Skip to content

Commit 5f3c1bd

Browse files
Provide LISTAGG implementation (#174)
This patch provides an initial implemenation of LISTAGG[1]. Notably this implemenation deviates from ANSI SQL by allowing both WITHIN GROUP and the delimiter to be optional. We do so because Redshift SQL works this way and this approach is ultimately more flexible. Fixes #169. [1] https://modern-sql.com/feature/listagg
1 parent 418b963 commit 5f3c1bd

File tree

5 files changed

+207
-17
lines changed

5 files changed

+207
-17
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Check https://github.com/andygrove/sqlparser-rs/commits/master for undocumented
2020
- Support `ON { UPDATE | DELETE } { RESTRICT | CASCADE | SET NULL | NO ACTION | SET DEFAULT }` in `FOREIGN KEY` constraints (#170) - thanks @c7hm4r!
2121
- Support basic forms of `CREATE SCHEMA` and `DROP SCHEMA` (#173) - thanks @alex-dukhno!
2222
- Support `NULLS FIRST`/`LAST` in `ORDER BY` expressions (#176) - thanks @houqp!
23+
- Support `LISTAGG()` (#174) - thanks @maxcountryman!
2324

2425
### Fixed
2526
- Report an error for unterminated string literals (#165)

src/ast/mod.rs

+74
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ pub enum Expr {
224224
/// A parenthesized subquery `(SELECT ...)`, used in expression like
225225
/// `SELECT (subquery) AS x` or `WHERE (subquery) = x`
226226
Subquery(Box<Query>),
227+
/// The `LISTAGG` function `SELECT LISTAGG(...) WITHIN GROUP (ORDER BY ...)`
228+
ListAgg(ListAgg),
227229
}
228230

229231
impl fmt::Display for Expr {
@@ -299,6 +301,7 @@ impl fmt::Display for Expr {
299301
}
300302
Expr::Exists(s) => write!(f, "EXISTS ({})", s),
301303
Expr::Subquery(s) => write!(f, "({})", s),
304+
Expr::ListAgg(listagg) => write!(f, "{}", listagg),
302305
}
303306
}
304307
}
@@ -850,6 +853,77 @@ impl FromStr for FileFormat {
850853
}
851854
}
852855

856+
/// A `LISTAGG` invocation `LISTAGG( [ DISTINCT ] <expr>[, <separator> ] [ON OVERFLOW <on_overflow>] ) )
857+
/// [ WITHIN GROUP (ORDER BY <within_group1>[, ...] ) ]`
858+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
859+
pub struct ListAgg {
860+
pub distinct: bool,
861+
pub expr: Box<Expr>,
862+
pub separator: Option<Box<Expr>>,
863+
pub on_overflow: Option<ListAggOnOverflow>,
864+
pub within_group: Vec<OrderByExpr>,
865+
}
866+
867+
impl fmt::Display for ListAgg {
868+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
869+
write!(
870+
f,
871+
"LISTAGG({}{}",
872+
if self.distinct { "DISTINCT " } else { "" },
873+
self.expr
874+
)?;
875+
if let Some(separator) = &self.separator {
876+
write!(f, ", {}", separator)?;
877+
}
878+
if let Some(on_overflow) = &self.on_overflow {
879+
write!(f, "{}", on_overflow)?;
880+
}
881+
write!(f, ")")?;
882+
if !self.within_group.is_empty() {
883+
write!(
884+
f,
885+
" WITHIN GROUP (ORDER BY {})",
886+
display_comma_separated(&self.within_group)
887+
)?;
888+
}
889+
Ok(())
890+
}
891+
}
892+
893+
/// The `ON OVERFLOW` clause of a LISTAGG invocation
894+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
895+
pub enum ListAggOnOverflow {
896+
/// `ON OVERFLOW ERROR`
897+
Error,
898+
899+
/// `ON OVERFLOW TRUNCATE [ <filler> ] WITH[OUT] COUNT`
900+
Truncate {
901+
filler: Option<Box<Expr>>,
902+
with_count: bool,
903+
},
904+
}
905+
906+
impl fmt::Display for ListAggOnOverflow {
907+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
908+
write!(f, " ON OVERFLOW")?;
909+
match self {
910+
ListAggOnOverflow::Error => write!(f, " ERROR"),
911+
ListAggOnOverflow::Truncate { filler, with_count } => {
912+
write!(f, " TRUNCATE")?;
913+
if let Some(filler) = filler {
914+
write!(f, " {}", filler)?;
915+
}
916+
if *with_count {
917+
write!(f, " WITH")?;
918+
} else {
919+
write!(f, " WITHOUT")?;
920+
}
921+
write!(f, " COUNT")
922+
}
923+
}
924+
}
925+
}
926+
853927
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
854928
pub enum ObjectType {
855929
Table,

src/dialect/keywords.rs

+3
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ define_keywords!(
161161
END_FRAME,
162162
END_PARTITION,
163163
EQUALS,
164+
ERROR,
164165
ESCAPE,
165166
EVERY,
166167
EXCEPT,
@@ -230,6 +231,7 @@ define_keywords!(
230231
LIKE,
231232
LIKE_REGEX,
232233
LIMIT,
234+
LISTAGG,
233235
LN,
234236
LOCAL,
235237
LOCALTIME,
@@ -279,6 +281,7 @@ define_keywords!(
279281
OUT,
280282
OUTER,
281283
OVER,
284+
OVERFLOW,
282285
OVERLAPS,
283286
OVERLAY,
284287
PARAMETER,

src/parser.rs

+75-13
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ impl Parser {
191191
"EXISTS" => self.parse_exists_expr(),
192192
"EXTRACT" => self.parse_extract_expr(),
193193
"INTERVAL" => self.parse_literal_interval(),
194+
"LISTAGG" => self.parse_listagg_expr(),
194195
"NOT" => Ok(Expr::UnaryOp {
195196
op: UnaryOperator::Not,
196197
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
@@ -272,14 +273,7 @@ impl Parser {
272273

273274
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
274275
self.expect_token(&Token::LParen)?;
275-
let all = self.parse_keyword("ALL");
276-
let distinct = self.parse_keyword("DISTINCT");
277-
if all && distinct {
278-
return parser_err!(format!(
279-
"Cannot specify both ALL and DISTINCT in function: {}",
280-
name.to_string(),
281-
));
282-
}
276+
let distinct = self.parse_all_or_distinct()?;
283277
let args = self.parse_optional_args()?;
284278
let over = if self.parse_keyword("OVER") {
285279
// TBD: support window names (`OVER mywin`) in place of inline specification
@@ -423,6 +417,66 @@ impl Parser {
423417
})
424418
}
425419

420+
/// Parse a SQL LISTAGG expression, e.g. `LISTAGG(...) WITHIN GROUP (ORDER BY ...)`.
421+
pub fn parse_listagg_expr(&mut self) -> Result<Expr, ParserError> {
422+
self.expect_token(&Token::LParen)?;
423+
let distinct = self.parse_all_or_distinct()?;
424+
let expr = Box::new(self.parse_expr()?);
425+
// While ANSI SQL would would require the separator, Redshift makes this optional. Here we
426+
// choose to make the separator optional as this provides the more general implementation.
427+
let separator = if self.consume_token(&Token::Comma) {
428+
Some(Box::new(self.parse_expr()?))
429+
} else {
430+
None
431+
};
432+
let on_overflow = if self.parse_keywords(vec!["ON", "OVERFLOW"]) {
433+
if self.parse_keyword("ERROR") {
434+
Some(ListAggOnOverflow::Error)
435+
} else {
436+
self.expect_keyword("TRUNCATE")?;
437+
let filler = match self.peek_token() {
438+
Some(Token::Word(kw)) if kw.keyword == "WITH" || kw.keyword == "WITHOUT" => {
439+
None
440+
}
441+
Some(Token::SingleQuotedString(_))
442+
| Some(Token::NationalStringLiteral(_))
443+
| Some(Token::HexStringLiteral(_)) => Some(Box::new(self.parse_expr()?)),
444+
_ => self.expected(
445+
"either filler, WITH, or WITHOUT in LISTAGG",
446+
self.peek_token(),
447+
)?,
448+
};
449+
let with_count = self.parse_keyword("WITH");
450+
if !with_count && !self.parse_keyword("WITHOUT") {
451+
self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?;
452+
}
453+
self.expect_keyword("COUNT")?;
454+
Some(ListAggOnOverflow::Truncate { filler, with_count })
455+
}
456+
} else {
457+
None
458+
};
459+
self.expect_token(&Token::RParen)?;
460+
// Once again ANSI SQL requires WITHIN GROUP, but Redshift does not. Again we choose the
461+
// more general implementation.
462+
let within_group = if self.parse_keywords(vec!["WITHIN", "GROUP"]) {
463+
self.expect_token(&Token::LParen)?;
464+
self.expect_keywords(&["ORDER", "BY"])?;
465+
let order_by_expr = self.parse_comma_separated(Parser::parse_order_by_expr)?;
466+
self.expect_token(&Token::RParen)?;
467+
order_by_expr
468+
} else {
469+
vec![]
470+
};
471+
Ok(Expr::ListAgg(ListAgg {
472+
distinct,
473+
expr,
474+
separator,
475+
on_overflow,
476+
within_group,
477+
}))
478+
}
479+
426480
// This function parses date/time fields for both the EXTRACT function-like
427481
// operator and interval qualifiers. EXTRACT supports a wider set of
428482
// date/time fields than interval qualifiers, so this function may need to
@@ -851,6 +905,18 @@ impl Parser {
851905
Ok(values)
852906
}
853907

908+
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
909+
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
910+
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
911+
let all = self.parse_keyword("ALL");
912+
let distinct = self.parse_keyword("DISTINCT");
913+
if all && distinct {
914+
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
915+
} else {
916+
Ok(distinct)
917+
}
918+
}
919+
854920
/// Parse a SQL CREATE statement
855921
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
856922
if self.parse_keyword("TABLE") {
@@ -1635,11 +1701,7 @@ impl Parser {
16351701
/// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`),
16361702
/// assuming the initial `SELECT` was already consumed
16371703
pub fn parse_select(&mut self) -> Result<Select, ParserError> {
1638-
let all = self.parse_keyword("ALL");
1639-
let distinct = self.parse_keyword("DISTINCT");
1640-
if all && distinct {
1641-
return parser_err!("Cannot specify both ALL and DISTINCT in SELECT");
1642-
}
1704+
let distinct = self.parse_all_or_distinct()?;
16431705

16441706
let top = if self.parse_keyword("TOP") {
16451707
Some(self.parse_top()?)

tests/sqlparser_common.rs

+54-4
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ fn parse_select_all() {
244244
fn parse_select_all_distinct() {
245245
let result = parse_sql_statements("SELECT ALL DISTINCT name FROM customer");
246246
assert_eq!(
247-
ParserError::ParserError("Cannot specify both ALL and DISTINCT in SELECT".to_string()),
247+
ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()),
248248
result.unwrap_err(),
249249
);
250250
}
@@ -357,9 +357,7 @@ fn parse_select_count_distinct() {
357357
let sql = "SELECT COUNT(ALL DISTINCT + x) FROM customer";
358358
let res = parse_sql_statements(sql);
359359
assert_eq!(
360-
ParserError::ParserError(
361-
"Cannot specify both ALL and DISTINCT in function: COUNT".to_string()
362-
),
360+
ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()),
363361
res.unwrap_err()
364362
);
365363
}
@@ -914,6 +912,58 @@ fn parse_extract() {
914912
);
915913
}
916914

915+
#[test]
916+
fn parse_listagg() {
917+
let sql = "SELECT LISTAGG(DISTINCT dateid, ', ' ON OVERFLOW TRUNCATE '%' WITHOUT COUNT) \
918+
WITHIN GROUP (ORDER BY id, username)";
919+
let select = verified_only_select(sql);
920+
921+
verified_stmt("SELECT LISTAGG(sellerid) WITHIN GROUP (ORDER BY dateid)");
922+
verified_stmt("SELECT LISTAGG(dateid)");
923+
verified_stmt("SELECT LISTAGG(DISTINCT dateid)");
924+
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW ERROR)");
925+
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE N'...' WITH COUNT)");
926+
verified_stmt("SELECT LISTAGG(dateid ON OVERFLOW TRUNCATE X'deadbeef' WITH COUNT)");
927+
928+
let expr = Box::new(Expr::Identifier(Ident::new("dateid")));
929+
let on_overflow = Some(ListAggOnOverflow::Truncate {
930+
filler: Some(Box::new(Expr::Value(Value::SingleQuotedString(
931+
"%".to_string(),
932+
)))),
933+
with_count: false,
934+
});
935+
let within_group = vec![
936+
OrderByExpr {
937+
expr: Expr::Identifier(Ident {
938+
value: "id".to_string(),
939+
quote_style: None,
940+
}),
941+
asc: None,
942+
nulls_first: None,
943+
},
944+
OrderByExpr {
945+
expr: Expr::Identifier(Ident {
946+
value: "username".to_string(),
947+
quote_style: None,
948+
}),
949+
asc: None,
950+
nulls_first: None,
951+
},
952+
];
953+
assert_eq!(
954+
&Expr::ListAgg(ListAgg {
955+
distinct: true,
956+
expr,
957+
separator: Some(Box::new(Expr::Value(Value::SingleQuotedString(
958+
", ".to_string()
959+
)))),
960+
on_overflow,
961+
within_group
962+
}),
963+
expr_from_projection(only(&select.projection))
964+
);
965+
}
966+
917967
#[test]
918968
fn parse_create_table() {
919969
let sql = "CREATE TABLE uk_cities (\

0 commit comments

Comments
 (0)