Skip to content

Commit 5c651f3

Browse files
authored
Merge pull request #256 from ClibMouse/Kusto-p3-disable-extra--word-for-sort
Kusto-phase3 : fix sort issues
2 parents fc581bc + fa3d497 commit 5c651f3

File tree

8 files changed

+412
-60
lines changed

8 files changed

+412
-60
lines changed

src/Parsers/Kusto/ParserKQLSort.cpp

Lines changed: 84 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,112 @@
1+
#include <format>
12
#include <Parsers/ASTLiteral.h>
2-
#include <Parsers/IParserBase.h>
3-
#include <Parsers/ExpressionListParsers.h>
43
#include <Parsers/ASTOrderByElement.h>
4+
#include <Parsers/ExpressionListParsers.h>
5+
#include <Parsers/IParserBase.h>
56
#include <Parsers/Kusto/ParserKQLQuery.h>
67
#include <Parsers/Kusto/ParserKQLSort.h>
78

89
namespace DB
910
{
1011

12+
namespace ErrorCodes
13+
{
14+
extern const int SYNTAX_ERROR;
15+
}
16+
1117
bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
1218
{
13-
bool has_dir = false;
14-
std::vector <bool> has_directions;
19+
String order_list_str;
1520
ParserOrderByExpressionList order_list;
1621
ASTPtr order_expression_list;
1722

18-
auto expr = getExprFromToken(pos);
19-
20-
Tokens tokens(expr.c_str(), expr.c_str() + expr.size());
21-
IParser::Pos new_pos(tokens, pos.max_depth);
23+
auto validate_column = [&](Pos & pos1, Pos & pos2)
24+
{
25+
if (pos2->type == TokenType::BareWord && pos1 != pos2)
26+
throw Exception(
27+
ErrorCodes::SYNTAX_ERROR,
28+
"{} does not refer to any known column, table, variable or function",
29+
String(pos2->begin, pos2->end));
2230

23-
auto pos_backup = new_pos;
24-
if (!order_list.parse(pos_backup, order_expression_list, expected))
25-
return false;
31+
return String(pos1->begin, pos2->end);
32+
};
2633

27-
while (!new_pos->isEnd() && new_pos->type != TokenType::PipeMark && new_pos->type != TokenType::Semicolon)
34+
auto format_sort_expr = [&](const Pos & pos1, const Pos & pos2)
2835
{
29-
String tmp(new_pos->begin, new_pos->end);
30-
if (tmp == "desc" or tmp == "asc")
31-
has_dir = true;
32-
33-
if (new_pos->type == TokenType::Comma)
36+
auto start_pos = pos1;
37+
auto end_pos = pos2;
38+
String column_expr, sort_dir, nulls_position;
39+
auto tmp_pos = start_pos;
40+
while (tmp_pos < end_pos)
3441
{
35-
has_directions.push_back(has_dir);
36-
has_dir = false;
42+
String tmp(tmp_pos->begin, tmp_pos->end);
43+
if (tmp == "desc" || tmp == "asc")
44+
{
45+
if (!sort_dir.empty() || !nulls_position.empty())
46+
throw Exception(ErrorCodes::SYNTAX_ERROR, "The incomplete fragment is unexpected");
47+
--tmp_pos;
48+
column_expr = validate_column(start_pos, tmp_pos);
49+
sort_dir = tmp;
50+
++tmp_pos;
51+
}
52+
if (tmp == "nulls")
53+
{
54+
if (!nulls_position.empty())
55+
throw Exception(ErrorCodes::SYNTAX_ERROR, "The incomplete fragment is unexpected");
56+
auto nulls_pos = tmp_pos;
57+
++tmp_pos;
58+
tmp = String(tmp_pos->begin, tmp_pos->end);
59+
if (tmp_pos->isEnd() || (tmp != "first" && tmp != "last"))
60+
throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid nulls position of sort operator");
61+
62+
nulls_position = "nulls " + tmp;
63+
if (column_expr.empty())
64+
{
65+
--nulls_pos;
66+
column_expr = validate_column(start_pos, nulls_pos);
67+
}
68+
}
69+
70+
++tmp_pos;
3771
}
72+
--end_pos;
73+
if (column_expr.empty())
74+
column_expr = validate_column(start_pos, end_pos);
3875

39-
++new_pos;
40-
}
41-
has_directions.push_back(has_dir);
76+
if (sort_dir.empty())
77+
sort_dir = "desc";
78+
if (nulls_position.empty())
79+
nulls_position = sort_dir == "desc" ? "nulls last" : "nulls first";
80+
return std::format("{} {} {}", getExprFromToken(column_expr, pos.max_depth), sort_dir, nulls_position);
81+
};
4282

43-
for (uint64_t i = 0; i < order_expression_list->children.size(); ++i)
83+
auto paren_count = 0;
84+
auto begin = pos;
85+
while (!pos->isEnd() && pos->type != TokenType::PipeMark && pos->type != TokenType::Semicolon)
4486
{
45-
if (!has_directions[i])
87+
if (pos->type == TokenType::ClosingRoundBracket)
88+
--paren_count;
89+
if (pos->type == TokenType::OpeningRoundBracket)
90+
++paren_count;
91+
if (pos->type == TokenType::Comma && paren_count == 0)
4692
{
47-
auto *order_expr = order_expression_list->children[i]->as<ASTOrderByElement>();
48-
order_expr->direction = -1; // default desc
49-
if (!order_expr->nulls_direction_was_explicitly_specified)
50-
order_expr->nulls_direction = -1;
51-
else
52-
order_expr->nulls_direction = order_expr->nulls_direction == 1 ? -1 : 1;
93+
auto single_sort_expr = format_sort_expr(begin, pos);
94+
order_list_str = order_list_str.empty() ? single_sort_expr : order_list_str + "," + single_sort_expr;
95+
begin = pos;
96+
++begin;
5397
}
98+
++pos;
5499
}
55100

101+
auto single_sort_expr = format_sort_expr(begin, pos);
102+
order_list_str = order_list_str.empty() ? single_sort_expr : order_list_str + "," + single_sort_expr;
103+
104+
Tokens tokens(order_list_str.c_str(), order_list_str.c_str() + order_list_str.size());
105+
IParser::Pos new_pos(tokens, pos.max_depth);
106+
107+
if (!order_list.parse(new_pos, order_expression_list, expected))
108+
return false;
109+
56110
node->as<ASTSelectQuery>()->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(order_expression_list));
57111
return true;
58112
}

src/Parsers/tests/KQL/gtest_KQL_Distinct.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Distinct, ParserTest,
2424
},
2525
{
2626
"Customers |where Age <30 | order by Age| distinct Occupation, Education",
27-
"SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n ORDER BY Age DESC\n)"
27+
"SELECT DISTINCT\n Occupation,\n Education\nFROM\n(\n SELECT *\n FROM Customers\n WHERE Age < 30\n ORDER BY Age DESC NULLS LAST\n)"
2828
},
2929
{
3030
"Customers | project a = (Age % 10) | distinct a;",
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#include <Parsers/tests/gtest_common.h>
2+
3+
#include <Parsers/Kusto/ParserKQLQuery.h>
4+
5+
INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_Sort, ParserTest,
6+
::testing::Combine(
7+
::testing::Values(std::make_shared<DB::ParserKQLQuery>()),
8+
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
9+
{
10+
"Customers | order by FirstName",
11+
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
12+
},
13+
{
14+
"Customers | order by FirstName asc",
15+
"SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST"
16+
},
17+
{
18+
"Customers | order by FirstName desc",
19+
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
20+
},
21+
{
22+
"Customers | order by FirstName asc nulls first",
23+
"SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST"
24+
},
25+
{
26+
"Customers | order by FirstName asc nulls last",
27+
"SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS LAST"
28+
},
29+
{
30+
"Customers | order by FirstName desc nulls first",
31+
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST"
32+
},
33+
{
34+
"Customers | order by FirstName desc nulls last",
35+
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
36+
},
37+
{
38+
"Customers | order by FirstName nulls first",
39+
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS FIRST"
40+
},
41+
{
42+
"Customers | order by FirstName nulls last",
43+
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
44+
},
45+
{
46+
"Customers | order by FirstName, Age",
47+
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST"
48+
},
49+
{
50+
"Customers | order by FirstName asc, Age desc",
51+
"SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS FIRST,\n Age DESC NULLS LAST"
52+
},
53+
{
54+
"Customers | order by FirstName desc, Age asc",
55+
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age ASC NULLS FIRST"
56+
},
57+
{
58+
"Customers | order by FirstName asc nulls first, Age asc nulls first",
59+
"SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS FIRST,\n Age ASC NULLS FIRST"
60+
},
61+
{
62+
"Customers | order by FirstName asc nulls last, Age asc nulls last",
63+
"SELECT *\nFROM Customers\nORDER BY\n FirstName ASC NULLS LAST,\n Age ASC NULLS LAST"
64+
},
65+
{
66+
"Customers | order by FirstName desc nulls first, Age desc nulls first",
67+
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS FIRST,\n Age DESC NULLS FIRST"
68+
},
69+
{
70+
"Customers | order by FirstName desc nulls last, Age desc nulls last",
71+
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST"
72+
},
73+
{
74+
"Customers | order by FirstName nulls first, Age nulls first",
75+
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS FIRST,\n Age DESC NULLS FIRST"
76+
},
77+
{
78+
"Customers | order by FirstName nulls last, Age nulls last",
79+
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age DESC NULLS LAST"
80+
},
81+
{
82+
"Customers | order by FirstName, Age asc nulls last, LastName nulls first",
83+
"SELECT *\nFROM Customers\nORDER BY\n FirstName DESC NULLS LAST,\n Age ASC NULLS LAST,\n LastName DESC NULLS FIRST"
84+
},
85+
{
86+
"Customers | order by FirstName ASC",
87+
"throws"
88+
},
89+
{
90+
"Customers | order by FirstName DESC",
91+
"throws"
92+
},
93+
{
94+
"Customers | order by FirstName nulls",
95+
"throws"
96+
},
97+
{
98+
"Customers | order by FirstName nulls middle",
99+
"throws"
100+
},
101+
{
102+
"Customers | order by FirstName asc desc",
103+
"throws"
104+
},
105+
{
106+
"Customers | order by FirstName nulls first desc",
107+
"throws"
108+
}
109+
})));

src/Parsers/tests/KQL/gtest_KQL_StringFunctions.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_String, ParserTest,
180180
},
181181
{
182182
"Customers | project name_abbr = strcat(substring(FirstName,0,3), ' ', substring(LastName,2))| order by LastName",
183-
"SELECT concat(ifNull(kql_tostring(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(if(toInt64(length(LastName)) <= 0, '', substr(LastName, (((2 % toInt64(length(LastName))) + toInt64(length(LastName))) % toInt64(length(LastName))) + 1))), ''), '') AS name_abbr\nFROM Customers\nORDER BY LastName DESC"
183+
"SELECT concat(ifNull(kql_tostring(if(toInt64(length(FirstName)) <= 0, '', substr(FirstName, (((0 % toInt64(length(FirstName))) + toInt64(length(FirstName))) % toInt64(length(FirstName))) + 1, 3))), ''), ifNull(kql_tostring(' '), ''), ifNull(kql_tostring(if(toInt64(length(LastName)) <= 0, '', substr(LastName, (((2 % toInt64(length(LastName))) + toInt64(length(LastName))) % toInt64(length(LastName))) + 1))), ''), '') AS name_abbr\nFROM Customers\nORDER BY LastName DESC NULLS LAST"
184184
},
185185
{
186186
"print idx1 = indexof('abcdefg','cde')",

src/Parsers/tests/KQL/gtest_KQL_TopHitter.cpp

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopHitters, ParserTest,
88
::testing::ValuesIn(std::initializer_list<ParserTestCase>{
99
{
1010
"Customers | top 5 by Age",
11-
"SELECT *\nFROM Customers\nORDER BY Age DESC\nLIMIT 5"
11+
"SELECT *\nFROM Customers\nORDER BY Age DESC NULLS LAST\nLIMIT 5"
1212
},
1313
{
1414
"Customers | top 5 by Age desc",
15-
"SELECT *\nFROM Customers\nORDER BY Age DESC\nLIMIT 5"
15+
"SELECT *\nFROM Customers\nORDER BY Age DESC NULLS LAST\nLIMIT 5"
1616
},
1717
{
1818
"Customers | top 5 by Age asc",
19-
"SELECT *\nFROM Customers\nORDER BY Age ASC\nLIMIT 5"
19+
"SELECT *\nFROM Customers\nORDER BY Age ASC NULLS FIRST\nLIMIT 5"
2020
},
2121
{
2222
"Customers | top 5 by FirstName desc nulls first",
@@ -28,30 +28,30 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery_TopHitters, ParserTest,
2828
},
2929
{
3030
"Customers | top 5 by Age | top 2 by FirstName",
31-
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n ORDER BY Age DESC\n LIMIT 5\n)\nORDER BY FirstName DESC\nLIMIT 2"
31+
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n ORDER BY Age DESC NULLS LAST\n LIMIT 5\n)\nORDER BY FirstName DESC NULLS LAST\nLIMIT 2"
3232
},
3333
{
34-
"Customers| top-hitters a = 3 of Age by extra",
35-
"SELECT *\nFROM\n(\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_sum_extra DESC\nLIMIT 3 AS a"
34+
"Customers| top-hitters a = 3 of Age by extra",
35+
"SELECT *\nFROM\n(\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_sum_extra DESC NULLS LAST\nLIMIT 3 AS a"
3636
},
3737
{
38-
"Customers| top-hitters 3 of Age",
39-
"SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC\nLIMIT 3"
38+
"Customers| top-hitters 3 of Age",
39+
"SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC NULLS LAST\nLIMIT 3"
4040
},
4141
{
42-
"Customers| top-hitters 3 of Age by extra | top-hitters 2 of Age",
43-
"SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM\n (\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n )\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC\nLIMIT 2"
42+
"Customers| top-hitters 3 of Age by extra | top-hitters 2 of Age",
43+
"SELECT *\nFROM\n(\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM\n (\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n )\n GROUP BY Age\n)\nORDER BY approximate_count_Age DESC NULLS LAST\nLIMIT 2"
4444
},
4545
{
46-
"Customers| top-hitters 3 of Age by extra | where Age > 30",
47-
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n)\nWHERE Age > 30"
46+
"Customers| top-hitters 3 of Age by extra | where Age > 30",
47+
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n)\nWHERE Age > 30"
4848
},
4949
{
50-
"Customers| top-hitters 3 of Age by extra | where approximate_sum_extra < 200",
51-
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC\n LIMIT 3\n)\nWHERE approximate_sum_extra < 200"
50+
"Customers| top-hitters 3 of Age by extra | where approximate_sum_extra < 200",
51+
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n sum(extra) AS approximate_sum_extra\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_sum_extra DESC NULLS LAST\n LIMIT 3\n)\nWHERE approximate_sum_extra < 200"
5252
},
5353
{
54-
"Customers| top-hitters 3 of Age | where approximate_count_Age > 2",
55-
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_count_Age DESC\n LIMIT 3\n)\nWHERE approximate_count_Age > 2"
54+
"Customers| top-hitters 3 of Age | where approximate_count_Age > 2",
55+
"SELECT *\nFROM\n(\n SELECT *\n FROM\n (\n SELECT\n Age,\n count() AS approximate_count_Age\n FROM Customers\n GROUP BY Age\n )\n ORDER BY approximate_count_Age DESC NULLS LAST\n LIMIT 3\n)\nWHERE approximate_count_Age > 2"
5656
}
5757
})));

src/Parsers/tests/gtest_Parser.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -273,35 +273,31 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest,
273273
},
274274
{
275275
"Customers | sort by FirstName desc",
276-
"SELECT *\nFROM Customers\nORDER BY FirstName DESC"
276+
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
277277
},
278278
{
279279
"Customers | take 3 | order by FirstName desc",
280-
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC"
280+
"SELECT *\nFROM\n(\n SELECT *\n FROM Customers\n LIMIT 3\n)\nORDER BY FirstName DESC NULLS LAST"
281281
},
282282
{
283283
"Customers | sort by FirstName asc",
284-
"SELECT *\nFROM Customers\nORDER BY FirstName ASC"
284+
"SELECT *\nFROM Customers\nORDER BY FirstName ASC NULLS FIRST"
285285
},
286286
{
287287
"Customers | sort by FirstName",
288-
"SELECT *\nFROM Customers\nORDER BY FirstName DESC"
289-
},
290-
{
291-
"Customers | order by LastName",
292-
"SELECT *\nFROM Customers\nORDER BY LastName DESC"
288+
"SELECT *\nFROM Customers\nORDER BY FirstName DESC NULLS LAST"
293289
},
294290
{
295-
"Customers | order by Age desc , FirstName asc ",
296-
"SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC"
291+
"Customers | order by Age desc, FirstName asc",
292+
"SELECT *\nFROM Customers\nORDER BY\n Age DESC NULLS LAST,\n FirstName ASC NULLS FIRST"
297293
},
298294
{
299295
"Customers | order by Age asc , FirstName desc",
300-
"SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC"
296+
"SELECT *\nFROM Customers\nORDER BY\n Age ASC NULLS FIRST,\n FirstName DESC NULLS LAST"
301297
},
302298
{
303299
"Customers | sort by FirstName | order by Age ",
304-
"SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName DESC"
300+
"SELECT *\nFROM Customers\nORDER BY\n Age DESC NULLS LAST,\n FirstName DESC NULLS LAST"
305301
},
306302
{
307303
"Customers | sort by FirstName nulls first",

0 commit comments

Comments
 (0)