Skip to content

Commit 947d7a0

Browse files
authored
feat: literal expressions for the textplan parser (#61)
* Adds support for literals and column names on the parser side * Adds the datetime submodule for parsing dates * Initial output to binary plans * Adds the protobuf-matchers submodule for testing
1 parent edc9896 commit 947d7a0

38 files changed

+2091
-136
lines changed

.github/workflows/build_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
run: ./scripts/setup-ubuntu.sh
3939
- run: mkdir build
4040
- name: Run cmake
41-
run: cmake -Bbuild -GNinja -DCMAKE_BUILD_TYPE=Debug
41+
run: cmake -Bbuild -GNinja -DCMAKE_BUILD_TYPE=Debug -DBUILD_TZ_LIB=ON
4242
- name: Build
4343
run: ninja -C build
4444
- name: Test

.gitmodules

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,9 @@
1313
[submodule "third_party/abseil-cpp"]
1414
path = third_party/abseil-cpp
1515
url = https://github.com/abseil/abseil-cpp.git
16+
[submodule "third_party/datetime"]
17+
path = third_party/datetime
18+
url = https://github.com/HowardHinnant/date.git
19+
[submodule "third_party/protobuf-matchers"]
20+
path = third_party/protobuf-matchers
21+
url = https://github.com/EpsilonPrime/protobuf-matchers.git

.licenserc.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@ header:
1414
- '.licenserc.yaml'
1515
- 'third_party/abseil-cpp'
1616
- 'third_party/antlr4'
17+
- 'third_party/datetime'
1718
- 'third_party/fmt'
1819
- 'third_party/googletest'
20+
- 'third_party/protobuf-matchers'
1921
- 'third_party/substrait'
2022
- 'third_party/yaml-cpp'
2123
- '**/*.md'

include/substrait/expression/DecimalLiteral.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ class DecimalLiteral {
1818
static DecimalLiteral fromProto(
1919
const ::substrait::proto::Expression_Literal_Decimal& proto);
2020

21+
static DecimalLiteral
22+
fromString(const std::string& str, int32_t precision, int32_t scale);
23+
2124
// Validates that the constructed decimal has an exactly 16 byte value with
2225
// a stated precision between 1 and 38.
2326
bool isValid();
@@ -29,6 +32,9 @@ class DecimalLiteral {
2932
// a string.
3033
std::string toString();
3134

35+
// Emits a proto version of the corresponding decimal.
36+
::substrait::proto::Expression_Literal_Decimal toProto();
37+
3238
[[nodiscard]] int32_t precision() const {
3339
return precision_;
3440
}

scripts/setup-ubuntu.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ sudo --preserve-env apt install -y \
2323
protobuf-compiler \
2424
clang-format \
2525
uuid-dev \
26-
default-jre
26+
default-jre \
27+
libcurl4-openssl-dev
2728

2829
pip install cmake-format

src/substrait/common/NumberUtils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ class NumberUtils {
2424
}
2525
};
2626

27-
} // namespace io::substrait::common
27+
} // namespace io::substrait::common

src/substrait/expression/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
add_library(substrait_expression DecimalLiteral.cpp)
44

5-
target_link_libraries(substrait_expression substrait_proto absl::numeric)
5+
target_link_libraries(substrait_expression substrait_proto absl::numeric
6+
absl::strings)
67

78
if(${SUBSTRAIT_CPP_BUILD_TESTING})
89
add_subdirectory(tests)

src/substrait/expression/DecimalLiteral.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <sstream>
66

77
#include "absl/numeric/int128.h"
8+
#include "absl/strings/numbers.h"
89
#include "substrait/proto/algebra.pb.h"
910

1011
namespace io::substrait::expression {
@@ -14,7 +15,7 @@ namespace {
1415
// negate flips the sign of a two-complements value.
1516
std::string negate(const std::string& value) {
1617
std::string newValue = value;
17-
// Flip all of the bits and add one.
18+
// Flip all the bits and add one.
1819
bool carryover = true;
1920
for (char& b : newValue) {
2021
uint8_t newB = ~(static_cast<uint8_t>(b));
@@ -27,13 +28,37 @@ std::string negate(const std::string& value) {
2728
return newValue;
2829
}
2930

31+
void uint128ToBytes(const absl::uint128& value, std::uint8_t* bytes) {
32+
// Copy the low 64 bits of the uint128 value into the first 8 bytes of the
33+
// output buffer.
34+
std::memcpy(bytes, &value, 8);
35+
36+
// Copy the high 64 bits of the uint128 value into the next 8 bytes of the
37+
// output buffer.
38+
std::memcpy(bytes + 8, reinterpret_cast<const std::uint8_t*>(&value) + 8, 8);
39+
}
40+
3041
} // namespace
3142

3243
DecimalLiteral DecimalLiteral::fromProto(
3344
const ::substrait::proto::Expression_Literal_Decimal& proto) {
3445
return {proto.value(), proto.precision(), proto.scale()};
3546
}
3647

48+
DecimalLiteral DecimalLiteral::fromString(
49+
const std::string& str,
50+
int32_t precision,
51+
int32_t scale) {
52+
absl::uint128 v;
53+
if (!absl::SimpleAtoi(str, &v)) {
54+
// TODO -- Store the parse errors so that they can be examined later.
55+
return {"", 0, 0};
56+
}
57+
std::uint8_t valueBytes[16];
58+
uint128ToBytes(v, valueBytes);
59+
return {std::string((const char*)valueBytes, 16), precision, scale};
60+
}
61+
3762
bool DecimalLiteral::isValid() {
3863
return value_.size() == 16 && precision_ >= 1 && precision_ <= 38;
3964
}
@@ -92,4 +117,12 @@ std::string DecimalLiteral::toString() {
92117
return decimalString.str();
93118
}
94119

120+
::substrait::proto::Expression_Literal_Decimal DecimalLiteral::toProto() {
121+
::substrait::proto::Expression_Literal_Decimal result;
122+
result.set_value(value_);
123+
result.set_precision(precision_);
124+
result.set_scale(scale_);
125+
return result;
126+
};
127+
95128
} // namespace io::substrait::expression

src/substrait/textplan/Location.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
#include "substrait/textplan/Location.h"
44

5+
#include <functional>
6+
57
namespace io::substrait::textplan {
68

79
constexpr Location Location::kUnknownLocation(

src/substrait/textplan/SymbolTable.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ enum class SymbolType {
3030
};
3131

3232
enum class RelationType {
33+
// Logical plans
3334
kUnknown = 0,
3435
kRead = 1,
3536
kProject = 2,
@@ -40,13 +41,17 @@ enum class RelationType {
4041
kSort = 7,
4142
kFilter = 8,
4243
kSet = 9,
43-
kExchange = 10,
44-
kDdl = 11,
45-
kWrite = 12,
46-
kHashJoin = 13,
47-
kMergeJoin = 14,
48-
kReference = 15,
4944

45+
// Physical plans
46+
kHashJoin = 31,
47+
kMergeJoin = 32,
48+
49+
// Write relations, currently unreachable in Plan protos.
50+
kExchange = 50,
51+
kDdl = 51,
52+
kWrite = 52,
53+
54+
// Extensions
5055
kExtensionLeaf = 100,
5156
kExtensionSingle = 101,
5257
kExtensionMulti = 102,

src/substrait/textplan/SymbolTablePrinter.cpp

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -154,9 +154,6 @@ std::string typeToText(const ::substrait::proto::Type& type) {
154154
std::string relationToText(
155155
const SymbolTable& symbolTable,
156156
const SymbolInfo& info) {
157-
if (info.blob.type() != typeid(std::shared_ptr<RelationData>)) {
158-
return "not-yet-implemented";
159-
}
160157
auto relationData = ANY_CAST(std::shared_ptr<RelationData>, info.blob);
161158
if (relationData->relation.rel_type_case() ==
162159
::substrait::proto::Rel::REL_TYPE_NOT_SET) {
@@ -193,9 +190,6 @@ std::string outputPipelinesSection(const SymbolTable& symbolTable) {
193190
info.type != SymbolType::kRelation) {
194191
continue;
195192
}
196-
if (info.blob.type() != typeid(std::shared_ptr<RelationData>)) {
197-
return "not-yet-implemented";
198-
}
199193
auto relationData = ANY_CAST(std::shared_ptr<RelationData>, info.blob);
200194
for (auto pipelineStart : relationData->newPipelines) {
201195
auto pipeline = pipelineToPath(symbolTable, pipelineStart);
@@ -270,7 +264,7 @@ std::string outputSchemaSection(const SymbolTable& symbolTable) {
270264
return text.str();
271265
}
272266

273-
std::string outputSourceSection(const SymbolTable& symbolTable) {
267+
std::string outputSourcesSection(const SymbolTable& symbolTable) {
274268
std::stringstream text;
275269
bool hasPreviousText = false;
276270
for (const SymbolInfo& info : symbolTable) {
@@ -419,7 +413,7 @@ std::string SymbolTablePrinter::outputToText(const SymbolTable& symbolTable) {
419413
hasPreviousText = true;
420414
}
421415

422-
newText = outputSourceSection(symbolTable);
416+
newText = outputSourcesSection(symbolTable);
423417
if (!newText.empty()) {
424418
if (hasPreviousText) {
425419
text << "\n";
@@ -438,4 +432,20 @@ std::string SymbolTablePrinter::outputToText(const SymbolTable& symbolTable) {
438432
return text.str();
439433
}
440434

435+
::substrait::proto::Plan SymbolTablePrinter::outputToBinaryPlan(
436+
const SymbolTable& symbolTable) {
437+
::substrait::proto::Plan plan;
438+
for (const SymbolInfo& info : symbolTable) {
439+
if (info.type != SymbolType::kRelation) {
440+
continue;
441+
}
442+
auto relationData = ANY_CAST(std::shared_ptr<RelationData>, info.blob);
443+
auto relation = plan.add_relations();
444+
// TODO -- Figure out when to use rel_root and when to use rel.
445+
*relation->mutable_rel() = relationData->relation;
446+
}
447+
448+
return plan;
449+
}
450+
441451
} // namespace io::substrait::textplan

src/substrait/textplan/SymbolTablePrinter.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,18 @@
66

77
#include "SymbolTable.h"
88

9+
namespace substrait::proto {
10+
class Plan;
11+
}
12+
913
namespace io::substrait::textplan {
1014

1115
class SymbolTablePrinter {
1216
public:
1317
static std::string outputToText(const SymbolTable& symbolTable);
18+
19+
static ::substrait::proto::Plan outputToBinaryPlan(
20+
const SymbolTable& symbolTable);
1421
};
1522

1623
} // namespace io::substrait::textplan

src/substrait/textplan/converter/BasePlanProtoVisitor.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,7 @@ std::any BasePlanProtoVisitor::visitExpression(
649649
case ::substrait::proto::Expression::RexTypeCase::REX_TYPE_NOT_SET:
650650
break;
651651
}
652+
// TODO -- Use an error listener instead.
652653
SUBSTRAIT_UNSUPPORTED(
653654
"Unsupported expression type encountered: " +
654655
std::to_string(expression.rex_type_case()));

src/substrait/textplan/converter/InitialPlanProtoVisitor.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ std::any InitialPlanProtoVisitor::visitExtensionUri(
119119
std::any InitialPlanProtoVisitor::visitPlanRelation(
120120
const ::substrait::proto::PlanRel& relation) {
121121
BasePlanProtoVisitor::visitPlanRelation(relation);
122-
std::string name =
122+
const std::string name =
123123
::substrait::proto::planRelTypeCaseName(relation.rel_type_case());
124124
auto uniqueName = symbolTable_->getUniqueName(name);
125125
auto relationData = std::make_shared<RelationData>();
@@ -134,7 +134,7 @@ std::any InitialPlanProtoVisitor::visitPlanRelation(
134134

135135
std::any InitialPlanProtoVisitor::visitRelation(
136136
const ::substrait::proto::Rel& relation) {
137-
std::string name =
137+
const std::string name =
138138
::substrait::proto::relTypeCaseName(relation.rel_type_case());
139139

140140
auto previousRelationScope = currentRelationScope_;
@@ -303,7 +303,7 @@ void InitialPlanProtoVisitor::updateLocalSchema(
303303
std::nullopt,
304304
std::nullopt);
305305
}
306-
relationData->fieldReferences.push_back(symbol);
306+
relationData->fieldReferences.emplace_back(symbol);
307307
}
308308
}
309309
break;

src/substrait/textplan/converter/InitialPlanProtoVisitor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class InitialPlanProtoVisitor : public BasePlanProtoVisitor {
8282
std::shared_ptr<SymbolTable> symbolTable_;
8383
std::shared_ptr<SubstraitErrorListener> errorListener_;
8484

85-
const ::substrait::proto::Rel* currentRelationScope_;
85+
const ::substrait::proto::Rel* currentRelationScope_{nullptr};
8686
std::map<const ::substrait::proto::Rel*, const SymbolInfo*>
8787
readRelationSources_;
8888
std::map<const ::substrait::proto::Rel*, const SymbolInfo*>

src/substrait/textplan/converter/PipelineVisitor.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ namespace io::substrait::textplan {
1010

1111
class PipelineVisitor : public BasePlanProtoVisitor {
1212
public:
13-
explicit PipelineVisitor(const SymbolTable& symbol_table) {
14-
symbolTable_ = std::make_shared<SymbolTable>(symbol_table);
13+
explicit PipelineVisitor(const SymbolTable& symbolTable) {
14+
symbolTable_ = std::make_shared<SymbolTable>(symbolTable);
1515
}
1616

1717
[[nodiscard]] std::shared_ptr<const SymbolTable> getSymbolTable() const {

src/substrait/textplan/converter/PlanPrinterVisitor.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,8 @@ std::string PlanPrinterVisitor::lookupFunctionReference(
8888
if (symbol->type != SymbolType::kFunction) {
8989
continue;
9090
}
91-
if (symbol->blob.type() !=
92-
typeid(const ::substrait::proto::extensions::
93-
SimpleExtensionDeclaration_ExtensionFunction*)) {
94-
// TODO -- Implement function references for text plans.
95-
continue;
96-
}
97-
auto function = ANY_CAST(
98-
const ::substrait::proto::extensions::
99-
SimpleExtensionDeclaration_ExtensionFunction*,
100-
symbol->blob);
101-
if (function->function_anchor() == function_reference) {
91+
auto function = ANY_CAST(std::shared_ptr<FunctionData>, symbol->blob);
92+
if (function->anchor == function_reference) {
10293
return symbol->name;
10394
}
10495
}
@@ -603,6 +594,7 @@ std::any PlanPrinterVisitor::visitExpression(
603594
const ::substrait::proto::Expression& expression) {
604595
if (expression.rex_type_case() ==
605596
::substrait::proto::Expression::RexTypeCase::REX_TYPE_NOT_SET) {
597+
// TODO -- Remove this check after expressions are finished.
606598
return std::string("EXPR-NOT-YET-IMPLEMENTED");
607599
}
608600
return BasePlanProtoVisitor::visitExpression(expression);

src/substrait/textplan/parser/CMakeLists.txt

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,26 @@ add_library(
88
SubstraitPlanVisitor.h
99
SubstraitPlanPipelineVisitor.cpp
1010
SubstraitPlanPipelineVisitor.h
11+
SubstraitPlanRelationVisitor.cpp
12+
SubstraitPlanRelationVisitor.h
1113
ParseText.cpp
1214
ParseText.h
1315
SubstraitParserErrorListener.cpp)
1416

15-
target_link_libraries(substrait_textplan_loader symbol_table error_listener
16-
substrait_proto textplan_grammar fmt::fmt-header-only)
17+
target_link_libraries(
18+
substrait_textplan_loader
19+
symbol_table
20+
error_listener
21+
substrait_proto
22+
substrait_type
23+
substrait_expression
24+
textplan_grammar
25+
fmt::fmt-header-only
26+
date::date
27+
date::date-tz)
1728

1829
add_executable(planparser Tool.cpp)
1930

20-
include_directories(${textplan_grammar_INCLUDE_DIRS})
2131
target_link_libraries(planparser substrait_textplan_loader error_listener)
2232

2333
if(${SUBSTRAIT_CPP_BUILD_TESTING})

0 commit comments

Comments
 (0)