Skip to content

Commit 8f2019a

Browse files
committed
Add a script to extract the list of AST fields into a table
Part of the `cst` branch, see its README for details.
1 parent f172113 commit 8f2019a

File tree

3 files changed

+3397
-0
lines changed

3 files changed

+3397
-0
lines changed

util/ast-fields.tsv

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
struct_name enum_name variant_name field_name field_type
2+
ColumnDef name Ident
3+
ColumnDef data_type DataType
4+
ColumnDef collation Option<ObjectName>
5+
ColumnDef options Vec<ColumnOptionDef>
6+
ColumnOptionDef name Option<Ident>
7+
ColumnOptionDef option ColumnOption
8+
Ident value String
9+
Ident quote_style Option<char>
10+
ObjectName unnamed Vec<Ident>
11+
WhenClause condition Expr
12+
WhenClause result Expr
13+
WindowSpec partition_by Vec<Expr>
14+
WindowSpec order_by Vec<OrderByExpr>
15+
WindowSpec window_frame Option<WindowFrame>
16+
WindowFrame units WindowFrameUnits
17+
WindowFrame start_bound WindowFrameBound
18+
WindowFrame end_bound Option<WindowFrameBound>
19+
Assignment id Ident
20+
Assignment value Expr
21+
Function name ObjectName
22+
Function args Vec<Expr>
23+
Function over Option<WindowSpec>
24+
Function distinct bool
25+
ListAgg distinct bool
26+
ListAgg expr Box<Expr>
27+
ListAgg separator Option<Box<Expr>>
28+
ListAgg on_overflow Option<ListAggOnOverflow>
29+
ListAgg within_group Vec<OrderByExpr>
30+
SqlOption name Ident
31+
SqlOption value Value
32+
Query ctes Vec<Cte>
33+
Query body SetExpr
34+
Query order_by Vec<OrderByExpr>
35+
Query limit Option<Expr>
36+
Query offset Option<Offset>
37+
Query fetch Option<Fetch>
38+
Select distinct bool
39+
Select top Option<Top>
40+
Select projection Vec<SelectItem>
41+
Select from Vec<TableWithJoins>
42+
Select selection Option<Expr>
43+
Select group_by Vec<Expr>
44+
Select having Option<Expr>
45+
Cte alias TableAlias
46+
Cte query Query
47+
TableWithJoins relation TableFactor
48+
TableWithJoins joins Vec<Join>
49+
TableAlias name Ident
50+
TableAlias columns Vec<Ident>
51+
Join relation TableFactor
52+
Join join_operator JoinOperator
53+
OrderByExpr expr Expr
54+
OrderByExpr asc Option<bool>
55+
OrderByExpr nulls_first Option<bool>
56+
Offset value Expr
57+
Offset rows OffsetRows
58+
Fetch with_ties bool
59+
Fetch percent bool
60+
Fetch quantity Option<Expr>
61+
Top with_ties bool
62+
Top percent bool
63+
Top quantity Option<Expr>
64+
Values unnamed Vec<Vec<Expr>>
65+
DataType::Char DataType Char unnamed Option<u64>
66+
DataType::Varchar DataType Varchar unnamed Option<u64>
67+
DataType::Clob DataType Clob unnamed u64
68+
DataType::Binary DataType Binary unnamed u64
69+
DataType::Varbinary DataType Varbinary unnamed u64
70+
DataType::Blob DataType Blob unnamed u64
71+
DataType::Decimal DataType Decimal unnamed Option<u64>
72+
DataType::Decimal DataType Decimal unnamed Option<u64>
73+
DataType::Float DataType Float unnamed Option<u64>
74+
DataType::Custom DataType Custom unnamed ObjectName
75+
DataType::Array DataType Array unnamed Box<DataType>
76+
AlterTableOperation::AddConstraint AlterTableOperation AddConstraint unnamed TableConstraint
77+
AlterTableOperation::DropConstraint AlterTableOperation DropConstraint name Ident
78+
TableConstraint::Unique TableConstraint Unique name Option<Ident>
79+
TableConstraint::Unique TableConstraint Unique columns Vec<Ident>
80+
TableConstraint::Unique TableConstraint Unique is_primary bool
81+
TableConstraint::ForeignKey TableConstraint ForeignKey name Option<Ident>
82+
TableConstraint::ForeignKey TableConstraint ForeignKey columns Vec<Ident>
83+
TableConstraint::ForeignKey TableConstraint ForeignKey foreign_table ObjectName
84+
TableConstraint::ForeignKey TableConstraint ForeignKey referred_columns Vec<Ident>
85+
TableConstraint::Check TableConstraint Check name Option<Ident>
86+
TableConstraint::Check TableConstraint Check expr Box<Expr>
87+
ColumnOption::Default ColumnOption Default unnamed Expr
88+
ColumnOption::Unique ColumnOption Unique is_primary bool
89+
ColumnOption::ForeignKey ColumnOption ForeignKey foreign_table ObjectName
90+
ColumnOption::ForeignKey ColumnOption ForeignKey referred_columns Vec<Ident>
91+
ColumnOption::ForeignKey ColumnOption ForeignKey on_delete Option<ReferentialAction>
92+
ColumnOption::ForeignKey ColumnOption ForeignKey on_update Option<ReferentialAction>
93+
ColumnOption::Check ColumnOption Check unnamed Expr
94+
Expr::Identifier Expr Identifier unnamed Ident
95+
Expr::QualifiedWildcard Expr QualifiedWildcard unnamed Vec<Ident>
96+
Expr::CompoundIdentifier Expr CompoundIdentifier unnamed Vec<Ident>
97+
Expr::IsNull Expr IsNull unnamed Box<Expr>
98+
Expr::IsNotNull Expr IsNotNull unnamed Box<Expr>
99+
Expr::InList Expr InList expr Box<Expr>
100+
Expr::InList Expr InList list Vec<Expr>
101+
Expr::InList Expr InList negated bool
102+
Expr::InSubquery Expr InSubquery expr Box<Expr>
103+
Expr::InSubquery Expr InSubquery subquery Box<Query>
104+
Expr::InSubquery Expr InSubquery negated bool
105+
Expr::Between Expr Between expr Box<Expr>
106+
Expr::Between Expr Between negated bool
107+
Expr::Between Expr Between low Box<Expr>
108+
Expr::Between Expr Between high Box<Expr>
109+
Expr::BinaryOp Expr BinaryOp left Box<Expr>
110+
Expr::BinaryOp Expr BinaryOp op BinaryOperator
111+
Expr::BinaryOp Expr BinaryOp right Box<Expr>
112+
Expr::UnaryOp Expr UnaryOp op UnaryOperator
113+
Expr::UnaryOp Expr UnaryOp expr Box<Expr>
114+
Expr::Cast Expr Cast expr Box<Expr>
115+
Expr::Cast Expr Cast data_type DataType
116+
Expr::Extract Expr Extract field DateTimeField
117+
Expr::Extract Expr Extract expr Box<Expr>
118+
Expr::Collate Expr Collate expr Box<Expr>
119+
Expr::Collate Expr Collate collation ObjectName
120+
Expr::Nested Expr Nested unnamed Box<Expr>
121+
Expr::Value Expr Value unnamed Value
122+
Expr::Function Expr Function unnamed Function
123+
Expr::Case Expr Case operand Option<Box<Expr>>
124+
Expr::Case Expr Case when_clauses Vec<WhenClause>
125+
Expr::Case Expr Case else_result Option<Box<Expr>>
126+
Expr::Exists Expr Exists unnamed Box<Query>
127+
Expr::Subquery Expr Subquery unnamed Box<Query>
128+
Expr::ListAgg Expr ListAgg unnamed ListAgg
129+
WindowFrameBound::Preceding WindowFrameBound Preceding unnamed Option<u64>
130+
WindowFrameBound::Following WindowFrameBound Following unnamed Option<u64>
131+
Statement::Query Statement Query unnamed Box<Query>
132+
Statement::Insert Statement Insert table_name ObjectName
133+
Statement::Insert Statement Insert columns Vec<Ident>
134+
Statement::Insert Statement Insert source Box<Query>
135+
Statement::Copy Statement Copy table_name ObjectName
136+
Statement::Copy Statement Copy columns Vec<Ident>
137+
Statement::Copy Statement Copy values Vec<Option<String>>
138+
Statement::Update Statement Update table_name ObjectName
139+
Statement::Update Statement Update assignments Vec<Assignment>
140+
Statement::Update Statement Update selection Option<Expr>
141+
Statement::Delete Statement Delete table_name ObjectName
142+
Statement::Delete Statement Delete selection Option<Expr>
143+
Statement::CreateView Statement CreateView name ObjectName
144+
Statement::CreateView Statement CreateView columns Vec<Ident>
145+
Statement::CreateView Statement CreateView query Box<Query>
146+
Statement::CreateView Statement CreateView materialized bool
147+
Statement::CreateView Statement CreateView with_options Vec<SqlOption>
148+
Statement::CreateTable Statement CreateTable name ObjectName
149+
Statement::CreateTable Statement CreateTable columns Vec<ColumnDef>
150+
Statement::CreateTable Statement CreateTable constraints Vec<TableConstraint>
151+
Statement::CreateTable Statement CreateTable with_options Vec<SqlOption>
152+
Statement::CreateTable Statement CreateTable if_not_exists bool
153+
Statement::CreateTable Statement CreateTable external bool
154+
Statement::CreateTable Statement CreateTable file_format Option<FileFormat>
155+
Statement::CreateTable Statement CreateTable location Option<String>
156+
Statement::CreateIndex Statement CreateIndex name ObjectName
157+
Statement::CreateIndex Statement CreateIndex table_name ObjectName
158+
Statement::CreateIndex Statement CreateIndex columns Vec<Ident>
159+
Statement::CreateIndex Statement CreateIndex unique bool
160+
Statement::CreateIndex Statement CreateIndex if_not_exists bool
161+
Statement::AlterTable Statement AlterTable name ObjectName
162+
Statement::AlterTable Statement AlterTable operation AlterTableOperation
163+
Statement::Drop Statement Drop object_type ObjectType
164+
Statement::Drop Statement Drop if_exists bool
165+
Statement::Drop Statement Drop names Vec<ObjectName>
166+
Statement::Drop Statement Drop cascade bool
167+
Statement::SetVariable Statement SetVariable local bool
168+
Statement::SetVariable Statement SetVariable variable Ident
169+
Statement::SetVariable Statement SetVariable value SetVariableValue
170+
Statement::ShowVariable Statement ShowVariable variable Ident
171+
Statement::ShowColumns Statement ShowColumns extended bool
172+
Statement::ShowColumns Statement ShowColumns full bool
173+
Statement::ShowColumns Statement ShowColumns table_name ObjectName
174+
Statement::ShowColumns Statement ShowColumns filter Option<ShowStatementFilter>
175+
Statement::StartTransaction Statement StartTransaction modes Vec<TransactionMode>
176+
Statement::SetTransaction Statement SetTransaction modes Vec<TransactionMode>
177+
Statement::Commit Statement Commit chain bool
178+
Statement::Rollback Statement Rollback chain bool
179+
Statement::CreateSchema Statement CreateSchema schema_name ObjectName
180+
ListAggOnOverflow::Truncate ListAggOnOverflow Truncate filler Option<Box<Expr>>
181+
ListAggOnOverflow::Truncate ListAggOnOverflow Truncate with_count bool
182+
TransactionMode::AccessMode TransactionMode AccessMode unnamed TransactionAccessMode
183+
TransactionMode::IsolationLevel TransactionMode IsolationLevel unnamed TransactionIsolationLevel
184+
ShowStatementFilter::Like ShowStatementFilter Like unnamed String
185+
ShowStatementFilter::Where ShowStatementFilter Where unnamed Expr
186+
SetVariableValue::Ident SetVariableValue Ident unnamed Ident
187+
SetVariableValue::Literal SetVariableValue Literal unnamed Value
188+
SetExpr::Select SetExpr Select unnamed Box<Select>
189+
SetExpr::Query SetExpr Query unnamed Box<Query>
190+
SetExpr::SetOperation SetExpr SetOperation op SetOperator
191+
SetExpr::SetOperation SetExpr SetOperation all bool
192+
SetExpr::SetOperation SetExpr SetOperation left Box<SetExpr>
193+
SetExpr::SetOperation SetExpr SetOperation right Box<SetExpr>
194+
SetExpr::Values SetExpr Values unnamed Values
195+
SelectItem::UnnamedExpr SelectItem UnnamedExpr unnamed Expr
196+
SelectItem::ExprWithAlias SelectItem ExprWithAlias expr Expr
197+
SelectItem::ExprWithAlias SelectItem ExprWithAlias alias Ident
198+
SelectItem::QualifiedWildcard SelectItem QualifiedWildcard unnamed ObjectName
199+
TableFactor::Table TableFactor Table name ObjectName
200+
TableFactor::Table TableFactor Table alias Option<TableAlias>
201+
TableFactor::Table TableFactor Table args Vec<Expr>
202+
TableFactor::Table TableFactor Table with_hints Vec<Expr>
203+
TableFactor::Derived TableFactor Derived lateral bool
204+
TableFactor::Derived TableFactor Derived subquery Box<Query>
205+
TableFactor::Derived TableFactor Derived alias Option<TableAlias>
206+
TableFactor::NestedJoin TableFactor NestedJoin unnamed Box<TableWithJoins>
207+
JoinOperator::Inner JoinOperator Inner unnamed JoinConstraint
208+
JoinOperator::LeftOuter JoinOperator LeftOuter unnamed JoinConstraint
209+
JoinOperator::RightOuter JoinOperator RightOuter unnamed JoinConstraint
210+
JoinOperator::FullOuter JoinOperator FullOuter unnamed JoinConstraint
211+
JoinConstraint::On JoinConstraint On unnamed Expr
212+
JoinConstraint::Using JoinConstraint Using unnamed Vec<Ident>
213+
Value::Number Value Number unnamed String
214+
Value::Number Value Number unnamed BigDecimal
215+
Value::SingleQuotedString Value SingleQuotedString unnamed String
216+
Value::NationalStringLiteral Value NationalStringLiteral unnamed String
217+
Value::HexStringLiteral Value HexStringLiteral unnamed String
218+
Value::Boolean Value Boolean unnamed bool
219+
Value::Date Value Date unnamed String
220+
Value::Time Value Time unnamed String
221+
Value::Timestamp Value Timestamp unnamed String
222+
Value::Interval Value Interval value String
223+
Value::Interval Value Interval leading_field DateTimeField
224+
Value::Interval Value Interval leading_precision Option<u64>
225+
Value::Interval Value Interval last_field Option<DateTimeField>
226+
Value::Interval Value Interval fractional_seconds_precision Option<u64>

util/ast-stats.js

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Getting this out of the source code is currently a manual process:
2+
// 1) Run `egrep -hv "//!" src/ast/*.rs > mods`
3+
// this is required, because astexplorer's parser doesn't like the `//!` parent doc-comments
4+
// 2) Paste the results into https://astexplorer.net/ (select the Rust mode)
5+
// 3) Copy the AST data back to `const json =`
6+
// 4) Copy the source code to `srcText`, escaping backslashes (`\` -> `\\`) and backticks ` -> \`
7+
// 5) Run: node util/ast-stats.js > util/ast-fields.tsv
8+
9+
const {srcText, json} = require("./real-data.js");
10+
// const {srcText, json} = require("./test-data.js");
11+
12+
/** Walk the JSON object representing the AST depth-first, calling `visitor` on each "complex" node. */
13+
function walk(ast, path, visitor) {
14+
ast.path = path;
15+
16+
if (!visitor(ast)) return;
17+
const nodeName = (ast instanceof Array ? "Array" : ast["_type"]);
18+
// console.log(`@${path}: ${nodeName} ${ast.toString().substring(1,20)}`)
19+
for (var prop in ast) {
20+
if (ast[prop] instanceof Object) {
21+
walk(ast[prop], (path ? path + "/" : "") + nodeName, visitor);
22+
}
23+
}
24+
}
25+
26+
/** Return all AST objects matching the predicate in an array.
27+
Note: for nested objects only the top-most one is included in the results */
28+
function find(ast, filterFn) {
29+
let rv = [];
30+
walk(ast, "", node => {
31+
if (filterFn(node)) {
32+
rv.push(node);
33+
return false;
34+
}
35+
return true;
36+
});
37+
return rv;
38+
}
39+
40+
/** Return string in the given span of the text.
41+
* A span is an `{start: LC, end: LC}` object, where
42+
* LC is {line, column} sub-objects with 1-based positions, inclusive.
43+
*/
44+
function src(text, span) {
45+
const lineLengths = text.split("\n").map(line => line.length);
46+
function pos2idx(pos) {
47+
let idx = 0;
48+
for (let i = 0; i < pos.line-1; ++i) {
49+
idx += lineLengths[i] + 1;
50+
}
51+
idx += pos.column;
52+
//console.log(`pos2idx(${pos.line}, ${pos.column}) = ${idx}`)
53+
return idx;
54+
}
55+
return text.substring(pos2idx(span.start), pos2idx(span.end))
56+
}
57+
58+
// let spans = find(json, (node) => node._type == "Field").map(node => node.span);
59+
// console.log(src(srcText, spans[0]));
60+
61+
/** A predicate to filter declarations preceded by a set of #[derive]s used for AST structs/enums */
62+
function filterAST(structOrEnum) {
63+
const attrs = structOrEnum.attrs.map(attr => src(srcText, attr.span));
64+
const isAST = attrs.some(a => a == "#[derive(Debug, Clone, PartialEq, Eq, Hash)]");
65+
//console.log(structOrEnum.ident.to_string, isAST, isAST ? "" : attrs)
66+
return isAST;
67+
}
68+
69+
70+
const structs = find(json, n => n._type == "ItemStruct").filter(filterAST);
71+
const enums = find(json, n => n._type == "ItemEnum").filter(filterAST);
72+
73+
const enumVariants = enums.flatMap(enum_ => {
74+
const enum_name = enum_.ident.to_string;
75+
return find(enum_, n => n._type == "Variant").map(variant => ({
76+
struct_name: `${enum_name}::${variant.ident.to_string}`,
77+
enum_name: enum_name,
78+
variant_name: variant.ident.to_string,
79+
node: variant
80+
}));
81+
});
82+
83+
const structsAndEnumVariants = structs.map(struct => ({
84+
struct_name: struct.ident.to_string,
85+
enum_name: "", variant_name: "",
86+
node: struct
87+
})).concat(enumVariants);
88+
89+
let fields = structsAndEnumVariants.flatMap(item => {
90+
return find(item, (n) => n._type == "Field").map(field => ({
91+
struct_name: item.struct_name,
92+
enum_name: item.enum_name, variant_name: item.variant_name,
93+
field_name: field.ident ? field.ident.to_string : "unnamed", // e.g. (Bar, Baz)
94+
field_type: src(srcText, field.ty.span)
95+
}));
96+
});
97+
98+
const FIELDS = ["struct_name", "enum_name", "variant_name", "field_name", "field_type"];
99+
console.log(FIELDS.join("\t"));
100+
console.log(fields.map(row => FIELDS.map(col => row[col]).join("\t")).join("\n"));
101+
// console.log(fields);
102+
103+
//console.log(src(srcText, {start:{line:1135, column:12}, end: {line:1135, column:17}}));

0 commit comments

Comments
 (0)