Skip to content

Commit fbdeccf

Browse files
committed
Initial DSL implementation with grammar, semantic analysis, and codegen
1 parent c41cf41 commit fbdeccf

File tree

132 files changed

+8265
-607
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

132 files changed

+8265
-607
lines changed

Cargo.lock

Lines changed: 1912 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[workspace]
2-
members = ["optd-core"]
2+
members = [ "optd-core", "optd-dsl"]
33
resolver = "2"

docs/src/architecture/glossary.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ See the following sections for more information.
126126

127127
A logical expression is a version of a [Relational Expression].
128128

129-
TODO(connor) Add more details.
129+
TODO(connor): Add more details.
130130

131131
Examples of logical expressions include Logical Scan, Logical Join, or Logical Sort expressions
132132
(which can just be shorthanded to Scan, Join, or Sort).
@@ -135,7 +135,7 @@ Examples of logical expressions include Logical Scan, Logical Join, or Logical S
135135

136136
A physical expression is a version of a [Relational Expression].
137137

138-
TODO(connor) Add more details.
138+
TODO(connor): Add more details.
139139

140140
Examples of physical expressions include Table Scan, Index Scan, Hash Join, or Sort Merge Join.
141141

optd-core/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,14 @@ version = "0.1.0"
44
edition = "2021"
55

66
[dependencies]
7+
sqlx = { version = "0.8", features = [ "sqlite", "runtime-tokio", "migrate" ] }
78
trait-variant = "0.1.2"
89

910
# Pin more recent versions for `-Zminimal-versions`.
1011
proc-macro2 = "1.0.60" # For a missing feature (https://github.com/rust-lang/rust/issues/113152).
12+
anyhow = "1.0.95"
13+
tokio = { version = "1.43.0", features = ["full"] }
14+
serde = { version = "1.0", features = ["derive"] }
15+
serde_json = { version = "1", features = ["raw_value"] }
16+
dotenvy = "0.15"
17+
async-recursion = "1.1.1"

optd-core/src/cascades/expressions.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//! Types for logical and physical expressions in the optimizer.
2+
3+
use crate::operators::relational::physical::PhysicalOperator;
4+
use crate::operators::scalar::ScalarOperator;
5+
use crate::{operators::relational::logical::LogicalOperator, values::OptdValue};
6+
use serde::Deserialize;
7+
8+
use super::groups::{RelationalGroupId, ScalarGroupId};
9+
10+
/// A logical expression in the memo table.
11+
pub type LogicalExpression = LogicalOperator<OptdValue, RelationalGroupId, ScalarGroupId>;
12+
13+
/// A physical expression in the memo table.
14+
pub type PhysicalExpression = PhysicalOperator<OptdValue, RelationalGroupId, ScalarGroupId>;
15+
16+
/// A scalar expression in the memo table.
17+
pub type ScalarExpression = ScalarOperator<OptdValue, ScalarGroupId>;
18+
19+
/// A unique identifier for a logical expression in the memo table.
20+
#[repr(transparent)]
21+
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type, Deserialize)]
22+
#[sqlx(transparent)]
23+
pub struct LogicalExpressionId(pub i64);
24+
25+
/// A unique identifier for a physical expression in the memo table.
26+
#[repr(transparent)]
27+
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type, Deserialize)]
28+
#[sqlx(transparent)]
29+
pub struct PhysicalExpressionId(pub i64);
30+
31+
/// A unique identifier for a scalar expression in the memo table.
32+
#[repr(transparent)]
33+
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type, Deserialize)]
34+
#[sqlx(transparent)]
35+
pub struct ScalarExpressionId(pub i64);

optd-core/src/cascades/groups.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
use serde::Deserialize;
2+
3+
/// A unique identifier for a group of relational expressions in the memo table.
4+
#[repr(transparent)]
5+
#[derive(
6+
Debug,
7+
Clone,
8+
Copy,
9+
PartialEq,
10+
Eq,
11+
PartialOrd,
12+
Ord,
13+
Hash,
14+
sqlx::Type,
15+
serde::Serialize,
16+
Deserialize,
17+
)]
18+
#[sqlx(transparent)]
19+
pub struct RelationalGroupId(pub i64);
20+
21+
/// A unique identifier for a group of scalar expressions in the memo table.
22+
#[repr(transparent)]
23+
#[derive(
24+
Debug,
25+
Clone,
26+
Copy,
27+
PartialEq,
28+
Eq,
29+
PartialOrd,
30+
Ord,
31+
Hash,
32+
sqlx::Type,
33+
serde::Serialize,
34+
Deserialize,
35+
)]
36+
#[sqlx(transparent)]
37+
pub struct ScalarGroupId(pub i64);
38+
39+
/// The exploration status of a group or a logical expression in the memo table.
40+
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)]
41+
#[repr(i32)]
42+
pub enum ExplorationStatus {
43+
/// The group or the logical expression has not been explored.
44+
Unexplored,
45+
/// The group or the logical expression is currently being explored.
46+
Exploring,
47+
/// The group or the logical expression has been explored.
48+
Explored,
49+
}

optd-core/src/cascades/memo.rs

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
//! Memo table interface for query optimization.
2+
//!
3+
//! The memo table is a core data structure that stores expressions and their logical equivalences
4+
//! during query optimization. It serves two main purposes:
5+
//!
6+
//! - Avoiding redundant optimization by memoizing already explored expressions
7+
//! - Grouping logically equivalent expressions together to enable rule-based optimization
8+
//!
9+
10+
use std::sync::Arc;
11+
12+
use super::{
13+
expressions::{LogicalExpression, LogicalExpressionId, ScalarExpression, ScalarExpressionId},
14+
groups::{RelationalGroupId, ScalarGroupId},
15+
};
16+
use anyhow::Result;
17+
18+
#[trait_variant::make(Send)]
19+
pub trait Memoize: Send + Sync + 'static {
20+
/// Gets all logical expressions in a group.
21+
async fn get_all_logical_exprs_in_group(
22+
&self,
23+
group_id: RelationalGroupId,
24+
) -> Result<Vec<(LogicalExpressionId, Arc<LogicalExpression>)>>;
25+
26+
/// Adds a logical expression to an existing group.
27+
/// Returns the group id of new group if merge happened.
28+
async fn add_logical_expr_to_group(
29+
&self,
30+
logical_expr: &LogicalExpression,
31+
group_id: RelationalGroupId,
32+
) -> Result<RelationalGroupId>;
33+
34+
/// Adds a logical expression to the memo table.
35+
/// Returns the group id of group if already exists, otherwise creates a new group.
36+
async fn add_logical_expr(&self, logical_expr: &LogicalExpression)
37+
-> Result<RelationalGroupId>;
38+
39+
/// Gets all scalar expressions in a group.
40+
async fn get_all_scalar_exprs_in_group(
41+
&self,
42+
group_id: ScalarGroupId,
43+
) -> Result<Vec<(ScalarExpressionId, Arc<ScalarExpression>)>>;
44+
45+
/// Adds a scalar expression to an existing group.
46+
/// Returns the group id of new group if merge happened.
47+
async fn add_scalar_expr_to_group(
48+
&self,
49+
scalar_expr: &ScalarExpression,
50+
group_id: ScalarGroupId,
51+
) -> Result<ScalarGroupId>;
52+
53+
/// Adds a scalar expression to the memo table.
54+
/// Returns the group id of group if already exists, otherwise creates a new group.
55+
async fn add_scalar_expr(&self, scalar_expr: &ScalarExpression) -> Result<ScalarGroupId>;
56+
57+
/// Merges two relational groups and returns the new group id.
58+
async fn merge_relation_group(
59+
&self,
60+
from: RelationalGroupId,
61+
to: RelationalGroupId,
62+
) -> Result<RelationalGroupId>;
63+
64+
/// Merges two scalar groups and returns the new group id.
65+
async fn merge_scalar_group(
66+
&self,
67+
from: ScalarGroupId,
68+
to: ScalarGroupId,
69+
) -> Result<ScalarGroupId>;
70+
}

optd-core/src/cascades/mod.rs

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
use std::sync::Arc;
2+
3+
use async_recursion::async_recursion;
4+
use expressions::{LogicalExpression, ScalarExpression};
5+
use groups::{RelationalGroupId, ScalarGroupId};
6+
use memo::Memoize;
7+
8+
use crate::{
9+
operators::{
10+
relational::logical::{filter::Filter, join::Join, scan::Scan, LogicalOperator},
11+
scalar::{add::Add, equal::Equal, ScalarOperator},
12+
},
13+
plans::{logical::PartialLogicalPlan, scalar::PartialScalarPlan},
14+
};
15+
16+
pub mod expressions;
17+
pub mod groups;
18+
pub mod memo;
19+
20+
#[async_recursion]
21+
pub async fn ingest_partial_logical_plan(
22+
memo: &impl Memoize,
23+
partial_logical_plan: &PartialLogicalPlan,
24+
) -> anyhow::Result<RelationalGroupId> {
25+
match partial_logical_plan {
26+
PartialLogicalPlan::PartialMaterialized { operator } => {
27+
let mut children_relations = Vec::new();
28+
for child in operator.children_relations().iter() {
29+
children_relations.push(ingest_partial_logical_plan(memo, child).await?);
30+
}
31+
32+
let mut children_scalars = Vec::new();
33+
for child in operator.children_scalars().iter() {
34+
children_scalars.push(ingest_partial_scalar_plan(memo, child).await?);
35+
}
36+
37+
memo.add_logical_expr(&operator.into_expr(&children_relations, &children_scalars))
38+
.await
39+
}
40+
41+
PartialLogicalPlan::UnMaterialized(group_id) => Ok(*group_id),
42+
}
43+
}
44+
45+
#[async_recursion]
46+
pub async fn ingest_partial_scalar_plan(
47+
memo: &impl Memoize,
48+
partial_scalar_plan: &PartialScalarPlan,
49+
) -> anyhow::Result<ScalarGroupId> {
50+
match partial_scalar_plan {
51+
PartialScalarPlan::PartialMaterialized { operator } => {
52+
let mut children = Vec::new();
53+
for child in operator.children_scalars().iter() {
54+
children.push(ingest_partial_scalar_plan(memo, child).await?);
55+
}
56+
57+
memo.add_scalar_expr(&operator.into_expr(&children)).await
58+
}
59+
60+
PartialScalarPlan::UnMaterialized(group_id) => {
61+
return Ok(*group_id);
62+
}
63+
}
64+
}
65+
66+
#[async_recursion]
67+
async fn match_any_partial_logical_plan(
68+
memo: &impl Memoize,
69+
group: RelationalGroupId,
70+
) -> anyhow::Result<Arc<PartialLogicalPlan>> {
71+
let logical_exprs = memo.get_all_logical_exprs_in_group(group).await?;
72+
let last_logical_expr = logical_exprs.last().unwrap().1.clone();
73+
74+
match last_logical_expr.as_ref() {
75+
LogicalExpression::Scan(scan) => {
76+
let predicate = match_any_partial_scalar_plan(memo, scan.predicate).await?;
77+
Ok(Arc::new(PartialLogicalPlan::PartialMaterialized {
78+
operator: LogicalOperator::Scan(Scan {
79+
predicate,
80+
table_name: scan.table_name.clone(),
81+
}),
82+
}))
83+
}
84+
LogicalExpression::Filter(filter) => {
85+
let child = match_any_partial_logical_plan(memo, filter.child).await?;
86+
let predicate = match_any_partial_scalar_plan(memo, filter.predicate).await?;
87+
Ok(Arc::new(PartialLogicalPlan::PartialMaterialized {
88+
operator: LogicalOperator::Filter(Filter { child, predicate }),
89+
}))
90+
}
91+
LogicalExpression::Join(join) => {
92+
let left = match_any_partial_logical_plan(memo, join.left).await?;
93+
let right = match_any_partial_logical_plan(memo, join.right).await?;
94+
let condition = match_any_partial_scalar_plan(memo, join.condition).await?;
95+
Ok(Arc::new(PartialLogicalPlan::PartialMaterialized {
96+
operator: LogicalOperator::Join(Join {
97+
left,
98+
right,
99+
condition,
100+
join_type: join.join_type.clone(),
101+
}),
102+
}))
103+
}
104+
}
105+
}
106+
107+
#[async_recursion]
108+
async fn match_any_partial_scalar_plan(
109+
memo: &impl Memoize,
110+
group: ScalarGroupId,
111+
) -> anyhow::Result<Arc<PartialScalarPlan>> {
112+
let scalar_exprs = memo.get_all_scalar_exprs_in_group(group).await?;
113+
let last_scalar_expr = scalar_exprs.last().unwrap().1.clone();
114+
match last_scalar_expr.as_ref() {
115+
ScalarExpression::Constant(constant) => {
116+
Ok(Arc::new(PartialScalarPlan::PartialMaterialized {
117+
operator: ScalarOperator::Constant(constant.clone()),
118+
}))
119+
}
120+
ScalarExpression::ColumnRef(column_ref) => {
121+
Ok(Arc::new(PartialScalarPlan::PartialMaterialized {
122+
operator: ScalarOperator::ColumnRef(column_ref.clone()),
123+
}))
124+
}
125+
ScalarExpression::Add(add) => {
126+
let left = match_any_partial_scalar_plan(memo, add.left).await?;
127+
let right = match_any_partial_scalar_plan(memo, add.right).await?;
128+
Ok(Arc::new(PartialScalarPlan::PartialMaterialized {
129+
operator: ScalarOperator::Add(Add { left, right }),
130+
}))
131+
}
132+
ScalarExpression::Equal(equal) => {
133+
let left = match_any_partial_scalar_plan(memo, equal.left).await?;
134+
let right = match_any_partial_scalar_plan(memo, equal.right).await?;
135+
Ok(Arc::new(PartialScalarPlan::PartialMaterialized {
136+
operator: ScalarOperator::Equal(Equal { left, right }),
137+
}))
138+
}
139+
}
140+
}
141+
142+
#[cfg(test)]
143+
mod tests {
144+
use super::*;
145+
use crate::{storage::memo::SqliteMemo, test_utils::*};
146+
use anyhow::Ok;
147+
148+
#[tokio::test]
149+
async fn test_ingest_partial_logical_plan() -> anyhow::Result<()> {
150+
let memo = SqliteMemo::new_in_memory().await?;
151+
// select * from t1, t2 where t1.id = t2.id and t2.name = 'Memo' and t2.v1 = 1 + 1
152+
let partial_logical_plan = filter(
153+
join(
154+
"inner",
155+
scan("t1", boolean(true)),
156+
scan("t2", equal(column_ref(1), add(int64(1), int64(1)))),
157+
equal(column_ref(1), column_ref(2)),
158+
),
159+
equal(column_ref(2), string("Memo")),
160+
);
161+
162+
let group_id = ingest_partial_logical_plan(&memo, &partial_logical_plan).await?;
163+
let group_id_2 = ingest_partial_logical_plan(&memo, &partial_logical_plan).await?;
164+
assert_eq!(group_id, group_id_2);
165+
166+
// The plan should be the same, there is only one expression per group.
167+
let result: Arc<PartialLogicalPlan> =
168+
match_any_partial_logical_plan(&memo, group_id).await?;
169+
assert_eq!(result, partial_logical_plan);
170+
Ok(())
171+
}
172+
}

0 commit comments

Comments
 (0)