Skip to content

Commit

Permalink
Merge branch 'main' into yuchen/initial-storage
Browse files Browse the repository at this point in the history
  • Loading branch information
yliang412 authored Feb 1, 2025
2 parents e30b2c9 + 3962965 commit c5ec6cb
Show file tree
Hide file tree
Showing 23 changed files with 188 additions and 58 deletions.
4 changes: 2 additions & 2 deletions docs/src/architecture/glossary.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ See the following sections for more information.

A logical expression is a version of a [Relational Expression].

TODO(connor) Add more details.
TODO(connor): Add more details.

Examples of logical expressions include Logical Scan, Logical Join, or Logical Sort expressions
(which can just be shorthanded to Scan, Join, or Sort).
Expand All @@ -135,7 +135,7 @@ Examples of logical expressions include Logical Scan, Logical Join, or Logical S

A physical expression is a version of a [Relational Expression].

TODO(connor) Add more details.
TODO(connor): Add more details.

Examples of physical expressions include Table Scan, Index Scan, Hash Join, or Sort Merge Join.

Expand Down
12 changes: 5 additions & 7 deletions optd-core/src/expression.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
//! Types for logical and physical expressions in the optimizer.
use crate::memo::GroupId;
use crate::memo::{GroupId, ScalarGroupId};
use crate::operator::relational::logical::LogicalOperator;
use crate::operator::relational::physical::PhysicalOperator;

/// A logical expression in the memo table.
///
/// References children using [`GroupId`]s for expression sharing
/// and memoization.
pub type LogicalExpression = LogicalOperator<GroupId, GroupId>;
/// References children using [`GroupId`]s for expression sharing and memoization.
pub type LogicalExpression = LogicalOperator<GroupId, ScalarGroupId>;

/// A physical expression in the memo table.
///
/// Like [`LogicalExpression`] but with specific implementation
/// strategies.
pub type PhysicalExpression = PhysicalOperator<GroupId, GroupId>;
/// Like [`LogicalExpression`] but with specific implementation strategies.
pub type PhysicalExpression = PhysicalOperator<GroupId, ScalarGroupId>;
8 changes: 8 additions & 0 deletions optd-core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
//! TODO Add docs. We will likely want to add a `#![doc = include_str!("../README.md")]` here.
#![warn(missing_docs)]
#![warn(clippy::missing_docs_in_private_items)]
#![warn(clippy::missing_errors_doc)]
#![warn(clippy::missing_panics_doc)]
#![warn(clippy::missing_safety_doc)]

pub mod expression;
pub mod memo;
pub mod operator;
Expand Down
7 changes: 7 additions & 0 deletions optd-core/src/operator/relational/logical/filter.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
//! A logical filter.
use crate::operator::relational::RelationChildren;

/// Logical filter operator that selects rows matching a condition.
///
/// Takes input relation (`Relation`) and filters rows using a boolean predicate (`Scalar`).
#[derive(Clone)]
pub struct Filter<Relation, Scalar> {
/// The input relation.
pub child: Relation,
/// The filter expression denoting the predicate condition for this filter operation.
///
/// For example, a filter predicate could be `column_a > 42`, or it could be something like
/// `column_b < 100 AND column_c > 1000`.
pub predicate: Scalar,
}

Expand Down
8 changes: 8 additions & 0 deletions optd-core/src/operator/relational/logical/join.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! A logical join.
use crate::operator::relational::RelationChildren;

/// Logical join operator that combines rows from two relations.
Expand All @@ -6,9 +8,15 @@ use crate::operator::relational::RelationChildren;
/// (`Scalar`).
#[derive(Clone)]
pub struct Join<Relation, Scalar> {
/// TODO(alexis) Mocked for now.
pub join_type: String,
/// The left input relation.
pub left: Relation,
/// The right input relation.
pub right: Relation,
/// The join expression denoting the join condition that links the two input relations.
///
/// For example, a join operation could have a condition on `t1.id = t2.id` (an equijoin).
pub condition: Scalar,
}

Expand Down
1 change: 1 addition & 0 deletions optd-core/src/operator/relational/logical/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use super::RelationChildren;
/// [`LogicalPlan`]: crate::plan::logical_plan::LogicalPlan
/// [`PartialLogicalPlan`]: crate::plan::partial_logical_plan::PartialLogicalPlan
/// [`LogicalExpression`]: crate::expression::LogicalExpression
#[allow(missing_docs)]
#[derive(Clone)]
pub enum LogicalOperator<Relation, Scalar> {
Scan(Scan<Relation, Scalar>),
Expand Down
4 changes: 4 additions & 0 deletions optd-core/src/operator/relational/logical/project.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! A logical projection.
use crate::operator::relational::RelationChildren;

/// Logical project operator that specifies output columns.
Expand All @@ -6,7 +8,9 @@ use crate::operator::relational::RelationChildren;
/// (`Scalar`).
#[derive(Clone)]
pub struct Project<Relation, Scalar> {
/// The input relation.
pub child: Relation,
/// TODO(everyone): What exactly is going on here?
pub fields: Vec<Scalar>,
}

Expand Down
9 changes: 8 additions & 1 deletion optd-core/src/operator/relational/logical/scan.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! A logical scan.
use std::marker::PhantomData;

use crate::operator::relational::RelationChildren;
Expand All @@ -8,7 +10,12 @@ use crate::operator::relational::RelationChildren;
/// (`Scalar`).
#[derive(Clone)]
pub struct Scan<Relation, Scalar> {
pub table_name: String, // TODO(alexis): Mocked for now.
/// TODO(alexis) Mocked for now.
pub table_name: String,
/// An optional filter expression for predicate pushdown into scan operators.
///
/// For example, a `Filter(Scan(A), column_a < 42)` can be converted into a predicate pushdown
/// `Scan(A, column < 42)` to prevent having to materialize many tuples.
pub predicate: Option<Scalar>,
_phantom: PhantomData<Relation>,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::operator::relational::RelationChildren;
/// (`Scalar`) that evaluates to true/false. Only rows where predicate is true
/// are emitted.
#[derive(Clone)]
pub struct Filter<Relation, Scalar> {
pub struct PhysicalFilter<Relation, Scalar> {
pub child: Relation,
pub predicate: Scalar,
}
Expand Down
11 changes: 9 additions & 2 deletions optd-core/src/operator/relational/physical/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
//! Type definitions of physical operators in optd.
// TODO(connor):
// The module structure here is somewhat questionable, as it has multiple physical operators that
// should really only have 1 implementor (filter and project).
// For now, we can hold off on documenting stuff here until that is stabilized.
#![allow(missing_docs)]

pub mod filter;
pub mod join;
pub mod project;
pub mod scan;

use filter::filter::Filter;
use filter::filter::PhysicalFilter;
use join::{hash_join::HashJoin, merge_join::MergeJoin, nested_loop_join::NestedLoopJoin};
use project::project::Project;
use scan::table_scan::TableScan;
Expand All @@ -24,10 +30,11 @@ use super::RelationChildren;
///
/// [`PhysicalPlan`]: crate::plan::physical_plan::PhysicalPlan
/// [`PhysicalExpression`]: crate::expression::PhysicalExpression
#[allow(missing_docs)]
#[derive(Clone)]
pub enum PhysicalOperator<Relation, Scalar> {
TableScan(TableScan<Relation, Scalar>),
Filter(Filter<Relation, Scalar>),
Filter(PhysicalFilter<Relation, Scalar>),
Project(Project<Relation, Scalar>),
HashJoin(HashJoin<Relation, Scalar>),
NestedLoopJoin(NestedLoopJoin<Relation, Scalar>),
Expand Down
8 changes: 6 additions & 2 deletions optd-core/src/operator/scalar/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
//! Type definitions for scalar operators.
// For now, we can hold off on documenting stuff here until that is stabilized.
#![allow(missing_docs)]

pub mod add;
pub mod column_ref;
pub mod constants;
Expand All @@ -20,9 +24,9 @@ use constants::Constant;
/// [`PartialLogicalPlan`]: crate::plan::partial_logical_plan::PartialLogicalPlan
#[derive(Clone)]
pub enum ScalarOperator<Scalar> {
Add(Add<Scalar>),
ColumnRef(ColumnRef),
Constant(Constant),
ColumnRef(ColumnRef),
Add(Add<Scalar>),
}

/// Trait for getting the children scalars of a scalar operator.
Expand Down
6 changes: 6 additions & 0 deletions optd-core/src/plan/logical_plan.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! This module contains the [`LogicalPlan`] type, which is the representation of a logical query
//! plan from SQL.
//!
//! See the documentation for [`LogicalPlan`] for more information.
use super::scalar_plan::ScalarPlan;
use crate::operator::relational::logical::LogicalOperator;
Expand All @@ -15,5 +17,9 @@ use std::sync::Arc;
/// TODO(connor): add more docs.
#[derive(Clone)]
pub struct LogicalPlan {
/// Represents the current logical operator that is the root of the current subplan.
///
/// Note that the children of the operator are other plans, which means that this data structure
/// is an in-memory DAG (directed acyclic graph) of logical operators.
pub node: Arc<LogicalOperator<LogicalPlan, ScalarPlan>>,
}
35 changes: 26 additions & 9 deletions optd-core/src/plan/partial_logical_plan.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
use crate::memo::GroupId;
//! This module contains the [`PartialLogicalPlan`] type, which is the representation of a partially
//! materialized logical query plan that is a mix of materialized logical operators and
//! unmaterialized group ID references to memo table groups of expressions.
//!
//! See the documentation for [`PartialLogicalPlan`] for more information.
use crate::memo::{GroupId, ScalarGroupId};
use crate::operator::relational::logical::LogicalOperator;
use crate::operator::scalar::ScalarOperator;
use std::sync::Arc;

/// A partially materialized logical query plan represented as a DAG (directed acyclic graph).
///
/// While a [`LogicalPlan`] contains fully materialized operator nodes, a `PartialLogicalPlan`
/// While a [`LogicalPlan`] contains fully materialized operator nodes, a [`PartialLogicalPlan`]
/// can contain both materialized nodes and references to unmaterialized memo groups. This enables
/// efficient plan exploration and transformation during query optimization.
///
Expand All @@ -24,27 +30,38 @@ use std::sync::Arc;
/// [`LogicalPlan`]: crate::plan::logical_plan::LogicalPlan
#[derive(Clone)]
pub struct PartialLogicalPlan {
/// Represents the current logical operator that is the root of the current partially
/// materialized subplan.
///
/// Note that the children of the operator are either a [`Relation`] or a [`Scalar`], both of
/// which are defined in this module. See their documentation for more information.
pub node: Arc<LogicalOperator<Relation, Scalar>>,
}

/// A link to a relational node in a [`PartialLogicalPlan`].
///
/// Can be either:
/// - A materialized logical operator node
/// - A reference to an unmaterialized memo group
/// This link (which denotes what kind of relational children the operators of a
/// [`PartialLogicalPlan`] can have) can be either:
/// - A materialized logical operator node.
/// - A reference (identifier) to an unmaterialized memo group.
#[derive(Clone)]
pub enum Relation {
/// A materialized logical operator node.
Operator(Arc<LogicalOperator<Relation, Scalar>>),
/// A reference (identifier) to an unmaterialized memo group.
GroupId(GroupId),
}

/// A link to a scalar node in a [`PartialLogicalPlan`].
///
/// Can be either:
/// - A materialized scalar operator node
/// - A reference to an unmaterialized memo group
/// This link (which denotes what kind of scalar children the operators of a [`PartialLogicalPlan`]
/// can have) can be either:
/// - A materialized scalar operator node.
/// - A reference to an unmaterialized memo group.
#[derive(Clone)]
pub enum Scalar {
/// A materialized scalar operator node.
Operator(Arc<ScalarOperator<Scalar>>),
GroupId(GroupId),
/// A reference to an unmaterialized memo group.
ScalarGroupId(ScalarGroupId),
}
6 changes: 6 additions & 0 deletions optd-core/src/plan/physical_plan.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! This module contains the [`PhysicalPlan`] type, which is the representation of a physical
//! execution plan that can be sent to a query execution engine.
//!
//! See the documentation for [`PhysicalPlan`] for more information.
use super::scalar_plan::ScalarPlan;
use crate::operator::relational::physical::PhysicalOperator;
Expand All @@ -15,5 +17,9 @@ use std::sync::Arc;
/// TODO(connor): add more docs.
#[derive(Clone)]
pub struct PhysicalPlan {
/// Represents the current physical operator that is the root of the current subplan.
///
/// Note that the children of the operator are other plans, which means that this data structure
/// is an in-memory DAG (directed acyclic graph) of physical operators.
pub node: Arc<PhysicalOperator<PhysicalPlan, ScalarPlan>>,
}
6 changes: 5 additions & 1 deletion optd-core/src/plan/scalar_plan.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
use std::sync::Arc;
//! TODO(everyone): Figure out what exactly a `ScalarPlan` is (tree? DAG? always materialized?)
use crate::operator::scalar::ScalarOperator;
use std::sync::Arc;

/// A representation of a scalar query plan DAG (directed acyclic graph).
#[derive(Clone)]
pub struct ScalarPlan {
/// Represents the current scalar operator that is the root of the current scalar subtree.
///
/// TODO(connor): Figure out if scalar plans can be a DAG
pub node: Arc<ScalarOperator<ScalarPlan>>,
}
35 changes: 24 additions & 11 deletions optd-core/src/rules/implementation/hash_join.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,37 @@
//! The rule for implementing `Join` as a `HashJoin`.
//!
//! See [`HashJoinRule`] for more information.
use super::*;
use crate::operator::relational::{
logical::LogicalOperator,
logical::{join::Join, LogicalOperator},
physical::{join::hash_join::HashJoin, PhysicalOperator},
};

/// Implementation rule that converts a logical join into a hash join physical operator
/// A unit / marker struct for implementing `HashJoin`.
///
/// This implementation rule converts a logical `Join` into a physical `HashJoin` operator.
pub struct HashJoinRule;

// TODO: rule may fail, need to check join condition
// https://github.com/cmu-db/optd/issues/15
impl ImplementationRule for HashJoinRule {
fn check_and_apply(&self, expr: LogicalExpression) -> Option<PhysicalExpression> {
if let LogicalOperator::Join(join) = expr {
return Some(PhysicalOperator::HashJoin(HashJoin {
join_type: join.join_type,
probe_side: join.left,
build_side: join.right,
condition: join.condition,
}));
}
None
let LogicalOperator::Join(Join {
join_type,
left,
right,
condition,
}) = expr
else {
return None;
};

Some(PhysicalOperator::HashJoin(HashJoin {
join_type,
probe_side: left,
build_side: right,
condition,
}))
}
}
4 changes: 3 additions & 1 deletion optd-core/src/rules/implementation/mod.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
//! This module contains the implementation rule trait / API, as well as the rules that implement
//! said trait.
//!
//! TODO(connor) Add more docs.
//! TODO(connor): Add more docs.
use crate::expression::{LogicalExpression, PhysicalExpression};

/// The interface for implementation rules, which help convert logical plans into physical
/// (executable) query plans.
#[trait_variant::make(Send)]
#[allow(dead_code)]
pub trait ImplementationRule {
Expand Down
Loading

0 comments on commit c5ec6cb

Please sign in to comment.