diff --git a/docs/src/architecture/glossary.md b/docs/src/architecture/glossary.md index 024dec2..9a5f855 100644 --- a/docs/src/architecture/glossary.md +++ b/docs/src/architecture/glossary.md @@ -126,7 +126,7 @@ See the following sections for more information. A logical expression is a version of a [Relational Expression]. -TODO(connor) Add more details. +TODO(connor): Add more details. Examples of logical expressions include Logical Scan, Logical Join, or Logical Sort expressions (which can just be shorthanded to Scan, Join, or Sort). @@ -135,7 +135,7 @@ Examples of logical expressions include Logical Scan, Logical Join, or Logical S A physical expression is a version of a [Relational Expression]. -TODO(connor) Add more details. +TODO(connor): Add more details. Examples of physical expressions include Table Scan, Index Scan, Hash Join, or Sort Merge Join. diff --git a/optd-core/src/expression.rs b/optd-core/src/expression.rs index bdd89c0..808cd7c 100644 --- a/optd-core/src/expression.rs +++ b/optd-core/src/expression.rs @@ -1,17 +1,15 @@ //! Types for logical and physical expressions in the optimizer. -use crate::memo::GroupId; +use crate::memo::{GroupId, ScalarGroupId}; use crate::operator::relational::logical::LogicalOperator; use crate::operator::relational::physical::PhysicalOperator; /// A logical expression in the memo table. /// -/// References children using [`GroupId`]s for expression sharing -/// and memoization. -pub type LogicalExpression = LogicalOperator; +/// References children using [`GroupId`]s for expression sharing and memoization. +pub type LogicalExpression = LogicalOperator; /// A physical expression in the memo table. /// -/// Like [`LogicalExpression`] but with specific implementation -/// strategies. -pub type PhysicalExpression = PhysicalOperator; +/// Like [`LogicalExpression`] but with specific implementation strategies. +pub type PhysicalExpression = PhysicalOperator; diff --git a/optd-core/src/lib.rs b/optd-core/src/lib.rs index cdfd561..9ab7f2c 100644 --- a/optd-core/src/lib.rs +++ b/optd-core/src/lib.rs @@ -1,3 +1,11 @@ +//! TODO Add docs. We will likely want to add a `#![doc = include_str!("../README.md")]` here. + +#![warn(missing_docs)] +#![warn(clippy::missing_docs_in_private_items)] +#![warn(clippy::missing_errors_doc)] +#![warn(clippy::missing_panics_doc)] +#![warn(clippy::missing_safety_doc)] + pub mod expression; pub mod memo; pub mod operator; diff --git a/optd-core/src/operator/relational/logical/filter.rs b/optd-core/src/operator/relational/logical/filter.rs index 09ffc95..f2146d8 100644 --- a/optd-core/src/operator/relational/logical/filter.rs +++ b/optd-core/src/operator/relational/logical/filter.rs @@ -1,8 +1,15 @@ +//! A logical filter. + /// Logical filter operator that selects rows matching a condition. /// /// Takes input relation (`Relation`) and filters rows using a boolean predicate (`Scalar`). #[derive(Clone)] pub struct Filter { + /// The input relation. pub child: Relation, + /// The filter expression denoting the predicate condition for this filter operation. + /// + /// For example, a filter predicate could be `column_a > 42`, or it could be something like + /// `column_b < 100 AND column_c > 1000`. pub predicate: Scalar, } diff --git a/optd-core/src/operator/relational/logical/join.rs b/optd-core/src/operator/relational/logical/join.rs index c881648..75d919a 100644 --- a/optd-core/src/operator/relational/logical/join.rs +++ b/optd-core/src/operator/relational/logical/join.rs @@ -1,11 +1,19 @@ +//! A logical join. + /// Logical join operator that combines rows from two relations. /// /// Takes left and right relations (`Relation`) and joins their rows using a join condition /// (`Scalar`). #[derive(Clone)] pub struct Join { + /// TODO(alexis) Mocked for now. pub join_type: String, + /// The left input relation. pub left: Relation, + /// The right input relation. pub right: Relation, + /// The join expression denoting the join condition that links the two input relations. + /// + /// For example, a join operation could have a condition on `t1.id = t2.id` (an equijoin). pub condition: Scalar, } diff --git a/optd-core/src/operator/relational/logical/mod.rs b/optd-core/src/operator/relational/logical/mod.rs index d4db1a0..0821b38 100644 --- a/optd-core/src/operator/relational/logical/mod.rs +++ b/optd-core/src/operator/relational/logical/mod.rs @@ -22,6 +22,7 @@ use scan::Scan; /// [`LogicalPlan`]: crate::plan::logical_plan::LogicalPlan /// [`PartialLogicalPlan`]: crate::plan::partial_logical_plan::PartialLogicalPlan /// [`LogicalExpression`]: crate::expression::LogicalExpression +#[allow(missing_docs)] #[derive(Clone)] pub enum LogicalOperator { Scan(Scan), diff --git a/optd-core/src/operator/relational/logical/project.rs b/optd-core/src/operator/relational/logical/project.rs index 8c8e0dd..656e5f5 100644 --- a/optd-core/src/operator/relational/logical/project.rs +++ b/optd-core/src/operator/relational/logical/project.rs @@ -1,9 +1,13 @@ +//! A logical projection. + /// Logical project operator that specifies output columns. /// /// Takes input relation (`Relation`) and defines output columns/expressions /// (`Scalar`). #[derive(Clone)] pub struct Project { + /// The input relation. pub child: Relation, + /// TODO(everyone): What exactly is going on here? pub fields: Vec, } diff --git a/optd-core/src/operator/relational/logical/scan.rs b/optd-core/src/operator/relational/logical/scan.rs index f7ca18c..39dc625 100644 --- a/optd-core/src/operator/relational/logical/scan.rs +++ b/optd-core/src/operator/relational/logical/scan.rs @@ -1,9 +1,16 @@ +//! A logical scan. + /// Logical scan operator that reads from a base table. /// /// Reads from table (`String`) and optionally filters rows using a pushdown predicate /// (`Scalar`). #[derive(Clone)] pub struct Scan { - pub table_name: String, // TODO(alexis): Mocked for now. + /// TODO(alexis) Mocked for now. + pub table_name: String, + /// An optional filter expression for predicate pushdown into scan operators. + /// + /// For example, a `Filter(Scan(A), column_a < 42)` can be converted into a predicate pushdown + /// `Scan(A, column < 42)` to prevent having to materialize many tuples. pub predicate: Option, } diff --git a/optd-core/src/operator/relational/physical/filter/filter.rs b/optd-core/src/operator/relational/physical/filter/filter.rs index 8061ef6..9af1df6 100644 --- a/optd-core/src/operator/relational/physical/filter/filter.rs +++ b/optd-core/src/operator/relational/physical/filter/filter.rs @@ -4,7 +4,7 @@ /// (`Scalar`) that evaluates to true/false. Only rows where predicate is true /// are emitted. #[derive(Clone)] -pub struct Filter { +pub struct PhysicalFilter { pub child: Relation, pub predicate: Scalar, } diff --git a/optd-core/src/operator/relational/physical/mod.rs b/optd-core/src/operator/relational/physical/mod.rs index 5f6e582..9704168 100644 --- a/optd-core/src/operator/relational/physical/mod.rs +++ b/optd-core/src/operator/relational/physical/mod.rs @@ -1,11 +1,17 @@ //! Type definitions of physical operators in optd. +// TODO(connor): +// The module structure here is somewhat questionable, as it has multiple physical operators that +// should really only have 1 implementor (filter and project). +// For now, we can hold off on documenting stuff here until that is stabilized. +#![allow(missing_docs)] + pub mod filter; pub mod join; pub mod project; pub mod scan; -use filter::filter::Filter; +use filter::filter::PhysicalFilter; use join::{hash_join::HashJoin, merge_join::MergeJoin, nested_loop_join::NestedLoopJoin}; use project::project::Project; use scan::table_scan::TableScan; @@ -22,10 +28,11 @@ use scan::table_scan::TableScan; /// /// [`PhysicalPlan`]: crate::plan::physical_plan::PhysicalPlan /// [`PhysicalExpression`]: crate::expression::PhysicalExpression +#[allow(missing_docs)] #[derive(Clone)] pub enum PhysicalOperator { TableScan(TableScan), - Filter(Filter), + Filter(PhysicalFilter), Project(Project), HashJoin(HashJoin), NestedLoopJoin(NestedLoopJoin), diff --git a/optd-core/src/operator/scalar/mod.rs b/optd-core/src/operator/scalar/mod.rs index 4d8ca58..edb4b16 100644 --- a/optd-core/src/operator/scalar/mod.rs +++ b/optd-core/src/operator/scalar/mod.rs @@ -1,4 +1,8 @@ //! Type definitions for scalar operators. + +// For now, we can hold off on documenting stuff here until that is stabilized. +#![allow(missing_docs)] + pub mod add; pub mod column_ref; pub mod constants; @@ -20,7 +24,7 @@ use constants::Constant; /// [`PartialLogicalPlan`]: crate::plan::partial_logical_plan::PartialLogicalPlan #[derive(Clone)] pub enum ScalarOperator { - Add(Add), - ColumnRef(ColumnRef), Constant(Constant), + ColumnRef(ColumnRef), + Add(Add), } diff --git a/optd-core/src/plan/logical_plan.rs b/optd-core/src/plan/logical_plan.rs index 4ae6420..2a5aff8 100644 --- a/optd-core/src/plan/logical_plan.rs +++ b/optd-core/src/plan/logical_plan.rs @@ -1,5 +1,7 @@ //! This module contains the [`LogicalPlan`] type, which is the representation of a logical query //! plan from SQL. +//! +//! See the documentation for [`LogicalPlan`] for more information. use super::scalar_plan::ScalarPlan; use crate::operator::relational::logical::LogicalOperator; @@ -15,5 +17,9 @@ use std::sync::Arc; /// TODO(connor): add more docs. #[derive(Clone)] pub struct LogicalPlan { + /// Represents the current logical operator that is the root of the current subplan. + /// + /// Note that the children of the operator are other plans, which means that this data structure + /// is an in-memory DAG (directed acyclic graph) of logical operators. pub node: Arc>, } diff --git a/optd-core/src/plan/partial_logical_plan.rs b/optd-core/src/plan/partial_logical_plan.rs index d1ab57a..dbda1bd 100644 --- a/optd-core/src/plan/partial_logical_plan.rs +++ b/optd-core/src/plan/partial_logical_plan.rs @@ -1,11 +1,17 @@ -use crate::memo::GroupId; +//! This module contains the [`PartialLogicalPlan`] type, which is the representation of a partially +//! materialized logical query plan that is a mix of materialized logical operators and +//! unmaterialized group ID references to memo table groups of expressions. +//! +//! See the documentation for [`PartialLogicalPlan`] for more information. + +use crate::memo::{GroupId, ScalarGroupId}; use crate::operator::relational::logical::LogicalOperator; use crate::operator::scalar::ScalarOperator; use std::sync::Arc; /// A partially materialized logical query plan represented as a DAG (directed acyclic graph). /// -/// While a [`LogicalPlan`] contains fully materialized operator nodes, a `PartialLogicalPlan` +/// While a [`LogicalPlan`] contains fully materialized operator nodes, a [`PartialLogicalPlan`] /// can contain both materialized nodes and references to unmaterialized memo groups. This enables /// efficient plan exploration and transformation during query optimization. /// @@ -24,27 +30,38 @@ use std::sync::Arc; /// [`LogicalPlan`]: crate::plan::logical_plan::LogicalPlan #[derive(Clone)] pub struct PartialLogicalPlan { + /// Represents the current logical operator that is the root of the current partially + /// materialized subplan. + /// + /// Note that the children of the operator are either a [`Relation`] or a [`Scalar`], both of + /// which are defined in this module. See their documentation for more information. pub node: Arc>, } /// A link to a relational node in a [`PartialLogicalPlan`]. /// -/// Can be either: -/// - A materialized logical operator node -/// - A reference to an unmaterialized memo group +/// This link (which denotes what kind of relational children the operators of a +/// [`PartialLogicalPlan`] can have) can be either: +/// - A materialized logical operator node. +/// - A reference (identifier) to an unmaterialized memo group. #[derive(Clone)] pub enum Relation { + /// A materialized logical operator node. Operator(Arc>), + /// A reference (identifier) to an unmaterialized memo group. GroupId(GroupId), } /// A link to a scalar node in a [`PartialLogicalPlan`]. /// -/// Can be either: -/// - A materialized scalar operator node -/// - A reference to an unmaterialized memo group +/// This link (which denotes what kind of scalar children the operators of a [`PartialLogicalPlan`] +/// can have) can be either: +/// - A materialized scalar operator node. +/// - A reference to an unmaterialized memo group. #[derive(Clone)] pub enum Scalar { + /// A materialized scalar operator node. Operator(Arc>), - GroupId(GroupId), + /// A reference to an unmaterialized memo group. + ScalarGroupId(ScalarGroupId), } diff --git a/optd-core/src/plan/physical_plan.rs b/optd-core/src/plan/physical_plan.rs index 2d16954..bcb73f5 100644 --- a/optd-core/src/plan/physical_plan.rs +++ b/optd-core/src/plan/physical_plan.rs @@ -1,5 +1,7 @@ //! This module contains the [`PhysicalPlan`] type, which is the representation of a physical //! execution plan that can be sent to a query execution engine. +//! +//! See the documentation for [`PhysicalPlan`] for more information. use super::scalar_plan::ScalarPlan; use crate::operator::relational::physical::PhysicalOperator; @@ -15,5 +17,9 @@ use std::sync::Arc; /// TODO(connor): add more docs. #[derive(Clone)] pub struct PhysicalPlan { + /// Represents the current physical operator that is the root of the current subplan. + /// + /// Note that the children of the operator are other plans, which means that this data structure + /// is an in-memory DAG (directed acyclic graph) of physical operators. pub node: Arc>, } diff --git a/optd-core/src/plan/scalar_plan.rs b/optd-core/src/plan/scalar_plan.rs index c4cdc5b..f65c790 100644 --- a/optd-core/src/plan/scalar_plan.rs +++ b/optd-core/src/plan/scalar_plan.rs @@ -1,9 +1,13 @@ -use std::sync::Arc; +//! TODO(everyone): Figure out what exactly a `ScalarPlan` is (tree? DAG? always materialized?) use crate::operator::scalar::ScalarOperator; +use std::sync::Arc; /// A representation of a scalar query plan DAG (directed acyclic graph). #[derive(Clone)] pub struct ScalarPlan { + /// Represents the current scalar operator that is the root of the current scalar subtree. + /// + /// TODO(connor): Figure out if scalar plans can be a DAG pub node: Arc>, } diff --git a/optd-core/src/rules/implementation/hash_join.rs b/optd-core/src/rules/implementation/hash_join.rs index fb747fc..9738be4 100644 --- a/optd-core/src/rules/implementation/hash_join.rs +++ b/optd-core/src/rules/implementation/hash_join.rs @@ -1,24 +1,37 @@ +//! The rule for implementing `Join` as a `HashJoin`. +//! +//! See [`HashJoinRule`] for more information. + use super::*; use crate::operator::relational::{ - logical::LogicalOperator, + logical::{join::Join, LogicalOperator}, physical::{join::hash_join::HashJoin, PhysicalOperator}, }; -/// Implementation rule that converts a logical join into a hash join physical operator +/// A unit / marker struct for implementing `HashJoin`. +/// +/// This implementation rule converts a logical `Join` into a physical `HashJoin` operator. pub struct HashJoinRule; // TODO: rule may fail, need to check join condition // https://github.com/cmu-db/optd/issues/15 impl ImplementationRule for HashJoinRule { fn check_and_apply(&self, expr: LogicalExpression) -> Option { - if let LogicalOperator::Join(join) = expr { - return Some(PhysicalOperator::HashJoin(HashJoin { - join_type: join.join_type, - probe_side: join.left, - build_side: join.right, - condition: join.condition, - })); - } - None + let LogicalOperator::Join(Join { + join_type, + left, + right, + condition, + }) = expr + else { + return None; + }; + + Some(PhysicalOperator::HashJoin(HashJoin { + join_type, + probe_side: left, + build_side: right, + condition, + })) } } diff --git a/optd-core/src/rules/implementation/mod.rs b/optd-core/src/rules/implementation/mod.rs index 08da6f0..9e461a1 100644 --- a/optd-core/src/rules/implementation/mod.rs +++ b/optd-core/src/rules/implementation/mod.rs @@ -1,10 +1,12 @@ //! This module contains the implementation rule trait / API, as well as the rules that implement //! said trait. //! -//! TODO(connor) Add more docs. +//! TODO(connor): Add more docs. use crate::expression::{LogicalExpression, PhysicalExpression}; +/// The interface for implementation rules, which help convert logical plans into physical +/// (executable) query plans. #[trait_variant::make(Send)] #[allow(dead_code)] pub trait ImplementationRule { diff --git a/optd-core/src/rules/implementation/physical_filter.rs b/optd-core/src/rules/implementation/physical_filter.rs index 8c06072..a169e24 100644 --- a/optd-core/src/rules/implementation/physical_filter.rs +++ b/optd-core/src/rules/implementation/physical_filter.rs @@ -1,21 +1,27 @@ +//! The rule for implementing a logical `Filter` as a physical `Filter`. +//! +//! See [`PhysicalFilterRule`] for more information. + use super::*; use crate::operator::relational::{ - logical::LogicalOperator, - physical::{filter::filter::Filter, PhysicalOperator}, + logical::{filter::Filter as LogicalFilter, LogicalOperator}, + physical::{filter::filter::PhysicalFilter, PhysicalOperator}, }; -/// Implementation rule that converts a logical filter into a filter physical operator. +/// A unit / marker struct for implementing `PhysicalFilterRule`. +/// +/// This mplementation rule converts a logical `Filter` into a physical `Filter` operator. pub struct PhysicalFilterRule; impl ImplementationRule for PhysicalFilterRule { fn check_and_apply(&self, expr: LogicalExpression) -> Option { - if let LogicalOperator::Filter(filter) = expr { - return Some(PhysicalOperator::Filter(Filter { - child: filter.child, - predicate: filter.predicate, - })); - } + let LogicalOperator::Filter(LogicalFilter { child, predicate }) = expr else { + return None; + }; - None + Some(PhysicalOperator::Filter(PhysicalFilter { + child, + predicate, + })) } } diff --git a/optd-core/src/rules/implementation/table_scan.rs b/optd-core/src/rules/implementation/table_scan.rs index 375d9ae..22382c2 100644 --- a/optd-core/src/rules/implementation/table_scan.rs +++ b/optd-core/src/rules/implementation/table_scan.rs @@ -1,22 +1,32 @@ +//! The rule for implementing `Scan` as a `TableScan`. +//! +//! See [`TableScanRule`] for more information. + use crate::operator::relational::{ - logical::LogicalOperator, + logical::{scan::Scan, LogicalOperator}, physical::{scan::table_scan::TableScan, PhysicalOperator}, }; use super::*; -// Implementation rule that converts a logical scan into a table scan physical operator. +/// A unit / marker struct for implementing `TableScan`. +/// +/// This implementation rule converts a logical `Scan` into a physical `TableScan` operator. pub struct TableScanRule; impl ImplementationRule for TableScanRule { fn check_and_apply(&self, expr: LogicalExpression) -> Option { - if let LogicalOperator::Scan(scan) = expr { - return Some(PhysicalOperator::TableScan(TableScan { - table_name: scan.table_name, - predicate: scan.predicate, - })); - } + let LogicalOperator::Scan(Scan { + table_name, + predicate, + }) = expr + else { + return None; + }; - None + Some(PhysicalOperator::TableScan(TableScan { + table_name, + predicate, + })) } } diff --git a/optd-core/src/rules/mod.rs b/optd-core/src/rules/mod.rs index edc3d0b..b7e1002 100644 --- a/optd-core/src/rules/mod.rs +++ b/optd-core/src/rules/mod.rs @@ -1,2 +1,5 @@ +//! This module contains all rules that the optimizer has available, including both transformation +//! and implementation rules. + mod implementation; mod transformation; diff --git a/optd-core/src/rules/transformation/join_associativity.rs b/optd-core/src/rules/transformation/join_associativity.rs index 7c42c89..2e48be0 100644 --- a/optd-core/src/rules/transformation/join_associativity.rs +++ b/optd-core/src/rules/transformation/join_associativity.rs @@ -1,5 +1,13 @@ +//! The rule for join associativity. +//! +//! See [`JoinAssociativityRule`] for more information. + use super::*; +/// A unit / marker struct for join associativity. +/// +/// Since joining is an associative operation, we can convert a `Join(Join(A, B), C)` into a +/// `Join(A, Join(B, C))`. pub struct JoinAssociativityRule; impl TransformationRule for JoinAssociativityRule { diff --git a/optd-core/src/rules/transformation/join_commutativity.rs b/optd-core/src/rules/transformation/join_commutativity.rs index 0ef1e63..959d6db 100644 --- a/optd-core/src/rules/transformation/join_commutativity.rs +++ b/optd-core/src/rules/transformation/join_commutativity.rs @@ -1,5 +1,12 @@ +//! The rule for join commutativity. +//! +//! See [`JoinCommutativityRule`] for more information. + use super::*; +/// A unit / marker struct for join commutativity. +/// +/// Since joining is an commutative operation, we can convert a `Join(A, B)` into a `Join(B, C)`. pub struct JoinCommutativityRule; impl TransformationRule for JoinCommutativityRule { diff --git a/optd-core/src/rules/transformation/mod.rs b/optd-core/src/rules/transformation/mod.rs index 2b32ab7..3152e94 100644 --- a/optd-core/src/rules/transformation/mod.rs +++ b/optd-core/src/rules/transformation/mod.rs @@ -1,12 +1,14 @@ //! This module contains the transformation rule trait / API, as well as the rules that implement //! said trait. //! -//! TODO(connor) Add more docs. +//! TODO(everyone) Add more docs. use crate::{ expression::LogicalExpression, memo::Memo, plan::partial_logical_plan::PartialLogicalPlan, }; +/// The interface for transformation rules, which help enumerate logically equivalent plans during +/// the optimization search. #[trait_variant::make(Send)] #[allow(dead_code)] pub trait TransformationRule { @@ -31,6 +33,8 @@ pub trait TransformationRule { /// /// These changes can create new logical or scalar expressions. However, note that /// transformation rules will _not_ create new physical expressions. + /// + /// TODO(everyone) Figure out what the return type should really be. fn apply(&self, expr: PartialLogicalPlan) -> PartialLogicalPlan; }