From edc8d2a1afd419393af44f576d185cee4001c47f Mon Sep 17 00:00:00 2001 From: Yuchen Liang Date: Mon, 20 Jan 2025 14:20:34 -0500 Subject: [PATCH] add new subgroup table; more docs Signed-off-by: Yuchen Liang --- optd/Cargo.toml | 2 +- .../up.sql | 2 +- .../down.sql | 1 - .../up.sql | 18 ---- .../down.sql | 1 + .../up.sql | 9 ++ .../down.sql | 1 + .../up.sql | 10 ++ .../down.sql | 1 + .../up.sql | 12 +++ optd/src/storage/models.rs | 100 ++++++++++++++---- optd/src/storage/schema.rs | 40 +++++-- 12 files changed, 146 insertions(+), 51 deletions(-) delete mode 100644 optd/migrations/2025-01-19-054757_create_rel_group_winners/down.sql delete mode 100644 optd/migrations/2025-01-19-054757_create_rel_group_winners/up.sql create mode 100644 optd/migrations/2025-01-19-054757_create_rel_subgroup_winners/down.sql create mode 100644 optd/migrations/2025-01-19-054757_create_rel_subgroup_winners/up.sql create mode 100644 optd/migrations/2025-01-20-150957_create_rel_subgroups/down.sql create mode 100644 optd/migrations/2025-01-20-150957_create_rel_subgroups/up.sql create mode 100644 optd/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/down.sql create mode 100644 optd/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/up.sql diff --git a/optd/Cargo.toml b/optd/Cargo.toml index f3d2edc..ab55380 100644 --- a/optd/Cargo.toml +++ b/optd/Cargo.toml @@ -6,4 +6,4 @@ edition = "2021" [dependencies] diesel.workspace = true chrono.workspace = true -anyhow = "1.0.95" +anyhow.workspace = true diff --git a/optd/migrations/2025-01-19-054756_create_physical_exprs/up.sql b/optd/migrations/2025-01-19-054756_create_physical_exprs/up.sql index 2285ff7..3c6ad3a 100644 --- a/optd/migrations/2025-01-19-054756_create_physical_exprs/up.sql +++ b/optd/migrations/2025-01-19-054756_create_physical_exprs/up.sql @@ -6,7 +6,7 @@ CREATE TABLE physical_exprs ( -- The type descriptor of the physical expression. typ_desc BIGINT NOT NULL, -- The group this physical expression belongs to. - group_id BIGINT NOT NULL, -- groups.id + group_id BIGINT NOT NULL, -- The physical property dervied based on the properties of the children nodes. derived_phys_prop_id BIGINT NOT NULL, -- The cost associated with the physical expression. diff --git a/optd/migrations/2025-01-19-054757_create_rel_group_winners/down.sql b/optd/migrations/2025-01-19-054757_create_rel_group_winners/down.sql deleted file mode 100644 index 08d3e4f..0000000 --- a/optd/migrations/2025-01-19-054757_create_rel_group_winners/down.sql +++ /dev/null @@ -1 +0,0 @@ -DROP TABLE rel_group_winners; diff --git a/optd/migrations/2025-01-19-054757_create_rel_group_winners/up.sql b/optd/migrations/2025-01-19-054757_create_rel_group_winners/up.sql deleted file mode 100644 index ec5cae0..0000000 --- a/optd/migrations/2025-01-19-054757_create_rel_group_winners/up.sql +++ /dev/null @@ -1,18 +0,0 @@ --- The winners table records the winner of a group with some required physical property. -CREATE TABLE rel_group_winners ( - -- The group we are interested in. - group_id BIGINT NOT NULL, - -- Identified for the required physical property. - required_phys_prop_id BIGINT NOT NULL, - -- The winner of the group with `group_id` and required physical property. - physical_expr_id BIGINT NOT NULL, - PRIMARY KEY (group_id, required_phys_prop_id), - FOREIGN KEY (group_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE, - FOREIGN KEY (required_phys_prop_id) REFERENCES physical_props(id) ON DELETE CASCADE ON UPDATE CASCADE, - FOREIGN KEY (physical_expr_id) REFERENCES physical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE -); - --- Could also do a query to compute the winner: --- SELECT MIN(cost), [all other fields] --- FROM physical_exprs --- WHERE group_id = and satisfies(derived_phys_prop_id, required_phys_prop_id); diff --git a/optd/migrations/2025-01-19-054757_create_rel_subgroup_winners/down.sql b/optd/migrations/2025-01-19-054757_create_rel_subgroup_winners/down.sql new file mode 100644 index 0000000..2752bec --- /dev/null +++ b/optd/migrations/2025-01-19-054757_create_rel_subgroup_winners/down.sql @@ -0,0 +1 @@ +DROP TABLE rel_subgroup_winners; diff --git a/optd/migrations/2025-01-19-054757_create_rel_subgroup_winners/up.sql b/optd/migrations/2025-01-19-054757_create_rel_subgroup_winners/up.sql new file mode 100644 index 0000000..ba39afe --- /dev/null +++ b/optd/migrations/2025-01-19-054757_create_rel_subgroup_winners/up.sql @@ -0,0 +1,9 @@ +-- The winners table records the winner of a group with some required physical property. +CREATE TABLE rel_subgroup_winners ( + -- The subgroup id of the winner, i.e. the winner of the group with `group_id` and some required physical property. + subgroup_id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The physical expression id of the winner. + physical_expr_id BIGINT NOT NULL, + FOREIGN KEY (subgroup_id) REFERENCES rel_subgroup(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (physical_expr_id) REFERENCES physical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd/migrations/2025-01-20-150957_create_rel_subgroups/down.sql b/optd/migrations/2025-01-20-150957_create_rel_subgroups/down.sql new file mode 100644 index 0000000..0b5a382 --- /dev/null +++ b/optd/migrations/2025-01-20-150957_create_rel_subgroups/down.sql @@ -0,0 +1 @@ +DROP TABLE rel_subgroups; diff --git a/optd/migrations/2025-01-20-150957_create_rel_subgroups/up.sql b/optd/migrations/2025-01-20-150957_create_rel_subgroups/up.sql new file mode 100644 index 0000000..65a6f87 --- /dev/null +++ b/optd/migrations/2025-01-20-150957_create_rel_subgroups/up.sql @@ -0,0 +1,10 @@ +-- The relational subgroups table specifies the subgroups of a group with some required physical property. +CREATE TABLE rel_subgroups ( + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The group the subgroup belongs to. + group_id BIGINT NOT NULL, + -- The required physical property of the subgroup. + required_phys_prop_id BIGINT NOT NULL, + FOREIGN KEY (group_id) REFERENCES rel_groups(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (required_phys_prop_id) REFERENCES physical_props(id) ON DELETE CASCADE ON UPDATE CASCADE +); \ No newline at end of file diff --git a/optd/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/down.sql b/optd/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/down.sql new file mode 100644 index 0000000..850f7dd --- /dev/null +++ b/optd/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/down.sql @@ -0,0 +1 @@ +DROP TABLE rel_subgroup_physical_exprs; diff --git a/optd/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/up.sql b/optd/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/up.sql new file mode 100644 index 0000000..95ea041 --- /dev/null +++ b/optd/migrations/2025-01-20-153830_create_rel_subgroup_physical_exprs/up.sql @@ -0,0 +1,12 @@ +-- The relational subgroup expressions table specifies the physical expressions of a subgroup. +-- It is a m:n junction table since a subgroup can have multiple physical expressions, +-- and a physical expression can belong to multiple subgroups. +CREATE TABLE rel_subgroup_physical_exprs ( + -- The subgroup the physical expression belongs to. + subgroup_id BIGINT NOT NULL, + -- The physical expression id. + physical_expr_id BIGINT NOT NULL, + PRIMARY KEY (subgroup_id, physical_expr_id), + FOREIGN KEY (subgroup_id) REFERENCES rel_subgroups(id) ON DELETE CASCADE ON UPDATE CASCADE, + FOREIGN KEY (physical_expr_id) REFERENCES physical_exprs(id) ON DELETE CASCADE ON UPDATE CASCADE +); diff --git a/optd/src/storage/models.rs b/optd/src/storage/models.rs index 78f6494..9ba2137 100644 --- a/optd/src/storage/models.rs +++ b/optd/src/storage/models.rs @@ -8,7 +8,9 @@ use diesel::{ sql_types::{BigInt, Integer}, }; -#[derive(Queryable, Selectable)] +/// A relational group contains one or more equivalent logical expressions +/// and zero or more physical expressions. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::rel_groups)] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct RelGroup { @@ -22,20 +24,38 @@ pub struct RelGroup { pub rep_id: Option, } -#[derive(Queryable, Selectable)] -#[diesel(table_name = super::schema::rel_group_winners)] +/// A relational subgroup contains a subset of physical expressions in a relational group that +/// can satisfies the same required physical properties. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::rel_subgroups)] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] -pub struct RelGroupWinner { - /// The group we are interested in. +pub struct RelSubGroup { + pub id: RelSubGroupId, + /// The group the subgroup belongs to. pub group_id: RelGroupId, - /// The required physical property. + /// The required physical property of the subgroup. pub required_phys_prop_id: PhysicalPropId, - /// The winner of the group with `group_id` and required physical property. +} + +/// A subgroup winner is a physical expression that is the winner of a group with a required physical property. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] +#[diesel(table_name = super::schema::rel_subgroup_winners)] +#[diesel(belongs_to(RelSubGroup))] +#[diesel(belongs_to(PhysicalExpr))] +#[diesel(primary_key(subgroup_id))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct RelSubgroupWinner { + /// The subgroup id of the winner, i.e. the winner of the group with `group_id` and some required physical property. + pub subgroup_id: RelSubGroupId, + /// The physical expression id of the winner. pub physical_expr_id: PhysicalExprId, } -#[derive(Queryable, Selectable)] +/// A logical expression is a relational expression that consists of a tree of logical operators. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::logical_exprs)] +#[diesel(belongs_to(RelGroup))] +#[diesel(belongs_to(LogicalTypDesc))] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct LogicalExpr { /// The logical expression identifier. @@ -48,8 +68,10 @@ pub struct LogicalExpr { pub created_at: chrono::NaiveDateTime, } -#[derive(Queryable, Selectable)] +/// Logicial properties are shared by a relational group. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::logical_props)] +#[diesel(belongs_to(RelGroup))] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct LogicalProp { /// The logical property identifier. @@ -60,7 +82,8 @@ pub struct LogicalProp { pub card_est: i64, } -#[derive(Queryable, Selectable)] +/// Descriptor for a logical relational operator type. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::logical_typ_descs)] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct LogicalTypDesc { @@ -70,8 +93,11 @@ pub struct LogicalTypDesc { pub name: String, } -#[derive(Queryable, Selectable)] +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::physical_exprs)] +#[diesel(belongs_to(RelGroup))] +#[diesel(belongs_to(PhysicalTypDesc))] +#[diesel(belongs_to(PhysicalProp))] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct PhysicalExpr { /// The physical expression id. @@ -88,7 +114,27 @@ pub struct PhysicalExpr { pub created_at: chrono::NaiveDateTime, } -#[derive(Queryable, Selectable)] +// TODO(yuchen): Do we need a junction table for (logical_expr, required_phys_prop) <=> subgroup? TBD. +/// A relational subgroup expression entry specifies if a physical expression belongs to a subgroup. +/// It is a m:n relationship since a subgroup can have multiple physical expressions, +/// and a physical expression can belong to multiple subgroups. +#[derive(Queryable, Selectable, Identifiable, Associations)] +#[diesel(table_name = super::schema::rel_subgroup_physical_exprs)] +#[diesel(primary_key(subgroup_id, physical_expr_id))] +#[diesel(belongs_to(RelSubGroup, foreign_key = subgroup_id))] +#[diesel(belongs_to(PhysicalExpr))] +#[diesel(check_for_backend(diesel::sqlite::Sqlite))] +pub struct RelSubGroupPhysicalExpr { + /// The subgroup the physical expression belongs to. + pub subgroup_id: RelSubGroupId, + /// TThe physical expression id. + pub physical_expr_id: PhysicalExprId, +} + +/// A physical property is a characteristic of an expression that impacts its layout, +/// presentation, or location, but not its logical content. +/// They could be either required by a subgroup or derived on a physical expression. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::physical_props)] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct PhysicalProp { @@ -98,7 +144,8 @@ pub struct PhysicalProp { pub payload: Vec, } -#[derive(Queryable, Selectable)] +/// Descriptor for a physical relational operator type. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::physical_typ_descs)] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct PhysicalTypDesc { @@ -108,8 +155,12 @@ pub struct PhysicalTypDesc { pub name: String, } -#[derive(Queryable, Selectable)] +// TODO: ideally you want scalar to mimic the relational expressions. We don't have a definition of a physical scalar expression yet. +/// A scalar expression consists of a tree of scalar operators. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::scalar_exprs)] +#[diesel(belongs_to(ScalarGroup))] +#[diesel(belongs_to(ScalarTyeDesc))] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct ScalarExpr { /// The scalar expression id. @@ -118,29 +169,37 @@ pub struct ScalarExpr { pub typ_desc: ScalarTypDescId, /// The scalar group that this scalar expression belongs to. pub group_id: ScalarGroupId, + /// The time at which this scalar expression was created. pub created_at: chrono::NaiveDateTime, + /// The cost associated with this scalar expression. None if the cost has not been computed. pub cost: Option, } -#[derive(Queryable, Selectable)] +/// A scalar group contains one or more equivalent scalar expressions. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::scalar_groups)] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct ScalarGroup { pub id: ScalarGroupId, pub status: i32, pub created_at: chrono::NaiveDateTime, - pub rep_id: Option, + pub rep_id: Option, } -#[derive(Queryable, Selectable)] +/// A scalar group winner is a scalar expression with the lowest cost in a scalar group. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::scalar_group_winners)] +#[diesel(primary_key(group_id))] +#[diesel(belongs_to(ScalarGroup))] +#[diesel(belongs_to(ScalarExpr))] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct ScalarGroupWinner { pub group_id: ScalarGroupId, pub scalar_expr_id: ScalarExprId, } -#[derive(Queryable, Selectable)] +/// A scalar property is a property shared by a scalar group. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::scalar_props)] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct ScalarProp { @@ -150,7 +209,8 @@ pub struct ScalarProp { pub payload: Vec, } -#[derive(Queryable, Selectable)] +/// Descriptor for a scalar type. +#[derive(Queryable, Selectable, Identifiable, AsChangeset)] #[diesel(table_name = super::schema::scalar_typ_descs)] #[diesel(check_for_backend(diesel::sqlite::Sqlite))] pub struct ScalarTyeDesc { @@ -161,6 +221,7 @@ pub struct ScalarTyeDesc { } /// Defines a new ID type with the given name, inner type, and SQL type. +/// Also deriving some common traits for the new type. #[macro_export] macro_rules! impl_diesel_new_type_from_to_sql { ($type_name:ident, $inner_type:ty, $sql_type:ty) => { @@ -206,6 +267,7 @@ macro_rules! impl_diesel_new_type_from_to_sql { } impl_diesel_new_type_from_to_sql!(RelGroupId, i64, BigInt); +impl_diesel_new_type_from_to_sql!(RelSubGroupId, i64, BigInt); impl_diesel_new_type_from_to_sql!(LogicalExprId, i64, BigInt); impl_diesel_new_type_from_to_sql!(PhysicalExprId, i64, BigInt); impl_diesel_new_type_from_to_sql!(LogicalPropId, i64, BigInt); diff --git a/optd/src/storage/schema.rs b/optd/src/storage/schema.rs index 3e2b7f2..0e861a2 100644 --- a/optd/src/storage/schema.rs +++ b/optd/src/storage/schema.rs @@ -50,19 +50,33 @@ diesel::table! { } diesel::table! { - rel_group_winners (group_id, required_phys_prop_id) { - group_id -> BigInt, - required_phys_prop_id -> BigInt, + rel_groups (id) { + id -> BigInt, + status -> Integer, + created_at -> Timestamp, + rep_id -> Nullable, + } +} + +diesel::table! { + rel_subgroup_physical_exprs (subgroup_id, physical_expr_id) { + subgroup_id -> BigInt, physical_expr_id -> BigInt, } } diesel::table! { - rel_groups (id) { + rel_subgroup_winners (subgroup_id) { + subgroup_id -> BigInt, + physical_expr_id -> BigInt, + } +} + +diesel::table! { + rel_subgroups (id) { id -> BigInt, - status -> Integer, - created_at -> Timestamp, - rep_id -> Nullable, + group_id -> BigInt, + required_phys_prop_id -> BigInt, } } @@ -112,9 +126,11 @@ diesel::joinable!(logical_props -> rel_groups (group_id)); diesel::joinable!(physical_exprs -> physical_props (derived_phys_prop_id)); diesel::joinable!(physical_exprs -> physical_typ_descs (typ_desc)); diesel::joinable!(physical_exprs -> rel_groups (group_id)); -diesel::joinable!(rel_group_winners -> physical_exprs (physical_expr_id)); -diesel::joinable!(rel_group_winners -> physical_props (required_phys_prop_id)); -diesel::joinable!(rel_group_winners -> rel_groups (group_id)); +diesel::joinable!(rel_subgroup_physical_exprs -> physical_exprs (physical_expr_id)); +diesel::joinable!(rel_subgroup_physical_exprs -> rel_subgroups (subgroup_id)); +diesel::joinable!(rel_subgroup_winners -> physical_exprs (physical_expr_id)); +diesel::joinable!(rel_subgroups -> physical_props (required_phys_prop_id)); +diesel::joinable!(rel_subgroups -> rel_groups (group_id)); diesel::joinable!(scalar_exprs -> scalar_groups (group_id)); diesel::joinable!(scalar_exprs -> scalar_typ_descs (typ_desc)); diesel::joinable!(scalar_group_winners -> scalar_exprs (scalar_expr_id)); @@ -128,8 +144,10 @@ diesel::allow_tables_to_appear_in_same_query!( physical_exprs, physical_props, physical_typ_descs, - rel_group_winners, rel_groups, + rel_subgroup_physical_exprs, + rel_subgroup_winners, + rel_subgroups, scalar_exprs, scalar_group_winners, scalar_groups,