From fc61df0a11d9ecb69ff5161eafc3794620424ccb Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Fri, 14 Feb 2025 01:11:06 -0500 Subject: [PATCH] feat: full e2e pipeline (#26) ## Problem With the initial representation and storage added in #4 and #22, we now want to support the full pipeline going from parsing SQL, optimizing the plan using optd, and executing the query in Datafusion. ## Summary of changes - Integrate all @SarveshOO7's good work in https://github.com/cmu-db/optd/pull/10 - Added one mock physical implementation rule + operator for each logical operator - Refactor scalar operator storage and reduce code bloat. - Add physical storage tables and memo API. - Bump MSRV to 1.81.0 to be compatible with datafusion 45.0.0: https://github.com/apache/datafusion/pull/14330 --------- Signed-off-by: Yuchen Liang Co-authored-by: SarveshOO7 --- .github/codecov.yml | 2 + .github/workflows/check.yml | 4 +- .github/workflows/test.yml | 80 +- .gitignore | 1 + ...8f491a37dca665205cba7d69d0c9a657a16d9.json | 20 - Cargo.lock | 4702 +++++++++++++++-- Cargo.toml | 20 +- optd-core/Cargo.toml | 18 +- optd-core/src/cascades/goal.rs | 37 + optd-core/src/cascades/memo.rs | 27 +- optd-core/src/cascades/mod.rs | 472 +- optd-core/src/cascades/properties/mod.rs | 15 + .../src/operators/relational/logical/mod.rs | 18 + .../operators/relational/logical/project.rs | 33 + .../relational/physical/filter/filter.rs | 4 +- .../src/operators/relational/physical/mod.rs | 20 +- .../operators/relational/physical/project.rs | 33 + .../relational/physical/scan/table_scan.rs | 2 +- optd-core/src/operators/scalar/add.rs | 25 - optd-core/src/operators/scalar/binary_op.rs | 43 + optd-core/src/operators/scalar/constants.rs | 16 +- optd-core/src/operators/scalar/equal.rs | 25 - optd-core/src/operators/scalar/logic_op.rs | 33 + optd-core/src/operators/scalar/mod.rs | 73 +- optd-core/src/operators/scalar/unary_op.rs | 29 + optd-core/src/plans/logical.rs | 2 +- optd-core/src/plans/mod.rs | 1 + optd-core/src/plans/physical.rs | 50 + optd-core/src/plans/scalar.rs | 2 +- optd-core/src/storage/memo.rs | 278 +- ...0134520_create_logical_properties.down.sql | 1 + ...130134520_create_logical_properties.up.sql | 1 + ...134522_create_physical_properties.down.sql | 1 + ...30134522_create_physical_properties.up.sql | 1 + ...40570_create_relation_group_goals.down.sql | 1 + ...0140570_create_relation_group_goals.up.sql | 24 + ...44848_create_physical_expressions.down.sql | 1 + ...0144848_create_physical_expressions.up.sql | 19 + ...70430_create_scalar_operator_adds.down.sql | 1 - ...3170430_create_scalar_operator_adds.up.sql | 29 - ...create_scalar_operator_binary_ops.down.sql | 1 + ...0_create_scalar_operator_binary_ops.up.sql | 33 + ...454_create_scalar_operator_equals.down.sql | 1 - ...70454_create_scalar_operator_equals.up.sql | 29 - ..._create_scalar_operator_logic_ops.down.sql | 1 + ...43_create_scalar_operator_logic_ops.up.sql | 33 + ..._create_scalar_operator_unary_ops.down.sql | 1 + ...44_create_scalar_operator_unary_ops.up.sql | 29 + ...ate_physical_operator_table_scans.down.sql | 1 + ...reate_physical_operator_table_scans.up.sql | 33 + ...hysical_operator_physical_filters.down.sql | 1 + ..._physical_operator_physical_filters.up.sql | 37 + ...ysical_operator_physical_projects.down.sql | 1 + ...physical_operator_physical_projects.up.sql | 44 + ...ysical_operator_nested_loop_joins.down.sql | 1 + ...physical_operator_nested_loop_joins.up.sql | 43 + optd-core/src/test_utils.rs | 97 +- optd-core/src/values/mod.rs | 23 + optd-datafusion-cli/Cargo.toml | 89 + optd-datafusion-cli/Dockerfile | 38 + optd-datafusion-cli/README.md | 48 + .../examples/cli-session-context.rs | 92 + optd-datafusion-cli/src/catalog.rs | 365 ++ optd-datafusion-cli/src/cli_context.rs | 92 + optd-datafusion-cli/src/command.rs | 222 + optd-datafusion-cli/src/exec.rs | 625 +++ optd-datafusion-cli/src/functions.rs | 457 ++ optd-datafusion-cli/src/helper.rs | 378 ++ optd-datafusion-cli/src/highlighter.rs | 127 + optd-datafusion-cli/src/lib.rs | 31 + optd-datafusion-cli/src/main.rs | 438 ++ optd-datafusion-cli/src/object_storage.rs | 632 +++ optd-datafusion-cli/src/pool_type.rs | 48 + optd-datafusion-cli/src/print_format.rs | 691 +++ optd-datafusion-cli/src/print_options.rs | 170 + optd-datafusion-cli/tests/cli_integration.rs | 57 + optd-datafusion-cli/tests/data/sql.txt | 1 + optd-datafusion/Cargo.toml | 18 + optd-datafusion/sql/test_filter.sql | 24 + optd-datafusion/sql/test_join.sql | 24 + optd-datafusion/sql/test_scan.sql | 24 + optd-datafusion/src/converter/from_optd.rs | 192 + optd-datafusion/src/converter/into_optd.rs | 184 + optd-datafusion/src/converter/mod.rs | 32 + optd-datafusion/src/lib.rs | 134 + optd-datafusion/src/planner.rs | 217 + optd-dsl/src/gen/operator.rs | 151 - 87 files changed, 11169 insertions(+), 1005 deletions(-) delete mode 100644 .sqlx/query-a80ed7123532bfaf7e8360623ea8f491a37dca665205cba7d69d0c9a657a16d9.json create mode 100644 optd-core/src/cascades/goal.rs create mode 100644 optd-core/src/cascades/properties/mod.rs create mode 100644 optd-core/src/operators/relational/logical/project.rs create mode 100644 optd-core/src/operators/relational/physical/project.rs delete mode 100644 optd-core/src/operators/scalar/add.rs create mode 100644 optd-core/src/operators/scalar/binary_op.rs delete mode 100644 optd-core/src/operators/scalar/equal.rs create mode 100644 optd-core/src/operators/scalar/logic_op.rs create mode 100644 optd-core/src/operators/scalar/unary_op.rs create mode 100644 optd-core/src/plans/physical.rs create mode 100644 optd-core/src/storage/migrations/20250130134520_create_logical_properties.down.sql create mode 100644 optd-core/src/storage/migrations/20250130134520_create_logical_properties.up.sql create mode 100644 optd-core/src/storage/migrations/20250130134522_create_physical_properties.down.sql create mode 100644 optd-core/src/storage/migrations/20250130134522_create_physical_properties.up.sql create mode 100644 optd-core/src/storage/migrations/20250130140570_create_relation_group_goals.down.sql create mode 100644 optd-core/src/storage/migrations/20250130140570_create_relation_group_goals.up.sql create mode 100644 optd-core/src/storage/migrations/20250130144848_create_physical_expressions.down.sql create mode 100644 optd-core/src/storage/migrations/20250130144848_create_physical_expressions.up.sql delete mode 100644 optd-core/src/storage/migrations/20250203170430_create_scalar_operator_adds.down.sql delete mode 100644 optd-core/src/storage/migrations/20250203170430_create_scalar_operator_adds.up.sql create mode 100644 optd-core/src/storage/migrations/20250203170430_create_scalar_operator_binary_ops.down.sql create mode 100644 optd-core/src/storage/migrations/20250203170430_create_scalar_operator_binary_ops.up.sql delete mode 100644 optd-core/src/storage/migrations/20250203170454_create_scalar_operator_equals.down.sql delete mode 100644 optd-core/src/storage/migrations/20250203170454_create_scalar_operator_equals.up.sql create mode 100644 optd-core/src/storage/migrations/20250211035043_create_scalar_operator_logic_ops.down.sql create mode 100644 optd-core/src/storage/migrations/20250211035043_create_scalar_operator_logic_ops.up.sql create mode 100644 optd-core/src/storage/migrations/20250211035044_create_scalar_operator_unary_ops.down.sql create mode 100644 optd-core/src/storage/migrations/20250211035044_create_scalar_operator_unary_ops.up.sql create mode 100644 optd-core/src/storage/migrations/20250211044327_create_physical_operator_table_scans.down.sql create mode 100644 optd-core/src/storage/migrations/20250211044327_create_physical_operator_table_scans.up.sql create mode 100644 optd-core/src/storage/migrations/20250211075710_create_physical_operator_physical_filters.down.sql create mode 100644 optd-core/src/storage/migrations/20250211075710_create_physical_operator_physical_filters.up.sql create mode 100644 optd-core/src/storage/migrations/20250211075929_create_physical_operator_physical_projects.down.sql create mode 100644 optd-core/src/storage/migrations/20250211075929_create_physical_operator_physical_projects.up.sql create mode 100644 optd-core/src/storage/migrations/20250211080237_create_physical_operator_nested_loop_joins.down.sql create mode 100644 optd-core/src/storage/migrations/20250211080237_create_physical_operator_nested_loop_joins.up.sql create mode 100644 optd-datafusion-cli/Cargo.toml create mode 100644 optd-datafusion-cli/Dockerfile create mode 100644 optd-datafusion-cli/README.md create mode 100644 optd-datafusion-cli/examples/cli-session-context.rs create mode 100644 optd-datafusion-cli/src/catalog.rs create mode 100644 optd-datafusion-cli/src/cli_context.rs create mode 100644 optd-datafusion-cli/src/command.rs create mode 100644 optd-datafusion-cli/src/exec.rs create mode 100644 optd-datafusion-cli/src/functions.rs create mode 100644 optd-datafusion-cli/src/helper.rs create mode 100644 optd-datafusion-cli/src/highlighter.rs create mode 100644 optd-datafusion-cli/src/lib.rs create mode 100644 optd-datafusion-cli/src/main.rs create mode 100644 optd-datafusion-cli/src/object_storage.rs create mode 100644 optd-datafusion-cli/src/pool_type.rs create mode 100644 optd-datafusion-cli/src/print_format.rs create mode 100644 optd-datafusion-cli/src/print_options.rs create mode 100644 optd-datafusion-cli/tests/cli_integration.rs create mode 100644 optd-datafusion-cli/tests/data/sql.txt create mode 100644 optd-datafusion/Cargo.toml create mode 100644 optd-datafusion/sql/test_filter.sql create mode 100644 optd-datafusion/sql/test_join.sql create mode 100644 optd-datafusion/sql/test_scan.sql create mode 100644 optd-datafusion/src/converter/from_optd.rs create mode 100644 optd-datafusion/src/converter/into_optd.rs create mode 100644 optd-datafusion/src/converter/mod.rs create mode 100644 optd-datafusion/src/lib.rs create mode 100644 optd-datafusion/src/planner.rs delete mode 100644 optd-dsl/src/gen/operator.rs diff --git a/.github/codecov.yml b/.github/codecov.yml index 33e3dd1..e42f4ce 100644 --- a/.github/codecov.yml +++ b/.github/codecov.yml @@ -16,6 +16,8 @@ coverage: # Test files aren't important for coverage ignore: - "tests" + # Vendored datafusion CLI. + - "optd-datafusion-cli" # Make comments less noisy comment: diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 5d8d970..4424bc1 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -99,7 +99,9 @@ jobs: # https://docs.github.com/en/actions/learn-github-actions/contexts#context-availability strategy: matrix: - msrv: ["1.78.0"] # `Cargo.lock` version 4 + # `Cargo.lock` version 4 requires 1.78.0 + # datafusion 45.0.0 requires 1.81.0 + msrv: ["1.81.0"] name: ubuntu / ${{ matrix.msrv }} steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 42a9f33..8fca6ac 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -45,45 +45,47 @@ jobs: # https://github.com/rust-lang/cargo/issues/6669 - name: cargo test --doc run: cargo test --locked --all-features --doc - minimal: - # This action chooses the oldest version of the dependencies permitted by Cargo.toml to ensure - # that this crate is compatible with the minimal version that this crate and its dependencies - # require. This will pickup issues where this create relies on functionality that was introduced - # later than the actual version specified (e.g., when we choose just a major version, but a - # method was added after this version). - # - # This particular check can be difficult to get to succeed as often transitive dependencies may - # be incorrectly specified (e.g., a dependency specifies 1.0 but really requires 1.1.5). There - # is an alternative flag available -Zdirect-minimal-versions that uses the minimal versions for - # direct dependencies of this crate, while selecting the maximal versions for the transitive - # dependencies. Alternatively, you can add a line in your Cargo.toml to artificially increase - # the minimal dependency, which you do with e.g.: - # ```toml - # # for minimal-versions - # [target.'cfg(any())'.dependencies] - # openssl = { version = "0.10.55", optional = true } # needed to allow foo to build with -Zminimal-versions - # ``` - # The optional = true is necessary in case that dependency isn't otherwise transitively required - # by your library, and the target bit is so that this dependency edge never actually affects - # Cargo build order. See also - # https://github.com/jonhoo/fantoccini/blob/fde336472b712bc7ebf5b4e772023a7ba71b2262/Cargo.toml#L47-L49. - # This action is run on ubuntu with the stable toolchain, as it is not expected to fail - runs-on: ubuntu-latest - name: ubuntu / stable / minimal-versions - steps: - - uses: actions/checkout@v4 - with: - submodules: true - - name: Install stable - uses: dtolnay/rust-toolchain@stable - - name: Install nightly for -Zminimal-versions - uses: dtolnay/rust-toolchain@nightly - - name: rustup default stable - run: rustup default stable - - name: cargo update -Zminimal-versions - run: cargo +nightly update -Zminimal-versions - - name: cargo test - run: cargo test --locked --all-features --all-targets + # TODO: Experienced issue with twox-hash-1.6.3, re-enable once fixed. + # Failed workflow: https://github.com/cmu-db/optd/actions/runs/13323412234/job/37211945474?pr=26 + # minimal: + # # This action chooses the oldest version of the dependencies permitted by Cargo.toml to ensure + # # that this crate is compatible with the minimal version that this crate and its dependencies + # # require. This will pickup issues where this create relies on functionality that was introduced + # # later than the actual version specified (e.g., when we choose just a major version, but a + # # method was added after this version). + # # + # # This particular check can be difficult to get to succeed as often transitive dependencies may + # # be incorrectly specified (e.g., a dependency specifies 1.0 but really requires 1.1.5). There + # # is an alternative flag available -Zdirect-minimal-versions that uses the minimal versions for + # # direct dependencies of this crate, while selecting the maximal versions for the transitive + # # dependencies. Alternatively, you can add a line in your Cargo.toml to artificially increase + # # the minimal dependency, which you do with e.g.: + # # ```toml + # # # for minimal-versions + # # [target.'cfg(any())'.dependencies] + # # openssl = { version = "0.10.55", optional = true } # needed to allow foo to build with -Zminimal-versions + # # ``` + # # The optional = true is necessary in case that dependency isn't otherwise transitively required + # # by your library, and the target bit is so that this dependency edge never actually affects + # # Cargo build order. See also + # # https://github.com/jonhoo/fantoccini/blob/fde336472b712bc7ebf5b4e772023a7ba71b2262/Cargo.toml#L47-L49. + # # This action is run on ubuntu with the stable toolchain, as it is not expected to fail + # runs-on: ubuntu-latest + # name: ubuntu / stable / minimal-versions + # steps: + # - uses: actions/checkout@v4 + # with: + # submodules: true + # - name: Install stable + # uses: dtolnay/rust-toolchain@stable + # - name: Install nightly for -Zminimal-versions + # uses: dtolnay/rust-toolchain@nightly + # - name: rustup default stable + # run: rustup default stable + # - name: cargo update -Zminimal-versions + # run: cargo +nightly update -Zminimal-versions + # - name: cargo test + # run: cargo test --locked --all-features --all-targets os-check: # run cargo test on mac and windows runs-on: ${{ matrix.os }} diff --git a/.gitignore b/.gitignore index d2d7727..41e1412 100644 --- a/.gitignore +++ b/.gitignore @@ -12,3 +12,4 @@ target/ *.db *.db-shm *.db-wal +.history \ No newline at end of file diff --git a/.sqlx/query-a80ed7123532bfaf7e8360623ea8f491a37dca665205cba7d69d0c9a657a16d9.json b/.sqlx/query-a80ed7123532bfaf7e8360623ea8f491a37dca665205cba7d69d0c9a657a16d9.json deleted file mode 100644 index 10a6f5c..0000000 --- a/.sqlx/query-a80ed7123532bfaf7e8360623ea8f491a37dca665205cba7d69d0c9a657a16d9.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "db_name": "SQLite", - "query": "UPDATE id_sequences SET current_value = current_value + 1 where id = 0 RETURNING current_value", - "describe": { - "columns": [ - { - "name": "current_value", - "ordinal": 0, - "type_info": "Integer" - } - ], - "parameters": { - "Right": 0 - }, - "nullable": [ - false - ] - }, - "hash": "a80ed7123532bfaf7e8360623ea8f491a37dca665205cba7d69d0c9a657a16d9" -} diff --git a/Cargo.lock b/Cargo.lock index c7d57fc..15bf822 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -18,1042 +18,3993 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - -[[package]] -name = "anyhow" -version = "1.0.95" +name = "adler32" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] -name = "async-recursion" -version = "1.1.1" +name = "ahash" +version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ - "proc-macro2", - "quote", - "syn", + "cfg-if", + "const-random", + "getrandom 0.2.15", + "once_cell", + "version_check", + "zerocopy", ] [[package]] -name = "atoi" -version = "2.0.0" +name = "aho-corasick" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ - "num-traits", + "memchr", ] [[package]] -name = "autocfg" -version = "1.4.0" +name = "alloc-no-stdlib" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" [[package]] -name = "backtrace" -version = "0.3.74" +name = "alloc-stdlib" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" dependencies = [ - "addr2line", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", - "windows-targets 0.52.6", + "alloc-no-stdlib", ] [[package]] -name = "base64" -version = "0.22.1" +name = "allocator-api2" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] -name = "base64ct" -version = "1.6.0" +name = "android-tzdata" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" [[package]] -name = "bitflags" -version = "2.8.0" +name = "android_system_properties" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ - "serde", + "libc", ] [[package]] -name = "block-buffer" -version = "0.10.4" +name = "anstream" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ - "generic-array", + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", ] [[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - -[[package]] -name = "bytes" -version = "1.10.0" +name = "anstyle" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] -name = "cc" -version = "1.2.11" +name = "anstyle-parse" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ - "shlex", + "utf8parse", ] [[package]] -name = "cfg-if" -version = "1.0.0" +name = "anstyle-query" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys 0.59.0", +] [[package]] -name = "concurrent-queue" -version = "2.5.0" +name = "anstyle-wincon" +version = "3.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" dependencies = [ - "crossbeam-utils", + "anstyle", + "once_cell", + "windows-sys 0.59.0", ] [[package]] -name = "const-oid" -version = "0.9.6" +name = "anyhow" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" [[package]] -name = "cpufeatures" -version = "0.2.17" +name = "apache-avro" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +checksum = "1aef82843a0ec9f8b19567445ad2421ceeb1d711514384bdd3d49fe37102ee13" dependencies = [ - "libc", + "bigdecimal", + "bzip2 0.4.4", + "crc32fast", + "digest", + "libflate", + "log", + "num-bigint", + "quad-rand", + "rand", + "regex-lite", + "serde", + "serde_bytes", + "serde_json", + "snap", + "strum", + "strum_macros", + "thiserror 1.0.69", + "typed-builder", + "uuid", + "xz2", + "zstd", ] [[package]] -name = "crc" -version = "3.2.1" +name = "arrayref" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" -dependencies = [ - "crc-catalog", -] +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" [[package]] -name = "crc-catalog" -version = "2.4.0" +name = "arrayvec" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] -name = "crossbeam-queue" -version = "0.3.12" +name = "arrow" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc" dependencies = [ - "crossbeam-utils", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", ] [[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crypto-common" -version = "0.1.6" +name = "arrow-arith" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248" dependencies = [ - "generic-array", - "typenum", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "num", ] [[package]] -name = "der" -version = "0.7.9" +name = "arrow-array" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" +checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223" dependencies = [ - "const-oid", - "pem-rfc7468", - "zeroize", + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown 0.15.2", + "num", ] [[package]] -name = "digest" -version = "0.10.7" +name = "arrow-buffer" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89" dependencies = [ - "block-buffer", - "const-oid", - "crypto-common", - "subtle", + "bytes", + "half", + "num", ] [[package]] -name = "displaydoc" -version = "0.2.5" +name = "arrow-cast" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870" dependencies = [ - "proc-macro2", - "quote", - "syn", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", ] [[package]] -name = "dotenvy" -version = "0.15.7" +name = "arrow-csv" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" +checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6" +dependencies = [ + "arrow-array", + "arrow-cast", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "regex", +] [[package]] -name = "either" -version = "1.13.0" +name = "arrow-data" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754" dependencies = [ - "serde", + "arrow-buffer", + "arrow-schema", + "half", + "num", ] [[package]] -name = "equivalent" -version = "1.0.1" +name = "arrow-ipc" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "flatbuffers", + "lz4_flex", +] [[package]] -name = "errno" -version = "0.3.10" +name = "arrow-json" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c" dependencies = [ - "libc", - "windows-sys 0.59.0", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", ] [[package]] -name = "etcetera" -version = "0.8.0" +name = "arrow-ord" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da" dependencies = [ - "cfg-if", - "home", - "windows-sys 0.48.0", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", ] [[package]] -name = "event-listener" -version = "5.4.0" +name = "arrow-row" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae" +checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c" dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", ] [[package]] -name = "fastrand" -version = "2.3.0" +name = "arrow-schema" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6" [[package]] -name = "flume" -version = "0.11.1" +name = "arrow-select" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807" dependencies = [ - "futures-core", - "futures-sink", - "spin", + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", ] [[package]] -name = "foldhash" -version = "0.1.4" +name = "arrow-string" +version = "54.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" +checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "memchr", + "num", + "regex", + "regex-syntax", +] [[package]] -name = "form_urlencoded" -version = "1.2.1" +name = "assert_cmd" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d" dependencies = [ - "percent-encoding", + "anstyle", + "bstr", + "doc-comment", + "libc", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", ] [[package]] -name = "futures-channel" -version = "0.3.31" +name = "async-compression" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" dependencies = [ + "bzip2 0.4.4", + "flate2", "futures-core", - "futures-sink", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", ] [[package]] -name = "futures-core" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" - -[[package]] -name = "futures-executor" -version = "0.3.31" +name = "async-recursion" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ - "futures-core", - "futures-task", - "futures-util", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "futures-intrusive" -version = "0.5.0" +name = "async-trait" +version = "0.1.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ - "futures-core", - "lock_api", - "parking_lot", + "proc-macro2", + "quote", + "syn", ] [[package]] -name = "futures-io" -version = "0.3.31" +name = "atoi" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" +dependencies = [ + "num-traits", +] [[package]] -name = "futures-sink" -version = "0.3.31" +name = "atomic-waker" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] -name = "futures-task" -version = "0.3.31" +name = "autocfg" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] -name = "futures-util" -version = "0.3.31" +name = "aws-config" +version = "1.5.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "50236e4d60fe8458de90a71c0922c761e41755adf091b1b03de1cef537179915" dependencies = [ - "futures-core", - "futures-io", - "futures-sink", - "futures-task", - "memchr", + "aws-credential-types", + "aws-runtime", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "hex", + "http 0.2.12", + "ring", + "time", + "tokio", + "tracing", + "url", + "zeroize", +] + +[[package]] +name = "aws-credential-types" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da" +dependencies = [ + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "zeroize", +] + +[[package]] +name = "aws-runtime" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76dd04d39cc12844c0994f2c9c5a6f5184c22e9188ec1ff723de41910a21dcad" +dependencies = [ + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "fastrand", + "http 0.2.12", + "http-body 0.4.6", + "once_cell", + "percent-encoding", "pin-project-lite", - "pin-utils", - "slab", + "tracing", + "uuid", ] [[package]] -name = "generic-array" -version = "0.14.7" +name = "aws-sdk-sso" +version = "1.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +checksum = "00a35fc7e74f5be45839eb753568535c074a592185dd0a2d406685018d581c43" dependencies = [ - "typenum", - "version_check", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", ] [[package]] -name = "getrandom" -version = "0.2.15" +name = "aws-sdk-ssooidc" +version = "1.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "f8fa655b4f313124ce272cbc38c5fef13793c832279cec750103e5e6b71a54b8" dependencies = [ - "cfg-if", - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-types", + "bytes", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", ] [[package]] -name = "getrandom" -version = "0.3.1" +name = "aws-sdk-sts" +version = "1.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "dc1cfe5e16b90421ea031f4c6348b534ef442e76f6bf4a1b2b592c12cc2c6af9" dependencies = [ - "cfg-if", - "libc", - "wasi 0.13.3+wasi-0.2.2", - "windows-targets 0.52.6", + "aws-credential-types", + "aws-runtime", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "http 0.2.12", + "once_cell", + "regex-lite", + "tracing", ] [[package]] -name = "gimli" -version = "0.31.1" +name = "aws-sigv4" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" +checksum = "9bfe75fad52793ce6dec0dc3d4b1f388f038b5eb866c8d4d7f3a8e21b5ea5051" +dependencies = [ + "aws-credential-types", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "form_urlencoded", + "hex", + "hmac", + "http 0.2.12", + "http 1.2.0", + "once_cell", + "percent-encoding", + "sha2", + "time", + "tracing", +] [[package]] -name = "hashbrown" -version = "0.15.2" +name = "aws-smithy-async" +version = "1.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +checksum = "fa59d1327d8b5053c54bf2eaae63bf629ba9e904434d0835a28ed3c0ed0a614e" dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", + "futures-util", + "pin-project-lite", + "tokio", ] [[package]] -name = "hashlink" -version = "0.10.0" +name = "aws-smithy-http" +version = "0.60.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc" dependencies = [ - "hashbrown", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http-body 0.4.6", + "once_cell", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", ] [[package]] -name = "heck" -version = "0.5.0" +name = "aws-smithy-json" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +checksum = "623a51127f24c30776c8b374295f2df78d92517386f77ba30773f15a30ce1422" +dependencies = [ + "aws-smithy-types", +] [[package]] -name = "hex" -version = "0.4.3" +name = "aws-smithy-query" +version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" +checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" +dependencies = [ + "aws-smithy-types", + "urlencoding", +] [[package]] -name = "hkdf" -version = "0.12.4" +name = "aws-smithy-runtime" +version = "1.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +checksum = "d526a12d9ed61fadefda24abe2e682892ba288c2018bcb38b1b4c111d13f6d92" dependencies = [ - "hmac", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "fastrand", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "http-body 1.0.1", + "httparse", + "hyper 0.14.32", + "hyper-rustls 0.24.2", + "once_cell", + "pin-project-lite", + "pin-utils", + "rustls 0.21.12", + "tokio", + "tracing", ] [[package]] -name = "hmac" -version = "0.12.1" +name = "aws-smithy-runtime-api" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd" dependencies = [ - "digest", + "aws-smithy-async", + "aws-smithy-types", + "bytes", + "http 0.2.12", + "http 1.2.0", + "pin-project-lite", + "tokio", + "tracing", + "zeroize", ] [[package]] -name = "home" -version = "0.5.11" +name = "aws-smithy-types" +version = "1.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +checksum = "c7b8a53819e42f10d0821f56da995e1470b199686a1809168db6ca485665f042" dependencies = [ - "windows-sys 0.59.0", + "base64-simd", + "bytes", + "bytes-utils", + "futures-core", + "http 0.2.12", + "http 1.2.0", + "http-body 0.4.6", + "http-body 1.0.1", + "http-body-util", + "itoa", + "num-integer", + "pin-project-lite", + "pin-utils", + "ryu", + "serde", + "time", + "tokio", + "tokio-util", ] [[package]] -name = "icu_collections" -version = "1.5.0" +name = "aws-smithy-xml" +version = "0.60.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +checksum = "ab0b0166827aa700d3dc519f72f8b3a91c35d0b8d042dc5d643a91e6f80648fc" dependencies = [ - "displaydoc", - "yoke", - "zerofrom", - "zerovec", + "xmlparser", ] [[package]] -name = "icu_locid" -version = "1.5.0" +name = "aws-types" +version = "1.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +checksum = "dfbd0a668309ec1f66c0f6bda4840dd6d4796ae26d699ebc266d7cc95c6d040f" dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", + "aws-credential-types", + "aws-smithy-async", + "aws-smithy-runtime-api", + "aws-smithy-types", + "rustc_version", + "tracing", ] [[package]] -name = "icu_locid_transform" -version = "1.5.0" +name = "backtrace" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ - "displaydoc", - "icu_locid", - "icu_locid_transform_data", - "icu_provider", - "tinystr", - "zerovec", + "addr2line", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", + "windows-targets 0.52.6", ] [[package]] -name = "icu_locid_transform_data" -version = "1.5.0" +name = "base64" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] -name = "icu_normalizer" -version = "1.5.0" +name = "base64" +version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + +[[package]] +name = "base64-simd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339abbe78e73178762e23bea9dfd08e697eb3f3301cd4be981c0f78ba5859195" dependencies = [ - "displaydoc", - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "write16", - "zerovec", + "outref", + "vsimd", ] [[package]] -name = "icu_normalizer_data" -version = "1.5.0" +name = "base64ct" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] -name = "icu_properties" -version = "1.5.1" +name = "bigdecimal" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +checksum = "7f31f3af01c5c65a07985c804d3366560e6fa7883d640a122819b14ec327482c" dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid_transform", - "icu_properties_data", - "icu_provider", - "tinystr", - "zerovec", + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", + "serde", ] [[package]] -name = "icu_properties_data" -version = "1.5.0" +name = "bitflags" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] -name = "icu_provider" -version = "1.5.0" +name = "bitflags" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider_macros", - "stable_deref_trait", - "tinystr", - "writeable", - "yoke", - "zerofrom", - "zerovec", + "serde", ] [[package]] -name = "icu_provider_macros" -version = "1.5.0" +name = "blake2" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" dependencies = [ - "proc-macro2", - "quote", - "syn", + "digest", ] [[package]] -name = "idna" -version = "1.0.3" +name = "blake3" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", ] [[package]] -name = "idna_adapter" -version = "1.2.0" +name = "block-buffer" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" dependencies = [ - "icu_normalizer", - "icu_properties", + "generic-array", ] [[package]] -name = "indexmap" +name = "brotli" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74fa05ad7d803d413eb8380983b092cbbaf9a85f151b871360e7b00cd7060b37" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bstr" +version = "1.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "531a9155a481e2ee699d4f98f43c0ca4ff8ee1bfd55c31e9e98fb29d2b176fe0" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" + +[[package]] +name = "bytes-utils" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dafe3a8757b027e2be6e4e5601ed563c55989fcf1546e933c66c8eb3a058d35" +dependencies = [ + "bytes", + "either", +] + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7777341816418c02e033934a09f20dc0ccaf65a5201ef8a450ae0105a573fda" +dependencies = [ + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + +[[package]] +name = "chrono" +version = "0.4.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "serde", + "windows-targets 0.52.6", +] + +[[package]] +name = "chrono-tz" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c6ac4f2c0bf0f44e9161aec9675e1050aa4a530663c4a9e37e108fa948bca9f" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" +dependencies = [ + "parse-zoneinfo", + "phf_codegen", +] + +[[package]] +name = "clap" +version = "4.5.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acebd8ad879283633b343856142139f2da2317c96b05b4dd6181c61e2480184" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ba32cbda51c7e1dfd49acc1457ba1a7dec5b64fe360e828acb13ca8dc9c2f9" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "clipboard-win" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15efe7a882b08f34e38556b14f2fb3daa98769d06c7f0c1b076dfd0d983bc892" +dependencies = [ + "error-code", +] + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "comfy-table" +version = "7.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +dependencies = [ + "unicode-segmentation", + "unicode-width", +] + +[[package]] +name = "concurrent-queue" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.15", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" + +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + +[[package]] +name = "crc" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "crunchy" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "ctor" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a2785755761f3ddc1492979ce1e48d2c00d09311c39e4466429188f3dd6501" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a" +dependencies = [ + "apache-avro", + "arrow", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2 0.5.0", + "chrono", + "datafusion-catalog", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-functions-nested", + "datafusion-functions-table", + "datafusion-functions-window", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-optimizer", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "itertools 0.14.0", + "log", + "num-traits", + "object_store", + "parking_lot", + "parquet", + "rand", + "regex", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-catalog" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1" +dependencies = [ + "arrow", + "async-trait", + "dashmap", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-plan", + "datafusion-sql", + "futures", + "itertools 0.14.0", + "log", + "parking_lot", + "sqlparser", +] + +[[package]] +name = "datafusion-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602" +dependencies = [ + "ahash", + "apache-avro", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ipc", + "arrow-schema", + "base64 0.22.1", + "half", + "hashbrown 0.14.5", + "indexmap", + "libc", + "log", + "object_store", + "parquet", + "paste", + "recursive", + "sqlparser", + "tokio", + "web-time", +] + +[[package]] +name = "datafusion-common-runtime" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48" +dependencies = [ + "log", + "tokio", +] + +[[package]] +name = "datafusion-doc" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66" + +[[package]] +name = "datafusion-execution" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27" +dependencies = [ + "arrow", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-functions-window-common", + "datafusion-physical-expr-common", + "indexmap", + "paste", + "recursive", + "serde_json", + "sqlparser", +] + +[[package]] +name = "datafusion-expr-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885" +dependencies = [ + "arrow", + "datafusion-common", + "itertools 0.14.0", + "paste", +] + +[[package]] +name = "datafusion-functions" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b" +dependencies = [ + "arrow", + "arrow-buffer", + "base64 0.22.1", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-macros", + "hashbrown 0.14.5", + "hex", + "itertools 0.14.0", + "log", + "md-5", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-functions-aggregate" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6" +dependencies = [ + "ahash", + "arrow", + "arrow-buffer", + "arrow-schema", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "half", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-aggregate-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "datafusion-expr-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-functions-nested" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e" +dependencies = [ + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "datafusion-common", + "datafusion-doc", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions", + "datafusion-functions-aggregate", + "datafusion-macros", + "datafusion-physical-expr-common", + "itertools 0.14.0", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-table" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36" +dependencies = [ + "arrow", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-plan", + "parking_lot", + "paste", +] + +[[package]] +name = "datafusion-functions-window" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5" +dependencies = [ + "datafusion-common", + "datafusion-doc", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-macros", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "log", + "paste", +] + +[[package]] +name = "datafusion-functions-window-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4" +dependencies = [ + "datafusion-common", + "datafusion-physical-expr-common", +] + +[[package]] +name = "datafusion-macros" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af" +dependencies = [ + "datafusion-expr", + "quote", + "syn", +] + +[[package]] +name = "datafusion-optimizer" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54" +dependencies = [ + "arrow", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "indexmap", + "itertools 0.14.0", + "log", + "recursive", + "regex", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr-common", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.14.0", + "log", + "paste", + "petgraph", +] + +[[package]] +name = "datafusion-physical-expr-common" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435" +dependencies = [ + "ahash", + "arrow", + "arrow-buffer", + "datafusion-common", + "datafusion-expr-common", + "hashbrown 0.14.5", + "itertools 0.14.0", +] + +[[package]] +name = "datafusion-physical-optimizer" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-expr-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "futures", + "itertools 0.14.0", + "log", + "recursive", + "url", +] + +[[package]] +name = "datafusion-physical-plan" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-common-runtime", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-window-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "futures", + "half", + "hashbrown 0.14.5", + "indexmap", + "itertools 0.14.0", + "log", + "parking_lot", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "datafusion-sql" +version = "45.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43" +dependencies = [ + "arrow", + "arrow-array", + "arrow-schema", + "bigdecimal", + "datafusion-common", + "datafusion-expr", + "indexmap", + "log", + "recursive", + "regex", + "sqlparser", +] + +[[package]] +name = "der" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0" +dependencies = [ + "const-oid", + "pem-rfc7468", + "zeroize", +] + +[[package]] +name = "deranged" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" +dependencies = [ + "powerfmt", +] + +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "const-oid", + "crypto-common", + "subtle", +] + +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.59.0", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "dotenvy" +version = "0.15.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +dependencies = [ + "serde", +] + +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "env_filter" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "humantime", + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "error-code" +version = "3.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d9305ccc6942a704f4335694ecd3de2ea531b114ac2d51f5f843750787a92f" + +[[package]] +name = "etcetera" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "136d1b5283a1ab77bd9257427ffd09d8667ced0570b6f938942bc7568ed5b943" +dependencies = [ + "cfg-if", + "home", + "windows-sys 0.48.0", +] + +[[package]] +name = "event-listener" +version = "5.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3492acde4c3fc54c845eaab3eed8bd00c7a7d881f78bfc801e43a93dec1331ae" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + +[[package]] +name = "fd-lock" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" +dependencies = [ + "cfg-if", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + +[[package]] +name = "flatbuffers" +version = "24.12.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "float-cmp" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b09cf3155332e944990140d967ff5eceb70df778b34f77d8075db46e4704e6d8" +dependencies = [ + "num-traits", +] + +[[package]] +name = "flume" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" +dependencies = [ + "futures-core", + "futures-sink", + "spin", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foldhash" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "js-sys", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", + "wasm-bindgen", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets 0.52.6", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "glob" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" + +[[package]] +name = "h2" +version = "0.3.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "h2" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.2.0", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "half" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "hashbrown" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "hashlink" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" +dependencies = [ + "hashbrown 0.15.2", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac", +] + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "home" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.2.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.2.0", + "http-body 1.0.1", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" + +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.26", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "h2 0.4.7", + "http 1.2.0", + "http-body 1.0.1", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "rustls-native-certs 0.6.3", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" +dependencies = [ + "futures-util", + "http 1.2.0", + "hyper 1.6.0", + "hyper-util", + "rustls 0.23.23", + "rustls-native-certs 0.8.1", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.1", + "tower-service", +] + +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.2.0", + "http-body 1.0.1", + "hyper 1.6.0", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", +] + +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + +[[package]] +name = "indexmap" version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" +dependencies = [ + "equivalent", + "hashbrown 0.15.2", +] + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin", +] + +[[package]] +name = "lexical-core" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.169" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" + +[[package]] +name = "libflate" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d" +dependencies = [ + "core2", + "hashbrown 0.14.5", + "rle-decode-fast", +] + +[[package]] +name = "libm" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" + +[[package]] +name = "libmimalloc-sys" +version = "0.1.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.8.0", + "libc", +] + +[[package]] +name = "libsqlite3-sys" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +dependencies = [ + "cc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + +[[package]] +name = "litemap" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" + +[[package]] +name = "lz4_flex" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75761162ae2b0e580d7e7c390558127e5f01b4194debd6221fd8c207fc80e3f5" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "mimalloc" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633" +dependencies = [ + "libmimalloc-sys", +] + +[[package]] +name = "mime" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" + +[[package]] +name = "miniz_oxide" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +dependencies = [ + "adler2", +] + +[[package]] +name = "mio" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +dependencies = [ + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.52.0", +] + +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags 2.8.0", + "cfg-if", + "cfg_aliases", + "libc", +] + +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", + "serde", +] + +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand", + "smallvec", + "zeroize", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "object" +version = "0.36.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +dependencies = [ + "memchr", +] + +[[package]] +name = "object_store" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cfccb68961a56facde1163f9319e0d15743352344e7808a11795fb99698dcaf" +dependencies = [ + "async-trait", + "base64 0.22.1", + "bytes", + "chrono", + "futures", + "humantime", + "hyper 1.6.0", + "itertools 0.13.0", + "md-5", + "parking_lot", + "percent-encoding", + "quick-xml", + "rand", + "reqwest", + "ring", + "rustls-pemfile 2.2.0", + "serde", + "serde_json", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.20.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + +[[package]] +name = "optd-core" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-recursion", + "dotenvy", + "pest", + "pest_derive", + "proc-macro2", + "serde", + "serde_json", + "sqlx", + "tokio", + "trait-variant", +] + +[[package]] +name = "optd-datafusion" +version = "0.1.0" +dependencies = [ + "anyhow", + "async-recursion", + "async-trait", + "datafusion", + "futures", + "itertools 0.14.0", + "optd-core", + "proc-macro2", + "tokio", + "trait-variant", +] + +[[package]] +name = "optd-datafusion-cli" +version = "45.0.0" +dependencies = [ + "arrow", + "assert_cmd", + "async-trait", + "aws-config", + "aws-credential-types", + "aws-sdk-sso", + "aws-sdk-ssooidc", + "aws-sdk-sts", + "clap", + "ctor", + "datafusion", + "datafusion-catalog", + "dirs", + "env_logger", + "futures", + "home", + "mimalloc", + "object_store", + "optd-datafusion", + "parking_lot", + "parquet", + "predicates", + "regex", + "rstest", + "rustyline", + "tokio", + "url", +] + +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "outref" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" + +[[package]] +name = "parking" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + +[[package]] +name = "parquet" +version = "54.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64 0.22.1", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.15.2", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", + "zstd-sys", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24" +dependencies = [ + "regex", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pem-rfc7468" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +dependencies = [ + "base64ct", +] + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "pest" +version = "2.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" +dependencies = [ + "memchr", + "thiserror 2.0.11", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + +[[package]] +name = "petgraph" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + +[[package]] +name = "pkcs8" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +dependencies = [ + "der", + "spki", +] + +[[package]] +name = "pkg-config" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" + +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "predicates" +version = "3.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5d19ee57562043d37e82899fade9a22ebab7be9cef5026b07fda9cdd4293573" +dependencies = [ + "anstyle", + "difflib", + "float-cmp", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "727e462b119fe9c93fd0eb1429a5f7647394014cf3c04ab2c0350eeb09095ffa" + +[[package]] +name = "predicates-tree" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72dd2d6d381dfb73a193c7fca536518d7caee39fc8503f74e7dc0be0531b425c" +dependencies = [ + "predicates-core", + "termtree", +] + +[[package]] +name = "proc-macro-crate" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" dependencies = [ - "equivalent", - "hashbrown", + "toml_edit", ] [[package]] -name = "itoa" -version = "1.0.14" +name = "proc-macro2" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +dependencies = [ + "unicode-ident", +] [[package]] -name = "lazy_static" -version = "1.5.0" +name = "psm" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" dependencies = [ - "spin", + "cc", ] [[package]] -name = "libc" -version = "0.2.169" +name = "quad-rand" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "5a651516ddc9168ebd67b24afd085a718be02f8858fe406591b013d101ce2f40" [[package]] -name = "libm" -version = "0.2.11" +name = "quick-xml" +version = "0.37.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +checksum = "165859e9e55f79d67b96c5d96f4e88b6f2695a1972849c15a6a3f5c59fc2c003" +dependencies = [ + "memchr", + "serde", +] [[package]] -name = "libsqlite3-sys" -version = "0.30.1" +name = "quinn" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" +checksum = "62e96808277ec6f97351a2380e6c25114bc9e67037775464979f3037c92d05ef" dependencies = [ - "cc", - "pkg-config", - "vcpkg", + "bytes", + "pin-project-lite", + "quinn-proto", + "quinn-udp", + "rustc-hash", + "rustls 0.23.23", + "socket2", + "thiserror 2.0.11", + "tokio", + "tracing", ] [[package]] -name = "linux-raw-sys" -version = "0.4.15" +name = "quinn-proto" +version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "a2fe5ef3495d7d2e377ff17b1a8ce2ee2ec2a18cde8b6ad6619d65d0701c135d" +dependencies = [ + "bytes", + "getrandom 0.2.15", + "rand", + "ring", + "rustc-hash", + "rustls 0.23.23", + "rustls-pki-types", + "slab", + "thiserror 2.0.11", + "tinyvec", + "tracing", + "web-time", +] [[package]] -name = "litemap" -version = "0.7.4" +name = "quinn-udp" +version = "0.5.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" +checksum = "1c40286217b4ba3a71d644d752e6a0b71f13f1b6a2c5311acfcbe0c2418ed904" +dependencies = [ + "cfg_aliases", + "libc", + "once_cell", + "socket2", + "tracing", + "windows-sys 0.59.0", +] [[package]] -name = "lock_api" -version = "0.4.12" +name = "quote" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ - "autocfg", - "scopeguard", + "proc-macro2", ] [[package]] -name = "log" -version = "0.4.25" +name = "radix_trie" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] [[package]] -name = "md-5" -version = "0.10.6" +name = "rand" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "cfg-if", - "digest", + "libc", + "rand_chacha", + "rand_core", ] [[package]] -name = "memchr" -version = "2.7.4" +name = "rand_chacha" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] [[package]] -name = "miniz_oxide" -version = "0.8.3" +name = "rand_core" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "adler2", + "getrandom 0.2.15", ] [[package]] -name = "mio" -version = "1.0.3" +name = "recursive" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" dependencies = [ - "libc", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.52.0", + "recursive-proc-macro-impl", + "stacker", ] [[package]] -name = "num-bigint-dig" -version = "0.8.4" +name = "recursive-proc-macro-impl" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ - "byteorder", - "lazy_static", - "libm", - "num-integer", - "num-iter", - "num-traits", - "rand", - "smallvec", - "zeroize", + "quote", + "syn", ] [[package]] -name = "num-integer" -version = "0.1.46" +name = "redox_syscall" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "num-traits", + "bitflags 2.8.0", ] [[package]] -name = "num-iter" -version = "0.1.45" +name = "redox_users" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" dependencies = [ - "autocfg", - "num-integer", - "num-traits", + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.11", ] [[package]] -name = "num-traits" -version = "0.2.19" +name = "regex" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ - "autocfg", - "libm", + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", ] [[package]] -name = "object" -version = "0.36.7" +name = "regex-automata" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ + "aho-corasick", "memchr", + "regex-syntax", ] [[package]] -name = "once_cell" -version = "1.20.2" +name = "regex-lite" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "53a49587ad06b26609c52e423de037e7f57f20d53535d66e08c695f347df952a" [[package]] -name = "optd-core" -version = "0.1.0" +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "relative-path" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2" + +[[package]] +name = "reqwest" +version = "0.12.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" dependencies = [ - "anyhow", - "async-recursion", - "dotenvy", - "pest", - "pest_derive", - "proc-macro2", + "base64 0.22.1", + "bytes", + "futures-core", + "futures-util", + "h2 0.4.7", + "http 1.2.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.6.0", + "hyper-rustls 0.27.5", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls 0.23.23", + "rustls-native-certs 0.8.1", + "rustls-pemfile 2.2.0", + "rustls-pki-types", "serde", "serde_json", - "sqlx", + "serde_urlencoded", + "sync_wrapper", "tokio", - "trait-variant", + "tokio-rustls 0.26.1", + "tokio-util", + "tower", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-streams", + "web-sys", + "windows-registry", ] [[package]] -name = "parking" -version = "2.2.1" +name = "ring" +version = "0.17.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" +checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.15", + "libc", + "spin", + "untrusted", + "windows-sys 0.52.0", +] [[package]] -name = "parking_lot" -version = "0.12.3" +name = "rle-decode-fast" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + +[[package]] +name = "rsa" +version = "0.9.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47c75d7c5c6b673e58bf54d8544a9f432e3a925b0e80f7cd3602ab5c50c55519" dependencies = [ - "lock_api", - "parking_lot_core", + "const-oid", + "digest", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core", + "signature", + "spki", + "subtle", + "zeroize", ] [[package]] -name = "parking_lot_core" -version = "0.9.10" +name = "rstest" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +checksum = "03e905296805ab93e13c1ec3a03f4b6c4f35e9498a3d5fa96dc626d22c03cd89" dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.52.6", + "futures-timer", + "futures-util", + "rstest_macros", + "rustc_version", ] [[package]] -name = "pem-rfc7468" -version = "0.7.0" +name = "rstest_macros" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88b39c9bfcfc231068454382784bb460aae594343fb030d46e9f50a645418412" +checksum = "ef0053bbffce09062bee4bcc499b0fbe7a57b879f1efe088d6d8d4c7adcdef9b" dependencies = [ - "base64ct", + "cfg-if", + "glob", + "proc-macro-crate", + "proc-macro2", + "quote", + "regex", + "relative-path", + "rustc_version", + "syn", + "unicode-ident", ] [[package]] -name = "percent-encoding" -version = "2.3.1" +name = "rustc-demangle" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] -name = "pest" -version = "2.7.15" +name = "rustc-hash" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" dependencies = [ - "memchr", - "thiserror", - "ucd-trie", + "semver", ] [[package]] -name = "pest_derive" -version = "2.7.15" +name = "rustix" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "816518421cfc6887a0d62bf441b6ffb4536fcc926395a69e1a85852d4363f57e" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "pest", - "pest_generator", + "bitflags 2.8.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", ] [[package]] -name = "pest_generator" -version = "2.7.15" +name = "rustls" +version = "0.21.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1396fd3a870fc7838768d171b4616d5c91f6cc25e377b673d714567d99377b" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" dependencies = [ - "pest", - "pest_meta", - "proc-macro2", - "quote", - "syn", + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", ] [[package]] -name = "pest_meta" -version = "2.7.15" +name = "rustls" +version = "0.23.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1e58089ea25d717bfd31fb534e4f3afcc2cc569c70de3e239778991ea3b7dea" +checksum = "47796c98c480fce5406ef69d1c76378375492c3b0a0de587be0c1d9feb12f395" dependencies = [ "once_cell", - "pest", - "sha2", + "ring", + "rustls-pki-types", + "rustls-webpki 0.102.8", + "subtle", + "zeroize", ] [[package]] -name = "pin-project-lite" -version = "0.2.16" +name = "rustls-native-certs" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework 2.11.1", +] [[package]] -name = "pin-utils" -version = "0.1.0" +name = "rustls-native-certs" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3" +dependencies = [ + "openssl-probe", + "rustls-pki-types", + "schannel", + "security-framework 3.2.0", +] [[package]] -name = "pkcs1" -version = "0.7.5" +name = "rustls-pemfile" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "der", - "pkcs8", - "spki", + "base64 0.21.7", ] [[package]] -name = "pkcs8" -version = "0.10.2" +name = "rustls-pemfile" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" dependencies = [ - "der", - "spki", + "rustls-pki-types", ] [[package]] -name = "pkg-config" -version = "0.3.31" +name = "rustls-pki-types" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" +dependencies = [ + "web-time", +] [[package]] -name = "ppv-lite86" -version = "0.2.20" +name = "rustls-webpki" +version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "zerocopy", + "ring", + "untrusted", ] [[package]] -name = "proc-macro2" -version = "1.0.93" +name = "rustls-webpki" +version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ - "unicode-ident", + "ring", + "rustls-pki-types", + "untrusted", ] [[package]] -name = "quote" -version = "1.0.38" +name = "rustversion" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" -dependencies = [ - "proc-macro2", -] +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] -name = "rand" -version = "0.8.5" +name = "rustyline" +version = "15.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "2ee1e066dc922e513bda599c6ccb5f3bb2b0ea5870a579448f2622993f0a9a2f" dependencies = [ + "bitflags 2.8.0", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", "libc", - "rand_chacha", - "rand_core", + "log", + "memchr", + "nix", + "radix_trie", + "unicode-segmentation", + "unicode-width", + "utf8parse", + "windows-sys 0.59.0", ] [[package]] -name = "rand_chacha" -version = "0.3.1" +name = "ryu" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ - "ppv-lite86", - "rand_core", + "winapi-util", ] [[package]] -name = "rand_core" -version = "0.6.4" +name = "schannel" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ - "getrandom 0.2.15", + "windows-sys 0.59.0", ] [[package]] -name = "redox_syscall" -version = "0.5.8" +name = "scopeguard" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "bitflags", + "ring", + "untrusted", ] [[package]] -name = "rsa" -version = "0.9.7" +name = "security-framework" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47c75d7c5c6b673e58bf54d8544a9f432e3a925b0e80f7cd3602ab5c50c55519" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "const-oid", - "digest", - "num-bigint-dig", - "num-integer", - "num-traits", - "pkcs1", - "pkcs8", - "rand_core", - "signature", - "spki", - "subtle", - "zeroize", + "bitflags 2.8.0", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", ] [[package]] -name = "rustc-demangle" -version = "0.1.24" +name = "security-framework" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" +dependencies = [ + "bitflags 2.8.0", + "core-foundation 0.10.0", + "core-foundation-sys", + "libc", + "security-framework-sys", +] [[package]] -name = "rustix" -version = "0.38.44" +name = "security-framework-sys" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" dependencies = [ - "bitflags", - "errno", + "core-foundation-sys", "libc", - "linux-raw-sys", - "windows-sys 0.59.0", ] [[package]] -name = "ryu" -version = "1.0.19" +name = "semver" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" [[package]] -name = "scopeguard" -version = "1.2.0" +name = "seq-macro" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" @@ -1064,6 +4015,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" version = "1.0.217" @@ -1146,6 +4106,18 @@ dependencies = [ "rand_core", ] +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.9" @@ -1164,6 +4136,33 @@ dependencies = [ "serde", ] +[[package]] +name = "snafu" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + [[package]] name = "socket2" version = "0.5.8" @@ -1193,6 +4192,27 @@ dependencies = [ "der", ] +[[package]] +name = "sqlparser" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sqlx" version = "0.8.3" @@ -1221,7 +4241,7 @@ dependencies = [ "futures-intrusive", "futures-io", "futures-util", - "hashbrown", + "hashbrown 0.15.2", "hashlink", "indexmap", "log", @@ -1232,7 +4252,7 @@ dependencies = [ "serde_json", "sha2", "smallvec", - "thiserror", + "thiserror 2.0.11", "tokio", "tokio-stream", "tracing", @@ -1285,8 +4305,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4560278f0e00ce64938540546f59f590d60beee33fffbd3b9cd47851e5fff233" dependencies = [ "atoi", - "base64", - "bitflags", + "base64 0.22.1", + "bitflags 2.8.0", "byteorder", "bytes", "crc", @@ -1315,7 +4335,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 2.0.11", "tracing", "whoami", ] @@ -1327,8 +4347,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5b98a57f363ed6764d5b3a12bfedf62f07aa16e1856a7ddc2a0bb190a959613" dependencies = [ "atoi", - "base64", - "bitflags", + "base64 0.22.1", + "bitflags 2.8.0", "byteorder", "crc", "dotenvy", @@ -1352,7 +4372,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 2.0.11", "tracing", "whoami", ] @@ -1386,6 +4406,25 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d08feb8f695b465baed819b03c128dc23f57a694510ab1f06c77f763975685e" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stringprep" version = "0.1.5" @@ -1397,6 +4436,31 @@ dependencies = [ "unicode-properties", ] +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + +[[package]] +name = "strum_macros" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "subtle" version = "2.6.1" @@ -1414,6 +4478,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.13.1" @@ -1440,23 +4513,99 @@ dependencies = [ ] [[package]] -name = "thiserror" -version = "2.0.11" +name = "termtree" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +dependencies = [ + "thiserror-impl 2.0.11", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "time" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ - "thiserror-impl", + "deranged", + "num-conv", + "powerfmt", + "serde", + "time-core", + "time-macros", ] [[package]] -name = "thiserror-impl" -version = "2.0.11" +name = "time-core" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" dependencies = [ - "proc-macro2", - "quote", - "syn", + "num-conv", + "time-core", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", ] [[package]] @@ -1513,6 +4662,26 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" +dependencies = [ + "rustls 0.23.23", + "tokio", +] + [[package]] name = "tokio-stream" version = "0.1.17" @@ -1524,6 +4693,63 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" + +[[package]] +name = "toml_edit" +version = "0.22.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + +[[package]] +name = "tower-service" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" + [[package]] name = "tracing" version = "0.1.41" @@ -1567,6 +4793,42 @@ dependencies = [ "syn", ] +[[package]] +name = "try-lock" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "typed-builder" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06fbd5b8de54c5f7c91f6fe4cebb949be2125d7758e630bb58b1d831dbce600" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9534daa9fd3ed0bd911d462a37f172228077e7abf18c18a5f67199d959205f8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "typenum" version = "1.17.0" @@ -1606,6 +4868,24 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.5.4" @@ -1617,6 +4897,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf16_iter" version = "1.0.5" @@ -1629,6 +4915,22 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "uuid" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0" +dependencies = [ + "getrandom 0.3.1", + "serde", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -1641,6 +4943,40 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vsimd" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" + +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1662,6 +4998,110 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b" +[[package]] +name = "wasm-bindgen" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +dependencies = [ + "bumpalo", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "wasm-streams" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "web-sys" +version = "0.3.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "whoami" version = "1.5.2" @@ -1672,6 +5112,54 @@ dependencies = [ "wasite", ] +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-registry" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" +dependencies = [ + "windows-result", + "windows-strings", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -1820,13 +5308,22 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "winnow" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59690dea168f2198d1a3b0cac23b8063efcd11012f10ae4698f284808c8ef603" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen-rt" version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" dependencies = [ - "bitflags", + "bitflags 2.8.0", ] [[package]] @@ -1841,6 +5338,21 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "xmlparser" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + [[package]] name = "yoke" version = "0.7.5" @@ -1934,3 +5446,31 @@ dependencies = [ "quote", "syn", ] + +[[package]] +name = "zstd" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.13+zstd.1.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index 0315d49..d5c32a8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,21 @@ [workspace] -members = ["optd-core"] +members = ["optd-core", "optd-datafusion", "optd-datafusion-cli"] resolver = "2" + + +[workspace.package] +version = "0.1.0" +edition = "2021" +rust-version = "1.81.0" +repository = "https://github.com/cmu-db/optd" + + + +[workspace.dependencies] +anyhow = "1.0" +trait-variant = "0.1.2" +async-recursion = "1.1.1" +tokio = { version = "1.43.0", features = ["full"] } +datafusion = "45.0.0" +# Pin more recent versions for `-Zminimal-versions`. +proc-macro2 = "1.0.60" # For a missing feature (https://github.com/rust-lang/rust/issues/113152). \ No newline at end of file diff --git a/optd-core/Cargo.toml b/optd-core/Cargo.toml index 94efe51..a3a5381 100644 --- a/optd-core/Cargo.toml +++ b/optd-core/Cargo.toml @@ -1,19 +1,19 @@ [package] name = "optd-core" -version = "0.1.0" -edition = "2021" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +repository.workspace = true [dependencies] +tokio.workspace = true +anyhow.workspace = true +async-recursion.workspace = true +trait-variant.workspace = true +proc-macro2.workspace = true sqlx = { version = "0.8", features = [ "sqlite", "runtime-tokio", "migrate" ] } -trait-variant = "0.1.2" - -# Pin more recent versions for `-Zminimal-versions`. -proc-macro2 = "1.0.60" # For a missing feature (https://github.com/rust-lang/rust/issues/113152). -anyhow = "1.0.95" -tokio = { version = "1.43.0", features = ["full"] } serde = { version = "1.0", features = ["derive"] } serde_json = { version = "1", features = ["raw_value"] } dotenvy = "0.15" -async-recursion = "1.1.1" pest = "2.7.15" pest_derive = "2.7.15" diff --git a/optd-core/src/cascades/goal.rs b/optd-core/src/cascades/goal.rs new file mode 100644 index 0000000..fb1c6f8 --- /dev/null +++ b/optd-core/src/cascades/goal.rs @@ -0,0 +1,37 @@ +use serde::Deserialize; + +/// A unique identifier for a goal in the memo table. +#[repr(transparent)] +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + sqlx::Type, + serde::Serialize, + Deserialize, +)] +#[sqlx(transparent)] +pub struct GoalId(pub i64); + +/// The optimization status of a group or a physical expression with a goal in the memo table. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, sqlx::Type)] +#[repr(i32)] +pub enum OptimizationStatus { + /// The group or the physical expression has not been explored. + Unoptimized, + /// The group or the physical expression is currently being explored. + Pending, + /// The group or the physical expression has been explored. + Optimized, +} + +#[derive(Debug, Clone, PartialEq, sqlx::FromRow)] +pub struct Goal { + pub id: GoalId, + pub optimization_status: OptimizationStatus, +} diff --git a/optd-core/src/cascades/memo.rs b/optd-core/src/cascades/memo.rs index 2f6b710..4d1652f 100644 --- a/optd-core/src/cascades/memo.rs +++ b/optd-core/src/cascades/memo.rs @@ -10,13 +10,25 @@ use std::sync::Arc; use super::{ - expressions::{LogicalExpression, LogicalExpressionId, ScalarExpression, ScalarExpressionId}, + expressions::{ + LogicalExpression, LogicalExpressionId, PhysicalExpression, PhysicalExpressionId, + ScalarExpression, ScalarExpressionId, + }, + goal::Goal, groups::{RelationalGroupId, ScalarGroupId}, + properties::PhysicalProperty, }; use anyhow::Result; #[trait_variant::make(Send)] pub trait Memoize: Send + Sync + 'static { + /// Creates or get an optimization goal for a group with some required physical properties. + async fn create_or_get_relation_group_goal( + &self, + group_id: RelationalGroupId, + required_physical_props: Vec, + ) -> Result; + /// Gets all logical expressions in a group. async fn get_all_logical_exprs_in_group( &self, @@ -67,4 +79,17 @@ pub trait Memoize: Send + Sync + 'static { from: ScalarGroupId, to: ScalarGroupId, ) -> Result; + + /// Gets all logical expressions in a group. + async fn get_all_physical_exprs_in_group( + &self, + group_id: RelationalGroupId, + ) -> Result)>>; + + /// Adds a physical expression to an existing group in the memo table. + async fn add_physical_expr_to_group( + &self, + physical_expr: &PhysicalExpression, + group_id: RelationalGroupId, + ) -> Result; } diff --git a/optd-core/src/cascades/mod.rs b/optd-core/src/cascades/mod.rs index 72ed19a..529dbad 100644 --- a/optd-core/src/cascades/mod.rs +++ b/optd-core/src/cascades/mod.rs @@ -1,22 +1,34 @@ +pub mod expressions; +pub mod goal; +pub mod groups; +pub mod memo; +pub mod properties; + use std::sync::Arc; use async_recursion::async_recursion; -use expressions::{LogicalExpression, ScalarExpression}; +use expressions::{LogicalExpression, PhysicalExpression, ScalarExpression}; use groups::{RelationalGroupId, ScalarGroupId}; use memo::Memoize; use crate::{ operators::{ - relational::logical::{filter::Filter, join::Join, scan::Scan, LogicalOperator}, - scalar::{add::Add, equal::Equal, ScalarOperator}, + relational::{ + logical::{filter::Filter, join::Join, project::Project, scan::Scan, LogicalOperator}, + physical::{ + self, filter::filter::PhysicalFilter, join::nested_loop_join::NestedLoopJoin, + project::PhysicalProject, scan::table_scan::TableScan, + }, + }, + scalar::{binary_op::BinaryOp, logic_op::LogicOp, unary_op::UnaryOp, ScalarOperator}, + }, + plans::{ + logical::{LogicalPlan, PartialLogicalPlan}, + physical::{PartialPhysicalPlan, PhysicalPlan}, + scalar::{PartialScalarPlan, ScalarPlan}, }, - plans::{logical::PartialLogicalPlan, scalar::PartialScalarPlan}, }; -pub mod expressions; -pub mod groups; -pub mod memo; - #[async_recursion] pub async fn ingest_partial_logical_plan( memo: &impl Memoize, @@ -42,6 +54,29 @@ pub async fn ingest_partial_logical_plan( } } +#[async_recursion] +pub async fn ingest_full_logical_plan( + memo: &impl Memoize, + logical_plan: &LogicalPlan, +) -> anyhow::Result { + let mut children_relations = Vec::new(); + for child in logical_plan.operator.children_relations().iter() { + children_relations.push(ingest_full_logical_plan(memo, child).await?); + } + + let mut children_scalars = Vec::new(); + for child in logical_plan.operator.children_scalars().iter() { + children_scalars.push(ingest_full_scalar_plan(memo, child).await?); + } + + memo.add_logical_expr( + &logical_plan + .operator + .into_expr(&children_relations, &children_scalars), + ) + .await +} + #[async_recursion] pub async fn ingest_partial_scalar_plan( memo: &impl Memoize, @@ -64,7 +99,97 @@ pub async fn ingest_partial_scalar_plan( } #[async_recursion] -async fn match_any_partial_logical_plan( +pub async fn ingest_full_scalar_plan( + memo: &impl Memoize, + scalar_plan: &ScalarPlan, +) -> anyhow::Result { + let mut children = Vec::new(); + for child in scalar_plan.operator.children_scalars().iter() { + children.push(ingest_full_scalar_plan(memo, child).await?); + } + + memo.add_scalar_expr(&scalar_plan.operator.into_expr(&children)) + .await +} + +async fn mock_optimize_scalar_group( + _memo: &impl Memoize, + _group: ScalarGroupId, +) -> anyhow::Result<()> { + Ok(()) +} + +#[async_recursion] +pub async fn mock_optimize_relation_group( + memo: &impl Memoize, + group_id: RelationalGroupId, +) -> anyhow::Result<()> { + let logical_exprs = memo.get_all_logical_exprs_in_group(group_id).await?; + let last_logical_expr = logical_exprs.last().unwrap().1.clone(); + + mock_optimize_relation_expr(memo, group_id, &last_logical_expr).await?; + + Ok(()) +} + +#[async_recursion] +async fn mock_optimize_relation_expr( + memo: &impl Memoize, + group_id: RelationalGroupId, + logical_expr: &LogicalExpression, +) -> anyhow::Result<()> { + match logical_expr { + LogicalExpression::Scan(scan) => { + let physical_expr = PhysicalExpression::TableScan(TableScan { + table_name: scan.table_name.clone(), + predicate: scan.predicate, + }); + memo.add_physical_expr_to_group(&physical_expr, group_id) + .await?; + mock_optimize_scalar_group(memo, scan.predicate).await?; + } + LogicalExpression::Filter(filter) => { + let physical_expr = PhysicalExpression::Filter(PhysicalFilter { + child: filter.child, + predicate: filter.predicate, + }); + memo.add_physical_expr_to_group(&physical_expr, group_id) + .await?; + mock_optimize_scalar_group(memo, filter.predicate).await?; + mock_optimize_relation_group(memo, filter.child).await?; + } + LogicalExpression::Join(join) => { + let physical_expr = PhysicalExpression::NestedLoopJoin(NestedLoopJoin { + join_type: join.join_type.clone(), + outer: join.left, + inner: join.right, + condition: join.condition, + }); + memo.add_physical_expr_to_group(&physical_expr, group_id) + .await?; + mock_optimize_scalar_group(memo, join.condition).await?; + mock_optimize_relation_group(memo, join.left).await?; + mock_optimize_relation_group(memo, join.right).await?; + } + LogicalExpression::Project(project) => { + let physical_expr = PhysicalExpression::Project(PhysicalProject { + child: project.child, + fields: project.fields.clone(), + }); + memo.add_physical_expr_to_group(&physical_expr, group_id) + .await?; + mock_optimize_relation_group(memo, project.child).await?; + for field in project.fields.iter() { + mock_optimize_scalar_group(memo, *field).await?; + } + } + } + + Ok(()) +} + +#[async_recursion] +pub async fn match_any_partial_logical_plan( memo: &impl Memoize, group: RelationalGroupId, ) -> anyhow::Result> { @@ -101,6 +226,113 @@ async fn match_any_partial_logical_plan( }), })) } + LogicalExpression::Project(project) => { + let child = match_any_partial_logical_plan(memo, project.child).await?; + let mut fields = Vec::with_capacity(project.fields.len()); + + for field in project.fields.iter() { + fields.push(match_any_partial_scalar_plan(memo, *field).await?); + } + + Ok(Arc::new(PartialLogicalPlan::PartialMaterialized { + operator: LogicalOperator::Project(Project { child, fields }), + })) + } + } +} + +#[async_recursion] +async fn match_any_partial_physical_plan( + memo: &impl Memoize, + group: RelationalGroupId, +) -> anyhow::Result> { + let physical_exprs = memo.get_all_physical_exprs_in_group(group).await?; + let last_physical_expr = physical_exprs.last().unwrap().1.clone(); + match last_physical_expr.as_ref() { + PhysicalExpression::TableScan(table_scan) => { + Ok(Arc::new(PartialPhysicalPlan::PartialMaterialized { + operator: physical::PhysicalOperator::TableScan(TableScan { + table_name: table_scan.table_name.clone(), + predicate: match_any_partial_scalar_plan(memo, table_scan.predicate).await?, + }), + })) + } + PhysicalExpression::Filter(filter) => { + Ok(Arc::new(PartialPhysicalPlan::PartialMaterialized { + operator: physical::PhysicalOperator::Filter(PhysicalFilter { + child: match_any_partial_physical_plan(memo, filter.child).await?, + predicate: match_any_partial_scalar_plan(memo, filter.predicate).await?, + }), + })) + } + PhysicalExpression::NestedLoopJoin(nested_loop_join) => { + Ok(Arc::new(PartialPhysicalPlan::PartialMaterialized { + operator: physical::PhysicalOperator::NestedLoopJoin(NestedLoopJoin { + join_type: nested_loop_join.join_type.clone(), + outer: match_any_partial_physical_plan(memo, nested_loop_join.outer).await?, + inner: match_any_partial_physical_plan(memo, nested_loop_join.inner).await?, + condition: match_any_partial_scalar_plan(memo, nested_loop_join.condition) + .await?, + }), + })) + } + PhysicalExpression::Project(project) => { + let mut fields = Vec::with_capacity(project.fields.len()); + for field in project.fields.iter() { + fields.push(match_any_partial_scalar_plan(memo, *field).await?); + } + Ok(Arc::new(PartialPhysicalPlan::PartialMaterialized { + operator: physical::PhysicalOperator::Project(PhysicalProject { + child: match_any_partial_physical_plan(memo, project.child).await?, + fields, + }), + })) + } + _ => unimplemented!(), + } +} + +#[async_recursion] +pub async fn match_any_physical_plan( + memo: &impl Memoize, + group: RelationalGroupId, +) -> anyhow::Result> { + let physical_exprs = memo.get_all_physical_exprs_in_group(group).await?; + let last_physical_expr = physical_exprs.last().unwrap().1.clone(); + match last_physical_expr.as_ref() { + PhysicalExpression::TableScan(table_scan) => Ok(Arc::new(PhysicalPlan { + operator: physical::PhysicalOperator::TableScan(TableScan { + table_name: table_scan.table_name.clone(), + predicate: match_any_scalar_plan(memo, table_scan.predicate).await?, + }), + })), + PhysicalExpression::Filter(filter) => Ok(Arc::new(PhysicalPlan { + operator: physical::PhysicalOperator::Filter(PhysicalFilter { + child: match_any_physical_plan(memo, filter.child).await?, + predicate: match_any_scalar_plan(memo, filter.predicate).await?, + }), + })), + PhysicalExpression::NestedLoopJoin(nested_loop_join) => Ok(Arc::new(PhysicalPlan { + operator: physical::PhysicalOperator::NestedLoopJoin(NestedLoopJoin { + join_type: nested_loop_join.join_type.clone(), + outer: match_any_physical_plan(memo, nested_loop_join.outer).await?, + inner: match_any_physical_plan(memo, nested_loop_join.inner).await?, + condition: match_any_scalar_plan(memo, nested_loop_join.condition).await?, + }), + })), + PhysicalExpression::Project(project) => { + let mut fields = Vec::with_capacity(project.fields.len()); + for field in project.fields.iter() { + fields.push(match_any_scalar_plan(memo, *field).await?); + } + Ok(Arc::new(PhysicalPlan { + operator: physical::PhysicalOperator::Project(PhysicalProject { + child: match_any_physical_plan(memo, project.child).await?, + fields, + }), + })) + } + _ => unimplemented!(), } } @@ -122,18 +354,73 @@ async fn match_any_partial_scalar_plan( operator: ScalarOperator::ColumnRef(column_ref.clone()), })) } - ScalarExpression::Add(add) => { - let left = match_any_partial_scalar_plan(memo, add.left).await?; - let right = match_any_partial_scalar_plan(memo, add.right).await?; + ScalarExpression::BinaryOp(binary_op) => { + let left = match_any_partial_scalar_plan(memo, binary_op.left).await?; + let right = match_any_partial_scalar_plan(memo, binary_op.right).await?; + Ok(Arc::new(PartialScalarPlan::PartialMaterialized { + operator: ScalarOperator::BinaryOp(BinaryOp::new( + binary_op.kind.clone(), + left, + right, + )), + })) + } + ScalarExpression::UnaryOp(unary_op) => { + let child = match_any_partial_scalar_plan(memo, unary_op.child).await?; Ok(Arc::new(PartialScalarPlan::PartialMaterialized { - operator: ScalarOperator::Add(Add { left, right }), + operator: ScalarOperator::UnaryOp(UnaryOp::new(unary_op.kind.clone(), child)), })) } - ScalarExpression::Equal(equal) => { - let left = match_any_partial_scalar_plan(memo, equal.left).await?; - let right = match_any_partial_scalar_plan(memo, equal.right).await?; + ScalarExpression::LogicOp(logic) => { + let mut children = Vec::with_capacity(logic.children.len()); + for child in logic.children.iter() { + children.push(match_any_partial_scalar_plan(memo, *child).await?); + } Ok(Arc::new(PartialScalarPlan::PartialMaterialized { - operator: ScalarOperator::Equal(Equal { left, right }), + operator: ScalarOperator::LogicOp(LogicOp::new(logic.kind.clone(), children)), + })) + } + } +} + +#[async_recursion] +async fn match_any_scalar_plan( + memo: &impl Memoize, + group: ScalarGroupId, +) -> anyhow::Result> { + let scalar_exprs = memo.get_all_scalar_exprs_in_group(group).await?; + let last_scalar_expr = scalar_exprs.last().unwrap().1.clone(); + match last_scalar_expr.as_ref() { + ScalarExpression::Constant(constant) => Ok(Arc::new(ScalarPlan { + operator: ScalarOperator::Constant(constant.clone()), + })), + ScalarExpression::ColumnRef(column_ref) => Ok(Arc::new(ScalarPlan { + operator: ScalarOperator::ColumnRef(column_ref.clone()), + })), + ScalarExpression::BinaryOp(binary_op) => { + let left = match_any_scalar_plan(memo, binary_op.left).await?; + let right = match_any_scalar_plan(memo, binary_op.right).await?; + Ok(Arc::new(ScalarPlan { + operator: ScalarOperator::BinaryOp(BinaryOp::new( + binary_op.kind.clone(), + left, + right, + )), + })) + } + ScalarExpression::UnaryOp(unary_op) => { + let child = match_any_scalar_plan(memo, unary_op.child).await?; + Ok(Arc::new(ScalarPlan { + operator: ScalarOperator::UnaryOp(UnaryOp::new(unary_op.kind.clone(), child)), + })) + } + ScalarExpression::LogicOp(logic_op) => { + let mut children = Vec::with_capacity(logic_op.children.len()); + for child in logic_op.children.iter() { + children.push(match_any_scalar_plan(memo, *child).await?); + } + Ok(Arc::new(ScalarPlan { + operator: ScalarOperator::LogicOp(LogicOp::new(logic_op.kind.clone(), children)), })) } } @@ -169,4 +456,155 @@ mod tests { assert_eq!(result, partial_logical_plan); Ok(()) } + + #[tokio::test] + async fn test_ingest_projection() -> anyhow::Result<()> { + let memo = SqliteMemo::new_in_memory().await?; + + // select 1, t1.1 from t1; + let logical_plan = project(scan("t1", boolean(true)), vec![int64(1), column_ref(1)]); + let group_id = ingest_partial_logical_plan(&memo, &logical_plan).await?; + let dup_group_id = ingest_partial_logical_plan(&memo, &logical_plan).await?; + assert_eq!(group_id, dup_group_id); + + let result = match_any_partial_logical_plan(&memo, group_id).await?; + assert_eq!(result, logical_plan); + + Ok(()) + } + + #[tokio::test] + async fn test_ingest_and() -> anyhow::Result<()> { + let memo = SqliteMemo::new_in_memory().await?; + + // select * from t1 where t1.id = 1 and t1.name = 'Memo'; + let logical_plan = filter( + scan("t1", boolean(true)), + and(vec![boolean(true), equal(column_ref(2), string("Memo"))]), + ); + + let group_id = ingest_partial_logical_plan(&memo, &logical_plan).await?; + let dup_group_id = ingest_partial_logical_plan(&memo, &logical_plan).await?; + assert_eq!(group_id, dup_group_id); + + let result = match_any_partial_logical_plan(&memo, group_id).await?; + assert_eq!(result, logical_plan); + + Ok(()) + } + + #[tokio::test] + async fn test_scan_e2e() -> anyhow::Result<()> { + let memo = SqliteMemo::new_in_memory().await?; + + // select * from t1; + let logical_plan = scan("t1", boolean(true)); + let group_id = ingest_partial_logical_plan(&memo, &logical_plan).await?; + + let result = match_any_partial_logical_plan(&memo, group_id).await?; + assert_eq!(result, logical_plan); + + mock_optimize_relation_group(&memo, group_id).await?; + let physical_plan = match_any_partial_physical_plan(&memo, group_id).await?; + + assert_eq!(physical_plan, table_scan("t1", boolean(true))); + + Ok(()) + } + + #[tokio::test] + async fn test_filter_e2e() -> anyhow::Result<()> { + let memo = SqliteMemo::new_in_memory().await?; + + // select * from t1 where t1.#0 = 1 and true; + let logical_plan = filter( + scan("t1", or(vec![boolean(true), boolean(false)])), + and(vec![equal(column_ref(0), int64(1)), boolean(true)]), + ); + let group_id = ingest_partial_logical_plan(&memo, &logical_plan).await?; + + let result = match_any_partial_logical_plan(&memo, group_id).await?; + assert_eq!(result, logical_plan); + + mock_optimize_relation_group(&memo, group_id).await?; + let physical_plan = match_any_partial_physical_plan(&memo, group_id).await?; + + assert_eq!( + physical_plan, + physical_filter( + table_scan("t1", or(vec![boolean(true), boolean(false)])), + and(vec![equal(column_ref(0), int64(1)), boolean(true)]) + ) + ); + + Ok(()) + } + + #[tokio::test] + async fn test_join_e2e() -> anyhow::Result<()> { + let memo = SqliteMemo::new_in_memory().await?; + + // select * from t1 where t1.#0 = 1 and NOT false; + let scan_t1 = scan("t1", not(boolean(false))); + let logical_plan = join( + "inner", + scan_t1.clone(), + scan_t1, + equal(column_ref(0), column_ref(0)), + ); + let group_id = ingest_partial_logical_plan(&memo, &logical_plan).await?; + + let result = match_any_partial_logical_plan(&memo, group_id).await?; + assert_eq!(result, logical_plan); + + mock_optimize_relation_group(&memo, group_id).await?; + let physical_plan = match_any_partial_physical_plan(&memo, group_id).await?; + + let table_scan_t1 = table_scan("t1", not(boolean(false))); + assert_eq!( + physical_plan, + nested_loop_join( + "inner", + table_scan_t1.clone(), + table_scan_t1, + equal(column_ref(0), column_ref(0)), + ) + ); + + Ok(()) + } + + #[tokio::test] + async fn test_project_e2e() -> anyhow::Result<()> { + let memo = SqliteMemo::new_in_memory().await?; + + // select t1.#0, (t1.#1 + 1) - (-3) from t1; + let logical_plan = project( + scan("t1", boolean(true)), + vec![ + column_ref(0), + minus(add(column_ref(1), int64(1)), neg(int64(3))), + ], + ); + let group_id = ingest_partial_logical_plan(&memo, &logical_plan).await?; + + let result = match_any_partial_logical_plan(&memo, group_id).await?; + assert_eq!(result, logical_plan); + + mock_optimize_relation_group(&memo, group_id).await?; + let physical_plan = match_any_partial_physical_plan(&memo, group_id).await?; + + assert_eq!( + physical_plan, + physical_project( + table_scan("t1", boolean(true)), + vec![ + column_ref(0), + minus(add(column_ref(1), int64(1)), neg(int64(3))), + ], + ) + ); + + Ok(()) + } } diff --git a/optd-core/src/cascades/properties/mod.rs b/optd-core/src/cascades/properties/mod.rs new file mode 100644 index 0000000..8878cf5 --- /dev/null +++ b/optd-core/src/cascades/properties/mod.rs @@ -0,0 +1,15 @@ +use serde::{Deserialize, Serialize}; + +use crate::values::OptdValue; + +#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] +pub enum PhysicalProperty { + Sorted(SortProperty), +} + +#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)] +pub struct SortProperty { + /// Each tuple is a column index, direction pair. + /// e.g. vec![(0, Asc), (1, Desc)] + pub sort_orders: Vec<(OptdValue, OptdValue)>, +} diff --git a/optd-core/src/operators/relational/logical/mod.rs b/optd-core/src/operators/relational/logical/mod.rs index 42e6c7f..77cdfc3 100644 --- a/optd-core/src/operators/relational/logical/mod.rs +++ b/optd-core/src/operators/relational/logical/mod.rs @@ -6,10 +6,12 @@ pub mod filter; pub mod join; +pub mod project; pub mod scan; use filter::Filter; use join::Join; +use project::Project; use scan::Scan; use serde::Deserialize; @@ -40,6 +42,8 @@ pub enum LogicalOperator { Filter(Filter), /// Join operator Join(Join), + /// Project operator + Project(Project), } /// The kind of logical operator. @@ -54,6 +58,8 @@ pub enum LogicalOperatorKind { Filter, /// Represents a join operation Join, + /// Represents a projection operation + Project, } impl LogicalOperator @@ -70,6 +76,7 @@ where LogicalOperator::Scan(_) => LogicalOperatorKind::Scan, LogicalOperator::Filter(_) => LogicalOperatorKind::Filter, LogicalOperator::Join(_) => LogicalOperatorKind::Join, + LogicalOperator::Project(_) => LogicalOperatorKind::Project, } } @@ -84,6 +91,7 @@ where LogicalOperator::Scan(scan) => vec![scan.table_name.clone()], LogicalOperator::Filter(_) => vec![], LogicalOperator::Join(join) => vec![join.join_type.clone()], + LogicalOperator::Project(_) => vec![], } } @@ -98,6 +106,7 @@ where LogicalOperator::Scan(_) => vec![], LogicalOperator::Filter(filter) => vec![filter.child.clone()], LogicalOperator::Join(join) => vec![join.left.clone(), join.right.clone()], + LogicalOperator::Project(project) => vec![project.child.clone()], } } @@ -112,6 +121,7 @@ where LogicalOperator::Scan(scan) => vec![scan.predicate.clone()], LogicalOperator::Filter(filter) => vec![filter.predicate.clone()], LogicalOperator::Join(join) => vec![join.condition.clone()], + LogicalOperator::Project(project) => project.fields.clone(), } } @@ -164,6 +174,14 @@ where join_type: join.join_type.clone(), }) } + LogicalOperator::Project(_) => { + assert_eq!(rel_size, 1, "Project: wrong number of relations"); + // cannot make assumption about scalar size. + LogicalExpression::Project(Project { + child: children_relations[0], + fields: children_scalars.to_vec(), + }) + } } } } diff --git a/optd-core/src/operators/relational/logical/project.rs b/optd-core/src/operators/relational/logical/project.rs new file mode 100644 index 0000000..8f5506e --- /dev/null +++ b/optd-core/src/operators/relational/logical/project.rs @@ -0,0 +1,33 @@ +//! A logical projection. + +use serde::Deserialize; + +use crate::values::OptdValue; + +use super::LogicalOperator; + +/// Logical project operator that specifies output columns. +/// +/// Takes input relation (`Relation`) and defines output columns/expressions +/// (`Scalar`). +#[derive(Debug, Clone, PartialEq, Deserialize)] +pub struct Project { + /// The input relation. + pub child: Relation, + pub fields: Vec, +} + +impl Project { + /// Create a new project operator. + pub fn new(child: Relation, fields: Vec) -> Self { + Self { child, fields } + } +} + +/// Creates a project logical operator. +pub fn project( + child: Relation, + fields: Vec, +) -> LogicalOperator { + LogicalOperator::Project(Project::new(child, fields)) +} diff --git a/optd-core/src/operators/relational/physical/filter/filter.rs b/optd-core/src/operators/relational/physical/filter/filter.rs index 2deb22e..9b3a63d 100644 --- a/optd-core/src/operators/relational/physical/filter/filter.rs +++ b/optd-core/src/operators/relational/physical/filter/filter.rs @@ -20,8 +20,8 @@ impl PhysicalFilter { } } -/// Creates a filter physical operator. -pub fn filter( +/// Creates a physical filter operator. +pub fn physical_filter( child: Relation, predicate: Scalar, ) -> PhysicalOperator { diff --git a/optd-core/src/operators/relational/physical/mod.rs b/optd-core/src/operators/relational/physical/mod.rs index 21d3b46..2416ae3 100644 --- a/optd-core/src/operators/relational/physical/mod.rs +++ b/optd-core/src/operators/relational/physical/mod.rs @@ -5,6 +5,7 @@ pub mod filter; pub mod join; +pub mod project; pub mod scan; use crate::{ @@ -16,7 +17,9 @@ use crate::{ }; use filter::filter::PhysicalFilter; use join::{hash_join::HashJoin, merge_join::MergeJoin, nested_loop_join::NestedLoopJoin}; +use project::PhysicalProject; use scan::table_scan::TableScan; +use serde::Deserialize; /// Each variant of `PhysicalOperator` represents a specific kind of physical operator. /// @@ -29,7 +32,7 @@ use scan::table_scan::TableScan; /// - Pattern matching: Using physical operators for matching rule patterns /// - Partially materialized plans: Using physical operators during optimization /// - Fully materialized plans: Using physical operators in physical execution -#[derive(Clone)] +#[derive(Debug, Clone, PartialEq, Deserialize)] pub enum PhysicalOperator { /// Table scan operator TableScan(TableScan), @@ -41,6 +44,7 @@ pub enum PhysicalOperator { NestedLoopJoin(NestedLoopJoin), /// Sort-merge join operator SortMergeJoin(MergeJoin), + Project(PhysicalProject), } /// The kind of physical operator. @@ -59,6 +63,8 @@ pub enum PhysicalOperatorKind { NestedLoopJoin, /// Represents a sort-merge join operation SortMergeJoin, + /// Represents a project operation + Project, } impl PhysicalOperator @@ -74,6 +80,7 @@ where PhysicalOperator::HashJoin(_) => PhysicalOperatorKind::HashJoin, PhysicalOperator::NestedLoopJoin(_) => PhysicalOperatorKind::NestedLoopJoin, PhysicalOperator::SortMergeJoin(_) => PhysicalOperatorKind::SortMergeJoin, + PhysicalOperator::Project(_) => PhysicalOperatorKind::Project, } } @@ -89,6 +96,7 @@ where PhysicalOperator::SortMergeJoin(join) => { vec![join.left_sorted.clone(), join.right_sorted.clone()] } + PhysicalOperator::Project(project) => vec![project.child.clone()], } } @@ -100,6 +108,7 @@ where PhysicalOperator::HashJoin(join) => vec![join.condition.clone()], PhysicalOperator::NestedLoopJoin(join) => vec![join.condition.clone()], PhysicalOperator::SortMergeJoin(join) => vec![join.condition.clone()], + PhysicalOperator::Project(project) => project.fields.clone(), } } @@ -164,6 +173,15 @@ where condition: children_scalars[0], }) } + PhysicalOperator::Project(_) => { + assert_eq!(rel_size, 1, "Project: wrong number of relations"); + // cannot make assumption about scalar size. + + PhysicalExpression::Project(PhysicalProject { + child: children_relations[0], + fields: children_scalars.to_vec(), + }) + } } } } diff --git a/optd-core/src/operators/relational/physical/project.rs b/optd-core/src/operators/relational/physical/project.rs new file mode 100644 index 0000000..f823e3d --- /dev/null +++ b/optd-core/src/operators/relational/physical/project.rs @@ -0,0 +1,33 @@ +//! A logical projection. + +use serde::Deserialize; + +use crate::values::OptdValue; + +use super::PhysicalOperator; + +/// Physical project operator that specifies output columns. +/// +/// Takes input relation (`Relation`) and defines output columns/expressions +/// (`Scalar`). +#[derive(Debug, Clone, PartialEq, Deserialize)] +pub struct PhysicalProject { + /// The input relation. + pub child: Relation, + pub fields: Vec, +} + +impl PhysicalProject { + /// Create a new physical project operator. + pub fn new(child: Relation, fields: Vec) -> Self { + Self { child, fields } + } +} + +/// Creates a physical project operator. +pub fn physical_project( + child: Relation, + fields: Vec, +) -> PhysicalOperator { + PhysicalOperator::Project(PhysicalProject::new(child, fields)) +} diff --git a/optd-core/src/operators/relational/physical/scan/table_scan.rs b/optd-core/src/operators/relational/physical/scan/table_scan.rs index b1de502..f636ccf 100644 --- a/optd-core/src/operators/relational/physical/scan/table_scan.rs +++ b/optd-core/src/operators/relational/physical/scan/table_scan.rs @@ -4,7 +4,7 @@ use crate::{operators::relational::physical::PhysicalOperator, values::OptdValue use serde::Deserialize; /// A physical operator that scans rows from a table. -#[derive(Clone, Debug, Deserialize)] +#[derive(Debug, Clone, PartialEq, Deserialize)] pub struct TableScan { /// The name of the table to scan. pub table_name: Value, diff --git a/optd-core/src/operators/scalar/add.rs b/optd-core/src/operators/scalar/add.rs deleted file mode 100644 index 67f7f1d..0000000 --- a/optd-core/src/operators/scalar/add.rs +++ /dev/null @@ -1,25 +0,0 @@ -//! A scalar addition operator. - -use crate::{operators::scalar::ScalarOperator, values::OptdValue}; -use serde::Deserialize; - -/// A scalar operator that adds two values. -#[derive(Debug, Clone, PartialEq, Deserialize)] -pub struct Add { - /// The left operand. - pub left: Scalar, - /// The right operand. - pub right: Scalar, -} - -impl Add { - /// Create a new addition operator. - pub fn new(left: Scalar, right: Scalar) -> Self { - Self { left, right } - } -} - -/// Creates an addition scalar operator. -pub fn add(left: Scalar, right: Scalar) -> ScalarOperator { - ScalarOperator::Add(Add::new(left, right)) -} diff --git a/optd-core/src/operators/scalar/binary_op.rs b/optd-core/src/operators/scalar/binary_op.rs new file mode 100644 index 0000000..4951f28 --- /dev/null +++ b/optd-core/src/operators/scalar/binary_op.rs @@ -0,0 +1,43 @@ +//! A scalar binary operator. +use crate::{operators::scalar::ScalarOperator, values::OptdValue}; +use serde::Deserialize; + +/// A scalar operator that performs a binary operation on two values. +#[derive(Debug, Clone, PartialEq, Deserialize)] +pub struct BinaryOp { + /// The kind of operator. + pub kind: Value, + /// The left operand. + pub left: Scalar, + /// The right operand. + pub right: Scalar, +} + +impl BinaryOp { + /// Create a new addition operator. + pub fn new(kind: Value, left: Scalar, right: Scalar) -> Self { + Self { kind, left, right } + } +} + +/// Creates an addition scalar operator. +pub fn add(left: Scalar, right: Scalar) -> ScalarOperator { + ScalarOperator::BinaryOp(BinaryOp::new(OptdValue::String("add".into()), left, right)) +} + +pub fn minus(left: Scalar, right: Scalar) -> ScalarOperator { + ScalarOperator::BinaryOp(BinaryOp::new( + OptdValue::String("minus".into()), + left, + right, + )) +} + +/// Creates an equality scalar operator. +pub fn equal(left: Scalar, right: Scalar) -> ScalarOperator { + ScalarOperator::BinaryOp(BinaryOp::new( + OptdValue::String("equal".into()), + left, + right, + )) +} diff --git a/optd-core/src/operators/scalar/constants.rs b/optd-core/src/operators/scalar/constants.rs index 0445b82..2143729 100644 --- a/optd-core/src/operators/scalar/constants.rs +++ b/optd-core/src/operators/scalar/constants.rs @@ -17,7 +17,17 @@ impl Constant { } } -/// Creates a constant scalar operator. -pub fn constant(value: OptdValue) -> ScalarOperator { - ScalarOperator::Constant(Constant::new(value)) +/// Creates a boolean constant scalar operator. +pub fn boolean(value: bool) -> ScalarOperator { + ScalarOperator::Constant(Constant::new(OptdValue::Bool(value))) +} + +/// Creates an `int64` constant scalar operator. +pub fn int64(value: bool) -> ScalarOperator { + ScalarOperator::Constant(Constant::new(OptdValue::Bool(value))) +} + +/// Creates a string constant scalar operator. +pub fn string(value: &str) -> ScalarOperator { + ScalarOperator::Constant(Constant::new(OptdValue::String(value.into()))) } diff --git a/optd-core/src/operators/scalar/equal.rs b/optd-core/src/operators/scalar/equal.rs deleted file mode 100644 index 04e2cb8..0000000 --- a/optd-core/src/operators/scalar/equal.rs +++ /dev/null @@ -1,25 +0,0 @@ -//! A scalar equality operator. - -use crate::{operators::scalar::ScalarOperator, values::OptdValue}; -use serde::Deserialize; - -/// A scalar operator that compares two values for equality. -#[derive(Debug, Clone, PartialEq, Deserialize)] -pub struct Equal { - /// The left operand. - pub left: Scalar, - /// The right operand. - pub right: Scalar, -} - -impl Equal { - /// Create a new equality operator. - pub fn new(left: Scalar, right: Scalar) -> Self { - Self { left, right } - } -} - -/// Creates an equality scalar operator. -pub fn equal(left: Scalar, right: Scalar) -> ScalarOperator { - ScalarOperator::Equal(Equal::new(left, right)) -} diff --git a/optd-core/src/operators/scalar/logic_op.rs b/optd-core/src/operators/scalar/logic_op.rs new file mode 100644 index 0000000..473f57e --- /dev/null +++ b/optd-core/src/operators/scalar/logic_op.rs @@ -0,0 +1,33 @@ +//! A scalar logic operator. + +use serde::Deserialize; + +use crate::values::OptdValue; + +use super::ScalarOperator; + +/// A scalar operator that adds two values. +#[derive(Debug, Clone, PartialEq, Deserialize)] +pub struct LogicOp { + /// The kind of logic operator. + pub kind: Value, + /// The operands to the logic operator. + pub children: Vec, +} + +impl LogicOp { + /// Create a new logic scalar operator. + pub fn new(kind: Value, children: Vec) -> Self { + Self { kind, children } + } +} + +/// Creates an `and` logic scalar operator. +pub fn and(children: Vec) -> ScalarOperator { + ScalarOperator::LogicOp(LogicOp::new(OptdValue::String("and".into()), children)) +} + +/// Creates an `and` logic scalar operator. +pub fn or(children: Vec) -> ScalarOperator { + ScalarOperator::LogicOp(LogicOp::new(OptdValue::String("or".into()), children)) +} diff --git a/optd-core/src/operators/scalar/mod.rs b/optd-core/src/operators/scalar/mod.rs index b27dd38..7735d2f 100644 --- a/optd-core/src/operators/scalar/mod.rs +++ b/optd-core/src/operators/scalar/mod.rs @@ -4,20 +4,22 @@ //! Scalar operators represent expressions and computations that operate on individual values //! rather than relations. -pub mod add; +pub mod binary_op; pub mod column_ref; pub mod constants; -pub mod equal; +pub mod logic_op; +pub mod unary_op; use crate::{ cascades::{expressions::ScalarExpression, groups::ScalarGroupId}, values::OptdValue, }; -use add::Add; +use binary_op::BinaryOp; use column_ref::ColumnRef; use constants::Constant; -use equal::Equal; +use logic_op::LogicOp; use serde::Deserialize; +use unary_op::UnaryOp; /// Each variant of `ScalarOperator` represents a specific kind of scalar operator. /// @@ -35,10 +37,12 @@ pub enum ScalarOperator { Constant(Constant), /// Column reference operator ColumnRef(ColumnRef), - /// Addition operator - Add(Add), - /// Equality comparison operator - Equal(Equal), + /// Binary operator (e.g., +, -, *, /, ==, >, etc.) + BinaryOp(BinaryOp), + /// Unary operator (e.g., NOT, -) + UnaryOp(UnaryOp), + /// Logic operator (e.g., AND, OR) + LogicOp(LogicOp), } /// The kind of scalar operator. @@ -51,10 +55,12 @@ pub enum ScalarOperatorKind { Constant, /// Represents a column reference ColumnRef, - /// Represents an addition operation - Add, - /// Represents an equality comparison - Equal, + /// Represents a binary operation (e.g., +, -, *, /, ==, >, etc.) + Binary, + /// Represents a unary operation (e.g., NOT, -) + Unary, + /// Represents a logic operation (e.g., AND, OR) + Logic, } impl ScalarOperator @@ -66,8 +72,9 @@ where match self { ScalarOperator::Constant(_) => ScalarOperatorKind::Constant, ScalarOperator::ColumnRef(_) => ScalarOperatorKind::ColumnRef, - ScalarOperator::Add(_) => ScalarOperatorKind::Add, - ScalarOperator::Equal(_) => ScalarOperatorKind::Equal, + ScalarOperator::BinaryOp(_) => ScalarOperatorKind::Binary, + ScalarOperator::UnaryOp(_) => ScalarOperatorKind::Unary, + ScalarOperator::LogicOp(_) => ScalarOperatorKind::Logic, } } @@ -76,8 +83,11 @@ where match self { ScalarOperator::Constant(constant) => vec![constant.value.clone()], ScalarOperator::ColumnRef(column_ref) => vec![column_ref.column_index.clone()], - ScalarOperator::Add(_) => vec![], - ScalarOperator::Equal(_) => vec![], + ScalarOperator::BinaryOp(binary_op) => { + vec![binary_op.kind.clone()] + } + ScalarOperator::UnaryOp(unary_op) => vec![unary_op.kind.clone()], + ScalarOperator::LogicOp(logic_op) => vec![logic_op.kind.clone()], } } @@ -86,8 +96,11 @@ where match self { ScalarOperator::Constant(_) => vec![], ScalarOperator::ColumnRef(_) => vec![], - ScalarOperator::Add(add) => vec![add.left.clone(), add.right.clone()], - ScalarOperator::Equal(equal) => vec![equal.left.clone(), equal.right.clone()], + ScalarOperator::BinaryOp(binary_op) => { + vec![binary_op.left.clone(), binary_op.right.clone()] + } + ScalarOperator::UnaryOp(unary_op) => vec![unary_op.child.clone()], + ScalarOperator::LogicOp(logic_op) => logic_op.children.clone(), } } @@ -108,18 +121,26 @@ where column_index: column_ref.column_index.clone(), }) } - ScalarOperator::Add(_) => { - assert_eq!(scalar_size, 2, "Add: expected 2 children"); - ScalarExpression::Add(Add { + ScalarOperator::BinaryOp(binary) => { + assert_eq!(scalar_size, 2, "Binary: expected two children"); + ScalarExpression::BinaryOp(BinaryOp { + kind: binary.kind.clone(), left: children_scalars[0], right: children_scalars[1], }) } - ScalarOperator::Equal(_) => { - assert_eq!(scalar_size, 2, "Equal: expected 2 children"); - ScalarExpression::Equal(Equal { - left: children_scalars[0], - right: children_scalars[1], + ScalarOperator::UnaryOp(unary) => { + assert_eq!(scalar_size, 1, "Unary: expected one child"); + ScalarExpression::UnaryOp(UnaryOp { + kind: unary.kind.clone(), + child: children_scalars[0], + }) + } + ScalarOperator::LogicOp(logic) => { + assert!(scalar_size > 0, "Logic: expected at least one child"); + ScalarExpression::LogicOp(LogicOp { + kind: logic.kind.clone(), + children: children_scalars.to_vec(), }) } } diff --git a/optd-core/src/operators/scalar/unary_op.rs b/optd-core/src/operators/scalar/unary_op.rs new file mode 100644 index 0000000..c0164ee --- /dev/null +++ b/optd-core/src/operators/scalar/unary_op.rs @@ -0,0 +1,29 @@ +//! A scalar binary operator. +use crate::{operators::scalar::ScalarOperator, values::OptdValue}; +use serde::Deserialize; + +/// A scalar operator that performs a unary operation on its child. +#[derive(Debug, Clone, PartialEq, Deserialize)] +pub struct UnaryOp { + /// The kind of operator. + pub kind: Value, + /// The child operand. + pub child: Scalar, +} + +impl UnaryOp { + /// Create a new addition operator. + pub fn new(kind: Value, child: Scalar) -> Self { + Self { kind, child } + } +} + +/// Creates a not unary scalar operator (e.g. `NOT true`). +pub fn not(child: Scalar) -> ScalarOperator { + ScalarOperator::UnaryOp(UnaryOp::new(OptdValue::String("not".into()), child)) +} + +/// Creates a negation unary scalar operator (e.g. `-1`). +pub fn neg(child: Scalar) -> ScalarOperator { + ScalarOperator::UnaryOp(UnaryOp::new(OptdValue::String("neg".into()), child)) +} diff --git a/optd-core/src/plans/logical.rs b/optd-core/src/plans/logical.rs index c4a41fc..47bdcbe 100644 --- a/optd-core/src/plans/logical.rs +++ b/optd-core/src/plans/logical.rs @@ -26,7 +26,7 @@ use std::sync::Arc; /// plan representation after optimization is complete. #[derive(Clone, Debug, PartialEq)] pub struct LogicalPlan { - operator: LogicalOperator, Arc>, + pub operator: LogicalOperator, Arc>, } /// A logical plan with varying levels of materialization. diff --git a/optd-core/src/plans/mod.rs b/optd-core/src/plans/mod.rs index adfa709..240a02a 100644 --- a/optd-core/src/plans/mod.rs +++ b/optd-core/src/plans/mod.rs @@ -6,6 +6,7 @@ use crate::values::OptdExpr; pub mod logical; +pub mod physical; pub mod scalar; /// Expression type for constructing partial plans. diff --git a/optd-core/src/plans/physical.rs b/optd-core/src/plans/physical.rs new file mode 100644 index 0000000..e41976f --- /dev/null +++ b/optd-core/src/plans/physical.rs @@ -0,0 +1,50 @@ +//! Physical plan representations for the OPTD optimizer. +//! +//! Provides three levels of plan materialization: +//! 1. Full materialization (PhysicalPlan) +//! 2. Partial materialization (PartialPhysicalPlan) +//! 3. Group references (RelationalGroupId). +//! +//! This allows the optimizer to work with plans at different stages +//! of materialization during the optimization process. + +use crate::{ + cascades::groups::RelationalGroupId, operators::relational::physical::PhysicalOperator, + values::OptdValue, +}; + +use super::{ + scalar::{PartialScalarPlan, ScalarPlan}, + PartialPlanExpr, +}; +use std::sync::Arc; + +/// A fully materialized physical query plan. +/// +/// Contains a complete tree of physical operators where all children +/// (both physical and scalar) are fully materialized. Used for final +/// plan representation after optimization is complete. +#[derive(Clone, Debug, PartialEq)] +pub struct PhysicalPlan { + pub operator: PhysicalOperator, Arc>, +} + +/// A physical plan with varying levels of materialization. +/// +/// During optimization, plans can be in three states: +/// - Partially materialized: Single materialized operator with group references +/// - Unmaterialized: Pure group reference +#[derive(Clone, Debug, PartialEq)] +pub enum PartialPhysicalPlan { + /// Single materialized operator with potentially unmaterialized children + PartialMaterialized { + operator: PhysicalOperator, Arc>, + }, + + /// Reference to an optimization group containing equivalent plans + UnMaterialized(RelationalGroupId), +} + +/// Type alias for expressions that construct physical plans. +/// See PartialPlanExpr for the available expression constructs. +pub type PartialPhysicalPlanExpr = PartialPlanExpr; diff --git a/optd-core/src/plans/scalar.rs b/optd-core/src/plans/scalar.rs index dbbed3c..0eb4484 100644 --- a/optd-core/src/plans/scalar.rs +++ b/optd-core/src/plans/scalar.rs @@ -22,7 +22,7 @@ use std::sync::Arc; /// after optimization is complete. #[derive(Clone, Debug, PartialEq)] pub struct ScalarPlan { - operator: ScalarOperator>, + pub operator: ScalarOperator>, } /// A scalar expression with varying levels of materialization. diff --git a/optd-core/src/storage/memo.rs b/optd-core/src/storage/memo.rs index cbbd3c9..1a90d6a 100644 --- a/optd-core/src/storage/memo.rs +++ b/optd-core/src/storage/memo.rs @@ -9,20 +9,29 @@ use sqlx::{ SqliteConnection, SqlitePool, }; -use crate::cascades::{ - expressions::*, - groups::{ExplorationStatus, RelationalGroupId, ScalarGroupId}, - memo::Memoize, +use crate::{cascades::goal::Goal, operators::scalar::ScalarOperatorKind}; +use crate::{ + cascades::properties::PhysicalProperty, operators::relational::logical::LogicalOperatorKind, +}; +use crate::{ + cascades::{ + expressions::*, + goal::OptimizationStatus, + groups::{ExplorationStatus, RelationalGroupId, ScalarGroupId}, + memo::Memoize, + }, + operators::relational::physical::PhysicalOperatorKind, }; -use crate::operators::relational::logical::LogicalOperatorKind; -use crate::operators::scalar::ScalarOperatorKind; /// A Storage manager that manages connections to the database. +#[derive(Debug)] pub struct SqliteMemo { /// A async connection pool to the SQLite database. db: SqlitePool, /// SQL query string to get all logical expressions in a group. get_all_logical_exprs_in_group_query: String, + /// SQL query string to get all physical expressions in a group. + get_all_physical_exprs_in_group_query: String, /// SQL query string to get all scalar expressions in a group. get_all_scalar_exprs_in_group_query: String, } @@ -48,6 +57,7 @@ impl SqliteMemo { let memo = Self { db: SqlitePool::connect_with(options).await?, get_all_logical_exprs_in_group_query: get_all_logical_exprs_in_group_query().into(), + get_all_physical_exprs_in_group_query: get_all_physical_exprs_in_group_query().into(), get_all_scalar_exprs_in_group_query: get_all_scalar_exprs_in_group_query().into(), }; memo.migrate().await?; @@ -71,6 +81,22 @@ impl SqliteMemo { } impl Memoize for SqliteMemo { + async fn create_or_get_relation_group_goal( + &self, + group_id: RelationalGroupId, + required_physical_props: Vec, + ) -> Result { + let mut txn = self.begin().await?; + let goal = sqlx::query_as( + "INSERT INTO relation_group_goals (group_id, required_physical_props, optimization_status) VALUES ($1, $2) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING (id, optimization_status)", + ).bind(group_id) + .bind(serde_json::to_value(&required_physical_props)?) + .bind(OptimizationStatus::Unoptimized) + .fetch_one(&mut * txn) + .await?; + Ok(goal) + } + async fn get_all_logical_exprs_in_group( &self, group_id: RelationalGroupId, @@ -184,6 +210,40 @@ impl Memoize for SqliteMemo { txn.commit().await?; Ok(to) } + + async fn get_all_physical_exprs_in_group( + &self, + group_id: RelationalGroupId, + ) -> Result)>> { + #[derive(sqlx::FromRow)] + struct PhysicalExprRecord { + physical_expression_id: PhysicalExpressionId, + data: sqlx::types::Json>, + } + + let mut txn = self.begin().await?; + let representative_group_id = self.get_representative_group_id(&mut txn, group_id).await?; + let logical_exprs: Vec = + sqlx::query_as(&self.get_all_physical_exprs_in_group_query) + .bind(representative_group_id) + .fetch_all(&mut *txn) + .await?; + + txn.commit().await?; + Ok(logical_exprs + .into_iter() + .map(|record| (record.physical_expression_id, record.data.0)) + .collect()) + } + + async fn add_physical_expr_to_group( + &self, + physical_expr: &PhysicalExpression, + group_id: RelationalGroupId, + ) -> Result { + self.add_physical_expr_to_group_inner(physical_expr, group_id) + .await + } } // Helper functions for implementing the `Memoize` trait. @@ -306,37 +366,55 @@ impl SqliteMemo { .fetch_one(&mut *txn) .await? } - ScalarExpression::Add(add) => { + ScalarExpression::BinaryOp(binary_op) => { + Self::insert_into_scalar_expressions( + &mut txn, + scalar_expr_id, + group_id, + ScalarOperatorKind::Binary, + ) + .await?; + + sqlx::query_scalar("INSERT INTO scalar_binary_ops (scalar_expression_id, group_id, kind, left_group_id, right_group_id) VALUES ($1, $2, $3, $4, $5) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") + .bind(scalar_expr_id) + .bind(group_id) + .bind(serde_json::to_string(&binary_op.kind)?) + .bind(binary_op.left) + .bind(binary_op.right) + .fetch_one(&mut *txn) + .await? + } + ScalarExpression::UnaryOp(unary_op) => { Self::insert_into_scalar_expressions( &mut txn, scalar_expr_id, group_id, - ScalarOperatorKind::Add, + ScalarOperatorKind::Binary, ) .await?; - sqlx::query_scalar("INSERT INTO scalar_adds (scalar_expression_id, group_id, left_group_id, right_group_id) VALUES ($1, $2, $3, $4) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") + sqlx::query_scalar("INSERT INTO scalar_unary_ops (scalar_expression_id, group_id, kind, child_group_id) VALUES ($1, $2, $3, $4) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") .bind(scalar_expr_id) .bind(group_id) - .bind(add.left) - .bind(add.right) + .bind(serde_json::to_string(&unary_op.kind)?) + .bind(unary_op.child) .fetch_one(&mut *txn) .await? } - ScalarExpression::Equal(equal) => { + ScalarExpression::LogicOp(logic) => { Self::insert_into_scalar_expressions( &mut txn, scalar_expr_id, group_id, - ScalarOperatorKind::Equal, + ScalarOperatorKind::Logic, ) .await?; - sqlx::query_scalar("INSERT INTO scalar_equals (scalar_expression_id, group_id, left_group_id, right_group_id) VALUES ($1, $2, $3, $4) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") + sqlx::query_scalar("INSERT INTO scalar_logic_ops (scalar_expression_id, group_id, kind, children_group_ids) VALUES ($1, $2, $3, $4) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") .bind(scalar_expr_id) .bind(group_id) - .bind(equal.left) - .bind(equal.right) + .bind(serde_json::to_string(&logic.kind)?) + .bind(serde_json::to_value(&logic.children)?) .fetch_one(&mut *txn) .await? } @@ -471,6 +549,23 @@ impl SqliteMemo { .fetch_one(&mut *txn) .await? } + LogicalExpression::Project(project) => { + Self::insert_into_logical_expressions( + &mut txn, + logical_expr_id, + group_id, + LogicalOperatorKind::Project, + ) + .await?; + + sqlx::query_scalar("INSERT INTO projects (logical_expression_id, group_id, child_group_id, fields_group_ids) VALUES ($1, $2, $3, $4) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") + .bind(logical_expr_id) + .bind(group_id) + .bind(project.child) + .bind(serde_json::to_value(&project.fields)?) + .fetch_one(&mut *txn) + .await? + } }; if inserted_group_id == group_id { @@ -518,6 +613,111 @@ impl SqliteMemo { .await?; Ok(()) } + + async fn add_physical_expr_to_group_inner( + &self, + physical_expr: &PhysicalExpression, + group_id: RelationalGroupId, + ) -> anyhow::Result { + let mut txn = self.begin().await?; + let group_id = self.get_representative_group_id(&mut txn, group_id).await?; + let physical_expr_id = txn.new_physical_expression_id().await?; + + let inserted_group_id: RelationalGroupId = match physical_expr { + PhysicalExpression::TableScan(scan) => { + Self::insert_into_physical_expressions( + &mut txn, + physical_expr_id, + group_id, + PhysicalOperatorKind::TableScan, + ) + .await?; + + sqlx::query_scalar("INSERT INTO table_scans (physical_expression_id, group_id, table_name, predicate_group_id) VALUES ($1, $2, $3, $4) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") + .bind(physical_expr_id) + .bind(group_id) + .bind(serde_json::to_string(&scan.table_name)?) + .bind(scan.predicate) + .fetch_one(&mut *txn) + .await? + } + PhysicalExpression::Filter(filter) => { + Self::insert_into_physical_expressions( + &mut txn, + physical_expr_id, + group_id, + PhysicalOperatorKind::Filter, + ) + .await?; + + sqlx::query_scalar("INSERT INTO physical_filters (physical_expression_id, group_id, child_group_id, predicate_group_id) VALUES ($1, $2, $3, $4) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") + .bind(physical_expr_id) + .bind(group_id) + .bind(filter.child) + .bind(filter.predicate) + .fetch_one(&mut *txn) + .await? + } + PhysicalExpression::NestedLoopJoin(join) => { + Self::insert_into_physical_expressions( + &mut txn, + physical_expr_id, + group_id, + PhysicalOperatorKind::NestedLoopJoin, + ) + .await?; + + sqlx::query_scalar("INSERT INTO nested_loop_joins (physical_expression_id, group_id, join_type, outer_group_id, inner_group_id, condition_group_id) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") + .bind(physical_expr_id) + .bind(group_id) + .bind(serde_json::to_string(&join.join_type)?) + .bind(join.outer) + .bind(join.inner) + .bind(join.condition) + .fetch_one(&mut *txn) + .await? + } + PhysicalExpression::Project(project) => { + Self::insert_into_physical_expressions( + &mut txn, + physical_expr_id, + group_id, + PhysicalOperatorKind::Project, + ) + .await?; + + sqlx::query_scalar("INSERT INTO physical_projects (physical_expression_id, group_id, child_group_id, fields_group_ids) VALUES ($1, $2, $3, $4) ON CONFLICT DO UPDATE SET group_id = group_id RETURNING group_id") + .bind(physical_expr_id) + .bind(group_id) + .bind(project.child) + .bind(serde_json::to_value(&project.fields)?) + .fetch_one(&mut *txn) + .await? + } + _ => unimplemented!(), + }; + txn.commit().await?; + + Ok(inserted_group_id) + } + + /// Inserts an entry into the `physical_expressions` table. + async fn insert_into_physical_expressions( + txn: &mut SqliteConnection, + physical_expr_id: PhysicalExpressionId, + group_id: RelationalGroupId, + operator_kind: PhysicalOperatorKind, + ) -> anyhow::Result<()> { + sqlx::query( + "INSERT INTO physical_expressions (id, group_id, operator_kind) VALUES ($1, $2, $3)", + ) + .bind(physical_expr_id) + .bind(group_id) + .bind(operator_kind) + .execute(&mut *txn) + .await?; + Ok(()) + } } /// The SQL query to get all logical expressions in a group. @@ -529,7 +729,21 @@ const fn get_all_logical_exprs_in_group_query() -> &'static str { " UNION ALL ", "SELECT logical_expression_id, json_object('Filter', json_object('child', child_group_id, 'predicate', predicate_group_id)) as data FROM filters WHERE group_id = $1", " UNION ALL ", - "SELECT logical_expression_id, json_object('Join', json_object('join_type', json(join_type), 'left', left_group_id, 'right', right_group_id, 'condition', condition_group_id)) as data FROM joins WHERE group_id = $1" + "SELECT logical_expression_id, json_object('Join', json_object('join_type', json(join_type), 'left', left_group_id, 'right', right_group_id, 'condition', condition_group_id)) as data FROM joins WHERE group_id = $1", + " UNION ALL ", + "SELECT logical_expression_id, json_object('Project', json_object('child', child_group_id, 'fields', json(fields_group_ids))) as data FROM projects WHERE group_id = $1" + ) +} + +const fn get_all_physical_exprs_in_group_query() -> &'static str { + concat!( + "SELECT physical_expression_id, json_object('TableScan', json_object('table_name', json(table_name), 'predicate', predicate_group_id)) as data FROM table_scans WHERE group_id = $1", + " UNION ALL ", + "SELECT physical_expression_id, json_object('Filter', json_object('child', child_group_id, 'predicate', predicate_group_id)) as data FROM physical_filters WHERE group_id = $1", + " UNION ALL ", + "SELECT physical_expression_id, json_object('NestedLoopJoin', json_object('join_type', json(join_type), 'outer', outer_group_id, 'inner', inner_group_id, 'condition', condition_group_id)) as data FROM nested_loop_joins WHERE group_id = $1", + " UNION ALL ", + "SELECT physical_expression_id, json_object('Project', json_object('child', child_group_id, 'fields', json(fields_group_ids))) as data FROM physical_projects WHERE group_id = $1" ) } @@ -542,9 +756,11 @@ const fn get_all_scalar_exprs_in_group_query() -> &'static str { " UNION ALL ", "SELECT scalar_expression_id, json_object('ColumnRef', json_object('column_index', json(column_index))) as data FROM scalar_column_refs WHERE group_id = $1", " UNION ALL ", - "SELECT scalar_expression_id, json_object('Add', json_object('left', left_group_id, 'right', right_group_id)) as data FROM scalar_adds WHERE group_id = $1", + "SELECT scalar_expression_id, json_object('BinaryOp', json_object('kind', json(kind), 'left', left_group_id, 'right', right_group_id)) as data FROM scalar_binary_ops WHERE group_id = $1", + " UNION ALL ", + "SELECT scalar_expression_id, json_object('LogicOp', json_object('kind', json(kind), 'children', json(children_group_ids))) as data FROM scalar_logic_ops WHERE group_id = $1", " UNION ALL ", - "SELECT scalar_expression_id, json_object('Equal', json_object('left', left_group_id, 'right', right_group_id)) as data FROM scalar_equals WHERE group_id = $1" + "SELECT scalar_expression_id, json_object('UnaryOp', json_object('kind', json(kind), 'child', child_group_id)) as data FROM scalar_unary_ops WHERE group_id = $1", ) } @@ -562,46 +778,40 @@ mod tests { let true_predicate = ScalarExpression::Constant(constants::Constant::new(OptdValue::Bool(true))); let true_predicate_group = memo.add_scalar_expr(&true_predicate).await?; - let scan1 = Arc::new(LogicalExpression::Scan(scan::Scan::new( - "t1", - true_predicate_group, - ))); + let scan1 = Arc::new(scan::scan("t1", true_predicate_group)); let scan1_group = memo.add_logical_expr(&scan1).await?; let dup_scan1_group = memo.add_logical_expr(&scan1).await?; assert_eq!(scan1_group, dup_scan1_group); - let scan2 = Arc::new(LogicalExpression::Scan(scan::Scan::new( - "t2", - true_predicate_group, - ))); + let scan2 = Arc::new(scan::scan("t2", true_predicate_group)); let scan2_group = memo.add_logical_expr(&scan2).await?; let dup_scan2_group = memo.add_logical_expr(&scan2).await?; assert_eq!(scan2_group, dup_scan2_group); - let t1v1 = ScalarExpression::ColumnRef(column_ref::ColumnRef::new(1)); + let t1v1 = column_ref::column_ref(1); let t1v1_group_id = memo.add_scalar_expr(&t1v1).await?; - let t2v2 = ScalarExpression::ColumnRef(column_ref::ColumnRef::new(2)); + let t2v2 = column_ref::column_ref(2); let t2v2_group_id = memo.add_scalar_expr(&t2v2).await?; - let join_cond = ScalarExpression::Equal(equal::Equal::new(t1v1_group_id, t2v2_group_id)); + let join_cond = binary_op::equal(t1v1_group_id, t2v2_group_id); let join_cond_group_id = memo.add_scalar_expr(&join_cond).await?; - let join = Arc::new(LogicalExpression::Join(join::Join::new( + let join = Arc::new(join::join( "inner", scan1_group, scan2_group, join_cond_group_id, - ))); + )); let join_group = memo.add_logical_expr(&join).await?; let dup_join_group = memo.add_logical_expr(&join).await?; assert_eq!(join_group, dup_join_group); - let join_alt = Arc::new(LogicalExpression::Join(join::Join::new( + let join_alt = Arc::new(join::join( "inner", scan2_group, scan1_group, join_cond_group_id, - ))); + )); let join_alt_group = memo .add_logical_expr_to_group(&join_alt, join_group) .await?; diff --git a/optd-core/src/storage/migrations/20250130134520_create_logical_properties.down.sql b/optd-core/src/storage/migrations/20250130134520_create_logical_properties.down.sql new file mode 100644 index 0000000..d2f607c --- /dev/null +++ b/optd-core/src/storage/migrations/20250130134520_create_logical_properties.down.sql @@ -0,0 +1 @@ +-- Add down migration script here diff --git a/optd-core/src/storage/migrations/20250130134520_create_logical_properties.up.sql b/optd-core/src/storage/migrations/20250130134520_create_logical_properties.up.sql new file mode 100644 index 0000000..0da0a53 --- /dev/null +++ b/optd-core/src/storage/migrations/20250130134520_create_logical_properties.up.sql @@ -0,0 +1 @@ +-- Add up migration script here diff --git a/optd-core/src/storage/migrations/20250130134522_create_physical_properties.down.sql b/optd-core/src/storage/migrations/20250130134522_create_physical_properties.down.sql new file mode 100644 index 0000000..d2f607c --- /dev/null +++ b/optd-core/src/storage/migrations/20250130134522_create_physical_properties.down.sql @@ -0,0 +1 @@ +-- Add down migration script here diff --git a/optd-core/src/storage/migrations/20250130134522_create_physical_properties.up.sql b/optd-core/src/storage/migrations/20250130134522_create_physical_properties.up.sql new file mode 100644 index 0000000..bd968b4 --- /dev/null +++ b/optd-core/src/storage/migrations/20250130134522_create_physical_properties.up.sql @@ -0,0 +1 @@ +-- Add up migration script here \ No newline at end of file diff --git a/optd-core/src/storage/migrations/20250130140570_create_relation_group_goals.down.sql b/optd-core/src/storage/migrations/20250130140570_create_relation_group_goals.down.sql new file mode 100644 index 0000000..7a7a821 --- /dev/null +++ b/optd-core/src/storage/migrations/20250130140570_create_relation_group_goals.down.sql @@ -0,0 +1 @@ +DROP TABLE relation_group_goals; diff --git a/optd-core/src/storage/migrations/20250130140570_create_relation_group_goals.up.sql b/optd-core/src/storage/migrations/20250130140570_create_relation_group_goals.up.sql new file mode 100644 index 0000000..c190831 --- /dev/null +++ b/optd-core/src/storage/migrations/20250130140570_create_relation_group_goals.up.sql @@ -0,0 +1,24 @@ +CREATE TABLE relation_group_goals ( + -- The unique identifier of the relation group optimization goal. + id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + -- The relation group id that this winner is associated with. + relation_group_id BIGINT NOT NULL, + -- The required physical properties of the group. + required_physical_properties JSON, + -- The physical expression id of the winner. + -- TODO(yuchen): handle property enforcement. + + -- The optimization status of the goal. + -- It can be one of the following values: Unoptimized, Pending, Optimized. + optimization_status TEXT, + winner_physical_expression_id BIGINT, + -- The time at which the group winner is created. + created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL, + + FOREIGN KEY (relation_group_id) REFERENCES relation_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (winner_physical_expression_id) REFERENCES physical_expressions (id) +); + + +CREATE UNIQUE INDEX relation_group_goals_unique_fields ON relation_group_goals (relation_group_id, required_physical_properties); diff --git a/optd-core/src/storage/migrations/20250130144848_create_physical_expressions.down.sql b/optd-core/src/storage/migrations/20250130144848_create_physical_expressions.down.sql new file mode 100644 index 0000000..e69643d --- /dev/null +++ b/optd-core/src/storage/migrations/20250130144848_create_physical_expressions.down.sql @@ -0,0 +1 @@ +DROP TABLE physical_expressions; diff --git a/optd-core/src/storage/migrations/20250130144848_create_physical_expressions.up.sql b/optd-core/src/storage/migrations/20250130144848_create_physical_expressions.up.sql new file mode 100644 index 0000000..67c0ff7 --- /dev/null +++ b/optd-core/src/storage/migrations/20250130144848_create_physical_expressions.up.sql @@ -0,0 +1,19 @@ +CREATE TABLE physical_expressions ( + -- A unique identifier for a physical expression in the optimizer. + id INTEGER NOT NULL PRIMARY KEY, + -- The representative group that a physical expression belongs to. + group_id BIGINT NOT NULL ON CONFLICT REPLACE, + -- The kind of the physical operator. + operator_kind TEXT NOT NULL, + -- The time at which the physical expression is created. + created_at TIMESTAMP DEFAULT (CURRENT_TIMESTAMP) NOT NULL, + -- When group merging happens, the group id of the physical expression is also updated. + FOREIGN KEY (group_id) REFERENCES relation_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE +); + +CREATE TRIGGER update_physical_expressions_relation_group_ids +AFTER UPDATE OF representative_group_id ON relation_groups +BEGIN + UPDATE OR REPLACE physical_expressions SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; +END; diff --git a/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_adds.down.sql b/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_adds.down.sql deleted file mode 100644 index 573e50e..0000000 --- a/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_adds.down.sql +++ /dev/null @@ -1 +0,0 @@ -DROP TABLE scalar_adds; diff --git a/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_adds.up.sql b/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_adds.up.sql deleted file mode 100644 index 213327d..0000000 --- a/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_adds.up.sql +++ /dev/null @@ -1,29 +0,0 @@ --- A scalar operator that adds two scalar expressions and returns their sum. -CREATE TABLE scalar_adds ( - -- The scalar expression id that this operator associated with. - scalar_expression_id INTEGER NOT NULL PRIMARY KEY, - -- The group id of the expression. - group_id BIGINT NOT NULL, - -- The group id of left operand of the addition. - left_group_id BIGINT NOT NULL, - -- The group id of right operand of the addition. - right_group_id BIGINT NOT NULL, - - FOREIGN KEY (scalar_expression_id) REFERENCES scalar_expressions (id) - ON UPDATE CASCADE ON DELETE CASCADE, - FOREIGN KEY (group_id) REFERENCES scalar_groups (id) - ON UPDATE CASCADE ON DELETE CASCADE, - FOREIGN KEY (left_group_id) REFERENCES scalar_groups (id) - ON UPDATE CASCADE ON DELETE CASCADE - FOREIGN KEY (right_group_id) REFERENCES scalar_groups (id) - ON UPDATE CASCADE ON DELETE CASCADE -); - --- Unique index on add's data fields. -CREATE UNIQUE INDEX scalar_adds_data_fields ON scalar_adds (left_group_id, right_group_id); - -CREATE TRIGGER update_scalar_adds_scalar_group_ids -AFTER UPDATE OF representative_group_id ON scalar_groups -BEGIN - UPDATE OR REPLACE scalar_adds SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; -END; diff --git a/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_binary_ops.down.sql b/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_binary_ops.down.sql new file mode 100644 index 0000000..bf1c541 --- /dev/null +++ b/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_binary_ops.down.sql @@ -0,0 +1 @@ +DROP TABLE scalar_binary_ops; diff --git a/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_binary_ops.up.sql b/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_binary_ops.up.sql new file mode 100644 index 0000000..7b29f81 --- /dev/null +++ b/optd-core/src/storage/migrations/20250203170430_create_scalar_operator_binary_ops.up.sql @@ -0,0 +1,33 @@ +-- A scalar operator that perform a binary operation on two scalar expressions. +CREATE TABLE scalar_binary_ops ( + -- The scalar expression id that this operator associated with. + scalar_expression_id INTEGER NOT NULL PRIMARY KEY, + -- The group id of the expression. + group_id BIGINT NOT NULL, + -- The kind of binary operation (e.g. +, -, *, /, ==, >). + kind JSON NOT NULL, + -- The group id of left operand of the binary operation. + left_group_id BIGINT NOT NULL, + -- The group id of right operand of the binary operation. + right_group_id BIGINT NOT NULL, + + FOREIGN KEY (scalar_expression_id) REFERENCES scalar_expressions (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (group_id) REFERENCES scalar_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (left_group_id) REFERENCES scalar_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (right_group_id) REFERENCES scalar_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE +); + +-- Unique index on binary operation's data fields. +CREATE UNIQUE INDEX scalar_binary_ops_data_fields ON scalar_binary_ops (kind, left_group_id, right_group_id); + +CREATE TRIGGER update_scalar_binary_ops_scalar_group_ids +AFTER UPDATE OF representative_group_id ON scalar_groups +BEGIN + UPDATE OR REPLACE scalar_binary_ops SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; + UPDATE OR REPLACE scalar_binary_ops SET left_group_id = NEW.representative_group_id WHERE left_group_id = OLD.representative_group_id; + UPDATE OR REPLACE scalar_binary_ops SET right_group_id = NEW.representative_group_id WHERE right_group_id = OLD.representative_group_id; +END; diff --git a/optd-core/src/storage/migrations/20250203170454_create_scalar_operator_equals.down.sql b/optd-core/src/storage/migrations/20250203170454_create_scalar_operator_equals.down.sql deleted file mode 100644 index cfec6fc..0000000 --- a/optd-core/src/storage/migrations/20250203170454_create_scalar_operator_equals.down.sql +++ /dev/null @@ -1 +0,0 @@ -DROP TABLE scalar_equals; diff --git a/optd-core/src/storage/migrations/20250203170454_create_scalar_operator_equals.up.sql b/optd-core/src/storage/migrations/20250203170454_create_scalar_operator_equals.up.sql deleted file mode 100644 index 1148a3d..0000000 --- a/optd-core/src/storage/migrations/20250203170454_create_scalar_operator_equals.up.sql +++ /dev/null @@ -1,29 +0,0 @@ --- A scalar operator that checks if two scalar expressions of the same type are equal. -CREATE TABLE scalar_equals ( - -- The scalar expression id that this operator is associated with. - scalar_expression_id INTEGER NOT NULL PRIMARY KEY, - -- The group id of the expression. - group_id BIGINT NOT NULL, - -- The group id of left operand of the equality. - left_group_id BIGINT NOT NULL, - -- The group id of right operand of the equality. - right_group_id BIGINT NOT NULL, - - FOREIGN KEY (scalar_expression_id) REFERENCES scalar_expressions (id) - ON UPDATE CASCADE ON DELETE CASCADE, - FOREIGN KEY (group_id) REFERENCES scalar_groups (id) - ON UPDATE CASCADE ON DELETE CASCADE, - FOREIGN KEY (left_group_id) REFERENCES scalar_groups (id) - ON UPDATE CASCADE ON DELETE CASCADE - FOREIGN KEY (right_group_id) REFERENCES scalar_groups (id) - ON UPDATE CASCADE ON DELETE CASCADE -); - --- Unique index on equal's data fields. -CREATE UNIQUE INDEX scalar_equals_data_fields ON scalar_equals (left_group_id, right_group_id); - -CREATE TRIGGER update_scalar_equals_scalar_group_ids -AFTER UPDATE OF representative_group_id ON scalar_groups -BEGIN - UPDATE OR REPLACE scalar_equals SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; -END; diff --git a/optd-core/src/storage/migrations/20250211035043_create_scalar_operator_logic_ops.down.sql b/optd-core/src/storage/migrations/20250211035043_create_scalar_operator_logic_ops.down.sql new file mode 100644 index 0000000..2688d24 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211035043_create_scalar_operator_logic_ops.down.sql @@ -0,0 +1 @@ +DROP TABLE scalar_ands; diff --git a/optd-core/src/storage/migrations/20250211035043_create_scalar_operator_logic_ops.up.sql b/optd-core/src/storage/migrations/20250211035043_create_scalar_operator_logic_ops.up.sql new file mode 100644 index 0000000..52bc7c3 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211035043_create_scalar_operator_logic_ops.up.sql @@ -0,0 +1,33 @@ +-- A scalar operator that perform logic operation on its operands. +CREATE TABLE scalar_logic_ops ( + -- The scalar expression id that this project is associated with. + scalar_expression_id INTEGER NOT NULL PRIMARY KEY, + -- The group id of the project. + group_id BIGINT NOT NULL, + -- The kind of logic operation (e.g. AND, OR). + kind JSON NOT NULL, + -- The group id of the children operands. + children_group_ids JSON NOT NULL, + + FOREIGN KEY (scalar_expression_id) REFERENCES scalar_expressions (id) + ON UPDATE CASCADE ON DELETE CASCADE + -- (Not enforced) + -- FOREIGN KEY json_each(children_group_ids) REFERENCES scalar_groups (id) + -- ON UPDATE CASCADE ON DELETE CASCADE +); + +-- Unique index on logic operation's data fields. +CREATE UNIQUE INDEX scalar_logic_ops_data_fields ON scalar_logic_ops (kind, children_group_ids); + +CREATE TRIGGER update_scalar_logic_ops_scalar_group_ids +AFTER UPDATE OF representative_group_id ON scalar_groups +BEGIN + UPDATE scalar_logic_ops SET children_group_ids = ( + SELECT json_group_array( + CASE + WHEN value = OLD.representative_group_id THEN NEW.representative_group_id + ELSE value + END + ) FROM json_each(children_group_ids) + ); +END; diff --git a/optd-core/src/storage/migrations/20250211035044_create_scalar_operator_unary_ops.down.sql b/optd-core/src/storage/migrations/20250211035044_create_scalar_operator_unary_ops.down.sql new file mode 100644 index 0000000..dd39237 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211035044_create_scalar_operator_unary_ops.down.sql @@ -0,0 +1 @@ +DROP TABLE scalar_unary_ops; diff --git a/optd-core/src/storage/migrations/20250211035044_create_scalar_operator_unary_ops.up.sql b/optd-core/src/storage/migrations/20250211035044_create_scalar_operator_unary_ops.up.sql new file mode 100644 index 0000000..05bd200 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211035044_create_scalar_operator_unary_ops.up.sql @@ -0,0 +1,29 @@ +-- A scalar operator that perform a unary operation on its child operands. +CREATE TABLE scalar_unary_ops ( + -- The scalar expression id that this operator associated with. + scalar_expression_id INTEGER NOT NULL PRIMARY KEY, + -- The group id of the expression. + group_id BIGINT NOT NULL, + -- The kind of unary operation (e.g. +, -, *, /, ==, >). + kind JSON NOT NULL, + -- The group id of child operand of the operation. + child_group_id BIGINT NOT NULL, + + FOREIGN KEY (scalar_expression_id) REFERENCES scalar_expressions (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (group_id) REFERENCES scalar_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (child_group_id) REFERENCES scalar_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE +); + +-- Unique index on unary operation's data fields. +CREATE UNIQUE INDEX scalar_unary_ops_data_fields ON scalar_unary_ops (kind, child_group_id); + +CREATE TRIGGER update_scalar_unary_ops_scalar_group_ids +AFTER UPDATE OF representative_group_id ON scalar_groups +BEGIN + UPDATE OR REPLACE scalar_unary_ops SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; + UPDATE OR REPLACE scalar_unary_ops SET child_group_id = NEW.representative_group_id WHERE child_group_id = OLD.representative_group_id; +END; + diff --git a/optd-core/src/storage/migrations/20250211044327_create_physical_operator_table_scans.down.sql b/optd-core/src/storage/migrations/20250211044327_create_physical_operator_table_scans.down.sql new file mode 100644 index 0000000..f657e52 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211044327_create_physical_operator_table_scans.down.sql @@ -0,0 +1 @@ +DROP TABLE table_scans; diff --git a/optd-core/src/storage/migrations/20250211044327_create_physical_operator_table_scans.up.sql b/optd-core/src/storage/migrations/20250211044327_create_physical_operator_table_scans.up.sql new file mode 100644 index 0000000..de63e20 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211044327_create_physical_operator_table_scans.up.sql @@ -0,0 +1,33 @@ +-- A physical table scan operator that scans rows from a table. +CREATE TABLE table_scans ( + -- The physical expression id that this scan is associated with. + physical_expression_id INTEGER NOT NULL PRIMARY KEY, + -- The group id of the scan. + group_id BIGINT NOT NULL, + -- The name of the table. + -- TODO(yuchen): changes this to table id. + table_name JSON NOT NULL, + -- An optional filter expression for predicate pushdown into scan operators. + predicate_group_id BIGINT NOT NULL, + FOREIGN KEY (physical_expression_id) REFERENCES physical_expressions (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (group_id) REFERENCES relation_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (predicate_group_id) REFERENCES scalar_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE +); + +-- Unique index on table scan's data fields. +CREATE UNIQUE INDEX table_scans_data_fields ON table_scans (table_name, predicate_group_id); + +CREATE TRIGGER update_table_scans_relation_group_ids +AFTER UPDATE OF representative_group_id ON relation_groups +BEGIN + UPDATE OR REPLACE table_scans SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; +END; + +CREATE TRIGGER update_table_scans_scalar_group_ids +AFTER UPDATE OF representative_group_id ON scalar_groups +BEGIN + UPDATE OR REPLACE table_scans SET predicate_group_id = NEW.representative_group_id WHERE predicate_group_id = OLD.representative_group_id; +END; diff --git a/optd-core/src/storage/migrations/20250211075710_create_physical_operator_physical_filters.down.sql b/optd-core/src/storage/migrations/20250211075710_create_physical_operator_physical_filters.down.sql new file mode 100644 index 0000000..2ba7c2b --- /dev/null +++ b/optd-core/src/storage/migrations/20250211075710_create_physical_operator_physical_filters.down.sql @@ -0,0 +1 @@ +DROP TABLE physical_filters; diff --git a/optd-core/src/storage/migrations/20250211075710_create_physical_operator_physical_filters.up.sql b/optd-core/src/storage/migrations/20250211075710_create_physical_operator_physical_filters.up.sql new file mode 100644 index 0000000..3af95f0 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211075710_create_physical_operator_physical_filters.up.sql @@ -0,0 +1,37 @@ +-- A physical filter operator that selects rows matching a condition. +CREATE TABLE physical_filters ( + -- The physical expression id that this scan is associated with. + physical_expression_id INTEGER NOT NULL PRIMARY KEY, + -- The group id of the filter. + group_id BIGINT NOT NULL, + -- The input relation. + child_group_id BIGINT NOT NULL, + -- The predicate applied to the child relation: e.g. `column_a > 5`. + predicate_group_id BIGINT NOT NULL, + + FOREIGN KEY (physical_expression_id) REFERENCES physical_expressions (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (group_id) REFERENCES relation_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (child_group_id) REFERENCES relation_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (predicate_group_id) REFERENCES scalar_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE +); + +-- Unique index on filter's data fields. +CREATE UNIQUE INDEX physical_filters_data_fields ON physical_filters (child_group_id, predicate_group_id); + +CREATE TRIGGER update_physical_filters_relation_group_ids +AFTER UPDATE OF representative_group_id ON relation_groups +BEGIN + UPDATE OR REPLACE physical_filters SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; + UPDATE OR REPLACE physical_filters SET child_group_id = NEW.representative_group_id WHERE child_group_id = OLD.representative_group_id; +END; + + +CREATE TRIGGER update_physical_filters_scalar_group_ids +AFTER UPDATE OF representative_group_id ON scalar_groups +BEGIN + UPDATE OR REPLACE physical_filters SET predicate_group_id = NEW.representative_group_id WHERE predicate_group_id = OLD.representative_group_id; +END; diff --git a/optd-core/src/storage/migrations/20250211075929_create_physical_operator_physical_projects.down.sql b/optd-core/src/storage/migrations/20250211075929_create_physical_operator_physical_projects.down.sql new file mode 100644 index 0000000..7ae6fe2 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211075929_create_physical_operator_physical_projects.down.sql @@ -0,0 +1 @@ +DROP TABLE physical_projects; diff --git a/optd-core/src/storage/migrations/20250211075929_create_physical_operator_physical_projects.up.sql b/optd-core/src/storage/migrations/20250211075929_create_physical_operator_physical_projects.up.sql new file mode 100644 index 0000000..2d63ccd --- /dev/null +++ b/optd-core/src/storage/migrations/20250211075929_create_physical_operator_physical_projects.up.sql @@ -0,0 +1,44 @@ +-- A physical project operator takes in a relation and outputs a relation with tuples that +-- contain only specified attributes. +CREATE TABLE physical_projects ( + -- The physical expression id that this project is associated with. + physical_expression_id INTEGER NOT NULL PRIMARY KEY, + -- The group id of the project. + group_id BIGINT NOT NULL, + -- The input relation. + child_group_id BIGINT NOT NULL, + -- The projection list. A vector of scalar group ids, + fields_group_ids JSON NOT NULL, + + FOREIGN KEY (physical_expression_id) REFERENCES physical_expressions (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (child_group_id) REFERENCES relation_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE + -- (Not enforced) + -- FOREIGN KEY json_each(fields_group_ids) REFERENCES scalar_groups (id) + -- ON UPDATE CASCADE ON DELETE CASCADE +); + +-- Unique index on project's data fields. +CREATE UNIQUE INDEX physical_projects_data_fields ON physical_projects (child_group_id, fields_group_ids); + +CREATE TRIGGER update_physical_projects_relation_group_ids +AFTER UPDATE OF representative_group_id ON relation_groups +BEGIN + UPDATE OR REPLACE physical_projects SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; + UPDATE OR REPLACE physical_projects SET child_group_id = NEW.representative_group_id WHERE child_group_id = OLD.representative_group_id; +END; + +-- Approach 1: +CREATE TRIGGER update_physical_projects_scalar_group_ids +AFTER UPDATE OF representative_group_id ON scalar_groups +BEGIN + UPDATE physical_projects SET fields_group_ids = ( + SELECT json_group_array( + CASE + WHEN value = OLD.representative_group_id THEN NEW.representative_group_id + ELSE value + END + ) FROM json_each(fields_group_ids) + ); +END; diff --git a/optd-core/src/storage/migrations/20250211080237_create_physical_operator_nested_loop_joins.down.sql b/optd-core/src/storage/migrations/20250211080237_create_physical_operator_nested_loop_joins.down.sql new file mode 100644 index 0000000..0b065f2 --- /dev/null +++ b/optd-core/src/storage/migrations/20250211080237_create_physical_operator_nested_loop_joins.down.sql @@ -0,0 +1 @@ +DROP TABLE nested_loop_joins; diff --git a/optd-core/src/storage/migrations/20250211080237_create_physical_operator_nested_loop_joins.up.sql b/optd-core/src/storage/migrations/20250211080237_create_physical_operator_nested_loop_joins.up.sql new file mode 100644 index 0000000..385b0fb --- /dev/null +++ b/optd-core/src/storage/migrations/20250211080237_create_physical_operator_nested_loop_joins.up.sql @@ -0,0 +1,43 @@ +-- A physical join operator combines rows from two relations. +CREATE TABLE nested_loop_joins ( + -- The physical expression id that this scan is associated with. + physical_expression_id INTEGER NOT NULL PRIMARY KEY, + -- The group id of the join. + group_id BIGINT NOT NULL, + -- The type of the join. + join_type JSON NOT NULL, + -- The outer input relation. + outer_group_id BIGINT NOT NULL, + -- The inner input relation. + inner_group_id BIGINT NOT NULL, + -- The join condition. e.g. `outer_column_a = inner_column_b`. + condition_group_id BIGINT NOT NULL, + + FOREIGN KEY (physical_expression_id) REFERENCES physical_expressions (id) + ON UPDATE CASCADE ON DELETE CASCADE, + FOREIGN KEY (group_id) REFERENCES relation_groups (id), + FOREIGN KEY (outer_group_id) REFERENCES relation_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE + FOREIGN KEY (inner_group_id) REFERENCES relation_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE + FOREIGN KEY (condition_group_id) REFERENCES scalar_groups (id) + ON UPDATE CASCADE ON DELETE CASCADE +); + +-- Unique index on join's data fields. +CREATE UNIQUE INDEX nested_loop_joins_data_fields ON nested_loop_joins (join_type, outer_group_id, inner_group_id, condition_group_id); + +CREATE TRIGGER update_nested_loop_joins_relation_group_ids +AFTER UPDATE OF representative_group_id ON relation_groups +BEGIN + UPDATE OR REPLACE nested_loop_joins SET group_id = NEW.representative_group_id WHERE group_id = OLD.representative_group_id; + UPDATE OR REPLACE nested_loop_joins SET outer_group_id = NEW.representative_group_id WHERE outer_group_id = OLD.representative_group_id; + UPDATE OR REPLACE nested_loop_joins SET inner_group_id = NEW.representative_group_id WHERE inner_group_id = OLD.representative_group_id; +END; + + +CREATE TRIGGER update_nested_loop_joins_scalar_group_ids +AFTER UPDATE OF representative_group_id ON scalar_groups +BEGIN + UPDATE OR REPLACE nested_loop_joins SET condition_group_id = NEW.representative_group_id WHERE condition_group_id = OLD.representative_group_id; +END; diff --git a/optd-core/src/test_utils.rs b/optd-core/src/test_utils.rs index 9ef0f8f..a696752 100644 --- a/optd-core/src/test_utils.rs +++ b/optd-core/src/test_utils.rs @@ -2,12 +2,18 @@ use std::sync::Arc; use crate::{ operators::{ - relational::logical::{filter::Filter, join::Join, scan::Scan, LogicalOperator}, - scalar::{ - add::Add, column_ref::ColumnRef, constants::Constant, equal::Equal, ScalarOperator, + relational::{ + logical::{filter::Filter, join::Join, project::Project, scan::Scan, LogicalOperator}, + physical::{ + filter::filter::PhysicalFilter, join::nested_loop_join::NestedLoopJoin, + project::PhysicalProject, scan::table_scan::TableScan, PhysicalOperator, + }, }, + scalar::{column_ref::ColumnRef, constants::Constant, ScalarOperator}, + }, + plans::{ + logical::PartialLogicalPlan, physical::PartialPhysicalPlan, scalar::PartialScalarPlan, }, - plans::{logical::PartialLogicalPlan, scalar::PartialScalarPlan}, values::OptdValue, }; @@ -37,7 +43,16 @@ pub fn column_ref(column_index: i64) -> Arc { pub fn add(left: Arc, right: Arc) -> Arc { Arc::new(PartialScalarPlan::PartialMaterialized { - operator: ScalarOperator::Add(Add::new(left, right)), + operator: crate::operators::scalar::binary_op::add(left, right), + }) +} + +pub fn minus( + left: Arc, + right: Arc, +) -> Arc { + Arc::new(PartialScalarPlan::PartialMaterialized { + operator: crate::operators::scalar::binary_op::minus(left, right), }) } @@ -46,7 +61,31 @@ pub fn equal( right: Arc, ) -> Arc { Arc::new(PartialScalarPlan::PartialMaterialized { - operator: ScalarOperator::Equal(Equal::new(left, right)), + operator: crate::operators::scalar::binary_op::equal(left, right), + }) +} + +pub fn neg(child: Arc) -> Arc { + Arc::new(PartialScalarPlan::PartialMaterialized { + operator: crate::operators::scalar::unary_op::neg(child), + }) +} + +pub fn not(child: Arc) -> Arc { + Arc::new(PartialScalarPlan::PartialMaterialized { + operator: crate::operators::scalar::unary_op::not(child), + }) +} + +pub fn and(children: Vec>) -> Arc { + Arc::new(PartialScalarPlan::PartialMaterialized { + operator: crate::operators::scalar::logic_op::and(children), + }) +} + +pub fn or(children: Vec>) -> Arc { + Arc::new(PartialScalarPlan::PartialMaterialized { + operator: crate::operators::scalar::logic_op::or(children), }) } @@ -75,3 +114,49 @@ pub fn join( operator: LogicalOperator::Join(Join::new(join_type, left, right, condition)), }) } + +pub fn project( + child: Arc, + fields: Vec>, +) -> Arc { + Arc::new(PartialLogicalPlan::PartialMaterialized { + operator: LogicalOperator::Project(Project::new(child, fields)), + }) +} + +pub fn table_scan(table_name: &str, predicate: Arc) -> Arc { + Arc::new(PartialPhysicalPlan::PartialMaterialized { + operator: PhysicalOperator::TableScan(TableScan::new(table_name, predicate)), + }) +} + +pub fn physical_filter( + child: Arc, + predicate: Arc, +) -> Arc { + Arc::new(PartialPhysicalPlan::PartialMaterialized { + operator: PhysicalOperator::Filter(PhysicalFilter::new(child, predicate)), + }) +} + +pub fn nested_loop_join( + join_type: &str, + outer: Arc, + inner: Arc, + condition: Arc, +) -> Arc { + Arc::new(PartialPhysicalPlan::PartialMaterialized { + operator: PhysicalOperator::NestedLoopJoin(NestedLoopJoin::new( + join_type, outer, inner, condition, + )), + }) +} + +pub fn physical_project( + child: Arc, + fields: Vec>, +) -> Arc { + Arc::new(PartialPhysicalPlan::PartialMaterialized { + operator: PhysicalOperator::Project(PhysicalProject::new(child, fields)), + }) +} diff --git a/optd-core/src/values/mod.rs b/optd-core/src/values/mod.rs index 556ea0c..6bfc007 100644 --- a/optd-core/src/values/mod.rs +++ b/optd-core/src/values/mod.rs @@ -21,6 +21,29 @@ pub enum OptdValue { // Complex Types: TODO(alexis). Enums, Optionals, Arrays, etc. } +impl OptdValue { + pub fn as_str(&self) -> Option<&str> { + match self { + OptdValue::String(s) => Some(s), + _ => None, + } + } + + pub fn as_bool(&self) -> Option { + match self { + OptdValue::Bool(b) => Some(*b), + _ => None, + } + } + + pub fn as_i64(&self) -> Option { + match self { + OptdValue::Int64(i) => Some(*i), + _ => None, + } + } +} + /// Expressions that can be evaluated on OptdValues. /// /// This enum defines all possible expressions in the OPTD-DSL, including: diff --git a/optd-datafusion-cli/Cargo.toml b/optd-datafusion-cli/Cargo.toml new file mode 100644 index 0000000..fe67c83 --- /dev/null +++ b/optd-datafusion-cli/Cargo.toml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "optd-datafusion-cli" +description = "Command Line Client for DataFusion query engine." +version = "45.0.0" +authors = ["Apache DataFusion "] +edition = "2021" +keywords = ["arrow", "datafusion", "query", "sql"] +license = "Apache-2.0" +homepage = "https://datafusion.apache.org" +repository = "https://github.com/apache/datafusion" +rust-version = "1.81.0" +readme = "README.md" + +[dependencies] +arrow = { version = "54.1.0" } +async-trait = "0.1.0" +aws-config = "1.5.0" +aws-credential-types = "1.2.0" +aws-sdk-sso = "1.57.0" +aws-sdk-ssooidc = "1.57.0" +aws-sdk-sts = "1.57.0" +clap = { version = "4.5.27", features = ["derive", "cargo"] } +datafusion = { workspace = true, features = [ + "avro", + "crypto_expressions", + "datetime_expressions", + "encoding_expressions", + "parquet", + "recursive_protection", + "regex_expressions", + "unicode_expressions", + "compression", +] } +datafusion-catalog = { version = "45.0.0" } +optd-datafusion = { path = "../optd-datafusion" } +dirs = "6.0.0" +env_logger = "0.11" +futures = "0.3" +# pin as home 0.5.11 has MSRV 1.81. Can remove this once we bump MSRV to 1.81 +home = "=0.5.11" +mimalloc = { version = "0.1", default-features = false } +object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] } +parking_lot = { version = "0.12" } +parquet = { version = "54.1.0", default-features = false } +regex = "1.8" +rustyline = "15.0" +tokio = { version = "1.24", features = [ + "macros", + "rt", + "rt-multi-thread", + "sync", + "parking_lot", + "signal", +] } +url = "2.5.4" + +[dev-dependencies] +assert_cmd = "2.0" +ctor = "0.2.9" +predicates = "3.0" +rstest = "0.24" + +# [profile.ci] +# inherits = "dev" +# incremental = false + +# # ci turns off debug info, etc for dependencies to allow for smaller binaries making caching more effective +# [profile.ci.package."*"] +# debug = false +# debug-assertions = false +# strip = "debuginfo" +# incremental = false diff --git a/optd-datafusion-cli/Dockerfile b/optd-datafusion-cli/Dockerfile new file mode 100644 index 0000000..f73b76b --- /dev/null +++ b/optd-datafusion-cli/Dockerfile @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +FROM rust:bookworm AS builder + +COPY . /usr/src/datafusion +COPY ./datafusion /usr/src/datafusion/datafusion +COPY ./datafusion-cli /usr/src/datafusion/datafusion-cli + +WORKDIR /usr/src/datafusion/datafusion-cli + +RUN rustup component add rustfmt + +RUN cargo build --release + +FROM debian:bookworm-slim + +COPY --from=builder /usr/src/datafusion/datafusion-cli/target/release/datafusion-cli /usr/local/bin + +RUN mkdir /data + +ENTRYPOINT ["datafusion-cli"] + +CMD ["--data-path", "/data"] diff --git a/optd-datafusion-cli/README.md b/optd-datafusion-cli/README.md new file mode 100644 index 0000000..ce09c3b --- /dev/null +++ b/optd-datafusion-cli/README.md @@ -0,0 +1,48 @@ + + + + +# DataFusion Command-line Interface + +[DataFusion](https://datafusion.apache.org/) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. + +DataFusion CLI (`datafusion-cli`) is a small command line utility that runs SQL queries using the DataFusion engine. + +# Frequently Asked Questions + +## Where can I find more information? + +See the [`datafusion-cli` documentation](https://datafusion.apache.org/user-guide/cli/index.html) for further information. + +## How do I make my IDE work with `datafusion-cli`? + +"open" the `datafusion/datafusion-cli` project as its own top level +project in my IDE (rather than opening `datafusion`) + +The reason `datafusion-cli` is not part of the main workspace in +[`datafusion Cargo.toml`] file is that `datafusion-cli` is a binary and has a +checked in `Cargo.lock` file to ensure reproducible builds. + +However, the `datafusion` and sub crates are intended for use as libraries and +thus do not have a `Cargo.lock` file checked in, as described in the [main +README] file. + +[`datafusion cargo.toml`]: https://github.com/apache/datafusion/blob/main/Cargo.toml +[main readme]: ../README.md diff --git a/optd-datafusion-cli/examples/cli-session-context.rs b/optd-datafusion-cli/examples/cli-session-context.rs new file mode 100644 index 0000000..080a589 --- /dev/null +++ b/optd-datafusion-cli/examples/cli-session-context.rs @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Shows an example of a custom session context that unions the input plan with itself. +//! To run this example, use `cargo run --example cli-session-context` from within the `datafusion-cli` directory. + +use std::sync::Arc; + +use datafusion::{ + dataframe::DataFrame, + error::DataFusionError, + execution::{context::SessionState, TaskContext}, + logical_expr::{LogicalPlan, LogicalPlanBuilder}, + prelude::SessionContext, +}; +use object_store::ObjectStore; +use optd_datafusion_cli::{ + cli_context::CliSessionContext, exec::exec_from_repl, print_options::PrintOptions, +}; + +/// This is a toy example of a custom session context that unions the input plan with itself. +struct MyUnionerContext { + ctx: SessionContext, +} + +impl Default for MyUnionerContext { + fn default() -> Self { + Self { + ctx: SessionContext::new(), + } + } +} + +#[async_trait::async_trait] +impl CliSessionContext for MyUnionerContext { + fn task_ctx(&self) -> Arc { + self.ctx.task_ctx() + } + + fn session_state(&self) -> SessionState { + self.ctx.state() + } + + fn register_object_store( + &self, + url: &url::Url, + object_store: Arc, + ) -> Option> { + self.ctx.register_object_store(url, object_store) + } + + fn register_table_options_extension_from_scheme(&self, _scheme: &str) { + unimplemented!() + } + + async fn execute_logical_plan(&self, plan: LogicalPlan) -> Result { + let new_plan = LogicalPlanBuilder::from(plan.clone()) + .union(plan.clone())? + .build()?; + + self.ctx.execute_logical_plan(new_plan).await + } +} + +#[tokio::main] +/// Runs the example. +pub async fn main() { + let my_ctx = MyUnionerContext::default(); + + let mut print_options = PrintOptions { + format: optd_datafusion_cli::print_format::PrintFormat::Automatic, + quiet: false, + maxrows: optd_datafusion_cli::print_options::MaxRows::Unlimited, + color: true, + }; + + exec_from_repl(&my_ctx, &mut print_options).await.unwrap(); +} diff --git a/optd-datafusion-cli/src/catalog.rs b/optd-datafusion-cli/src/catalog.rs new file mode 100644 index 0000000..3755571 --- /dev/null +++ b/optd-datafusion-cli/src/catalog.rs @@ -0,0 +1,365 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::sync::{Arc, Weak}; + +use crate::object_storage::{get_object_store, AwsOptions, GcpOptions}; + +use datafusion::catalog::{CatalogProvider, CatalogProviderList, SchemaProvider}; + +use datafusion::common::plan_datafusion_err; +use datafusion::datasource::listing::ListingTableUrl; +use datafusion::datasource::TableProvider; +use datafusion::error::Result; +use datafusion::execution::context::SessionState; +use datafusion::execution::session_state::SessionStateBuilder; + +use async_trait::async_trait; +use dirs::home_dir; +use parking_lot::RwLock; + +/// Wraps another catalog, automatically register require object stores for the file locations +#[derive(Debug)] +pub struct DynamicObjectStoreCatalog { + inner: Arc, + state: Weak>, +} + +impl DynamicObjectStoreCatalog { + pub fn new(inner: Arc, state: Weak>) -> Self { + Self { inner, state } + } +} + +impl CatalogProviderList for DynamicObjectStoreCatalog { + fn as_any(&self) -> &dyn Any { + self + } + + fn register_catalog( + &self, + name: String, + catalog: Arc, + ) -> Option> { + self.inner.register_catalog(name, catalog) + } + + fn catalog_names(&self) -> Vec { + self.inner.catalog_names() + } + + fn catalog(&self, name: &str) -> Option> { + let state = self.state.clone(); + self.inner + .catalog(name) + .map(|catalog| Arc::new(DynamicObjectStoreCatalogProvider::new(catalog, state)) as _) + } +} + +/// Wraps another catalog provider +#[derive(Debug)] +struct DynamicObjectStoreCatalogProvider { + inner: Arc, + state: Weak>, +} + +impl DynamicObjectStoreCatalogProvider { + pub fn new(inner: Arc, state: Weak>) -> Self { + Self { inner, state } + } +} + +impl CatalogProvider for DynamicObjectStoreCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + self.inner.schema_names() + } + + fn schema(&self, name: &str) -> Option> { + let state = self.state.clone(); + self.inner + .schema(name) + .map(|schema| Arc::new(DynamicObjectStoreSchemaProvider::new(schema, state)) as _) + } + + fn register_schema( + &self, + name: &str, + schema: Arc, + ) -> Result>> { + self.inner.register_schema(name, schema) + } +} + +/// Wraps another schema provider. [DynamicObjectStoreSchemaProvider] is responsible for registering the required +/// object stores for the file locations. +#[derive(Debug)] +struct DynamicObjectStoreSchemaProvider { + inner: Arc, + state: Weak>, +} + +impl DynamicObjectStoreSchemaProvider { + pub fn new(inner: Arc, state: Weak>) -> Self { + Self { inner, state } + } +} + +#[async_trait] +impl SchemaProvider for DynamicObjectStoreSchemaProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn table_names(&self) -> Vec { + self.inner.table_names() + } + + fn register_table( + &self, + name: String, + table: Arc, + ) -> Result>> { + self.inner.register_table(name, table) + } + + async fn table(&self, name: &str) -> Result>> { + let inner_table = self.inner.table(name).await; + if inner_table.is_ok() { + if let Some(inner_table) = inner_table? { + return Ok(Some(inner_table)); + } + } + + // if the inner schema provider didn't have a table by + // that name, try to treat it as a listing table + let mut state = self + .state + .upgrade() + .ok_or_else(|| plan_datafusion_err!("locking error"))? + .read() + .clone(); + let mut builder = SessionStateBuilder::from(state.clone()); + let optimized_name = substitute_tilde(name.to_owned()); + let table_url = ListingTableUrl::parse(optimized_name.as_str())?; + let scheme = table_url.scheme(); + let url = table_url.as_ref(); + + // If the store is already registered for this URL then `get_store` + // will return `Ok` which means we don't need to register it again. However, + // if `get_store` returns an `Err` then it means the corresponding store is + // not registered yet and we need to register it + match state.runtime_env().object_store_registry.get_store(url) { + Ok(_) => { /*Nothing to do here, store for this URL is already registered*/ } + Err(_) => { + // Register the store for this URL. Here we don't have access + // to any command options so the only choice is to use an empty collection + match scheme { + "s3" | "oss" | "cos" => { + if let Some(table_options) = builder.table_options() { + table_options.extensions.insert(AwsOptions::default()) + } + } + "gs" | "gcs" => { + if let Some(table_options) = builder.table_options() { + table_options.extensions.insert(GcpOptions::default()) + } + } + _ => {} + }; + state = builder.build(); + let store = get_object_store( + &state, + table_url.scheme(), + url, + &state.default_table_options(), + ) + .await?; + state.runtime_env().register_object_store(url, store); + } + } + self.inner.table(name).await + } + + fn deregister_table(&self, name: &str) -> Result>> { + self.inner.deregister_table(name) + } + + fn table_exist(&self, name: &str) -> bool { + self.inner.table_exist(name) + } +} + +pub fn substitute_tilde(cur: String) -> String { + if let Some(usr_dir_path) = home_dir() { + if let Some(usr_dir) = usr_dir_path.to_str() { + if cur.starts_with('~') && !usr_dir.is_empty() { + return cur.replacen('~', usr_dir, 1); + } + } + } + cur +} +#[cfg(test)] +mod tests { + + use super::*; + + use datafusion::catalog::SchemaProvider; + use datafusion::prelude::SessionContext; + + fn setup_context() -> (SessionContext, Arc) { + let ctx = SessionContext::new(); + ctx.register_catalog_list(Arc::new(DynamicObjectStoreCatalog::new( + ctx.state().catalog_list().clone(), + ctx.state_weak_ref(), + ))); + + let provider = &DynamicObjectStoreCatalog::new( + ctx.state().catalog_list().clone(), + ctx.state_weak_ref(), + ) as &dyn CatalogProviderList; + let catalog = provider + .catalog(provider.catalog_names().first().unwrap()) + .unwrap(); + let schema = catalog + .schema(catalog.schema_names().first().unwrap()) + .unwrap(); + (ctx, schema) + } + + #[tokio::test] + async fn query_http_location_test() -> Result<()> { + // This is a unit test so not expecting a connection or a file to be + // available + let domain = "example.com"; + let location = format!("http://{domain}/file.parquet"); + + let (ctx, schema) = setup_context(); + + // That's a non registered table so expecting None here + let table = schema.table(&location).await?; + assert!(table.is_none()); + + // It should still create an object store for the location in the SessionState + let store = ctx + .runtime_env() + .object_store(ListingTableUrl::parse(location)?)?; + + assert_eq!(format!("{store}"), "HttpStore"); + + // The store must be configured for this domain + let expected_domain = format!("Domain(\"{domain}\")"); + assert!(format!("{store:?}").contains(&expected_domain)); + + Ok(()) + } + + #[tokio::test] + async fn query_s3_location_test() -> Result<()> { + let bucket = "examples3bucket"; + let location = format!("s3://{bucket}/file.parquet"); + + let (ctx, schema) = setup_context(); + + let table = schema.table(&location).await?; + assert!(table.is_none()); + + let store = ctx + .runtime_env() + .object_store(ListingTableUrl::parse(location)?)?; + assert_eq!(format!("{store}"), format!("AmazonS3({bucket})")); + + // The store must be configured for this domain + let expected_bucket = format!("bucket: \"{bucket}\""); + assert!(format!("{store:?}").contains(&expected_bucket)); + + Ok(()) + } + + #[tokio::test] + async fn query_gs_location_test() -> Result<()> { + let bucket = "examplegsbucket"; + let location = format!("gs://{bucket}/file.parquet"); + + let (ctx, schema) = setup_context(); + + let table = schema.table(&location).await?; + assert!(table.is_none()); + + let store = ctx + .runtime_env() + .object_store(ListingTableUrl::parse(location)?)?; + assert_eq!(format!("{store}"), format!("GoogleCloudStorage({bucket})")); + + // The store must be configured for this domain + let expected_bucket = format!("bucket_name_encoded: \"{bucket}\""); + assert!(format!("{store:?}").contains(&expected_bucket)); + + Ok(()) + } + + #[tokio::test] + async fn query_invalid_location_test() { + let location = "ts://file.parquet"; + let (_ctx, schema) = setup_context(); + + assert!(schema.table(location).await.is_err()); + } + + #[cfg(not(target_os = "windows"))] + #[test] + fn test_substitute_tilde() { + use std::env; + use std::path::MAIN_SEPARATOR; + let original_home = home_dir(); + let test_home_path = if cfg!(windows) { + "C:\\Users\\user" + } else { + "/home/user" + }; + env::set_var( + if cfg!(windows) { "USERPROFILE" } else { "HOME" }, + test_home_path, + ); + let input = "~/Code/datafusion/benchmarks/data/tpch_sf1/part/part-0.parquet"; + let expected = format!( + "{}{}Code{}datafusion{}benchmarks{}data{}tpch_sf1{}part{}part-0.parquet", + test_home_path, + MAIN_SEPARATOR, + MAIN_SEPARATOR, + MAIN_SEPARATOR, + MAIN_SEPARATOR, + MAIN_SEPARATOR, + MAIN_SEPARATOR, + MAIN_SEPARATOR + ); + let actual = substitute_tilde(input.to_string()); + assert_eq!(actual, expected); + match original_home { + Some(home_path) => env::set_var( + if cfg!(windows) { "USERPROFILE" } else { "HOME" }, + home_path.to_str().unwrap(), + ), + None => env::remove_var(if cfg!(windows) { "USERPROFILE" } else { "HOME" }), + } + } +} diff --git a/optd-datafusion-cli/src/cli_context.rs b/optd-datafusion-cli/src/cli_context.rs new file mode 100644 index 0000000..d3c705e --- /dev/null +++ b/optd-datafusion-cli/src/cli_context.rs @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use datafusion::{ + dataframe::DataFrame, + error::DataFusionError, + execution::{context::SessionState, TaskContext}, + logical_expr::LogicalPlan, + prelude::SessionContext, +}; +use object_store::ObjectStore; + +use crate::object_storage::{AwsOptions, GcpOptions}; + +#[async_trait::async_trait] +/// The CLI session context trait provides a way to have a session context that can be used with datafusion's CLI code. +pub trait CliSessionContext { + /// Get an atomic reference counted task context. + fn task_ctx(&self) -> Arc; + + /// Get the session state. + fn session_state(&self) -> SessionState; + + /// Register an object store with the session context. + fn register_object_store( + &self, + url: &url::Url, + object_store: Arc, + ) -> Option>; + + /// Register table options extension from scheme. + fn register_table_options_extension_from_scheme(&self, scheme: &str); + + /// Execute a logical plan and return a DataFrame. + async fn execute_logical_plan(&self, plan: LogicalPlan) -> Result; +} + +#[async_trait::async_trait] +impl CliSessionContext for SessionContext { + fn task_ctx(&self) -> Arc { + self.task_ctx() + } + + fn session_state(&self) -> SessionState { + self.state() + } + + fn register_object_store( + &self, + url: &url::Url, + object_store: Arc, + ) -> Option> { + self.register_object_store(url, object_store) + } + + fn register_table_options_extension_from_scheme(&self, scheme: &str) { + match scheme { + // For Amazon S3 or Alibaba Cloud OSS + "s3" | "oss" | "cos" => { + // Register AWS specific table options in the session context: + self.register_table_options_extension(AwsOptions::default()) + } + // For Google Cloud Storage + "gs" | "gcs" => { + // Register GCP specific table options in the session context: + self.register_table_options_extension(GcpOptions::default()) + } + // For unsupported schemes, do nothing: + _ => {} + } + } + + async fn execute_logical_plan(&self, plan: LogicalPlan) -> Result { + self.execute_logical_plan(plan).await + } +} diff --git a/optd-datafusion-cli/src/command.rs b/optd-datafusion-cli/src/command.rs new file mode 100644 index 0000000..54942b9 --- /dev/null +++ b/optd-datafusion-cli/src/command.rs @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Command within CLI + +use crate::cli_context::CliSessionContext; +use crate::exec::{exec_and_print, exec_from_lines}; +use crate::functions::{display_all_functions, Function}; +use crate::print_format::PrintFormat; +use crate::print_options::PrintOptions; +use clap::ValueEnum; +use datafusion::arrow::array::{ArrayRef, StringArray}; +use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::common::exec_err; +use datafusion::common::instant::Instant; +use datafusion::error::{DataFusionError, Result}; +use std::fs::File; +use std::io::BufReader; +use std::str::FromStr; +use std::sync::Arc; + +/// Command +#[derive(Debug)] +pub enum Command { + Quit, + Help, + ListTables, + DescribeTableStmt(String), + ListFunctions, + Include(Option), + SearchFunctions(String), + QuietMode(Option), + OutputFormat(Option), +} + +pub enum OutputFormat { + ChangeFormat(String), +} + +impl Command { + pub async fn execute( + &self, + ctx: &dyn CliSessionContext, + print_options: &mut PrintOptions, + ) -> Result<()> { + match self { + Self::Help => { + let now = Instant::now(); + let command_batch = all_commands_info(); + print_options.print_batches(command_batch.schema(), &[command_batch], now) + } + Self::ListTables => exec_and_print(ctx, print_options, "SHOW TABLES".into()).await, + Self::DescribeTableStmt(name) => { + exec_and_print(ctx, print_options, format!("SHOW COLUMNS FROM {}", name)).await + } + Self::Include(filename) => { + if let Some(filename) = filename { + let file = File::open(filename).map_err(|e| { + DataFusionError::Execution(format!("Error opening {:?} {}", filename, e)) + })?; + exec_from_lines(ctx, &mut BufReader::new(file), print_options).await?; + Ok(()) + } else { + exec_err!("Required filename argument is missing") + } + } + Self::QuietMode(quiet) => { + if let Some(quiet) = quiet { + print_options.quiet = *quiet; + println!( + "Quiet mode set to {}", + if print_options.quiet { "true" } else { "false" } + ); + } else { + println!( + "Quiet mode is {}", + if print_options.quiet { "true" } else { "false" } + ); + } + Ok(()) + } + Self::Quit => exec_err!("Unexpected quit, this should be handled outside"), + Self::ListFunctions => display_all_functions(), + Self::SearchFunctions(function) => { + if let Ok(func) = function.parse::() { + let details = func.function_details()?; + println!("{}", details); + Ok(()) + } else { + exec_err!("{function} is not a supported function") + } + } + Self::OutputFormat(_) => { + exec_err!("Unexpected change output format, this should be handled outside") + } + } + } + + fn get_name_and_description(&self) -> (&'static str, &'static str) { + match self { + Self::Quit => ("\\q", "quit datafusion-cli"), + Self::ListTables => ("\\d", "list tables"), + Self::DescribeTableStmt(_) => ("\\d name", "describe table"), + Self::Help => ("\\?", "help"), + Self::Include(_) => ("\\i filename", "reads input from the specified filename"), + Self::ListFunctions => ("\\h", "function list"), + Self::SearchFunctions(_) => ("\\h function", "search function"), + Self::QuietMode(_) => ("\\quiet (true|false)?", "print or set quiet mode"), + Self::OutputFormat(_) => ("\\pset [NAME [VALUE]]", "set table output option\n(format)"), + } + } +} + +const ALL_COMMANDS: [Command; 9] = [ + Command::ListTables, + Command::DescribeTableStmt(String::new()), + Command::Quit, + Command::Help, + Command::Include(Some(String::new())), + Command::ListFunctions, + Command::SearchFunctions(String::new()), + Command::QuietMode(None), + Command::OutputFormat(None), +]; + +fn all_commands_info() -> RecordBatch { + let schema = Arc::new(Schema::new(vec![ + Field::new("Command", DataType::Utf8, false), + Field::new("Description", DataType::Utf8, false), + ])); + let (names, description): (Vec<&str>, Vec<&str>) = ALL_COMMANDS + .into_iter() + .map(|c| c.get_name_and_description()) + .unzip(); + RecordBatch::try_new( + schema, + [names, description] + .into_iter() + .map(|i| Arc::new(StringArray::from(i)) as ArrayRef) + .collect::>(), + ) + .expect("This should not fail") +} + +impl FromStr for Command { + type Err = (); + + fn from_str(s: &str) -> Result { + let (c, arg) = if let Some((a, b)) = s.split_once(' ') { + (a, Some(b)) + } else { + (s, None) + }; + Ok(match (c, arg) { + ("q", None) => Self::Quit, + ("d", None) => Self::ListTables, + ("d", Some(name)) => Self::DescribeTableStmt(name.into()), + ("?", None) => Self::Help, + ("h", None) => Self::ListFunctions, + ("h", Some(function)) => Self::SearchFunctions(function.into()), + ("i", None) => Self::Include(None), + ("i", Some(filename)) => Self::Include(Some(filename.to_owned())), + ("quiet", Some("true" | "t" | "yes" | "y" | "on")) => Self::QuietMode(Some(true)), + ("quiet", Some("false" | "f" | "no" | "n" | "off")) => Self::QuietMode(Some(false)), + ("quiet", None) => Self::QuietMode(None), + ("pset", Some(subcommand)) => Self::OutputFormat(Some(subcommand.to_string())), + ("pset", None) => Self::OutputFormat(None), + _ => return Err(()), + }) + } +} + +impl FromStr for OutputFormat { + type Err = (); + + fn from_str(s: &str) -> Result { + let (c, arg) = if let Some((a, b)) = s.split_once(' ') { + (a, Some(b)) + } else { + (s, None) + }; + Ok(match (c, arg) { + ("format", Some(format)) => Self::ChangeFormat(format.to_string()), + _ => return Err(()), + }) + } +} + +impl OutputFormat { + pub async fn execute(&self, print_options: &mut PrintOptions) -> Result<()> { + match self { + Self::ChangeFormat(format) => { + if let Ok(format) = format.parse::() { + print_options.format = format; + println!("Output format is {:?}.", print_options.format); + Ok(()) + } else { + exec_err!( + "{:?} is not a valid format type [possible values: {:?}]", + format, + PrintFormat::value_variants() + ) + } + } + } + } +} diff --git a/optd-datafusion-cli/src/exec.rs b/optd-datafusion-cli/src/exec.rs new file mode 100644 index 0000000..c724ba2 --- /dev/null +++ b/optd-datafusion-cli/src/exec.rs @@ -0,0 +1,625 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Execution functions + +use std::collections::HashMap; +use std::fs::File; +use std::io::prelude::*; +use std::io::BufReader; + +use crate::cli_context::CliSessionContext; +use crate::helper::split_from_semicolon; +use crate::print_format::PrintFormat; +use crate::{ + command::{Command, OutputFormat}, + helper::{unescape_input, CliHelper}, + object_storage::get_object_store, + print_options::{MaxRows, PrintOptions}, +}; + +use datafusion::common::instant::Instant; +use datafusion::common::{plan_datafusion_err, plan_err}; +use datafusion::config::ConfigFileType; +use datafusion::datasource::listing::ListingTableUrl; +use datafusion::error::{DataFusionError, Result}; +use datafusion::logical_expr::{DdlStatement, LogicalPlan}; +use datafusion::physical_plan::execution_plan::EmissionType; +use datafusion::physical_plan::{collect, execute_stream, ExecutionPlanProperties}; +use datafusion::sql::parser::{DFParser, Statement}; +use datafusion::sql::sqlparser::dialect::dialect_from_str; + +use datafusion::sql::sqlparser; +use rustyline::error::ReadlineError; +use rustyline::Editor; +use tokio::signal; + +/// run and execute SQL statements and commands, against a context with the given print options +pub async fn exec_from_commands( + ctx: &dyn CliSessionContext, + commands: Vec, + print_options: &PrintOptions, +) -> Result<()> { + for sql in commands { + exec_and_print(ctx, print_options, sql).await?; + } + + Ok(()) +} + +/// run and execute SQL statements and commands from a file, against a context with the given print options +pub async fn exec_from_lines( + ctx: &dyn CliSessionContext, + reader: &mut BufReader, + print_options: &PrintOptions, +) -> Result<()> { + let mut query = "".to_owned(); + + for line in reader.lines() { + match line { + Ok(line) if line.starts_with("#!") => { + continue; + } + Ok(line) if line.starts_with("--") => { + continue; + } + Ok(line) => { + let line = line.trim_end(); + query.push_str(line); + if line.ends_with(';') { + match exec_and_print(ctx, print_options, query).await { + Ok(_) => {} + Err(err) => eprintln!("{err}"), + } + query = "".to_string(); + } else { + query.push('\n'); + } + } + _ => { + break; + } + } + } + + // run the left over query if the last statement doesn't contain ‘;’ + // ignore if it only consists of '\n' + if query.contains(|c| c != '\n') { + exec_and_print(ctx, print_options, query).await?; + } + + Ok(()) +} + +pub async fn exec_from_files( + ctx: &dyn CliSessionContext, + files: Vec, + print_options: &PrintOptions, +) -> Result<()> { + let files = files + .into_iter() + .map(|file_path| File::open(file_path).unwrap()) + .collect::>(); + + for file in files { + let mut reader = BufReader::new(file); + exec_from_lines(ctx, &mut reader, print_options).await?; + } + + Ok(()) +} + +/// run and execute SQL statements and commands against a context with the given print options +pub async fn exec_from_repl( + ctx: &dyn CliSessionContext, + print_options: &mut PrintOptions, +) -> rustyline::Result<()> { + let mut rl = Editor::new()?; + rl.set_helper(Some(CliHelper::new( + &ctx.task_ctx().session_config().options().sql_parser.dialect, + print_options.color, + ))); + rl.load_history(".history").ok(); + + loop { + match rl.readline("> ") { + Ok(line) if line.starts_with('\\') => { + rl.add_history_entry(line.trim_end())?; + let command = line.split_whitespace().collect::>().join(" "); + if let Ok(cmd) = &command[1..].parse::() { + match cmd { + Command::Quit => break, + Command::OutputFormat(subcommand) => { + if let Some(subcommand) = subcommand { + if let Ok(command) = subcommand.parse::() { + if let Err(e) = command.execute(print_options).await { + eprintln!("{e}") + } + } else { + eprintln!("'\\{}' is not a valid command", &line[1..]); + } + } else { + println!("Output format is {:?}.", print_options.format); + } + } + _ => { + if let Err(e) = cmd.execute(ctx, print_options).await { + eprintln!("{e}") + } + } + } + } else { + eprintln!("'\\{}' is not a valid command", &line[1..]); + } + } + Ok(line) => { + let lines = split_from_semicolon(line); + for line in lines { + rl.add_history_entry(line.trim_end())?; + tokio::select! { + res = exec_and_print(ctx, print_options, line) => match res { + Ok(_) => {} + Err(err) => eprintln!("{err}"), + }, + _ = signal::ctrl_c() => { + println!("^C"); + continue + }, + } + // dialect might have changed + rl.helper_mut() + .unwrap() + .set_dialect(&ctx.task_ctx().session_config().options().sql_parser.dialect); + } + } + Err(ReadlineError::Interrupted) => { + println!("^C"); + continue; + } + Err(ReadlineError::Eof) => { + println!("\\q"); + break; + } + Err(err) => { + eprintln!("Unknown error happened {:?}", err); + break; + } + } + } + + rl.save_history(".history") +} + +pub(super) async fn exec_and_print( + ctx: &dyn CliSessionContext, + print_options: &PrintOptions, + sql: String, +) -> Result<()> { + let now = Instant::now(); + let sql = unescape_input(&sql)?; + let task_ctx = ctx.task_ctx(); + let dialect = &task_ctx.session_config().options().sql_parser.dialect; + let dialect = dialect_from_str(dialect).ok_or_else(|| { + plan_datafusion_err!( + "Unsupported SQL dialect: {dialect}. Available dialects: \ + Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \ + MsSQL, ClickHouse, BigQuery, Ansi." + ) + })?; + + let statements = DFParser::parse_sql_with_dialect(&sql, dialect.as_ref())?; + for statement in statements { + let adjusted = AdjustedPrintOptions::new(print_options.clone()).with_statement(&statement); + + let plan = create_plan(ctx, statement).await?; + let adjusted = adjusted.with_plan(&plan); + + let df = ctx.execute_logical_plan(plan).await?; + let physical_plan = df.create_physical_plan().await?; + + if physical_plan.boundedness().is_unbounded() { + if physical_plan.pipeline_behavior() == EmissionType::Final { + return plan_err!( + "The given query can generate a valid result only once \ + the source finishes, but the source is unbounded" + ); + } + // As the input stream comes, we can generate results. + // However, memory safety is not guaranteed. + let stream = execute_stream(physical_plan, task_ctx.clone())?; + print_options.print_stream(stream, now).await?; + } else { + // Bounded stream; collected results are printed after all input consumed. + let schema = physical_plan.schema(); + let results = collect(physical_plan, task_ctx.clone()).await?; + adjusted.into_inner().print_batches(schema, &results, now)?; + } + } + + Ok(()) +} + +/// Track adjustments to the print options based on the plan / statement being executed +#[derive(Debug)] +struct AdjustedPrintOptions { + inner: PrintOptions, +} + +impl AdjustedPrintOptions { + fn new(inner: PrintOptions) -> Self { + Self { inner } + } + /// Adjust print options based on any statement specific requirements + fn with_statement(mut self, statement: &Statement) -> Self { + if let Statement::Statement(sql_stmt) = statement { + // SHOW / SHOW ALL + if let sqlparser::ast::Statement::ShowVariable { .. } = sql_stmt.as_ref() { + self.inner.maxrows = MaxRows::Unlimited + } + } + self + } + + /// Adjust print options based on any plan specific requirements + fn with_plan(mut self, plan: &LogicalPlan) -> Self { + // For plans like `Explain` ignore `MaxRows` option and always display + // all rows + if matches!( + plan, + LogicalPlan::Explain(_) | LogicalPlan::DescribeTable(_) | LogicalPlan::Analyze(_) + ) { + self.inner.maxrows = MaxRows::Unlimited; + } + self + } + + /// Finalize and return the inner `PrintOptions` + fn into_inner(mut self) -> PrintOptions { + if self.inner.format == PrintFormat::Automatic { + self.inner.format = PrintFormat::Table; + } + + self.inner + } +} + +fn config_file_type_from_str(ext: &str) -> Option { + match ext.to_lowercase().as_str() { + "csv" => Some(ConfigFileType::CSV), + "json" => Some(ConfigFileType::JSON), + "parquet" => Some(ConfigFileType::PARQUET), + _ => None, + } +} + +async fn create_plan( + ctx: &dyn CliSessionContext, + statement: Statement, +) -> Result { + let mut plan = ctx.session_state().statement_to_plan(statement).await?; + + // Note that cmd is a mutable reference so that create_external_table function can remove all + // datafusion-cli specific options before passing through to datafusion. Otherwise, datafusion + // will raise Configuration errors. + if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan { + // To support custom formats, treat error as None + let format = config_file_type_from_str(&cmd.file_type); + register_object_store_and_config_extensions(ctx, &cmd.location, &cmd.options, format) + .await?; + } + + if let LogicalPlan::Copy(copy_to) = &mut plan { + let format = config_file_type_from_str(©_to.file_type.get_ext()); + + register_object_store_and_config_extensions( + ctx, + ©_to.output_url, + ©_to.options, + format, + ) + .await?; + } + Ok(plan) +} + +/// Asynchronously registers an object store and its configuration extensions +/// to the session context. +/// +/// This function dynamically registers a cloud object store based on the given +/// location and options. It first parses the location to determine the scheme +/// and constructs the URL accordingly. Depending on the scheme, it also registers +/// relevant options. The function then alters the default table options with the +/// given custom options. Finally, it retrieves and registers the object store +/// in the session context. +/// +/// # Parameters +/// +/// * `ctx`: A reference to the `SessionContext` for registering the object store. +/// * `location`: A string reference representing the location of the object store. +/// * `options`: A reference to a hash map containing configuration options for +/// the object store. +/// +/// # Returns +/// +/// A `Result<()>` which is an Ok value indicating successful registration, or +/// an error upon failure. +/// +/// # Errors +/// +/// This function can return an error if the location parsing fails, options +/// alteration fails, or if the object store cannot be retrieved and registered +/// successfully. +pub(crate) async fn register_object_store_and_config_extensions( + ctx: &dyn CliSessionContext, + location: &String, + options: &HashMap, + format: Option, +) -> Result<()> { + // Parse the location URL to extract the scheme and other components + let table_path = ListingTableUrl::parse(location)?; + + // Extract the scheme (e.g., "s3", "gcs") from the parsed URL + let scheme = table_path.scheme(); + + // Obtain a reference to the URL + let url = table_path.as_ref(); + + // Register the options based on the scheme extracted from the location + ctx.register_table_options_extension_from_scheme(scheme); + + // Clone and modify the default table options based on the provided options + let mut table_options = ctx.session_state().default_table_options(); + if let Some(format) = format { + table_options.set_config_format(format); + } + table_options.alter_with_string_hash_map(options)?; + + // Retrieve the appropriate object store based on the scheme, URL, and modified table options + let store = get_object_store(&ctx.session_state(), scheme, url, &table_options).await?; + + // Register the retrieved object store in the session context's runtime environment + ctx.register_object_store(url, store); + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + use datafusion::common::plan_err; + + use datafusion::prelude::SessionContext; + use url::Url; + + async fn create_external_table_test(location: &str, sql: &str) -> Result<()> { + let ctx = SessionContext::new(); + let plan = ctx.state().create_logical_plan(sql).await?; + + if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &plan { + let format = config_file_type_from_str(&cmd.file_type); + register_object_store_and_config_extensions(&ctx, &cmd.location, &cmd.options, format) + .await?; + } else { + return plan_err!("LogicalPlan is not a CreateExternalTable"); + } + + // Ensure the URL is supported by the object store + ctx.runtime_env() + .object_store(ListingTableUrl::parse(location)?)?; + + Ok(()) + } + + async fn copy_to_table_test(location: &str, sql: &str) -> Result<()> { + let ctx = SessionContext::new(); + // AWS CONFIG register. + + let plan = ctx.state().create_logical_plan(sql).await?; + + if let LogicalPlan::Copy(cmd) = &plan { + let format = config_file_type_from_str(&cmd.file_type.get_ext()); + register_object_store_and_config_extensions( + &ctx, + &cmd.output_url, + &cmd.options, + format, + ) + .await?; + } else { + return plan_err!("LogicalPlan is not a CreateExternalTable"); + } + + // Ensure the URL is supported by the object store + ctx.runtime_env() + .object_store(ListingTableUrl::parse(location)?)?; + + Ok(()) + } + + #[tokio::test] + async fn create_object_store_table_http() -> Result<()> { + // Should be OK + let location = "http://example.com/file.parquet"; + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET LOCATION '{location}'"); + create_external_table_test(location, &sql).await?; + + Ok(()) + } + #[tokio::test] + async fn copy_to_external_object_store_test() -> Result<()> { + let locations = vec![ + "s3://bucket/path/file.parquet", + "oss://bucket/path/file.parquet", + "cos://bucket/path/file.parquet", + "gcs://bucket/path/file.parquet", + ]; + let ctx = SessionContext::new(); + let task_ctx = ctx.task_ctx(); + let dialect = &task_ctx.session_config().options().sql_parser.dialect; + let dialect = dialect_from_str(dialect).ok_or_else(|| { + plan_datafusion_err!( + "Unsupported SQL dialect: {dialect}. Available dialects: \ + Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, \ + MsSQL, ClickHouse, BigQuery, Ansi." + ) + })?; + for location in locations { + let sql = format!("copy (values (1,2)) to '{}' STORED AS PARQUET;", location); + let statements = DFParser::parse_sql_with_dialect(&sql, dialect.as_ref())?; + for statement in statements { + //Should not fail + let mut plan = create_plan(&ctx, statement).await?; + if let LogicalPlan::Copy(copy_to) = &mut plan { + assert_eq!(copy_to.output_url, location); + assert_eq!(copy_to.file_type.get_ext(), "parquet".to_string()); + ctx.runtime_env() + .object_store_registry + .get_store(&Url::parse(©_to.output_url).unwrap())?; + } else { + return plan_err!("LogicalPlan is not a CopyTo"); + } + } + } + Ok(()) + } + + #[tokio::test] + async fn copy_to_object_store_table_s3() -> Result<()> { + let access_key_id = "fake_access_key_id"; + let secret_access_key = "fake_secret_access_key"; + let location = "s3://bucket/path/file.parquet"; + + // Missing region, use object_store defaults + let sql = format!("COPY (values (1,2)) TO '{location}' STORED AS PARQUET + OPTIONS ('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}')"); + copy_to_table_test(location, &sql).await?; + + Ok(()) + } + + #[tokio::test] + async fn create_object_store_table_s3() -> Result<()> { + let access_key_id = "fake_access_key_id"; + let secret_access_key = "fake_secret_access_key"; + let region = "fake_us-east-2"; + let session_token = "fake_session_token"; + let location = "s3://bucket/path/file.parquet"; + + // Missing region, use object_store defaults + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET + OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}') LOCATION '{location}'"); + create_external_table_test(location, &sql).await?; + + // Should be OK + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET + OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.region' '{region}', 'aws.session_token' '{session_token}') LOCATION '{location}'"); + create_external_table_test(location, &sql).await?; + + Ok(()) + } + + #[tokio::test] + async fn create_object_store_table_oss() -> Result<()> { + let access_key_id = "fake_access_key_id"; + let secret_access_key = "fake_secret_access_key"; + let endpoint = "fake_endpoint"; + let location = "oss://bucket/path/file.parquet"; + + // Should be OK + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET + OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.oss.endpoint' '{endpoint}') LOCATION '{location}'"); + create_external_table_test(location, &sql).await?; + + Ok(()) + } + + #[tokio::test] + async fn create_object_store_table_cos() -> Result<()> { + let access_key_id = "fake_access_key_id"; + let secret_access_key = "fake_secret_access_key"; + let endpoint = "fake_endpoint"; + let location = "cos://bucket/path/file.parquet"; + + // Should be OK + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET + OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.cos.endpoint' '{endpoint}') LOCATION '{location}'"); + create_external_table_test(location, &sql).await?; + + Ok(()) + } + + #[tokio::test] + async fn create_object_store_table_gcs() -> Result<()> { + let service_account_path = "fake_service_account_path"; + let service_account_key = + "{\"private_key\": \"fake_private_key.pem\",\"client_email\":\"fake_client_email\", \"private_key_id\":\"id\"}"; + let application_credentials_path = "fake_application_credentials_path"; + let location = "gcs://bucket/path/file.parquet"; + + // for service_account_path + let sql = format!( + "CREATE EXTERNAL TABLE test STORED AS PARQUET + OPTIONS('gcp.service_account_path' '{service_account_path}') LOCATION '{location}'" + ); + let err = create_external_table_test(location, &sql) + .await + .unwrap_err(); + assert!(err.to_string().contains("os error 2")); + + // for service_account_key + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_key' '{service_account_key}') LOCATION '{location}'"); + let err = create_external_table_test(location, &sql) + .await + .unwrap_err() + .to_string(); + assert!(err.contains("No RSA key found in pem file"), "{err}"); + + // for application_credentials_path + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET + OPTIONS('gcp.application_credentials_path' '{application_credentials_path}') LOCATION '{location}'"); + let err = create_external_table_test(location, &sql) + .await + .unwrap_err(); + assert!(err.to_string().contains("os error 2")); + + Ok(()) + } + + #[tokio::test] + async fn create_external_table_local_file() -> Result<()> { + let location = "path/to/file.parquet"; + + // Ensure that local files are also registered + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET LOCATION '{location}'"); + create_external_table_test(location, &sql).await.unwrap(); + + Ok(()) + } + + #[tokio::test] + async fn create_external_table_format_option() -> Result<()> { + let location = "path/to/file.cvs"; + + // Test with format options + let sql = + format!("CREATE EXTERNAL TABLE test STORED AS CSV LOCATION '{location}' OPTIONS('format.has_header' 'true')"); + create_external_table_test(location, &sql).await.unwrap(); + + Ok(()) + } +} diff --git a/optd-datafusion-cli/src/functions.rs b/optd-datafusion-cli/src/functions.rs new file mode 100644 index 0000000..6bb3cee --- /dev/null +++ b/optd-datafusion-cli/src/functions.rs @@ -0,0 +1,457 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Functions that are query-able and searchable via the `\h` command +use arrow::array::{Int64Array, StringArray}; +use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; +use arrow::record_batch::RecordBatch; +use arrow::util::pretty::pretty_format_batches; +use async_trait::async_trait; + +use datafusion::catalog::Session; +use datafusion::common::{plan_err, Column}; +use datafusion::datasource::TableProvider; +use datafusion::error::Result; +use datafusion::logical_expr::Expr; +use datafusion::physical_plan::memory::MemoryExec; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::scalar::ScalarValue; +use datafusion_catalog::TableFunctionImpl; +use parquet::basic::ConvertedType; +use parquet::data_type::{ByteArray, FixedLenByteArray}; +use parquet::file::reader::FileReader; +use parquet::file::serialized_reader::SerializedFileReader; +use parquet::file::statistics::Statistics; +use std::fmt; +use std::fs::File; +use std::str::FromStr; +use std::sync::Arc; + +#[derive(Debug)] +pub enum Function { + Select, + Explain, + Show, + CreateTable, + CreateTableAs, + Insert, + DropTable, +} + +const ALL_FUNCTIONS: [Function; 7] = [ + Function::CreateTable, + Function::CreateTableAs, + Function::DropTable, + Function::Explain, + Function::Insert, + Function::Select, + Function::Show, +]; + +impl Function { + pub fn function_details(&self) -> Result<&str> { + let details = match self { + Function::Select => { + r#" +Command: SELECT +Description: retrieve rows from a table or view +Syntax: +SELECT [ ALL | DISTINCT [ ON ( expression [, ...] ) ] ] + [ * | expression [ [ AS ] output_name ] [, ...] ] + [ FROM from_item [, ...] ] + [ WHERE condition ] + [ GROUP BY [ ALL | DISTINCT ] grouping_element [, ...] ] + [ HAVING condition ] + [ WINDOW window_name AS ( window_definition ) [, ...] ] + [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select ] + [ ORDER BY expression [ ASC | DESC | USING operator ] [ NULLS { FIRST | LAST } ] [, ...] ] + [ LIMIT { count | ALL } ] + [ OFFSET start [ ROW | ROWS ] ] + +where from_item can be one of: + + [ ONLY ] table_name [ * ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ] + [ TABLESAMPLE sampling_method ( argument [, ...] ) [ REPEATABLE ( seed ) ] ] + [ LATERAL ] ( select ) [ AS ] alias [ ( column_alias [, ...] ) ] + with_query_name [ [ AS ] alias [ ( column_alias [, ...] ) ] ] + [ LATERAL ] function_name ( [ argument [, ...] ] ) + [ WITH ORDINALITY ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ] + [ LATERAL ] function_name ( [ argument [, ...] ] ) [ AS ] alias ( column_definition [, ...] ) + [ LATERAL ] function_name ( [ argument [, ...] ] ) AS ( column_definition [, ...] ) + [ LATERAL ] ROWS FROM( function_name ( [ argument [, ...] ] ) [ AS ( column_definition [, ...] ) ] [, ...] ) + [ WITH ORDINALITY ] [ [ AS ] alias [ ( column_alias [, ...] ) ] ] + from_item [ NATURAL ] join_type from_item [ ON join_condition | USING ( join_column [, ...] ) [ AS join_using_alias ] ] + +and grouping_element can be one of: + + ( ) + expression + ( expression [, ...] ) + +and with_query is: + + with_query_name [ ( column_name [, ...] ) ] AS [ [ NOT ] MATERIALIZED ] ( select | values | insert | update | delete ) + +TABLE [ ONLY ] table_name [ * ]"# + } + Function::Explain => { + r#" +Command: EXPLAIN +Description: show the execution plan of a statement +Syntax: +EXPLAIN [ ANALYZE ] statement +"# + } + Function::Show => { + r#" +Command: SHOW +Description: show the value of a run-time parameter +Syntax: +SHOW name +"# + } + Function::CreateTable => { + r#" +Command: CREATE TABLE +Description: define a new table +Syntax: +CREATE [ EXTERNAL ] TABLE table_name ( [ + { column_name data_type } + [, ... ] +] ) +"# + } + Function::CreateTableAs => { + r#" +Command: CREATE TABLE AS +Description: define a new table from the results of a query +Syntax: +CREATE TABLE table_name + [ (column_name [, ...] ) ] + AS query + [ WITH [ NO ] DATA ] +"# + } + Function::Insert => { + r#" +Command: INSERT +Description: create new rows in a table +Syntax: +INSERT INTO table_name [ ( column_name [, ...] ) ] + { VALUES ( { expression } [, ...] ) [, ...] } +"# + } + Function::DropTable => { + r#" +Command: DROP TABLE +Description: remove a table +Syntax: +DROP TABLE [ IF EXISTS ] name [, ...] +"# + } + }; + Ok(details) + } +} + +impl FromStr for Function { + type Err = (); + + fn from_str(s: &str) -> Result { + Ok(match s.trim().to_uppercase().as_str() { + "SELECT" => Self::Select, + "EXPLAIN" => Self::Explain, + "SHOW" => Self::Show, + "CREATE TABLE" => Self::CreateTable, + "CREATE TABLE AS" => Self::CreateTableAs, + "INSERT" => Self::Insert, + "DROP TABLE" => Self::DropTable, + _ => return Err(()), + }) + } +} + +impl fmt::Display for Function { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Function::Select => write!(f, "SELECT"), + Function::Explain => write!(f, "EXPLAIN"), + Function::Show => write!(f, "SHOW"), + Function::CreateTable => write!(f, "CREATE TABLE"), + Function::CreateTableAs => write!(f, "CREATE TABLE AS"), + Function::Insert => write!(f, "INSERT"), + Function::DropTable => write!(f, "DROP TABLE"), + } + } +} + +pub fn display_all_functions() -> Result<()> { + println!("Available help:"); + let array = StringArray::from( + ALL_FUNCTIONS + .iter() + .map(|f| format!("{}", f)) + .collect::>(), + ); + let schema = Schema::new(vec![Field::new("Function", DataType::Utf8, false)]); + let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)])?; + println!("{}", pretty_format_batches(&[batch]).unwrap()); + Ok(()) +} + +/// PARQUET_META table function +#[derive(Debug)] +struct ParquetMetadataTable { + schema: SchemaRef, + batch: RecordBatch, +} + +#[async_trait] +impl TableProvider for ParquetMetadataTable { + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn schema(&self) -> arrow::datatypes::SchemaRef { + self.schema.clone() + } + + fn table_type(&self) -> datafusion::logical_expr::TableType { + datafusion::logical_expr::TableType::Base + } + + async fn scan( + &self, + _state: &dyn Session, + projection: Option<&Vec>, + _filters: &[Expr], + _limit: Option, + ) -> Result> { + Ok(Arc::new(MemoryExec::try_new( + &[vec![self.batch.clone()]], + TableProvider::schema(self), + projection.cloned(), + )?)) + } +} + +fn convert_parquet_statistics( + value: &Statistics, + converted_type: ConvertedType, +) -> (Option, Option) { + match (value, converted_type) { + (Statistics::Boolean(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Int32(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Int64(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Int96(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Float(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::Double(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::ByteArray(val), ConvertedType::UTF8) => ( + byte_array_to_string(val.min_opt()), + byte_array_to_string(val.max_opt()), + ), + (Statistics::ByteArray(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + (Statistics::FixedLenByteArray(val), ConvertedType::UTF8) => ( + fixed_len_byte_array_to_string(val.min_opt()), + fixed_len_byte_array_to_string(val.max_opt()), + ), + (Statistics::FixedLenByteArray(val), _) => ( + val.min_opt().map(|v| v.to_string()), + val.max_opt().map(|v| v.to_string()), + ), + } +} + +/// Convert to a string if it has utf8 encoding, otherwise print bytes directly +fn byte_array_to_string(val: Option<&ByteArray>) -> Option { + val.map(|v| { + v.as_utf8() + .map(|s| s.to_string()) + .unwrap_or_else(|_e| v.to_string()) + }) +} + +/// Convert to a string if it has utf8 encoding, otherwise print bytes directly +fn fixed_len_byte_array_to_string(val: Option<&FixedLenByteArray>) -> Option { + val.map(|v| { + v.as_utf8() + .map(|s| s.to_string()) + .unwrap_or_else(|_e| v.to_string()) + }) +} + +#[derive(Debug)] +pub struct ParquetMetadataFunc {} + +impl TableFunctionImpl for ParquetMetadataFunc { + fn call(&self, exprs: &[Expr]) -> Result> { + let filename = match exprs.first() { + Some(Expr::Literal(ScalarValue::Utf8(Some(s)))) => s, // single quote: parquet_metadata('x.parquet') + Some(Expr::Column(Column { name, .. })) => name, // double quote: parquet_metadata("x.parquet") + _ => { + return plan_err!("parquet_metadata requires string argument as its input"); + } + }; + + let file = File::open(filename.clone())?; + let reader = SerializedFileReader::new(file)?; + let metadata = reader.metadata(); + + let schema = Arc::new(Schema::new(vec![ + Field::new("filename", DataType::Utf8, true), + Field::new("row_group_id", DataType::Int64, true), + Field::new("row_group_num_rows", DataType::Int64, true), + Field::new("row_group_num_columns", DataType::Int64, true), + Field::new("row_group_bytes", DataType::Int64, true), + Field::new("column_id", DataType::Int64, true), + Field::new("file_offset", DataType::Int64, true), + Field::new("num_values", DataType::Int64, true), + Field::new("path_in_schema", DataType::Utf8, true), + Field::new("type", DataType::Utf8, true), + Field::new("stats_min", DataType::Utf8, true), + Field::new("stats_max", DataType::Utf8, true), + Field::new("stats_null_count", DataType::Int64, true), + Field::new("stats_distinct_count", DataType::Int64, true), + Field::new("stats_min_value", DataType::Utf8, true), + Field::new("stats_max_value", DataType::Utf8, true), + Field::new("compression", DataType::Utf8, true), + Field::new("encodings", DataType::Utf8, true), + Field::new("index_page_offset", DataType::Int64, true), + Field::new("dictionary_page_offset", DataType::Int64, true), + Field::new("data_page_offset", DataType::Int64, true), + Field::new("total_compressed_size", DataType::Int64, true), + Field::new("total_uncompressed_size", DataType::Int64, true), + ])); + + // construct record batch from metadata + let mut filename_arr = vec![]; + let mut row_group_id_arr = vec![]; + let mut row_group_num_rows_arr = vec![]; + let mut row_group_num_columns_arr = vec![]; + let mut row_group_bytes_arr = vec![]; + let mut column_id_arr = vec![]; + let mut file_offset_arr = vec![]; + let mut num_values_arr = vec![]; + let mut path_in_schema_arr = vec![]; + let mut type_arr = vec![]; + let mut stats_min_arr = vec![]; + let mut stats_max_arr = vec![]; + let mut stats_null_count_arr = vec![]; + let mut stats_distinct_count_arr = vec![]; + let mut stats_min_value_arr = vec![]; + let mut stats_max_value_arr = vec![]; + let mut compression_arr = vec![]; + let mut encodings_arr = vec![]; + let mut index_page_offset_arr = vec![]; + let mut dictionary_page_offset_arr = vec![]; + let mut data_page_offset_arr = vec![]; + let mut total_compressed_size_arr = vec![]; + let mut total_uncompressed_size_arr = vec![]; + for (rg_idx, row_group) in metadata.row_groups().iter().enumerate() { + for (col_idx, column) in row_group.columns().iter().enumerate() { + filename_arr.push(filename.clone()); + row_group_id_arr.push(rg_idx as i64); + row_group_num_rows_arr.push(row_group.num_rows()); + row_group_num_columns_arr.push(row_group.num_columns() as i64); + row_group_bytes_arr.push(row_group.total_byte_size()); + column_id_arr.push(col_idx as i64); + file_offset_arr.push(column.file_offset()); + num_values_arr.push(column.num_values()); + path_in_schema_arr.push(column.column_path().to_string()); + type_arr.push(column.column_type().to_string()); + let converted_type = column.column_descr().converted_type(); + + if let Some(s) = column.statistics() { + let (min_val, max_val) = convert_parquet_statistics(s, converted_type); + stats_min_arr.push(min_val.clone()); + stats_max_arr.push(max_val.clone()); + stats_null_count_arr.push(s.null_count_opt().map(|c| c as i64)); + stats_distinct_count_arr.push(s.distinct_count_opt().map(|c| c as i64)); + stats_min_value_arr.push(min_val); + stats_max_value_arr.push(max_val); + } else { + stats_min_arr.push(None); + stats_max_arr.push(None); + stats_null_count_arr.push(None); + stats_distinct_count_arr.push(None); + stats_min_value_arr.push(None); + stats_max_value_arr.push(None); + }; + compression_arr.push(format!("{:?}", column.compression())); + encodings_arr.push(format!("{:?}", column.encodings())); + index_page_offset_arr.push(column.index_page_offset()); + dictionary_page_offset_arr.push(column.dictionary_page_offset()); + data_page_offset_arr.push(column.data_page_offset()); + total_compressed_size_arr.push(column.compressed_size()); + total_uncompressed_size_arr.push(column.uncompressed_size()); + } + } + + let rb = RecordBatch::try_new( + schema.clone(), + vec![ + Arc::new(StringArray::from(filename_arr)), + Arc::new(Int64Array::from(row_group_id_arr)), + Arc::new(Int64Array::from(row_group_num_rows_arr)), + Arc::new(Int64Array::from(row_group_num_columns_arr)), + Arc::new(Int64Array::from(row_group_bytes_arr)), + Arc::new(Int64Array::from(column_id_arr)), + Arc::new(Int64Array::from(file_offset_arr)), + Arc::new(Int64Array::from(num_values_arr)), + Arc::new(StringArray::from(path_in_schema_arr)), + Arc::new(StringArray::from(type_arr)), + Arc::new(StringArray::from(stats_min_arr)), + Arc::new(StringArray::from(stats_max_arr)), + Arc::new(Int64Array::from(stats_null_count_arr)), + Arc::new(Int64Array::from(stats_distinct_count_arr)), + Arc::new(StringArray::from(stats_min_value_arr)), + Arc::new(StringArray::from(stats_max_value_arr)), + Arc::new(StringArray::from(compression_arr)), + Arc::new(StringArray::from(encodings_arr)), + Arc::new(Int64Array::from(index_page_offset_arr)), + Arc::new(Int64Array::from(dictionary_page_offset_arr)), + Arc::new(Int64Array::from(data_page_offset_arr)), + Arc::new(Int64Array::from(total_compressed_size_arr)), + Arc::new(Int64Array::from(total_uncompressed_size_arr)), + ], + )?; + + let parquet_metadata = ParquetMetadataTable { schema, batch: rb }; + Ok(Arc::new(parquet_metadata)) + } +} diff --git a/optd-datafusion-cli/src/helper.rs b/optd-datafusion-cli/src/helper.rs new file mode 100644 index 0000000..a5542ee --- /dev/null +++ b/optd-datafusion-cli/src/helper.rs @@ -0,0 +1,378 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Helper that helps with interactive editing, including multi-line parsing and validation, +//! and auto-completion for file name during creating external table. + +use std::borrow::Cow; + +use crate::highlighter::{NoSyntaxHighlighter, SyntaxHighlighter}; + +use datafusion::common::sql_datafusion_err; +use datafusion::error::DataFusionError; +use datafusion::sql::parser::{DFParser, Statement}; +use datafusion::sql::sqlparser::dialect::dialect_from_str; +use datafusion::sql::sqlparser::parser::ParserError; + +use rustyline::completion::{Completer, FilenameCompleter, Pair}; +use rustyline::error::ReadlineError; +use rustyline::highlight::{CmdKind, Highlighter}; +use rustyline::hint::Hinter; +use rustyline::validate::{ValidationContext, ValidationResult, Validator}; +use rustyline::{Context, Helper, Result}; + +pub struct CliHelper { + completer: FilenameCompleter, + dialect: String, + highlighter: Box, +} + +impl CliHelper { + pub fn new(dialect: &str, color: bool) -> Self { + let highlighter: Box = if !color { + Box::new(NoSyntaxHighlighter {}) + } else { + Box::new(SyntaxHighlighter::new(dialect)) + }; + Self { + completer: FilenameCompleter::new(), + dialect: dialect.into(), + highlighter, + } + } + + pub fn set_dialect(&mut self, dialect: &str) { + if dialect != self.dialect { + self.dialect = dialect.to_string(); + } + } + + fn validate_input(&self, input: &str) -> Result { + if let Some(sql) = input.strip_suffix(';') { + let sql = match unescape_input(sql) { + Ok(sql) => sql, + Err(err) => { + return Ok(ValidationResult::Invalid(Some(format!( + " 🤔 Invalid statement: {err}", + )))) + } + }; + + let dialect = match dialect_from_str(&self.dialect) { + Some(dialect) => dialect, + None => { + return Ok(ValidationResult::Invalid(Some(format!( + " 🤔 Invalid dialect: {}", + self.dialect + )))) + } + }; + let lines = split_from_semicolon(sql); + for line in lines { + match DFParser::parse_sql_with_dialect(&line, dialect.as_ref()) { + Ok(statements) if statements.is_empty() => { + return Ok(ValidationResult::Invalid(Some( + " 🤔 You entered an empty statement".to_string(), + ))); + } + Ok(_statements) => {} + Err(err) => { + return Ok(ValidationResult::Invalid(Some(format!( + " 🤔 Invalid statement: {err}", + )))); + } + } + } + Ok(ValidationResult::Valid(None)) + } else if input.starts_with('\\') { + // command + Ok(ValidationResult::Valid(None)) + } else { + Ok(ValidationResult::Incomplete) + } + } +} + +impl Default for CliHelper { + fn default() -> Self { + Self::new("generic", false) + } +} + +impl Highlighter for CliHelper { + fn highlight<'l>(&self, line: &'l str, pos: usize) -> Cow<'l, str> { + self.highlighter.highlight(line, pos) + } + + fn highlight_char(&self, line: &str, pos: usize, kind: CmdKind) -> bool { + self.highlighter.highlight_char(line, pos, kind) + } +} + +impl Hinter for CliHelper { + type Hint = String; +} + +/// returns true if the current position is after the open quote for +/// creating an external table. +fn is_open_quote_for_location(line: &str, pos: usize) -> bool { + let mut sql = line[..pos].to_string(); + sql.push('\''); + if let Ok(stmts) = DFParser::parse_sql(&sql) { + if let Some(Statement::CreateExternalTable(_)) = stmts.back() { + return true; + } + } + false +} + +impl Completer for CliHelper { + type Candidate = Pair; + + fn complete( + &self, + line: &str, + pos: usize, + ctx: &Context<'_>, + ) -> std::result::Result<(usize, Vec), ReadlineError> { + if is_open_quote_for_location(line, pos) { + self.completer.complete(line, pos, ctx) + } else { + Ok((0, Vec::with_capacity(0))) + } + } +} + +impl Validator for CliHelper { + fn validate(&self, ctx: &mut ValidationContext<'_>) -> Result { + let input = ctx.input().trim_end(); + self.validate_input(input) + } +} + +impl Helper for CliHelper {} + +/// Unescape input string from readline. +/// +/// The data read from stdio will be escaped, so we need to unescape the input before executing the input +pub fn unescape_input(input: &str) -> datafusion::error::Result { + let mut chars = input.chars(); + + let mut result = String::with_capacity(input.len()); + while let Some(char) = chars.next() { + if char == '\\' { + if let Some(next_char) = chars.next() { + // https://static.rust-lang.org/doc/master/reference.html#literals + result.push(match next_char { + '0' => '\0', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + '\\' => '\\', + _ => { + return Err(sql_datafusion_err!(ParserError::TokenizerError(format!( + "unsupported escape char: '\\{}'", + next_char + )))) + } + }); + } + } else { + result.push(char); + } + } + + Ok(result) +} + +/// Splits a string which consists of multiple queries. +pub(crate) fn split_from_semicolon(sql: String) -> Vec { + let mut commands = Vec::new(); + let mut current_command = String::new(); + let mut in_single_quote = false; + let mut in_double_quote = false; + + for c in sql.chars() { + if c == '\'' && !in_double_quote { + in_single_quote = !in_single_quote; + } else if c == '"' && !in_single_quote { + in_double_quote = !in_double_quote; + } + + if c == ';' && !in_single_quote && !in_double_quote { + if !current_command.trim().is_empty() { + commands.push(format!("{};", current_command.trim())); + current_command.clear(); + } + } else { + current_command.push(c); + } + } + + if !current_command.trim().is_empty() { + commands.push(format!("{};", current_command.trim())); + } + + commands +} + +#[cfg(test)] +mod tests { + use std::io::{BufRead, Cursor}; + + use super::*; + + fn readline_direct( + mut reader: impl BufRead, + validator: &CliHelper, + ) -> Result { + let mut input = String::new(); + + if reader.read_line(&mut input)? == 0 { + return Err(ReadlineError::Eof); + } + + validator.validate_input(&input) + } + + #[test] + fn unescape_readline_input() -> Result<()> { + let validator = CliHelper::default(); + + // should be valid + let result = readline_direct( + Cursor::new( + r"create external table test stored as csv location 'data.csv' options ('format.delimiter' ',');" + .as_bytes(), + ), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new( + r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\0');" + .as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new( + r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\n');" + .as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new( + r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\r');" + .as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new( + r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\t');" + .as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new( + r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\\');" + .as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + let result = readline_direct( + Cursor::new( + r"create external table test stored as csv location 'data.csv' options ('format.delimiter' ',,');" + .as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Valid(None))); + + // should be invalid + let result = readline_direct( + Cursor::new( + r"create external table test stored as csv location 'data.csv' options ('format.delimiter' '\u{07}');" + .as_bytes()), + &validator, + )?; + assert!(matches!(result, ValidationResult::Invalid(Some(_)))); + + Ok(()) + } + + #[test] + fn sql_dialect() -> Result<()> { + let mut validator = CliHelper::default(); + + // should be invalid in generic dialect + let result = readline_direct(Cursor::new(r"select 1 # 2;".as_bytes()), &validator)?; + assert!( + matches!(result, ValidationResult::Invalid(Some(e)) if e.contains("Invalid statement")) + ); + + // valid in postgresql dialect + validator.set_dialect("postgresql"); + let result = readline_direct(Cursor::new(r"select 1 # 2;".as_bytes()), &validator)?; + assert!(matches!(result, ValidationResult::Valid(None))); + + Ok(()) + } + + #[test] + fn test_split_from_semicolon() { + let sql = "SELECT 1; SELECT 2;"; + let expected = vec!["SELECT 1;", "SELECT 2;"]; + assert_eq!(split_from_semicolon(sql.to_string()), expected); + + let sql = r#"SELECT ";";"#; + let expected = vec![r#"SELECT ";";"#]; + assert_eq!(split_from_semicolon(sql.to_string()), expected); + + let sql = "SELECT ';';"; + let expected = vec!["SELECT ';';"]; + assert_eq!(split_from_semicolon(sql.to_string()), expected); + + let sql = r#"SELECT 1; SELECT 'value;value'; SELECT 1 as "text;text";"#; + let expected = vec![ + "SELECT 1;", + "SELECT 'value;value';", + r#"SELECT 1 as "text;text";"#, + ]; + assert_eq!(split_from_semicolon(sql.to_string()), expected); + + let sql = ""; + let expected: Vec = Vec::new(); + assert_eq!(split_from_semicolon(sql.to_string()), expected); + + let sql = "SELECT 1"; + let expected = vec!["SELECT 1;"]; + assert_eq!(split_from_semicolon(sql.to_string()), expected); + + let sql = "SELECT 1; "; + let expected = vec!["SELECT 1;"]; + assert_eq!(split_from_semicolon(sql.to_string()), expected); + } +} diff --git a/optd-datafusion-cli/src/highlighter.rs b/optd-datafusion-cli/src/highlighter.rs new file mode 100644 index 0000000..f3e13ed --- /dev/null +++ b/optd-datafusion-cli/src/highlighter.rs @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! The syntax highlighter. + +use std::{ + borrow::Cow::{self, Borrowed}, + fmt::Display, +}; + +use datafusion::sql::sqlparser::{ + dialect::{dialect_from_str, Dialect, GenericDialect}, + keywords::Keyword, + tokenizer::{Token, Tokenizer}, +}; +use rustyline::highlight::{CmdKind, Highlighter}; + +/// The syntax highlighter. +#[derive(Debug)] +pub struct SyntaxHighlighter { + dialect: Box, +} + +impl SyntaxHighlighter { + pub fn new(dialect: &str) -> Self { + let dialect = dialect_from_str(dialect).unwrap_or(Box::new(GenericDialect {})); + Self { dialect } + } +} + +pub struct NoSyntaxHighlighter {} + +impl Highlighter for NoSyntaxHighlighter {} + +impl Highlighter for SyntaxHighlighter { + fn highlight<'l>(&self, line: &'l str, _: usize) -> Cow<'l, str> { + let mut out_line = String::new(); + + // `with_unescape(false)` since we want to rebuild the original string. + let mut tokenizer = Tokenizer::new(self.dialect.as_ref(), line).with_unescape(false); + let tokens = tokenizer.tokenize(); + match tokens { + Ok(tokens) => { + for token in tokens.iter() { + match token { + Token::Word(w) if w.keyword != Keyword::NoKeyword => { + out_line.push_str(&Color::red(token)); + } + Token::SingleQuotedString(_) => { + out_line.push_str(&Color::green(token)); + } + other => out_line.push_str(&format!("{other}")), + } + } + out_line.into() + } + Err(_) => Borrowed(line), + } + } + + fn highlight_char(&self, line: &str, _pos: usize, _cmd: CmdKind) -> bool { + !line.is_empty() + } +} + +/// Convenient utility to return strings with [ANSI color](https://gist.github.com/JBlond/2fea43a3049b38287e5e9cefc87b2124). +struct Color {} + +impl Color { + fn green(s: impl Display) -> String { + format!("\x1b[92m{s}\x1b[0m") + } + + fn red(s: impl Display) -> String { + format!("\x1b[91m{s}\x1b[0m") + } +} + +#[cfg(test)] +mod tests { + use super::SyntaxHighlighter; + use rustyline::highlight::Highlighter; + + #[test] + fn highlighter_valid() { + let s = "SElect col_a from tab_1;"; + let highlighter = SyntaxHighlighter::new("generic"); + let out = highlighter.highlight(s, s.len()); + assert_eq!( + "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1;", + out + ); + } + + #[test] + fn highlighter_valid_with_new_line() { + let s = "SElect col_a from tab_1\n WHERE col_b = 'なにか';"; + let highlighter = SyntaxHighlighter::new("generic"); + let out = highlighter.highlight(s, s.len()); + assert_eq!( + "\u{1b}[91mSElect\u{1b}[0m col_a \u{1b}[91mfrom\u{1b}[0m tab_1\n \u{1b}[91mWHERE\u{1b}[0m col_b = \u{1b}[92m'なにか'\u{1b}[0m;", + out + ); + } + + #[test] + fn highlighter_invalid() { + let s = "SElect col_a from tab_1 WHERE col_b = ';"; + let highlighter = SyntaxHighlighter::new("generic"); + let out = highlighter.highlight(s, s.len()); + assert_eq!("SElect col_a from tab_1 WHERE col_b = ';", out); + } +} diff --git a/optd-datafusion-cli/src/lib.rs b/optd-datafusion-cli/src/lib.rs new file mode 100644 index 0000000..fbfc924 --- /dev/null +++ b/optd-datafusion-cli/src/lib.rs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![doc = include_str!("../README.md")] +pub const DATAFUSION_CLI_VERSION: &str = env!("CARGO_PKG_VERSION"); + +pub mod catalog; +pub mod cli_context; +pub mod command; +pub mod exec; +pub mod functions; +pub mod helper; +pub mod highlighter; +pub mod object_storage; +pub mod pool_type; +pub mod print_format; +pub mod print_options; diff --git a/optd-datafusion-cli/src/main.rs b/optd-datafusion-cli/src/main.rs new file mode 100644 index 0000000..a14d773 --- /dev/null +++ b/optd-datafusion-cli/src/main.rs @@ -0,0 +1,438 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashMap; +use std::env; +use std::path::Path; +use std::process::ExitCode; +use std::sync::{Arc, LazyLock}; + +use datafusion::error::{DataFusionError, Result}; +use datafusion::execution::context::SessionConfig; +use datafusion::execution::memory_pool::{FairSpillPool, GreedyMemoryPool, MemoryPool}; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; +use optd_datafusion_cli::catalog::DynamicObjectStoreCatalog; +use optd_datafusion_cli::functions::ParquetMetadataFunc; +use optd_datafusion_cli::{ + exec, + pool_type::PoolType, + print_format::PrintFormat, + print_options::{MaxRows, PrintOptions}, + DATAFUSION_CLI_VERSION, +}; + +use clap::Parser; +use mimalloc::MiMalloc; + +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + +#[derive(Debug, Parser, PartialEq)] +#[clap(author, version, about, long_about= None)] +struct Args { + #[clap( + short = 'p', + long, + help = "Path to your data, default to current directory", + value_parser(parse_valid_data_dir) + )] + data_path: Option, + + #[clap( + short = 'b', + long, + help = "The batch size of each query, or use DataFusion default", + value_parser(parse_batch_size) + )] + batch_size: Option, + + #[clap( + short = 'c', + long, + num_args = 0.., + help = "Execute the given command string(s), then exit. Commands are expected to be non empty.", + value_parser(parse_command) + )] + command: Vec, + + #[clap( + short = 'm', + long, + help = "The memory pool limitation (e.g. '10g'), default to None (no limit)", + value_parser(extract_memory_pool_size) + )] + memory_limit: Option, + + #[clap( + short, + long, + num_args = 0.., + help = "Execute commands from file(s), then exit", + value_parser(parse_valid_file) + )] + file: Vec, + + #[clap( + short = 'r', + long, + num_args = 0.., + help = "Run the provided files on startup instead of ~/.datafusionrc", + value_parser(parse_valid_file), + conflicts_with = "file" + )] + rc: Option>, + + #[clap(long, value_enum, default_value_t = PrintFormat::Automatic)] + format: PrintFormat, + + #[clap( + short, + long, + help = "Reduce printing other than the results and work quietly" + )] + quiet: bool, + + #[clap( + long, + help = "Specify the memory pool type 'greedy' or 'fair'", + default_value_t = PoolType::Greedy + )] + mem_pool_type: PoolType, + + #[clap( + long, + help = "The max number of rows to display for 'Table' format\n[possible values: numbers(0/10/...), inf(no limit)]", + default_value = "40" + )] + maxrows: MaxRows, + + #[clap(long, help = "Enables console syntax highlighting")] + color: bool, +} + +#[tokio::main] +/// Calls [`main_inner`], then handles printing errors and returning the correct exit code +pub async fn main() -> ExitCode { + if let Err(e) = main_inner().await { + println!("Error: {e}"); + return ExitCode::FAILURE; + } + + ExitCode::SUCCESS +} + +/// Main CLI entrypoint +async fn main_inner() -> Result<()> { + env_logger::init(); + let args = Args::parse(); + + if !args.quiet { + println!("DataFusion CLI v{}", DATAFUSION_CLI_VERSION); + } + + if let Some(ref path) = args.data_path { + let p = Path::new(path); + env::set_current_dir(p).unwrap(); + }; + + let mut session_config = SessionConfig::from_env()?.with_information_schema(true); + + if let Some(batch_size) = args.batch_size { + session_config = session_config.with_batch_size(batch_size); + }; + + let mut rt_builder = RuntimeEnvBuilder::new(); + // set memory pool size + if let Some(memory_limit) = args.memory_limit { + // set memory pool type + let pool: Arc = match args.mem_pool_type { + PoolType::Fair => Arc::new(FairSpillPool::new(memory_limit)), + PoolType::Greedy => Arc::new(GreedyMemoryPool::new(memory_limit)), + }; + rt_builder = rt_builder.with_memory_pool(pool) + } + + let runtime_env = rt_builder.build_arc()?; + + // enable dynamic file query + let ctx = optd_datafusion::create_df_context(Some(session_config), Some(runtime_env), None) + .await + .map_err(|e| DataFusionError::External(e.into()))? + .enable_url_table(); + ctx.refresh_catalogs().await?; + // install dynamic catalog provider that can register required object stores + ctx.register_catalog_list(Arc::new(DynamicObjectStoreCatalog::new( + ctx.state().catalog_list().clone(), + ctx.state_weak_ref(), + ))); + // register `parquet_metadata` table function to get metadata from parquet files + ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {})); + + let mut print_options = PrintOptions { + format: args.format, + quiet: args.quiet, + maxrows: args.maxrows, + color: args.color, + }; + + let commands = args.command; + let files = args.file; + let rc = match args.rc { + Some(file) => file, + None => { + let mut files = Vec::new(); + let home = dirs::home_dir(); + if let Some(p) = home { + let home_rc = p.join(".datafusionrc"); + if home_rc.exists() { + files.push(home_rc.into_os_string().into_string().unwrap()); + } + } + files + } + }; + + if commands.is_empty() && files.is_empty() { + if !rc.is_empty() { + exec::exec_from_files(&ctx, rc, &print_options).await?; + } + // TODO maybe we can have thiserror for cli but for now let's keep it simple + return exec::exec_from_repl(&ctx, &mut print_options) + .await + .map_err(|e| DataFusionError::External(Box::new(e))); + } + + if !files.is_empty() { + exec::exec_from_files(&ctx, files, &print_options).await?; + } + + if !commands.is_empty() { + exec::exec_from_commands(&ctx, commands, &print_options).await?; + } + + Ok(()) +} + +fn parse_valid_file(dir: &str) -> Result { + if Path::new(dir).is_file() { + Ok(dir.to_string()) + } else { + Err(format!("Invalid file '{}'", dir)) + } +} + +fn parse_valid_data_dir(dir: &str) -> Result { + if Path::new(dir).is_dir() { + Ok(dir.to_string()) + } else { + Err(format!("Invalid data directory '{}'", dir)) + } +} + +fn parse_batch_size(size: &str) -> Result { + match size.parse::() { + Ok(size) if size > 0 => Ok(size), + _ => Err(format!("Invalid batch size '{}'", size)), + } +} + +fn parse_command(command: &str) -> Result { + if !command.is_empty() { + Ok(command.to_string()) + } else { + Err("-c flag expects only non empty commands".to_string()) + } +} + +#[derive(Debug, Clone, Copy)] +enum ByteUnit { + Byte, + KiB, + MiB, + GiB, + TiB, +} + +impl ByteUnit { + fn multiplier(&self) -> u64 { + match self { + ByteUnit::Byte => 1, + ByteUnit::KiB => 1 << 10, + ByteUnit::MiB => 1 << 20, + ByteUnit::GiB => 1 << 30, + ByteUnit::TiB => 1 << 40, + } + } +} + +fn extract_memory_pool_size(size: &str) -> Result { + static BYTE_SUFFIXES: LazyLock> = LazyLock::new(|| { + let mut m = HashMap::new(); + m.insert("b", ByteUnit::Byte); + m.insert("k", ByteUnit::KiB); + m.insert("kb", ByteUnit::KiB); + m.insert("m", ByteUnit::MiB); + m.insert("mb", ByteUnit::MiB); + m.insert("g", ByteUnit::GiB); + m.insert("gb", ByteUnit::GiB); + m.insert("t", ByteUnit::TiB); + m.insert("tb", ByteUnit::TiB); + m + }); + + static SUFFIX_REGEX: LazyLock = + LazyLock::new(|| regex::Regex::new(r"^(-?[0-9]+)([a-z]+)?$").unwrap()); + + let lower = size.to_lowercase(); + if let Some(caps) = SUFFIX_REGEX.captures(&lower) { + let num_str = caps.get(1).unwrap().as_str(); + let num = num_str + .parse::() + .map_err(|_| format!("Invalid numeric value in memory pool size '{}'", size))?; + + let suffix = caps.get(2).map(|m| m.as_str()).unwrap_or("b"); + let unit = &BYTE_SUFFIXES + .get(suffix) + .ok_or_else(|| format!("Invalid memory pool size '{}'", size))?; + let memory_pool_size = usize::try_from(unit.multiplier()) + .ok() + .and_then(|multiplier| num.checked_mul(multiplier)) + .ok_or_else(|| format!("Memory pool size '{}' is too large", size))?; + + Ok(memory_pool_size) + } else { + Err(format!("Invalid memory pool size '{}'", size)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use datafusion::{assert_batches_eq, prelude::SessionContext}; + + fn assert_conversion(input: &str, expected: Result) { + let result = extract_memory_pool_size(input); + match expected { + Ok(v) => assert_eq!(result.unwrap(), v), + Err(e) => assert_eq!(result.unwrap_err(), e), + } + } + + #[test] + fn memory_pool_size() -> Result<(), String> { + // Test basic sizes without suffix, assumed to be bytes + assert_conversion("5", Ok(5)); + assert_conversion("100", Ok(100)); + + // Test various units + assert_conversion("5b", Ok(5)); + assert_conversion("4k", Ok(4 * 1024)); + assert_conversion("4kb", Ok(4 * 1024)); + assert_conversion("20m", Ok(20 * 1024 * 1024)); + assert_conversion("20mb", Ok(20 * 1024 * 1024)); + assert_conversion("2g", Ok(2 * 1024 * 1024 * 1024)); + assert_conversion("2gb", Ok(2 * 1024 * 1024 * 1024)); + assert_conversion("3t", Ok(3 * 1024 * 1024 * 1024 * 1024)); + assert_conversion("4tb", Ok(4 * 1024 * 1024 * 1024 * 1024)); + + // Test case insensitivity + assert_conversion("4K", Ok(4 * 1024)); + assert_conversion("4KB", Ok(4 * 1024)); + assert_conversion("20M", Ok(20 * 1024 * 1024)); + assert_conversion("20MB", Ok(20 * 1024 * 1024)); + assert_conversion("2G", Ok(2 * 1024 * 1024 * 1024)); + assert_conversion("2GB", Ok(2 * 1024 * 1024 * 1024)); + assert_conversion("2T", Ok(2 * 1024 * 1024 * 1024 * 1024)); + + // Test invalid input + assert_conversion( + "invalid", + Err("Invalid memory pool size 'invalid'".to_string()), + ); + assert_conversion("4kbx", Err("Invalid memory pool size '4kbx'".to_string())); + assert_conversion( + "-20mb", + Err("Invalid numeric value in memory pool size '-20mb'".to_string()), + ); + assert_conversion( + "-100", + Err("Invalid numeric value in memory pool size '-100'".to_string()), + ); + assert_conversion( + "12k12k", + Err("Invalid memory pool size '12k12k'".to_string()), + ); + + Ok(()) + } + + // #[tokio::test] + #[allow(dead_code)] + async fn test_parquet_metadata_works() -> Result<(), DataFusionError> { + let ctx = SessionContext::new(); + ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {})); + + // input with single quote + let sql = + "SELECT * FROM parquet_metadata('../datafusion/core/tests/data/fixed_size_list_array.parquet')"; + let df = ctx.sql(sql).await?; + let rbs = df.collect().await?; + + let excepted = [ + "+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", + "| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |", + "+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", + "| ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0 | 2 | 1 | 123 | 0 | 125 | 4 | \"f0.list.item\" | INT64 | 1 | 4 | 0 | | 1 | 4 | SNAPPY | [RLE_DICTIONARY, PLAIN, RLE] | | 4 | 46 | 121 | 123 |", + "+-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", + ]; + assert_batches_eq!(excepted, &rbs); + + // input with double quote + let sql = + "SELECT * FROM parquet_metadata(\"../datafusion/core/tests/data/fixed_size_list_array.parquet\")"; + let df = ctx.sql(sql).await?; + let rbs = df.collect().await?; + assert_batches_eq!(excepted, &rbs); + + Ok(()) + } + + // #[tokio::test] + #[allow(dead_code)] + async fn test_parquet_metadata_works_with_strings() -> Result<(), DataFusionError> { + let ctx = SessionContext::new(); + ctx.register_udtf("parquet_metadata", Arc::new(ParquetMetadataFunc {})); + + // input with string columns + let sql = + "SELECT * FROM parquet_metadata('../parquet-testing/data/data_index_bloom_encoding_stats.parquet')"; + let df = ctx.sql(sql).await?; + let rbs = df.collect().await?; + + let excepted = [ + +"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", +"| filename | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |", +"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+", +"| ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0 | 14 | 1 | 163 | 0 | 4 | 14 | \"String\" | BYTE_ARRAY | Hello | today | 0 | | Hello | today | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] | | | 4 | 152 | 163 |", +"+-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+" + ]; + assert_batches_eq!(excepted, &rbs); + + Ok(()) + } +} diff --git a/optd-datafusion-cli/src/object_storage.rs b/optd-datafusion-cli/src/object_storage.rs new file mode 100644 index 0000000..fc4b446 --- /dev/null +++ b/optd-datafusion-cli/src/object_storage.rs @@ -0,0 +1,632 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::any::Any; +use std::fmt::{Debug, Display}; +use std::sync::Arc; + +use datafusion::common::config::{ + ConfigEntry, ConfigExtension, ConfigField, ExtensionOptions, TableOptions, Visit, +}; +use datafusion::common::{config_err, exec_datafusion_err, exec_err}; +use datafusion::error::{DataFusionError, Result}; +use datafusion::execution::context::SessionState; + +use async_trait::async_trait; +use aws_config::BehaviorVersion; +use aws_credential_types::provider::ProvideCredentials; +use object_store::aws::{AmazonS3Builder, AwsCredential}; +use object_store::gcp::GoogleCloudStorageBuilder; +use object_store::http::HttpBuilder; +use object_store::{ClientOptions, CredentialProvider, ObjectStore}; +use url::Url; + +pub async fn get_s3_object_store_builder( + url: &Url, + aws_options: &AwsOptions, +) -> Result { + let AwsOptions { + access_key_id, + secret_access_key, + session_token, + region, + endpoint, + allow_http, + } = aws_options; + + let bucket_name = get_bucket_name(url)?; + let mut builder = AmazonS3Builder::from_env().with_bucket_name(bucket_name); + + if let (Some(access_key_id), Some(secret_access_key)) = (access_key_id, secret_access_key) { + builder = builder + .with_access_key_id(access_key_id) + .with_secret_access_key(secret_access_key); + + if let Some(session_token) = session_token { + builder = builder.with_token(session_token); + } + } else { + let config = aws_config::defaults(BehaviorVersion::latest()).load().await; + if let Some(region) = config.region() { + builder = builder.with_region(region.to_string()); + } + + let credentials = config + .credentials_provider() + .ok_or_else(|| { + DataFusionError::ObjectStore(object_store::Error::Generic { + store: "S3", + source: "Failed to get S3 credentials from the environment".into(), + }) + })? + .clone(); + + let credentials = Arc::new(S3CredentialProvider { credentials }); + builder = builder.with_credentials(credentials); + } + + if let Some(region) = region { + builder = builder.with_region(region); + } + + if let Some(endpoint) = endpoint { + // Make a nicer error if the user hasn't allowed http and the endpoint + // is http as the default message is "URL scheme is not allowed" + if let Ok(endpoint_url) = Url::try_from(endpoint.as_str()) { + if !matches!(allow_http, Some(true)) && endpoint_url.scheme() == "http" { + return config_err!( + "Invalid endpoint: {endpoint}. \ + HTTP is not allowed for S3 endpoints. \ + To allow HTTP, set 'aws.allow_http' to true" + ); + } + } + + builder = builder.with_endpoint(endpoint); + } + + if let Some(allow_http) = allow_http { + builder = builder.with_allow_http(*allow_http); + } + + Ok(builder) +} + +#[derive(Debug)] +struct S3CredentialProvider { + credentials: aws_credential_types::provider::SharedCredentialsProvider, +} + +#[async_trait] +impl CredentialProvider for S3CredentialProvider { + type Credential = AwsCredential; + + async fn get_credential(&self) -> object_store::Result> { + let creds = self.credentials.provide_credentials().await.map_err(|e| { + object_store::Error::Generic { + store: "S3", + source: Box::new(e), + } + })?; + Ok(Arc::new(AwsCredential { + key_id: creds.access_key_id().to_string(), + secret_key: creds.secret_access_key().to_string(), + token: creds.session_token().map(ToString::to_string), + })) + } +} + +pub fn get_oss_object_store_builder( + url: &Url, + aws_options: &AwsOptions, +) -> Result { + get_object_store_builder(url, aws_options, true) +} + +pub fn get_cos_object_store_builder( + url: &Url, + aws_options: &AwsOptions, +) -> Result { + get_object_store_builder(url, aws_options, false) +} + +fn get_object_store_builder( + url: &Url, + aws_options: &AwsOptions, + virtual_hosted_style_request: bool, +) -> Result { + let bucket_name = get_bucket_name(url)?; + let mut builder = AmazonS3Builder::from_env() + .with_virtual_hosted_style_request(virtual_hosted_style_request) + .with_bucket_name(bucket_name) + // oss/cos don't care about the "region" field + .with_region("do_not_care"); + + if let (Some(access_key_id), Some(secret_access_key)) = + (&aws_options.access_key_id, &aws_options.secret_access_key) + { + builder = builder + .with_access_key_id(access_key_id) + .with_secret_access_key(secret_access_key); + } + + if let Some(endpoint) = &aws_options.endpoint { + builder = builder.with_endpoint(endpoint); + } + + Ok(builder) +} + +pub fn get_gcs_object_store_builder( + url: &Url, + gs_options: &GcpOptions, +) -> Result { + let bucket_name = get_bucket_name(url)?; + let mut builder = GoogleCloudStorageBuilder::from_env().with_bucket_name(bucket_name); + + if let Some(service_account_path) = &gs_options.service_account_path { + builder = builder.with_service_account_path(service_account_path); + } + + if let Some(service_account_key) = &gs_options.service_account_key { + builder = builder.with_service_account_key(service_account_key); + } + + if let Some(application_credentials_path) = &gs_options.application_credentials_path { + builder = builder.with_application_credentials(application_credentials_path); + } + + Ok(builder) +} + +fn get_bucket_name(url: &Url) -> Result<&str> { + url.host_str().ok_or_else(|| { + DataFusionError::Execution(format!( + "Not able to parse bucket name from url: {}", + url.as_str() + )) + }) +} + +/// This struct encapsulates AWS options one uses when setting up object storage. +#[derive(Default, Debug, Clone)] +pub struct AwsOptions { + /// Access Key ID + pub access_key_id: Option, + /// Secret Access Key + pub secret_access_key: Option, + /// Session token + pub session_token: Option, + /// AWS Region + pub region: Option, + /// OSS or COS Endpoint + pub endpoint: Option, + /// Allow HTTP (otherwise will always use https) + pub allow_http: Option, +} + +impl ExtensionOptions for AwsOptions { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn cloned(&self) -> Box { + Box::new(self.clone()) + } + + fn set(&mut self, key: &str, value: &str) -> Result<()> { + let (_key, aws_key) = key.split_once('.').unwrap_or((key, "")); + let (key, rem) = aws_key.split_once('.').unwrap_or((aws_key, "")); + match key { + "access_key_id" => { + self.access_key_id.set(rem, value)?; + } + "secret_access_key" => { + self.secret_access_key.set(rem, value)?; + } + "session_token" => { + self.session_token.set(rem, value)?; + } + "region" => { + self.region.set(rem, value)?; + } + "oss" | "cos" | "endpoint" => { + self.endpoint.set(rem, value)?; + } + "allow_http" => { + self.allow_http.set(rem, value)?; + } + _ => { + return config_err!("Config value \"{}\" not found on AwsOptions", rem); + } + } + Ok(()) + } + + fn entries(&self) -> Vec { + struct Visitor(Vec); + + impl Visit for Visitor { + fn some(&mut self, key: &str, value: V, description: &'static str) { + self.0.push(ConfigEntry { + key: key.to_string(), + value: Some(value.to_string()), + description, + }) + } + + fn none(&mut self, key: &str, description: &'static str) { + self.0.push(ConfigEntry { + key: key.to_string(), + value: None, + description, + }) + } + } + + let mut v = Visitor(vec![]); + self.access_key_id.visit(&mut v, "access_key_id", ""); + self.secret_access_key + .visit(&mut v, "secret_access_key", ""); + self.session_token.visit(&mut v, "session_token", ""); + self.region.visit(&mut v, "region", ""); + self.endpoint.visit(&mut v, "endpoint", ""); + self.allow_http.visit(&mut v, "allow_http", ""); + v.0 + } +} + +impl ConfigExtension for AwsOptions { + const PREFIX: &'static str = "aws"; +} + +/// This struct encapsulates GCP options one uses when setting up object storage. +#[derive(Debug, Clone, Default)] +pub struct GcpOptions { + /// Service account path + pub service_account_path: Option, + /// Service account key + pub service_account_key: Option, + /// Application credentials path + pub application_credentials_path: Option, +} + +impl ExtensionOptions for GcpOptions { + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn cloned(&self) -> Box { + Box::new(self.clone()) + } + + fn set(&mut self, key: &str, value: &str) -> Result<()> { + let (_key, rem) = key.split_once('.').unwrap_or((key, "")); + match rem { + "service_account_path" => { + self.service_account_path.set(rem, value)?; + } + "service_account_key" => { + self.service_account_key.set(rem, value)?; + } + "application_credentials_path" => { + self.application_credentials_path.set(rem, value)?; + } + _ => { + return config_err!("Config value \"{}\" not found on GcpOptions", rem); + } + } + Ok(()) + } + + fn entries(&self) -> Vec { + struct Visitor(Vec); + + impl Visit for Visitor { + fn some(&mut self, key: &str, value: V, description: &'static str) { + self.0.push(ConfigEntry { + key: key.to_string(), + value: Some(value.to_string()), + description, + }) + } + + fn none(&mut self, key: &str, description: &'static str) { + self.0.push(ConfigEntry { + key: key.to_string(), + value: None, + description, + }) + } + } + + let mut v = Visitor(vec![]); + self.service_account_path + .visit(&mut v, "service_account_path", ""); + self.service_account_key + .visit(&mut v, "service_account_key", ""); + self.application_credentials_path + .visit(&mut v, "application_credentials_path", ""); + v.0 + } +} + +impl ConfigExtension for GcpOptions { + const PREFIX: &'static str = "gcp"; +} + +pub(crate) async fn get_object_store( + state: &SessionState, + scheme: &str, + url: &Url, + table_options: &TableOptions, +) -> Result, DataFusionError> { + let store: Arc = match scheme { + "s3" => { + let Some(options) = table_options.extensions.get::() else { + return exec_err!("Given table options incompatible with the 's3' scheme"); + }; + let builder = get_s3_object_store_builder(url, options).await?; + Arc::new(builder.build()?) + } + "oss" => { + let Some(options) = table_options.extensions.get::() else { + return exec_err!("Given table options incompatible with the 'oss' scheme"); + }; + let builder = get_oss_object_store_builder(url, options)?; + Arc::new(builder.build()?) + } + "cos" => { + let Some(options) = table_options.extensions.get::() else { + return exec_err!("Given table options incompatible with the 'cos' scheme"); + }; + let builder = get_cos_object_store_builder(url, options)?; + Arc::new(builder.build()?) + } + "gs" | "gcs" => { + let Some(options) = table_options.extensions.get::() else { + return exec_err!("Given table options incompatible with the 'gs'/'gcs' scheme"); + }; + let builder = get_gcs_object_store_builder(url, options)?; + Arc::new(builder.build()?) + } + "http" | "https" => Arc::new( + HttpBuilder::new() + .with_client_options(ClientOptions::new().with_allow_http(true)) + .with_url(url.origin().ascii_serialization()) + .build()?, + ), + _ => { + // For other types, try to get from `object_store_registry`: + state + .runtime_env() + .object_store_registry + .get_store(url) + .map_err(|_| exec_datafusion_err!("Unsupported object store scheme: {}", scheme))? + } + }; + Ok(store) +} + +#[cfg(test)] +mod tests { + use crate::cli_context::CliSessionContext; + + use super::*; + + use datafusion::common::plan_err; + use datafusion::{ + datasource::listing::ListingTableUrl, + logical_expr::{DdlStatement, LogicalPlan}, + prelude::SessionContext, + }; + + use object_store::{aws::AmazonS3ConfigKey, gcp::GoogleConfigKey}; + + #[tokio::test] + async fn s3_object_store_builder() -> Result<()> { + // "fake" is uppercase to ensure the values are not lowercased when parsed + let access_key_id = "FAKE_access_key_id"; + let secret_access_key = "FAKE_secret_access_key"; + let region = "fake_us-east-2"; + let endpoint = "endpoint33"; + let session_token = "FAKE_session_token"; + let location = "s3://bucket/path/FAKE/file.parquet"; + + let table_url = ListingTableUrl::parse(location)?; + let scheme = table_url.scheme(); + let sql = format!( + "CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS\ + ('aws.access_key_id' '{access_key_id}', \ + 'aws.secret_access_key' '{secret_access_key}', \ + 'aws.region' '{region}', \ + 'aws.session_token' {session_token}, \ + 'aws.endpoint' '{endpoint}'\ + ) LOCATION '{location}'" + ); + + let ctx = SessionContext::new(); + let mut plan = ctx.state().create_logical_plan(&sql).await?; + + if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { + ctx.register_table_options_extension_from_scheme(scheme); + let mut table_options = ctx.state().default_table_options(); + table_options.alter_with_string_hash_map(&cmd.options)?; + let aws_options = table_options.extensions.get::().unwrap(); + let builder = get_s3_object_store_builder(table_url.as_ref(), aws_options).await?; + // get the actual configuration information, then assert_eq! + let config = [ + (AmazonS3ConfigKey::AccessKeyId, access_key_id), + (AmazonS3ConfigKey::SecretAccessKey, secret_access_key), + (AmazonS3ConfigKey::Region, region), + (AmazonS3ConfigKey::Endpoint, endpoint), + (AmazonS3ConfigKey::Token, session_token), + ]; + for (key, value) in config { + assert_eq!(value, builder.get_config_value(&key).unwrap()); + } + } else { + return plan_err!("LogicalPlan is not a CreateExternalTable"); + } + + Ok(()) + } + + #[tokio::test] + async fn s3_object_store_builder_allow_http_error() -> Result<()> { + let access_key_id = "fake_access_key_id"; + let secret_access_key = "fake_secret_access_key"; + let endpoint = "http://endpoint33"; + let location = "s3://bucket/path/file.parquet"; + + let table_url = ListingTableUrl::parse(location)?; + let scheme = table_url.scheme(); + let sql = format!( + "CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS\ + ('aws.access_key_id' '{access_key_id}', \ + 'aws.secret_access_key' '{secret_access_key}', \ + 'aws.endpoint' '{endpoint}'\ + ) LOCATION '{location}'" + ); + + let ctx = SessionContext::new(); + let mut plan = ctx.state().create_logical_plan(&sql).await?; + + if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { + ctx.register_table_options_extension_from_scheme(scheme); + let mut table_options = ctx.state().default_table_options(); + table_options.alter_with_string_hash_map(&cmd.options)?; + let aws_options = table_options.extensions.get::().unwrap(); + let err = get_s3_object_store_builder(table_url.as_ref(), aws_options) + .await + .unwrap_err(); + + assert_eq!(err.to_string(), "Invalid or Unsupported Configuration: Invalid endpoint: http://endpoint33. HTTP is not allowed for S3 endpoints. To allow HTTP, set 'aws.allow_http' to true"); + } else { + return plan_err!("LogicalPlan is not a CreateExternalTable"); + } + + // Now add `allow_http` to the options and check if it works + let sql = format!( + "CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS\ + ('aws.access_key_id' '{access_key_id}', \ + 'aws.secret_access_key' '{secret_access_key}', \ + 'aws.endpoint' '{endpoint}',\ + 'aws.allow_http' 'true'\ + ) LOCATION '{location}'" + ); + + let mut plan = ctx.state().create_logical_plan(&sql).await?; + + if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { + ctx.register_table_options_extension_from_scheme(scheme); + let mut table_options = ctx.state().default_table_options(); + table_options.alter_with_string_hash_map(&cmd.options)?; + let aws_options = table_options.extensions.get::().unwrap(); + // ensure this isn't an error + get_s3_object_store_builder(table_url.as_ref(), aws_options).await?; + } else { + return plan_err!("LogicalPlan is not a CreateExternalTable"); + } + + Ok(()) + } + + #[tokio::test] + async fn oss_object_store_builder() -> Result<()> { + let access_key_id = "fake_access_key_id"; + let secret_access_key = "fake_secret_access_key"; + let endpoint = "fake_endpoint"; + let location = "oss://bucket/path/file.parquet"; + + let table_url = ListingTableUrl::parse(location)?; + let scheme = table_url.scheme(); + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('aws.access_key_id' '{access_key_id}', 'aws.secret_access_key' '{secret_access_key}', 'aws.oss.endpoint' '{endpoint}') LOCATION '{location}'"); + + let ctx = SessionContext::new(); + let mut plan = ctx.state().create_logical_plan(&sql).await?; + + if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { + ctx.register_table_options_extension_from_scheme(scheme); + let mut table_options = ctx.state().default_table_options(); + table_options.alter_with_string_hash_map(&cmd.options)?; + let aws_options = table_options.extensions.get::().unwrap(); + let builder = get_oss_object_store_builder(table_url.as_ref(), aws_options)?; + // get the actual configuration information, then assert_eq! + let config = [ + (AmazonS3ConfigKey::AccessKeyId, access_key_id), + (AmazonS3ConfigKey::SecretAccessKey, secret_access_key), + (AmazonS3ConfigKey::Endpoint, endpoint), + ]; + for (key, value) in config { + assert_eq!(value, builder.get_config_value(&key).unwrap()); + } + } else { + return plan_err!("LogicalPlan is not a CreateExternalTable"); + } + + Ok(()) + } + + #[tokio::test] + async fn gcs_object_store_builder() -> Result<()> { + let service_account_path = "fake_service_account_path"; + let service_account_key = + "{\"private_key\": \"fake_private_key.pem\",\"client_email\":\"fake_client_email\"}"; + let application_credentials_path = "fake_application_credentials_path"; + let location = "gcs://bucket/path/file.parquet"; + + let table_url = ListingTableUrl::parse(location)?; + let scheme = table_url.scheme(); + let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET OPTIONS('gcp.service_account_path' '{service_account_path}', 'gcp.service_account_key' '{service_account_key}', 'gcp.application_credentials_path' '{application_credentials_path}') LOCATION '{location}'"); + + let ctx = SessionContext::new(); + let mut plan = ctx.state().create_logical_plan(&sql).await?; + + if let LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) = &mut plan { + ctx.register_table_options_extension_from_scheme(scheme); + let mut table_options = ctx.state().default_table_options(); + table_options.alter_with_string_hash_map(&cmd.options)?; + let gcp_options = table_options.extensions.get::().unwrap(); + let builder = get_gcs_object_store_builder(table_url.as_ref(), gcp_options)?; + // get the actual configuration information, then assert_eq! + let config = [ + (GoogleConfigKey::ServiceAccount, service_account_path), + (GoogleConfigKey::ServiceAccountKey, service_account_key), + ( + GoogleConfigKey::ApplicationCredentials, + application_credentials_path, + ), + ]; + for (key, value) in config { + assert_eq!(value, builder.get_config_value(&key).unwrap()); + } + } else { + return plan_err!("LogicalPlan is not a CreateExternalTable"); + } + + Ok(()) + } +} diff --git a/optd-datafusion-cli/src/pool_type.rs b/optd-datafusion-cli/src/pool_type.rs new file mode 100644 index 0000000..269790b --- /dev/null +++ b/optd-datafusion-cli/src/pool_type.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::{ + fmt::{self, Display, Formatter}, + str::FromStr, +}; + +#[derive(PartialEq, Debug, Clone)] +pub enum PoolType { + Greedy, + Fair, +} + +impl FromStr for PoolType { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "Greedy" | "greedy" => Ok(PoolType::Greedy), + "Fair" | "fair" => Ok(PoolType::Fair), + _ => Err(format!("Invalid memory pool type '{}'", s)), + } + } +} + +impl Display for PoolType { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + PoolType::Greedy => write!(f, "greedy"), + PoolType::Fair => write!(f, "fair"), + } + } +} diff --git a/optd-datafusion-cli/src/print_format.rs b/optd-datafusion-cli/src/print_format.rs new file mode 100644 index 0000000..bd5c678 --- /dev/null +++ b/optd-datafusion-cli/src/print_format.rs @@ -0,0 +1,691 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Print format variants + +use std::str::FromStr; + +use crate::print_options::MaxRows; + +use arrow::csv::writer::WriterBuilder; +use arrow::datatypes::SchemaRef; +use arrow::json::{ArrayWriter, LineDelimitedWriter}; +use arrow::record_batch::RecordBatch; +use arrow::util::pretty::pretty_format_batches_with_options; +use datafusion::common::format::DEFAULT_CLI_FORMAT_OPTIONS; +use datafusion::error::Result; + +/// Allow records to be printed in different formats +#[derive(Debug, PartialEq, Eq, clap::ValueEnum, Clone, Copy)] +pub enum PrintFormat { + Csv, + Tsv, + Table, + Json, + NdJson, + Automatic, +} + +impl FromStr for PrintFormat { + type Err = String; + + fn from_str(s: &str) -> Result { + clap::ValueEnum::from_str(s, true) + } +} + +macro_rules! batches_to_json { + ($WRITER: ident, $writer: expr, $batches: expr) => {{ + { + if !$batches.is_empty() { + let mut json_writer = $WRITER::new(&mut *$writer); + for batch in $batches { + json_writer.write(batch)?; + } + json_writer.finish()?; + json_finish!($WRITER, $writer); + } + } + Ok(()) as Result<()> + }}; +} + +macro_rules! json_finish { + (ArrayWriter, $writer: expr) => {{ + writeln!($writer)?; + }}; + (LineDelimitedWriter, $writer: expr) => {{}}; +} + +fn print_batches_with_sep( + writer: &mut W, + batches: &[RecordBatch], + delimiter: u8, + with_header: bool, +) -> Result<()> { + let builder = WriterBuilder::new() + .with_header(with_header) + .with_delimiter(delimiter); + let mut csv_writer = builder.build(writer); + + for batch in batches { + csv_writer.write(batch)?; + } + + Ok(()) +} + +fn keep_only_maxrows(s: &str, maxrows: usize) -> String { + let lines: Vec = s.lines().map(String::from).collect(); + + assert!(lines.len() >= maxrows + 4); // 4 lines for top and bottom border + + let last_line = &lines[lines.len() - 1]; // bottom border line + + let spaces = last_line.len().saturating_sub(4); + let dotted_line = format!("| .{:( + writer: &mut W, + batches: &[RecordBatch], + maxrows: MaxRows, +) -> Result<()> { + match maxrows { + MaxRows::Limited(maxrows) => { + // Filter batches to meet the maxrows condition + let mut filtered_batches = Vec::new(); + let mut row_count: usize = 0; + let mut over_limit = false; + for batch in batches { + if row_count + batch.num_rows() > maxrows { + // If adding this batch exceeds maxrows, slice the batch + let limit = maxrows - row_count; + let sliced_batch = batch.slice(0, limit); + filtered_batches.push(sliced_batch); + over_limit = true; + break; + } else { + filtered_batches.push(batch.clone()); + row_count += batch.num_rows(); + } + } + + let formatted = + pretty_format_batches_with_options(&filtered_batches, &DEFAULT_CLI_FORMAT_OPTIONS)?; + if over_limit { + let mut formatted_str = format!("{}", formatted); + formatted_str = keep_only_maxrows(&formatted_str, maxrows); + writeln!(writer, "{}", formatted_str)?; + } else { + writeln!(writer, "{}", formatted)?; + } + } + MaxRows::Unlimited => { + let formatted = + pretty_format_batches_with_options(batches, &DEFAULT_CLI_FORMAT_OPTIONS)?; + writeln!(writer, "{}", formatted)?; + } + } + + Ok(()) +} + +impl PrintFormat { + /// Print the batches to a writer using the specified format + pub fn print_batches( + &self, + writer: &mut W, + schema: SchemaRef, + batches: &[RecordBatch], + maxrows: MaxRows, + with_header: bool, + ) -> Result<()> { + // filter out any empty batches + let batches: Vec<_> = batches + .iter() + .filter(|b| b.num_rows() > 0) + .cloned() + .collect(); + if batches.is_empty() { + return self.print_empty(writer, schema); + } + + match self { + Self::Csv | Self::Automatic => { + print_batches_with_sep(writer, &batches, b',', with_header) + } + Self::Tsv => print_batches_with_sep(writer, &batches, b'\t', with_header), + Self::Table => { + if maxrows == MaxRows::Limited(0) { + return Ok(()); + } + format_batches_with_maxrows(writer, &batches, maxrows) + } + Self::Json => batches_to_json!(ArrayWriter, writer, &batches), + Self::NdJson => batches_to_json!(LineDelimitedWriter, writer, &batches), + } + } + + /// Print when the result batches contain no rows + fn print_empty(&self, writer: &mut W, schema: SchemaRef) -> Result<()> { + match self { + // Print column headers for Table format + Self::Table if !schema.fields().is_empty() => { + let empty_batch = RecordBatch::new_empty(schema); + let formatted = pretty_format_batches_with_options( + &[empty_batch], + &DEFAULT_CLI_FORMAT_OPTIONS, + )?; + writeln!(writer, "{}", formatted)?; + } + _ => {} + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + + use arrow::array::Int32Array; + use arrow::datatypes::{DataType, Field, Schema}; + + #[test] + fn print_empty() { + for format in [ + PrintFormat::Csv, + PrintFormat::Tsv, + PrintFormat::Json, + PrintFormat::NdJson, + PrintFormat::Automatic, + ] { + // no output for empty batches, even with header set + PrintBatchesTest::new() + .with_format(format) + .with_schema(three_column_schema()) + .with_batches(vec![]) + .with_expected(&[""]) + .run(); + } + + // output column headers for empty batches when format is Table + #[rustfmt::skip] + let expected = &[ + "+---+---+---+", + "| a | b | c |", + "+---+---+---+", + "+---+---+---+", + ]; + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_schema(three_column_schema()) + .with_batches(vec![]) + .with_expected(expected) + .run(); + } + + #[test] + fn print_csv_no_header() { + #[rustfmt::skip] + let expected = &[ + "1,4,7", + "2,5,8", + "3,6,9", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Csv) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::No) + .with_expected(expected) + .run(); + } + + #[test] + fn print_csv_with_header() { + #[rustfmt::skip] + let expected = &[ + "a,b,c", + "1,4,7", + "2,5,8", + "3,6,9", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Csv) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::Yes) + .with_expected(expected) + .run(); + } + + #[test] + fn print_tsv_no_header() { + #[rustfmt::skip] + let expected = &[ + "1\t4\t7", + "2\t5\t8", + "3\t6\t9", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Tsv) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::No) + .with_expected(expected) + .run(); + } + + #[test] + fn print_tsv_with_header() { + #[rustfmt::skip] + let expected = &[ + "a\tb\tc", + "1\t4\t7", + "2\t5\t8", + "3\t6\t9", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Tsv) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::Yes) + .with_expected(expected) + .run(); + } + + #[test] + fn print_table() { + let expected = &[ + "+---+---+---+", + "| a | b | c |", + "+---+---+---+", + "| 1 | 4 | 7 |", + "| 2 | 5 | 8 |", + "| 3 | 6 | 9 |", + "+---+---+---+", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::Ignored) + .with_expected(expected) + .run(); + } + #[test] + fn print_json() { + let expected = &[r#"[{"a":1,"b":4,"c":7},{"a":2,"b":5,"c":8},{"a":3,"b":6,"c":9}]"#]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Json) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::Ignored) + .with_expected(expected) + .run(); + } + + #[test] + fn print_ndjson() { + let expected = &[ + r#"{"a":1,"b":4,"c":7}"#, + r#"{"a":2,"b":5,"c":8}"#, + r#"{"a":3,"b":6,"c":9}"#, + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::NdJson) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::Ignored) + .with_expected(expected) + .run(); + } + + #[test] + fn print_automatic_no_header() { + #[rustfmt::skip] + let expected = &[ + "1,4,7", + "2,5,8", + "3,6,9", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Automatic) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::No) + .with_expected(expected) + .run(); + } + #[test] + fn print_automatic_with_header() { + #[rustfmt::skip] + let expected = &[ + "a,b,c", + "1,4,7", + "2,5,8", + "3,6,9", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Automatic) + .with_batches(split_batch(three_column_batch())) + .with_header(WithHeader::Yes) + .with_expected(expected) + .run(); + } + + #[test] + fn print_maxrows_unlimited() { + #[rustfmt::skip] + let expected = &[ + "+---+", + "| a |", + "+---+", + "| 1 |", + "| 2 |", + "| 3 |", + "+---+", + ]; + + // should print out entire output with no truncation if unlimited or + // limit greater than number of batches or equal to the number of batches + for max_rows in [MaxRows::Unlimited, MaxRows::Limited(5), MaxRows::Limited(3)] { + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_schema(one_column_schema()) + .with_batches(vec![one_column_batch()]) + .with_maxrows(max_rows) + .with_expected(expected) + .run(); + } + } + + #[test] + fn print_maxrows_limited_one_batch() { + #[rustfmt::skip] + let expected = &[ + "+---+", + "| a |", + "+---+", + "| 1 |", + "| . |", + "| . |", + "| . |", + "+---+", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_batches(vec![one_column_batch()]) + .with_maxrows(MaxRows::Limited(1)) + .with_expected(expected) + .run(); + } + + #[test] + fn print_maxrows_limited_multi_batched() { + #[rustfmt::skip] + let expected = &[ + "+---+", + "| a |", + "+---+", + "| 1 |", + "| 2 |", + "| 3 |", + "| 1 |", + "| 2 |", + "| . |", + "| . |", + "| . |", + "+---+", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_batches(vec![ + one_column_batch(), + one_column_batch(), + one_column_batch(), + ]) + .with_maxrows(MaxRows::Limited(5)) + .with_expected(expected) + .run(); + } + + #[test] + fn test_print_batches_empty_batches() { + let batch = one_column_batch(); + let empty_batch = RecordBatch::new_empty(batch.schema()); + + #[rustfmt::skip] + let expected =&[ + "+---+", + "| a |", + "+---+", + "| 1 |", + "| 2 |", + "| 3 |", + "+---+", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_batches(vec![empty_batch.clone(), batch, empty_batch]) + .with_expected(expected) + .run(); + } + + #[test] + fn test_print_batches_empty_batch() { + let empty_batch = RecordBatch::new_empty(one_column_batch().schema()); + + // Print column headers for empty batch when format is Table + #[rustfmt::skip] + let expected =&[ + "+---+", + "| a |", + "+---+", + "+---+", + ]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_schema(one_column_schema()) + .with_batches(vec![empty_batch]) + .with_header(WithHeader::Yes) + .with_expected(expected) + .run(); + + // No output for empty batch when schema contains no columns + let empty_batch = RecordBatch::new_empty(Arc::new(Schema::empty())); + let expected = &[""]; + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_schema(Arc::new(Schema::empty())) + .with_batches(vec![empty_batch]) + .with_header(WithHeader::Yes) + .with_expected(expected) + .run(); + } + + #[derive(Debug)] + struct PrintBatchesTest { + format: PrintFormat, + schema: SchemaRef, + batches: Vec, + maxrows: MaxRows, + with_header: WithHeader, + expected: Vec<&'static str>, + } + + /// How to test with_header + #[derive(Debug, Clone)] + enum WithHeader { + Yes, + No, + /// output should be the same with or without header + Ignored, + } + + impl PrintBatchesTest { + fn new() -> Self { + Self { + format: PrintFormat::Table, + schema: Arc::new(Schema::empty()), + batches: vec![], + maxrows: MaxRows::Unlimited, + with_header: WithHeader::Ignored, + expected: vec![], + } + } + + /// set the format + fn with_format(mut self, format: PrintFormat) -> Self { + self.format = format; + self + } + + // set the schema + fn with_schema(mut self, schema: SchemaRef) -> Self { + self.schema = schema; + self + } + + /// set the batches to convert + fn with_batches(mut self, batches: Vec) -> Self { + self.batches = batches; + self + } + + /// set maxrows + fn with_maxrows(mut self, maxrows: MaxRows) -> Self { + self.maxrows = maxrows; + self + } + + /// set with_header + fn with_header(mut self, with_header: WithHeader) -> Self { + self.with_header = with_header; + self + } + + /// set expected output + fn with_expected(mut self, expected: &[&'static str]) -> Self { + self.expected = expected.to_vec(); + self + } + + /// run the test + fn run(self) { + let actual = self.output(); + let actual: Vec<_> = actual.trim_end().split('\n').collect(); + let expected = self.expected; + assert_eq!( + actual, expected, + "\n\nactual:\n{actual:#?}\n\nexpected:\n{expected:#?}" + ); + } + + /// formats batches using parameters and returns the resulting output + fn output(&self) -> String { + match self.with_header { + WithHeader::Yes => self.output_with_header(true), + WithHeader::No => self.output_with_header(false), + WithHeader::Ignored => { + let output = self.output_with_header(true); + // ensure the output is the same without header + let output_without_header = self.output_with_header(false); + assert_eq!( + output, output_without_header, + "Expected output to be the same with or without header" + ); + output + } + } + } + + fn output_with_header(&self, with_header: bool) -> String { + let mut buffer: Vec = vec![]; + self.format + .print_batches( + &mut buffer, + self.schema.clone(), + &self.batches, + self.maxrows, + with_header, + ) + .unwrap(); + String::from_utf8(buffer).unwrap() + } + } + + /// Return a schema with three columns + fn three_column_schema() -> SchemaRef { + Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int32, false), + Field::new("c", DataType::Int32, false), + ])) + } + + /// Return a batch with three columns and three rows + fn three_column_batch() -> RecordBatch { + RecordBatch::try_new( + three_column_schema(), + vec![ + Arc::new(Int32Array::from(vec![1, 2, 3])), + Arc::new(Int32Array::from(vec![4, 5, 6])), + Arc::new(Int32Array::from(vec![7, 8, 9])), + ], + ) + .unwrap() + } + + /// Return a schema with one column + fn one_column_schema() -> SchemaRef { + Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)])) + } + + /// return a batch with one column and three rows + fn one_column_batch() -> RecordBatch { + RecordBatch::try_new( + one_column_schema(), + vec![Arc::new(Int32Array::from(vec![1, 2, 3]))], + ) + .unwrap() + } + + /// Slice the record batch into 2 batches + fn split_batch(batch: RecordBatch) -> Vec { + assert!(batch.num_rows() > 1); + let split = batch.num_rows() / 2; + vec![ + batch.slice(0, split), + batch.slice(split, batch.num_rows() - split), + ] + } +} diff --git a/optd-datafusion-cli/src/print_options.rs b/optd-datafusion-cli/src/print_options.rs new file mode 100644 index 0000000..9218d2b --- /dev/null +++ b/optd-datafusion-cli/src/print_options.rs @@ -0,0 +1,170 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::fmt::{Display, Formatter}; +use std::io::Write; +use std::pin::Pin; +use std::str::FromStr; + +use crate::print_format::PrintFormat; + +use arrow::datatypes::SchemaRef; +use arrow::record_batch::RecordBatch; +use datafusion::common::instant::Instant; +use datafusion::common::DataFusionError; +use datafusion::error::Result; +use datafusion::physical_plan::RecordBatchStream; + +use futures::StreamExt; + +#[derive(Debug, Clone, PartialEq, Copy)] +pub enum MaxRows { + /// show all rows in the output + Unlimited, + /// Only show n rows + Limited(usize), +} + +impl FromStr for MaxRows { + type Err = String; + + fn from_str(maxrows: &str) -> Result { + if maxrows.to_lowercase() == "inf" + || maxrows.to_lowercase() == "infinite" + || maxrows.to_lowercase() == "none" + { + Ok(Self::Unlimited) + } else { + match maxrows.parse::() { + Ok(nrows) => Ok(Self::Limited(nrows)), + _ => Err(format!("Invalid maxrows {}. Valid inputs are natural numbers or \'none\', \'inf\', or \'infinite\' for no limit.", maxrows)), + } + } + } +} + +impl Display for MaxRows { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Unlimited => write!(f, "unlimited"), + Self::Limited(max_rows) => write!(f, "at most {max_rows}"), + } + } +} + +#[derive(Debug, Clone)] +pub struct PrintOptions { + pub format: PrintFormat, + pub quiet: bool, + pub maxrows: MaxRows, + pub color: bool, +} + +// Returns the query execution details formatted +fn get_execution_details_formatted( + row_count: usize, + maxrows: MaxRows, + query_start_time: Instant, +) -> String { + let nrows_shown_msg = match maxrows { + MaxRows::Limited(nrows) if nrows < row_count => { + format!("(First {nrows} displayed. Use --maxrows to adjust)") + } + _ => String::new(), + }; + + format!( + "{} row(s) fetched. {}\nElapsed {:.3} seconds.\n", + row_count, + nrows_shown_msg, + query_start_time.elapsed().as_secs_f64() + ) +} + +impl PrintOptions { + /// Print the batches to stdout using the specified format + pub fn print_batches( + &self, + schema: SchemaRef, + batches: &[RecordBatch], + query_start_time: Instant, + ) -> Result<()> { + let stdout = std::io::stdout(); + let mut writer = stdout.lock(); + + self.format + .print_batches(&mut writer, schema, batches, self.maxrows, true)?; + + let row_count: usize = batches.iter().map(|b| b.num_rows()).sum(); + let formatted_exec_details = get_execution_details_formatted( + row_count, + if self.format == PrintFormat::Table { + self.maxrows + } else { + MaxRows::Unlimited + }, + query_start_time, + ); + + if !self.quiet { + writeln!(writer, "{formatted_exec_details}")?; + } + + Ok(()) + } + + /// Print the stream to stdout using the specified format + pub async fn print_stream( + &self, + mut stream: Pin>, + query_start_time: Instant, + ) -> Result<()> { + if self.format == PrintFormat::Table { + return Err(DataFusionError::External( + "PrintFormat::Table is not implemented".to_string().into(), + )); + }; + + let stdout = std::io::stdout(); + let mut writer = stdout.lock(); + + let mut row_count = 0_usize; + let mut with_header = true; + + while let Some(maybe_batch) = stream.next().await { + let batch = maybe_batch?; + row_count += batch.num_rows(); + self.format.print_batches( + &mut writer, + batch.schema(), + &[batch], + MaxRows::Unlimited, + with_header, + )?; + with_header = false; + } + + let formatted_exec_details = + get_execution_details_formatted(row_count, MaxRows::Unlimited, query_start_time); + + if !self.quiet { + writeln!(writer, "{formatted_exec_details}")?; + } + + Ok(()) + } +} diff --git a/optd-datafusion-cli/tests/cli_integration.rs b/optd-datafusion-cli/tests/cli_integration.rs new file mode 100644 index 0000000..43f3949 --- /dev/null +++ b/optd-datafusion-cli/tests/cli_integration.rs @@ -0,0 +1,57 @@ +#![allow(unused)] + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::process::Command; + +use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; +use predicates::prelude::predicate; +use rstest::rstest; + +#[cfg(test)] +#[ctor::ctor] +fn init() { + // Enable RUST_LOG logging configuration for tests + let _ = env_logger::try_init(); +} + +// Disabled due to https://github.com/apache/datafusion/issues/10793 +// #[cfg(not(target_family = "windows"))] +// #[rstest] +// #[case::exec_from_commands( +// ["--command", "select 1", "--format", "json", "-q"], +// "[{\"Int64(1)\":1}]\n" +// )] +// #[case::exec_multiple_statements( +// ["--command", "select 1; select 2;", "--format", "json", "-q"], +// "[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n" +// )] +// #[case::exec_from_files( +// ["--file", "tests/data/sql.txt", "--format", "json", "-q"], +// "[{\"Int64(1)\":1}]\n" +// )] +// #[case::set_batch_size( +// ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], +// "[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n" +// )] +// #[test] +// fn cli_quick_test<'a>(#[case] args: impl IntoIterator, #[case] expected: &str) { +// let mut cmd = Command::cargo_bin("optd-datafusion-cli").unwrap(); +// cmd.args(args); +// cmd.assert().stdout(predicate::eq(expected)); +// } diff --git a/optd-datafusion-cli/tests/data/sql.txt b/optd-datafusion-cli/tests/data/sql.txt new file mode 100644 index 0000000..9e13a3e --- /dev/null +++ b/optd-datafusion-cli/tests/data/sql.txt @@ -0,0 +1 @@ +select 1; \ No newline at end of file diff --git a/optd-datafusion/Cargo.toml b/optd-datafusion/Cargo.toml new file mode 100644 index 0000000..0055bc7 --- /dev/null +++ b/optd-datafusion/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "optd-datafusion" +version.workspace = true +edition.workspace = true +rust-version.workspace = true +repository.workspace = true + +[dependencies] +optd-core = { path = "../optd-core" } +anyhow.workspace = true +tokio.workspace = true +trait-variant.workspace = true +async-recursion.workspace = true +proc-macro2.workspace = true +async-trait = "0.1.85" +datafusion.workspace = true +futures = "0.3.31" +itertools = "0.14.0" diff --git a/optd-datafusion/sql/test_filter.sql b/optd-datafusion/sql/test_filter.sql new file mode 100644 index 0000000..47bd7d1 --- /dev/null +++ b/optd-datafusion/sql/test_filter.sql @@ -0,0 +1,24 @@ +CREATE TABLE employees ( + id BIGINT, + name TEXT, + department_id BIGINT +); + +CREATE TABLE departments ( + id BIGINT, + department_name TEXT +); + +INSERT INTO employees VALUES + (1, 'Alice', 1), + (2, 'Bob', 2), + (3, 'Charlie', 1); + +INSERT INTO departments VALUES + (1, 'Engineering'), + (2, 'Marketing'); + + +explain SELECT * FROM employees WHERE id = 2 + 1 - 1 and name = 'Bob'; + +SELECT * FROM employees WHERE id = 2 + 1 - 1 and name = 'Bob'; diff --git a/optd-datafusion/sql/test_join.sql b/optd-datafusion/sql/test_join.sql new file mode 100644 index 0000000..d908067 --- /dev/null +++ b/optd-datafusion/sql/test_join.sql @@ -0,0 +1,24 @@ +CREATE TABLE employees ( + id INTEGER, + name TEXT, + department_id INTEGER +); + +CREATE TABLE departments ( + id INTEGER, + department_name TEXT +); + +INSERT INTO employees VALUES + (1, 'Alice', 1), + (2, 'Bob', 2), + (3, 'Charlie', 1); + +INSERT INTO departments VALUES + (1, 'Engineering'), + (2, 'Marketing'); + + +explain SELECT * FROM employees INNER JOIN departments ON employees.department_id = departments.id where (NOT (employees.name = 'Bob' AND departments.department_name = 'Engineering')) AND (NOT (employees.name = 'Bob' AND departments.department_name = 'Engineering')); + +SELECT * FROM employees INNER JOIN departments ON employees.department_id = departments.id where (NOT (employees.name = 'Bob' AND departments.department_name = 'Engineering')) AND (NOT (employees.name = 'Bob' AND departments.department_name = 'Engineering')); \ No newline at end of file diff --git a/optd-datafusion/sql/test_scan.sql b/optd-datafusion/sql/test_scan.sql new file mode 100644 index 0000000..44533b2 --- /dev/null +++ b/optd-datafusion/sql/test_scan.sql @@ -0,0 +1,24 @@ +CREATE TABLE employees ( + id INTEGER, + name TEXT, + department_id INTEGER +); + +CREATE TABLE departments ( + id INTEGER, + department_name TEXT +); + +INSERT INTO employees VALUES + (1, 'Alice', 1), + (2, 'Bob', 2), + (3, 'Charlie', 1); + +INSERT INTO departments VALUES + (1, 'Engineering'), + (2, 'Marketing'); + + +explain SELECT * FROM employees; + +SELECT * FROM employees; diff --git a/optd-datafusion/src/converter/from_optd.rs b/optd-datafusion/src/converter/from_optd.rs new file mode 100644 index 0000000..7fadf29 --- /dev/null +++ b/optd-datafusion/src/converter/from_optd.rs @@ -0,0 +1,192 @@ +use std::{collections::HashMap, str::FromStr, sync::Arc}; + +use anyhow::bail; +use async_recursion::async_recursion; +use datafusion::{ + arrow::datatypes::{Schema, SchemaRef}, + common::JoinType, + datasource::source_as_provider, + logical_expr::Operator, + physical_plan::{ + expressions::{BinaryExpr, Column, Literal, NegativeExpr, NotExpr}, + joins::utils::{ColumnIndex, JoinFilter}, + projection::ProjectionExec, + ExecutionPlan, PhysicalExpr, + }, + scalar::ScalarValue, +}; +use optd_core::{ + operators::{relational::physical::PhysicalOperator, scalar::ScalarOperator}, + plans::{physical::PhysicalPlan, scalar::ScalarPlan}, + values::OptdValue, +}; + +use super::OptdDFContext; + +impl OptdDFContext<'_> { + #[async_recursion] + pub async fn conv_optd_to_df_relational( + &self, + optimized_plan: &PhysicalPlan, + ) -> anyhow::Result> { + match &optimized_plan.operator { + PhysicalOperator::TableScan(table_scan) => { + let source = self + .tables + .get(table_scan.table_name.as_str().unwrap()) + .ok_or_else(|| anyhow::anyhow!("Table not found"))?; + let provider = source_as_provider(source)?; + + // TODO(yuchen): support filters inside table scan. + let filters = vec![]; + let plan = provider + .scan(self.session_state, None, &filters, None) + .await?; + Ok(plan) + } + PhysicalOperator::Filter(filter) => { + let input_exec = self.conv_optd_to_df_relational(&filter.child).await?; + let physical_expr = + Self::conv_optd_to_df_scalar(&filter.predicate, &input_exec.schema())?; + Ok( + Arc::new(datafusion::physical_plan::filter::FilterExec::try_new( + physical_expr, + input_exec, + )?) as Arc, + ) + } + PhysicalOperator::Project(project) => { + let input_exec = self.conv_optd_to_df_relational(&project.child).await?; + let physical_exprs = project + .fields + .iter() + .cloned() + .filter_map(|field| { + Self::conv_optd_to_df_scalar(&field, &input_exec.schema()).ok() + }) + .enumerate() + .map(|(idx, expr)| (expr, format!("col{}", idx))) + .collect::, String)>>(); + + Ok( + Arc::new(ProjectionExec::try_new(physical_exprs, input_exec)?) + as Arc, + ) + } + PhysicalOperator::NestedLoopJoin(join) => { + let left_exec = self.conv_optd_to_df_relational(&join.outer).await?; + let right_exec = self.conv_optd_to_df_relational(&join.inner).await?; + let filter_schema = { + let fields = left_exec + .schema() + .fields() + .into_iter() + .chain(right_exec.schema().fields().into_iter()) + .cloned() + .collect::>(); + Schema::new_with_metadata(fields, HashMap::new()) + }; + + let physical_expr = Self::conv_optd_to_df_scalar( + &join.condition, + &Arc::new(filter_schema.clone()), + )?; + + let join_type = JoinType::from_str(join.join_type.as_str().unwrap())?; + + let mut column_idxs = vec![]; + for i in 0..left_exec.schema().fields().len() { + column_idxs.push(ColumnIndex { + index: i, + side: datafusion::common::JoinSide::Left, + }); + } + for i in 0..right_exec.schema().fields().len() { + column_idxs.push(ColumnIndex { + index: i, + side: datafusion::common::JoinSide::Right, + }); + } + + Ok(Arc::new( + datafusion::physical_plan::joins::NestedLoopJoinExec::try_new( + left_exec, + right_exec, + Some(JoinFilter::new( + physical_expr, + column_idxs, + Arc::new(filter_schema), + )), + &join_type, + None, + )?, + ) as Arc) + } + PhysicalOperator::HashJoin(_hash_join) => todo!(), + PhysicalOperator::SortMergeJoin(_merge_join) => todo!(), + } + } + + pub fn conv_optd_to_df_scalar( + pred: &ScalarPlan, + context: &SchemaRef, + ) -> anyhow::Result> { + match &pred.operator { + ScalarOperator::ColumnRef(column_ref) => { + let idx = column_ref.column_index.as_i64().unwrap() as usize; + Ok(Arc::new( + // Datafusion checks if col expr name matches the schema, so we have to supply the name inferred by datafusion, + // instead of using out own logical properties + Column::new(context.fields()[idx].name(), idx), + )) + } + ScalarOperator::Constant(constant) => { + let value = match &constant.value { + OptdValue::Int64(value) => ScalarValue::Int64(Some(*value)), + OptdValue::String(value) => ScalarValue::Utf8(Some(value.clone())), + OptdValue::Bool(value) => ScalarValue::Boolean(Some(*value)), + }; + Ok(Arc::new(Literal::new(value))) + } + ScalarOperator::BinaryOp(binary_op) => { + let left = Self::conv_optd_to_df_scalar(&binary_op.left, context)?; + let right = Self::conv_optd_to_df_scalar(&binary_op.right, context)?; + // TODO(yuchen): really need the enums! + let op = match binary_op.kind.as_str().unwrap() { + "add" => Operator::Plus, + "minus" => Operator::Minus, + "equal" => Operator::Eq, + s => panic!("Unsupported binary operator: {}", s), + }; + Ok(Arc::new(BinaryExpr::new(left, op, right)) as Arc) + } + ScalarOperator::UnaryOp(unary_op) => { + let child = Self::conv_optd_to_df_scalar(&unary_op.child, context)?; + // TODO(yuchen): really need the enums! + match unary_op.kind.as_str().unwrap() { + "not" => Ok(Arc::new(NotExpr::new(child)) as Arc), + "neg" => Ok(Arc::new(NegativeExpr::new(child)) as Arc), + s => bail!("Unsupported unary operator: {}", s), + } + } + ScalarOperator::LogicOp(logic_op) => { + let op = match logic_op.kind.as_str().unwrap() { + "and" => Operator::And, + "or" => Operator::Or, + s => bail!("Unsupported logic operator: {}", s), + }; + let mut children = logic_op.children.iter(); + let first_child = Self::conv_optd_to_df_scalar( + children + .next() + .expect("LogicOp should have at least one child"), + context, + )?; + children.try_fold(first_child, |acc, expr| { + let expr = Self::conv_optd_to_df_scalar(expr, context)?; + Ok(Arc::new(BinaryExpr::new(acc, op, expr)) as Arc) + }) + } + } + } +} diff --git a/optd-datafusion/src/converter/into_optd.rs b/optd-datafusion/src/converter/into_optd.rs new file mode 100644 index 0000000..32b64f1 --- /dev/null +++ b/optd-datafusion/src/converter/into_optd.rs @@ -0,0 +1,184 @@ +use std::sync::Arc; + +use anyhow::bail; +use datafusion::{ + common::DFSchema, + logical_expr::{utils::conjunction, LogicalPlan as DFLogicalPlan, Operator}, + prelude::Expr, +}; +use optd_core::{ + operators::{ + relational::logical::{ + filter::Filter, join::Join, project::Project, scan::Scan, LogicalOperator, + }, + scalar::{ + binary_op, + column_ref::ColumnRef, + constants::{self, Constant}, + logic_op, unary_op, ScalarOperator, + }, + }, + plans::{logical::LogicalPlan, scalar::ScalarPlan}, + values::OptdValue, +}; + +use super::OptdDFContext; + +impl OptdDFContext<'_> { + /// The col_offset is an offset added to the column index for all column references. It is useful for joins. + pub fn conv_df_to_optd_scalar( + df_expr: &Expr, + context: &DFSchema, + col_offset: usize, + ) -> anyhow::Result> { + let operator = match df_expr { + Expr::Column(column) => ScalarOperator::ColumnRef(ColumnRef { + column_index: OptdValue::Int64( + (context.index_of_column(column).unwrap() + col_offset) as i64, + ), + }), + Expr::Literal(scalar_value) => match scalar_value { + datafusion::scalar::ScalarValue::Boolean(val) => { + ScalarOperator::Constant(Constant { + value: OptdValue::Bool((*val).unwrap()), + }) + } + datafusion::scalar::ScalarValue::Int64(val) => { + ScalarOperator::Constant(Constant::new(OptdValue::Int64((*val).unwrap()))) + } + datafusion::scalar::ScalarValue::Utf8(val) => { + ScalarOperator::Constant(Constant::new(OptdValue::String(val.clone().unwrap()))) + } + _ => panic!("optd Only supports a limited number of literals"), + }, + Expr::BinaryExpr(binary_expr) => { + let left = Self::conv_df_to_optd_scalar(&binary_expr.left, context, col_offset)?; + let right = Self::conv_df_to_optd_scalar(&binary_expr.right, context, col_offset)?; + match binary_expr.op { + Operator::Plus => binary_op::add(left, right), + Operator::Minus => binary_op::minus(left, right), + Operator::Eq => binary_op::equal(left, right), + // TODO(yuchen): flatten logic operations as an optimization. + Operator::And => logic_op::and(vec![left, right]), + Operator::Or => logic_op::or(vec![left, right]), + _ => todo!(), + } + } + Expr::Not(expr) => unary_op::not(Self::conv_df_to_optd_scalar( + expr.as_ref(), + context, + col_offset, + )?), + Expr::Cast(cast) => { + return Self::conv_df_to_optd_scalar(&cast.expr, context, col_offset); + } + _ => panic!( + "optd does not support this scalar expression: {:#?}", + df_expr + ), + }; + + Ok(Arc::new(ScalarPlan { operator })) + } + + fn flatten_scalar_as_conjunction( + join_cond: Vec>, + idx: usize, + ) -> Arc { + if idx == join_cond.len() - 1 { + join_cond[idx].clone() + } else { + Arc::new(ScalarPlan { + operator: logic_op::and(vec![ + join_cond[idx].clone(), + Self::flatten_scalar_as_conjunction(join_cond.clone(), idx + 1), + ]), + }) + } + } + + pub fn conv_df_to_optd_relational( + &mut self, + df_logical_plan: &DFLogicalPlan, + ) -> anyhow::Result> { + let operator = match df_logical_plan { + DFLogicalPlan::Filter(df_filter) => LogicalOperator::Filter(Filter { + child: self.conv_df_to_optd_relational(&df_filter.input)?, + predicate: Self::conv_df_to_optd_scalar( + &df_filter.predicate, + df_filter.input.schema(), + 0, + )?, + }), + DFLogicalPlan::Join(join) => { + let mut join_cond = Vec::new(); + for (left, right) in &join.on { + let left = Self::conv_df_to_optd_scalar(left, join.left.schema(), 0)?; + let offset = join.left.schema().fields().len(); + let right = Self::conv_df_to_optd_scalar(right, join.right.schema(), offset)?; + join_cond.push(Arc::new(ScalarPlan { + operator: binary_op::equal(left, right), + })); + } + if let Some(filter) = &join.filter { + let filter = + Self::conv_df_to_optd_scalar(filter, df_logical_plan.schema().as_ref(), 0)?; + join_cond.push(filter); + } + if join_cond.is_empty() { + join_cond.push(Arc::new(ScalarPlan { + operator: constants::boolean(true), + })); + } + + LogicalOperator::Join(Join::new( + &join.join_type.to_string(), + self.conv_df_to_optd_relational(&join.left)?, + self.conv_df_to_optd_relational(&join.right)?, + Self::flatten_scalar_as_conjunction(join_cond, 0), + )) + } + DFLogicalPlan::TableScan(table_scan) => { + let table_name = table_scan.table_name.to_quoted_string(); + + let combine_filters = conjunction(table_scan.filters.to_vec()); + let scan = LogicalOperator::Scan(Scan::new( + &table_scan.table_name.to_quoted_string(), + match combine_filters { + Some(df_expr) => { + let schema = DFSchema::try_from(table_scan.source.schema()).unwrap(); + Self::conv_df_to_optd_scalar(&df_expr, &schema, 0)? + } + None => Arc::new(ScalarPlan { + operator: ScalarOperator::Constant(Constant { + value: OptdValue::Bool(true), + }), + }), + }, + )); + + self.tables.insert(table_name, table_scan.source.clone()); + + scan + } + DFLogicalPlan::Projection(projection) => { + let input = self.conv_df_to_optd_relational(projection.input.as_ref())?; + let mut exprs = Vec::new(); + for expr in &projection.expr { + exprs.push(Self::conv_df_to_optd_scalar( + expr, + projection.input.schema(), + 0, + )?); + } + + LogicalOperator::Project(Project { + child: input, + fields: exprs, + }) + } + logical_plan => bail!("optd does not support this operator {:?}", logical_plan), + }; + Ok(Arc::new(LogicalPlan { operator })) + } +} diff --git a/optd-datafusion/src/converter/mod.rs b/optd-datafusion/src/converter/mod.rs new file mode 100644 index 0000000..d3b3e6c --- /dev/null +++ b/optd-datafusion/src/converter/mod.rs @@ -0,0 +1,32 @@ +use std::{collections::HashMap, sync::Arc}; + +use datafusion::{execution::SessionState, logical_expr::TableSource}; + +pub mod from_optd; +pub mod into_optd; + +/// A context for converting between optd and datafusion. +/// The map is used to lookup table sources when converting TableScan operators from optd to datafusion. +pub struct OptdDFContext<'a> { + /// Maps table names to table sources. + pub tables: HashMap>, + pub session_state: &'a SessionState, +} + +impl OptdDFContext<'_> { + /// Creates a new `OptdDFContext` with the provided session state. + /// + /// # Arguments + /// + /// * `session_state` - A reference to the `SessionState` used for conversions. + /// + /// # Returns + /// + /// A `OptdDFContext` containing an empty table map and the provided session state. + pub fn new(session_state: &SessionState) -> OptdDFContext { + OptdDFContext { + tables: HashMap::new(), + session_state, + } + } +} diff --git a/optd-datafusion/src/lib.rs b/optd-datafusion/src/lib.rs new file mode 100644 index 0000000..7193224 --- /dev/null +++ b/optd-datafusion/src/lib.rs @@ -0,0 +1,134 @@ +// Copyright (c) 2023-2024 CMU Database Group +// +// Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +#![allow(clippy::new_without_default)] +use std::sync::Arc; + +use datafusion::catalog::{CatalogProviderList, MemoryCatalogProviderList}; +use datafusion::common::Result; +use datafusion::execution::runtime_env::{RuntimeEnv, RuntimeEnvBuilder}; +use datafusion::execution::SessionStateBuilder; +use datafusion::prelude::{SessionConfig, SessionContext}; +use planner::OptdOptimizer; +use planner::OptdQueryPlanner; + +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::util::pretty; +use datafusion::physical_plan::ExecutionPlanProperties; +use datafusion::physical_plan::Partitioning; +use futures::StreamExt; +use std::time::SystemTime; +pub mod converter; +pub mod planner; + +pub async fn run_queries(queries: String) -> Result<()> { + // Create a SessionContext with TPCH base tables + + let session_config = SessionConfig::from_env()?.with_information_schema(true); + + let ctx = crate::create_df_context(Some(session_config.clone()), None, None) + .await + .unwrap(); + + // Create a DataFrame with the input query + for query in queries.split(';') { + if query.trim().is_empty() { + continue; + } + let sql = ctx.sql(query).await?; + // Run our execution engine on the physical plan + let df_physical_plan = sql.clone().create_physical_plan().await?; + let plan = df_physical_plan.clone(); + // println!("{:#?}", df_physical_plan.clone()); + // let df_physical_plan = df_physical_plan.children()[0].clone(); + let mut print_results: Vec = vec![]; + let now = SystemTime::now(); + + // DataFusion execution nodes will output multiple streams that are partitioned by the following + // patterns, so just join them all into one stream + let partitions = match plan.output_partitioning() { + Partitioning::RoundRobinBatch(c) => *c, + Partitioning::Hash(_, h) => *h, + Partitioning::UnknownPartitioning(p) => *p, + }; + + // In a separate tokio task, send batches to the next operator over the `tx` channel, and make + // sure to make use of all of the partitions + for i in 0..partitions { + let batch_stream = plan.execute(i, Default::default()).unwrap(); + + let results: Vec<_> = batch_stream.collect().await; + for batch in results { + let batch = batch.unwrap(); + if batch.num_rows() == 0 { + continue; + } + print_results.push(batch); + } + } + + match now.elapsed() { + Ok(elapsed) => { + // it prints '2' + println!("Datafusion time in milliseconds: {}", elapsed.as_millis()); + } + Err(e) => { + // an error occurred! + println!("Error: {e:?}"); + } + } + + print_results.into_iter().for_each(|batch| { + let pretty_results = pretty::pretty_format_batches(&[batch]).unwrap().to_string(); + println!("{}", pretty_results); + }); + } + Ok(()) +} + +/// Utility function to create a session context for datafusion + optd. +pub async fn create_df_context( + session_config: Option, + runtime_env: Option>, + catalog: Option>, +) -> anyhow::Result { + let mut session_config = match session_config { + Some(config) => config, + None => SessionConfig::from_env()?.with_information_schema(true), + }; + + // Disable Datafusion's heuristic rule based query optimizer + session_config.options_mut().optimizer.max_passes = 0; + + let runtime_env = match runtime_env { + Some(runtime_env) => runtime_env, + None => Arc::new(RuntimeEnvBuilder::new().build()?), + }; + + let catalog = match catalog { + Some(catalog) => catalog, + None => Arc::new(MemoryCatalogProviderList::new()), + }; + + let mut builder = SessionStateBuilder::new() + .with_config(session_config) + .with_runtime_env(runtime_env) + .with_catalog_list(catalog.clone()) + .with_default_features(); + + let optimizer = OptdOptimizer::new_in_memory().await?; + let planner = Arc::new(OptdQueryPlanner::new(optimizer)); + // clean up optimizer rules so that we can plug in our own optimizer + builder = builder.with_optimizer_rules(vec![]); + builder = builder.with_physical_optimizer_rules(vec![]); + + // use optd-bridge query planner + builder = builder.with_query_planner(planner); + + let state = builder.build(); + let ctx = SessionContext::new_with_state(state).enable_url_table(); + ctx.refresh_catalogs().await?; + Ok(ctx) +} diff --git a/optd-datafusion/src/planner.rs b/optd-datafusion/src/planner.rs new file mode 100644 index 0000000..79cbd29 --- /dev/null +++ b/optd-datafusion/src/planner.rs @@ -0,0 +1,217 @@ +use std::sync::Arc; + +use anyhow::Ok; +use async_trait::async_trait; +use datafusion::{ + execution::{context::QueryPlanner, SessionState}, + logical_expr::{ + Explain, LogicalPlan as DFLogicalPlan, PlanType as DFPlanType, ToStringifiedPlan, + }, + physical_plan::{displayable, explain::ExplainExec, ExecutionPlan}, + physical_planner::{DefaultPhysicalPlanner, PhysicalPlanner}, +}; +use optd_core::{ + plans::{logical::LogicalPlan, physical::PhysicalPlan}, + storage::memo::SqliteMemo, +}; + +use crate::converter::OptdDFContext; + +/// A mock optimizer for testing purposes. +#[derive(Debug)] +pub struct OptdOptimizer { + memo: SqliteMemo, +} + +impl OptdOptimizer { + pub async fn new_in_memory() -> anyhow::Result { + Ok(Self { + memo: SqliteMemo::new_in_memory().await?, + }) + } + + /// A mock optimization function for testing purposes. + /// + /// This function takes a logical plan, and for each node in the logical plan, it will + /// recursively traverse the node and its children and replace the node with a physical + /// operator. The physical operator is chosen based on the type of the logical operator. + /// For example, if the logical operator is a scan, the physical operator will be a + /// TableScan, if the logical operator is a filter, the physical operator will be a + /// Filter, and so on. + /// + /// The physical operators are chosen in a way that they mirror the structure of the + /// logical plan, but they are not actually optimized in any way. This is useful for + /// testing purposes, as it allows us to test the structure of the physical plan without + /// having to worry about the actual optimization process. + /// + /// The function returns a PhysicalPlan, which is a struct that contains the root node of + /// the physical plan. + /// + /// # Arguments + /// * `logical_plan` - The logical plan to optimize. + /// + /// # Returns + /// * `PhysicalPlan` - The optimized physical plan. + pub async fn mock_optimize( + &self, + logical_plan: &LogicalPlan, + ) -> anyhow::Result> { + let root_group_id = + optd_core::cascades::ingest_full_logical_plan(&self.memo, logical_plan).await?; + optd_core::cascades::mock_optimize_relation_group(&self.memo, root_group_id).await?; + + let optimized_plan = + optd_core::cascades::match_any_physical_plan(&self.memo, root_group_id).await?; + + Ok(optimized_plan) + } +} + +/// A struct that implements the `QueryPlanner` trait for the `OptdQueryPlanner`. +/// This trait is used to create a physical plan for a given logical plan. +/// The physical plan is created by converting the logical plan to an optd logical plan, +/// and then running the optd optimizer on the logical plan and then converting it back. +/// This is the entry point for optd. +#[derive(Debug)] +pub struct OptdQueryPlanner { + pub optimizer: Arc, +} + +impl OptdQueryPlanner { + /// Creates a new instance of `OptdQueryPlanner` with the given optimizer. + /// + /// The optimizer is cloned and stored in an `Arc` so that it can be safely shared + /// across threads. + /// + /// # Arguments + /// * `optimizer` - The optimizer to use for creating the physical plan. + /// + /// # Returns + /// * `OptdQueryPlanner` - A new instance of `OptdQueryPlanner` with the given optimizer. + pub fn new(optimizer: OptdOptimizer) -> Self { + Self { + optimizer: Arc::new(optimizer), + } + } + + /// This function is the entry point for the physical planner. It will attempt + /// to optimize the logical plan using the optd optimizer. If the logical plan + /// is a DML/DDL operation, it will fall back to the datafusion planner. + /// + /// The steps of this function are the following: + /// + /// 1. Check if the logical plan is a DML/DDL operation. If it is, fall back + /// to the datafusion planner. + /// 2. Convert the logical plan to an optd logical plan. + /// 3. Run the optd optimizer on the logical plan. + /// 4. Convert the physical plan to a physical plan that can be executed by + /// datafusion. + /// + /// # Arguments + /// * `logical_plan` - The logical plan in Datafusion's type system to optimize. + /// * `session_state` - The session state to use for creating the physical plan. + /// + /// + /// # Returns + /// * `anyhow::Result>` - The physical plan that can be executed by + /// datafusion. + async fn create_physical_plan_inner( + &self, + logical_plan: &DFLogicalPlan, + session_state: &SessionState, + ) -> anyhow::Result> { + // Fallback to the datafusion planner for DML/DDL operations. optd cannot handle this. + if let DFLogicalPlan::Dml(_) | DFLogicalPlan::Ddl(_) | DFLogicalPlan::EmptyRelation(_) = + logical_plan + { + let planner = DefaultPhysicalPlanner::default(); + return Ok(planner + .create_physical_plan(logical_plan, session_state) + .await?); + } + + let (logical_plan, _verbose, mut explains) = match logical_plan { + DFLogicalPlan::Explain(Explain { plan, verbose, .. }) => { + (plan.as_ref(), *verbose, Some(Vec::new())) + } + _ => (logical_plan, false, None), + }; + + if let Some(explains) = &mut explains { + explains.push( + logical_plan.to_stringified(DFPlanType::OptimizedLogicalPlan { + optimizer_name: "datafusion".to_string(), + }), + ); + } + + let mut converter = OptdDFContext::new(session_state); + // convert the logical plan to optd + let logical_plan = converter.conv_df_to_optd_relational(logical_plan)?; + // run the optd optimizer + let optd_optimized_physical_plan = self.optimizer.mock_optimize(&logical_plan).await?; + // convert the physical plan to optd + let physical_plan = converter + .conv_optd_to_df_relational(&optd_optimized_physical_plan) + .await + .map_err(|e| anyhow::anyhow!(e))?; + + if let Some(explains) = &mut explains { + explains.push( + displayable(&*physical_plan).to_stringified(false, DFPlanType::FinalPhysicalPlan), + ); + } + + if let Some(explains) = explains { + Ok(Arc::new(ExplainExec::new( + DFLogicalPlan::explain_schema(), + explains, + true, + ))) + } else { + Ok(physical_plan) + } + } +} + +// making it `async_trait` only because datafusion is taking it. +#[async_trait] +impl QueryPlanner for OptdQueryPlanner { + /// This function is the entry point for the physical planner. It calls the inner function + /// `create_physical_plan_inner` to optimize the logical plan using the optd optimizer. If the logical plan + /// is a DML/DDL operation, it will fall back to the datafusion planner. + /// + /// The steps of this function are the following: + /// + /// 1. Check if the logical plan is a DML/DDL operation. If it is, fall back + /// to the datafusion planner. + /// 2. Convert the logical plan to an optd logical plan. + /// 3. Run the optd optimizer on the logical plan. + /// 4. Convert the physical plan to a physical plan that can be executed by + /// datafusion. + /// + /// + /// # Arguments + /// * `datafusion_logical_plan` - The logical plan in Datafusion's type system to optimize. + /// * `session_state` - The session state to use for creating the physical plan. + /// + /// # Returns + /// * `datafusion::common::Result>` - The physical plan that can be executed by + /// datafusion. + /// + /// Also see [`OptdQueryPlanner::create_physical_plan`] + async fn create_physical_plan( + &self, + datafusion_logical_plan: &DFLogicalPlan, + session_state: &SessionState, + ) -> datafusion::common::Result> { + self.create_physical_plan_inner(datafusion_logical_plan, session_state) + .await + .map_err(|x| { + datafusion::error::DataFusionError::Execution(format!( + "Failed to create physical plan: {:?}", + x + )) + }) + } +} diff --git a/optd-dsl/src/gen/operator.rs b/optd-dsl/src/gen/operator.rs deleted file mode 100644 index 7789475..0000000 --- a/optd-dsl/src/gen/operator.rs +++ /dev/null @@ -1,151 +0,0 @@ -use crate::ast::{Field, LogicalOp, Operator, OperatorKind, ScalarOp, Type}; -use proc_macro2::{Ident, TokenStream}; -use quote::{format_ident, quote}; -use syn::parse_quote; - -/// Converts an AST type to its corresponding Rust type representation -fn type_to_tokens(ty: &Type, is_param: bool) -> TokenStream { - match ty { - Type::String => { - if is_param { - quote! { &str } - } else { - quote! { String } - } - } - Type::Bool => quote! { bool }, - Type::Int64 => quote! { i64 }, - Type::Float64 => quote! { f64 }, - Type::Operator(OperatorKind::Logical) => quote! { Relation }, - Type::Operator(OperatorKind::Scalar) => quote! { Scalar }, - Type::Array(inner) => { - let inner_type = type_to_tokens(inner, false); - quote! { Vec<#inner_type> } - } - _ => panic!("Unexpected type: {:?}", ty), - } -} - -/// Helper struct to hold field information for code generation -struct FieldInfo { - name: Ident, - ty: Type, -} - -impl FieldInfo { - fn new(field: &Field) -> Self { - Self { - name: format_ident!("{}", field.name), - ty: field.ty.clone(), - } - } - - fn struct_field(&self) -> TokenStream { - let name = &self.name; - let ty = type_to_tokens(&self.ty, false); - quote! { - pub #name: #ty - } - } - - fn ctor_param(&self) -> TokenStream { - let name = &self.name; - let ty = type_to_tokens(&self.ty, true); - quote! { #name: #ty } - } - - fn ctor_init(&self) -> TokenStream { - let name = &self.name; - match &self.ty { - Type::String => quote! { #name: #name.into() }, - _ => quote! { #name }, - } - } -} - -fn generate_code(operators: &[Operator]) -> proc_macro2::TokenStream { - let mut generated_code = proc_macro2::TokenStream::new(); - - for operator in operators { - let operator_code = match operator { - Operator::Logical(op) => generate_logical_operator(op), - Operator::Scalar(op) => generate_scalar_operator(op), - }; - generated_code.extend(operator_code); - } - - generated_code -} - -fn generate_logical_operator(operator: &LogicalOp) -> TokenStream { - let name = format_ident!("{}", &operator.name); - let fields: Vec = operator.fields.iter().map(FieldInfo::new).collect(); - let struct_fields: Vec<_> = fields.iter().map(|f| f.struct_field()).collect(); - let ctor_params: Vec<_> = fields.iter().map(|f| f.ctor_param()).collect(); - let ctor_inits: Vec<_> = fields.iter().map(|f| f.ctor_init()).collect(); - let field_names: Vec<_> = fields.iter().map(|f| &f.name).collect(); - let fn_name = format_ident!("{}", operator.name.to_lowercase()); - - quote! { - use super::LogicalOperator; - use crate::values::OptdValue; - use serde::Deserialize; - - #[derive(Debug, Clone, PartialEq, Deserialize)] - pub struct #name { - #(#struct_fields,)* - } - - impl #name { - pub fn new(#(#ctor_params,)*) -> Self { - Self { - #(#ctor_inits,)* - } - } - } - - pub fn #fn_name( - #(#ctor_params,)* - ) -> LogicalOperator { - LogicalOperator::#name(#name::new(#(#field_names,)*)) - } - } -} - -fn generate_scalar_operator(_operator: &ScalarOp) -> proc_macro2::TokenStream { - unimplemented!() -} - -#[test] -fn test_generate_logical_operator() { - use crate::ast::{Field, LogicalOp, OperatorKind, Type}; - use std::collections::HashMap; - - // Test with both Logical and Scalar operator types - let filter_op = LogicalOp { - name: "Filter".to_string(), - fields: vec![ - Field { - name: "child".to_string(), - ty: Type::Operator(OperatorKind::Logical), - }, - Field { - name: "predicate".to_string(), - ty: Type::Operator(OperatorKind::Scalar), - }, - ], - derived_props: HashMap::new(), - }; - - let generated = generate_logical_operator(&filter_op); - let syntax_tree: syn::File = parse_quote! { - #generated - }; - let formatted = prettyplease::unparse(&syntax_tree); - println!("Generated code:\n{}", formatted); - - // Basic validation - let code = formatted.to_string(); - assert!(code.contains("pub child: Relation")); - assert!(code.contains("pub predicate: Scalar")); -}