apache
diff --git a/‎common/src/main/scala/org/apache/comet/CometConf.scala
Lines changed: 12 additions & 20 deletions b/‎common/src/main/scala/org/apache/comet/CometConf.scala
Lines changed: 12 additions & 20 deletions
diff --git a/‎docs/source/user-guide/configs.md
Lines changed: 2 additions & 3 deletions b/‎docs/source/user-guide/configs.md
Lines changed: 2 additions & 3 deletions
diff --git a/‎docs/source/user-guide/tuning.md
Lines changed: 51 additions & 6 deletions b/‎docs/source/user-guide/tuning.md
Lines changed: 51 additions & 6 deletions
diff --git a/‎native/Cargo.lock
Lines changed: 25 additions & 23 deletions b/‎native/Cargo.lock
Lines changed: 25 additions & 23 deletions
diff --git a/‎native/core/Cargo.toml
Lines changed: 3 additions & 0 deletions b/‎native/core/Cargo.toml
Lines changed: 3 additions & 0 deletions
diff --git a/‎native/core/benches/row_columnar.rs
Lines changed: 2 additions & 0 deletions b/‎native/core/benches/row_columnar.rs
Lines changed: 2 additions & 0 deletions
@@ -272,18 +272,19 @@ object CometConf extends ShimCometConf {
       .booleanConf
       .createWithDefault(false)
 
-  val COMET_EXEC_SHUFFLE_COMPRESSION_CODEC: ConfigEntry[String] = conf(
-    s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.codec")
-    .doc(
-      "The codec of Comet native shuffle used to compress shuffle data. Only zstd is supported. " +
-        "Compression can be disabled by setting spark.shuffle.compress=false.")
-    .stringConf
-    .checkValues(Set("zstd"))
-    .createWithDefault("zstd")
+  val COMET_EXEC_SHUFFLE_COMPRESSION_CODEC: ConfigEntry[String] =
+    conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.codec")
+      .doc(
+        "The codec of Comet native shuffle used to compress shuffle data. lz4, zstd, and " +
+          "snappy are supported. Compression can be disabled by setting " +
+          "spark.shuffle.compress=false.")
+      .stringConf
+      .checkValues(Set("zstd", "lz4", "snappy"))
+      .createWithDefault("lz4")
 
-  val COMET_EXEC_SHUFFLE_COMPRESSION_LEVEL: ConfigEntry[Int] =
-    conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.level")
-      .doc("The compression level to use when compression shuffle files.")
+  val COMET_EXEC_SHUFFLE_COMPRESSION_ZSTD_LEVEL: ConfigEntry[Int] =
+    conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.zstd.level")
+      .doc("The compression level to use when compressing shuffle files with zstd.")
       .intConf
       .createWithDefault(1)
 
@@ -452,15 +453,6 @@ object CometConf extends ShimCometConf {
     .intConf
     .createWithDefault(8192)
 
-  val COMET_EXEC_MEMORY_FRACTION: ConfigEntry[Double] = conf("spark.comet.exec.memoryFraction")
-    .doc(
-      "The fraction of memory from Comet memory overhead that the native memory " +
-        "manager can use for execution. The purpose of this config is to set aside memory for " +
-        "untracked data structures, as well as imprecise size estimation during memory " +
-        "acquisition.")
-    .doubleConf
-    .createWithDefault(0.7)
-
   val COMET_PARQUET_ENABLE_DIRECT_BUFFER: ConfigEntry[Boolean] =
     conf("spark.comet.parquet.enable.directBuffer")
       .doc("Whether to use Java direct byte buffer when reading Parquet.")
 
@@ -47,12 +47,11 @@ Comet provides the following configuration settings.
 | spark.comet.exec.globalLimit.enabled | Whether to enable globalLimit by default. | true |
 | spark.comet.exec.hashJoin.enabled | Whether to enable hashJoin by default. | true |
 | spark.comet.exec.localLimit.enabled | Whether to enable localLimit by default. | true |
-| spark.comet.exec.memoryFraction | The fraction of memory from Comet memory overhead that the native memory manager can use for execution. The purpose of this config is to set aside memory for untracked data structures, as well as imprecise size estimation during memory acquisition. | 0.7 |
 | spark.comet.exec.memoryPool | The type of memory pool to be used for Comet native execution. Available memory pool types are 'greedy', 'fair_spill', 'greedy_task_shared', 'fair_spill_task_shared', 'greedy_global' and 'fair_spill_global', By default, this config is 'greedy_task_shared'. | greedy_task_shared |
 | spark.comet.exec.project.enabled | Whether to enable project by default. | true |
 | spark.comet.exec.replaceSortMergeJoin | Experimental feature to force Spark to replace SortMergeJoin with ShuffledHashJoin for improved performance. This feature is not stable yet. For more information, refer to the Comet Tuning Guide (https://datafusion.apache.org/comet/user-guide/tuning.html). | false |
-| spark.comet.exec.shuffle.compression.codec | The codec of Comet native shuffle used to compress shuffle data. Only zstd is supported. Compression can be disabled by setting spark.shuffle.compress=false. | zstd |
-| spark.comet.exec.shuffle.compression.level | The compression level to use when compression shuffle files. | 1 |
+| spark.comet.exec.shuffle.compression.codec | The codec of Comet native shuffle used to compress shuffle data. lz4, zstd, and snappy are supported. Compression can be disabled by setting spark.shuffle.compress=false. | lz4 |
+| spark.comet.exec.shuffle.compression.zstd.level | The compression level to use when compressing shuffle files with zstd. | 1 |
 | spark.comet.exec.shuffle.enabled | Whether to enable Comet native shuffle. Note that this requires setting 'spark.shuffle.manager' to 'org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager'. 'spark.shuffle.manager' must be set before starting the Spark application and cannot be changed during the application. | true |
 | spark.comet.exec.sort.enabled | Whether to enable sort by default. | true |
 | spark.comet.exec.sortMergeJoin.enabled | Whether to enable sortMergeJoin by default. | true |
 
@@ -23,11 +23,52 @@ Comet provides some tuning options to help you get the best performance from you
 
 ## Memory Tuning
 
-Comet shares an off-heap memory pool between Spark and Comet. This requires setting `spark.memory.offHeap.enabled=true`.
-If this setting is not enabled, Comet will not accelerate queries and will fall back to Spark.
+### Unified Memory Management with Off-Heap Memory
+
+The recommended way to share memory between Spark and Comet is to set `spark.memory.offHeap.enabled=true`. This allows
+Comet to share an off-heap memory pool with Spark. The size of the pool is specified by `spark.memory.offHeap.size`. For more details about Spark off-heap memory mode, please refer to Spark documentation: https://spark.apache.org/docs/latest/configuration.html.
+
+### Dedicated Comet Memory Pools
+
+Spark uses on-heap memory mode by default, i.e., the `spark.memory.offHeap.enabled` setting is not enabled. If Spark is under on-heap memory mode, Comet will use its own dedicated memory pools that
+are not shared with Spark. This requires additional configuration settings to be specified to set the size and type of
+memory pool to use.
+
+The size of the pool can be set explicitly with `spark.comet.memoryOverhead`. If this setting is not specified then
+the memory overhead will be calculated by multiplying the executor memory by `spark.comet.memory.overhead.factor`
+(defaults to `0.2`).
+
+The type of pool can be specified with `spark.comet.exec.memoryPool`. The default setting is `greedy_task_shared`.
+
+The valid pool types are:
+
+- `greedy`
+- `greedy_global`
+- `greedy_task_shared`
+- `fair_spill`
+- `fair_spill_global`
+- `fair_spill_task_shared`
+
+Pool types ending with `_global` use a single global memory pool between all tasks on same executor.
+
+Pool types ending with `_task_shared` share a single memory pool across all attempts for a single task.
+
+Other pool types create a dedicated pool per native query plan using a fraction of the available pool size based on number of cores 
+and cores per task.
+
+The `greedy*` pool types use DataFusion's [GreedyMemoryPool], which implements a greedy first-come first-serve limit. This
+pool works well for queries that do not need to spill or have a single spillable operator.
+
+The `fair_spill*` pool types use DataFusion's [FairSpillPool], which prevents spillable reservations from using more
+than an even fraction of the available memory sans any unspillable reservations
+(i.e. `(pool_size - unspillable_memory) / num_spillable_reservations)`). This pool works best when you know beforehand
+the query has multiple spillable operators that will likely all need to spill. Sometimes it will cause spills even
+when there was sufficient memory (reserved for other operators) to avoid doing so. Unspillable memory is allocated in
+a first-come, first-serve fashion
+
+[GreedyMemoryPool]: https://docs.rs/datafusion/latest/datafusion/execution/memory_pool/struct.GreedyMemoryPool.html
+[FairSpillPool]: https://docs.rs/datafusion/latest/datafusion/execution/memory_pool/struct.FairSpillPool.html
 
-Each executor will have a single memory pool which will be shared by all native plans being executed within that
-process, and by Spark itself. The size of the pool is specified by `spark.memory.offHeap.size`.
 
 ### Determining How Much Memory to Allocate
 
@@ -106,15 +147,19 @@ then any shuffle operations that cannot be supported in this mode will fall back
 ### Shuffle Compression
 
 By default, Spark compresses shuffle files using LZ4 compression. Comet overrides this behavior with ZSTD compression.
-Compression can be disabled by setting `spark.shuffle.compress=false`, which may result in faster shuffle times in 
+Compression can be disabled by setting `spark.shuffle.compress=false`, which may result in faster shuffle times in
 certain environments, such as single-node setups with fast NVMe drives, at the expense of increased disk space usage.
 
 ## Explain Plan
+
 ### Extended Explain
+
 With Spark 4.0.0 and newer, Comet can provide extended explain plan information in the Spark UI. Currently this lists
 reasons why Comet may not have been enabled for specific operations.
 To enable this, in the Spark configuration, set the following:
+
 ```shell
 -c spark.sql.extendedExplainProviders=org.apache.comet.ExtendedExplainInfo
 ```
-This will add a section to the detailed plan displayed in the Spark SQL UI page.
+
+This will add a section to the detailed plan displayed in the Spark SQL UI page.
@@ -52,6 +52,9 @@ serde = { version = "1", features = ["derive"] }
 lazy_static = "1.4.0"
 prost = "0.12.1"
 jni = "0.21"
+snap = "1.1"
+# we disable default features in lz4_flex to force the use of the faster unsafe encoding and decoding implementation
+lz4_flex = { version = "0.11.3", default-features = false }
 zstd = "0.11"
 rand = { workspace = true}
 num = { workspace = true }
 
@@ -19,6 +19,7 @@ use arrow::datatypes::DataType as ArrowDataType;
 use comet::execution::shuffle::row::{
     process_sorted_row_partition, SparkUnsafeObject, SparkUnsafeRow,
 };
+use comet::execution::shuffle::CompressionCodec;
 use criterion::{criterion_group, criterion_main, Criterion};
 use tempfile::Builder;
 
@@ -77,6 +78,7 @@ fn benchmark(c: &mut Criterion) {
                 false,
                 0,
                 None,
+                &CompressionCodec::Zstd(1),
             )
             .unwrap();
         });