diff --git a/CHANGES.md b/CHANGES.md
index ea3620903..b6c144548 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -3,7 +3,8 @@
## Next
* Issue #1290: Stopped using metadata for optimized count path
* Issue #1317: Improving OpenLineage 1.24.0+ compatibility
-* PR #1311 : Improve read session expired error message
+* PR #1311: Improve read session expired error message
+* PR #1320: Set the `temporaryGcsBucket` to default to `fs.gs.system.bucket` if exists, negating the need to set it in Dataproc clusters.
## 0.41.0 - 2024-09-05
diff --git a/README-template.md b/README-template.md
index a4ec1bd30..e84eaefbc 100644
--- a/README-template.md
+++ b/README-template.md
@@ -573,7 +573,8 @@ word-break:break-word
The GCS bucket that temporarily holds the data before it is loaded to
BigQuery. Required unless set in the Spark configuration
(spark.conf.set(...) ).
- Not supported by the `DIRECT` write method.
+ Defaults to the `fs.gs.system.bucket` if exists, for example on Google Cloud Dataproc clusters, starting version 0.42.0.
+ Supported only by the `INDIRECT` write method.
|
Write |
@@ -583,7 +584,7 @@ word-break:break-word
The GCS bucket that holds the data before it is loaded to
BigQuery. If informed, the data won't be deleted after write data
into BigQuery.
- Not supported by the `DIRECT` write method.
+ Supported only by the `INDIRECT` write method.
|
Write |
diff --git a/spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java b/spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java
index fd1543d0f..e61fc298a 100644
--- a/spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java
+++ b/spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkBigQueryConfig.java
@@ -171,6 +171,8 @@ public static WriteMethod from(@Nullable String writeMethod) {
public static final String BIG_NUMERIC_DEFAULT_PRECISION = "bigNumericDefaultPrecision";
public static final String BIG_NUMERIC_DEFAULT_SCALE = "bigNumericDefaultScale";
+ private static final String DATAPROC_SYSTEM_BUCKET_CONFIGURATION = "fs.gs.system.bucket";
+
TableId tableId;
// as the config needs to be Serializable, internally it uses
// com.google.common.base.Optional but externally it uses the regular java.util.Optional
@@ -398,7 +400,10 @@ public static SparkBigQueryConfig from(
.orNull();
config.defaultParallelism = defaultParallelism;
config.temporaryGcsBucket =
- stripPrefix(getAnyOption(globalOptions, options, "temporaryGcsBucket"));
+ stripPrefix(getAnyOption(globalOptions, options, "temporaryGcsBucket"))
+ .or(
+ com.google.common.base.Optional.fromNullable(
+ hadoopConfiguration.get(DATAPROC_SYSTEM_BUCKET_CONFIGURATION)));
config.persistentGcsBucket =
stripPrefix(getAnyOption(globalOptions, options, "persistentGcsBucket"));
config.persistentGcsPath = getOption(options, "persistentGcsPath");
diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryConfigTest.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryConfigTest.java
index 9dfbce529..c50f26fd2 100644
--- a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryConfigTest.java
+++ b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkBigQueryConfigTest.java
@@ -1182,4 +1182,22 @@ public void testEnableListInferenceWithDefaultIntermediateFormat() {
assertThat(config.getIntermediateFormat())
.isEqualTo(SparkBigQueryConfig.IntermediateFormat.PARQUET_LIST_INFERENCE_ENABLED);
}
+
+ @Test
+ public void testSystemBucketAsDefaultTemporaryGcsBucket() {
+ Configuration hadoopConfiguration = new Configuration();
+ hadoopConfiguration.set("fs.gs.system.bucket", "foo");
+ SparkBigQueryConfig config =
+ SparkBigQueryConfig.from(
+ asDataSourceOptionsMap(defaultOptions),
+ emptyMap, // allConf
+ hadoopConfiguration,
+ emptyMap, // customDefaults
+ 1,
+ new SQLConf(),
+ sparkVersion,
+ /* schema */ Optional.empty(),
+ /* tableIsMandatory */ true);
+ assertThat(config.getTemporaryGcsBucket()).hasValue("foo");
+ }
}