Skip to content

Commit 17ecc65

Browse files
authored
fix: to_gbq may swap data columns when replace table (#2532)
Fixes #2502
1 parent b952428 commit 17ecc65

File tree

2 files changed

+15
-8
lines changed

2 files changed

+15
-8
lines changed

bigframes/session/bq_caching_executor.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def _export_gbq(
247247
)
248248
sql = compiled.sql
249249

250-
if (existing_table is not None) and _if_schema_match(
250+
if (existing_table is not None) and _is_schema_match(
251251
existing_table.schema, array_value.schema
252252
):
253253
# b/409086472: Uses DML for table appends and replacements to avoid
@@ -690,16 +690,16 @@ def _result_schema(
690690
)
691691

692692

693-
def _if_schema_match(
694-
table_schema: Tuple[bigquery.SchemaField, ...], schema: schemata.ArraySchema
693+
def _is_schema_match(
694+
table_schema: Tuple[bigquery.SchemaField, ...],
695+
schema: schemata.ArraySchema,
695696
) -> bool:
696697
if len(table_schema) != len(schema.items):
697698
return False
698-
for field in table_schema:
699-
if field.name not in schema.names:
699+
for field, schema_item in zip(table_schema, schema.items):
700+
if field.name != schema_item.column:
700701
return False
701-
if bigframes.dtypes.convert_schema_field(field)[1] != schema.get_type(
702-
field.name
703-
):
702+
_, field_dtype = bigframes.dtypes.convert_schema_field(field)
703+
if field_dtype != schema_item.dtype:
704704
return False
705705
return True

tests/system/small/test_dataframe_io.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,13 @@ def test_to_gbq_if_exists_is_replace(scalars_dfs, dataset_id):
631631
assert len(gcs_df) == len(scalars_pandas_df)
632632
pd.testing.assert_index_equal(gcs_df.columns, scalars_pandas_df.columns)
633633

634+
# When replacing a table with same schema but different column order
635+
reordered_df = scalars_df[scalars_df.columns[::-1]]
636+
reordered_df.to_gbq(destination_table, if_exists="replace")
637+
gcs_df = pandas_gbq.read_gbq(destination_table, index_col="rowindex")
638+
assert len(gcs_df) == len(scalars_pandas_df)
639+
pd.testing.assert_index_equal(gcs_df.columns, reordered_df.columns)
640+
634641
# When replacing a table with different schema
635642
partitial_scalars_df = scalars_df.drop(columns=["string_col"])
636643
partitial_scalars_df.to_gbq(destination_table, if_exists="replace")

0 commit comments

Comments
 (0)