Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 85 additions & 3 deletions pandas_gbq/gbq.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@
from datetime import datetime
import logging
import re
import typing
import warnings

import pandas
from pandas_gbq.contexts import Context # noqa - backward compatible export
from pandas_gbq.contexts import context
from pandas_gbq.exceptions import ( # noqa - backward compatible export
Expand Down Expand Up @@ -380,6 +382,14 @@ def to_gbq(
progress_bar=True,
credentials=None,
api_method: str = "default",
clustering_columns: typing.Union[
pandas.core.indexes.base.Index, typing.Iterable[typing.Hashable]
] = (),
time_partitioning_column: typing.Optional[str] = None,
time_partitioning_type: typing.Optional[str] = "DAY",
time_partitioning_expiration_ms: typing.Optional[int] = None,
range_partitioning_column: typing.Optional[str] = None,
range_partitioning_range: typing.Optional[dict] = None,
verbose=None,
private_key=None,
auth_redirect_uri=None,
Expand All @@ -406,6 +416,20 @@ def to_gbq(
destination_table : str
Name of table to be written, in the form ``dataset.tablename`` or
``project.dataset.tablename``.
clustering_columns : typing.Union[
pandas.core.indexes.base.Index, typing.Iterable[typing.Hashable]
] = (),
Specifies the columns for clustering in the BigQuery table.
time_partitioning_column : str, optional
Specifies the column for time-based partitioning in the BigQuery table.
time_partitioning_type : str, default 'DAY'
Specifies the type of time-based partitioning.
time_partitioning_expiration_ms : int, optional
Specifies the milliseconds for time-based partitioning expiration.
range_partitioning_column : str, optional
Specifies the column for range-based partitioning in the BigQuery table.
range_partitioning_range : dict, optional
Specifies the range for range-based partitioning.
project_id : str, optional
Google Cloud Platform project ID. Optional when available from
the environment.
Expand Down Expand Up @@ -610,7 +634,16 @@ def to_gbq(
location=location,
credentials=connector.credentials,
)
table_connector.create(table_id, table_schema)
table_connector.create(
table_id,
table_schema,
clustering_columns=clustering_columns,
time_partitioning_column=time_partitioning_column,
time_partitioning_type=time_partitioning_type,
time_partitioning_expiration_ms=time_partitioning_expiration_ms,
range_partitioning_column=range_partitioning_column,
range_partitioning_range=range_partitioning_range,
)
else:
if if_exists == "append":
# Convert original schema (the schema that already exists) to pandas-gbq API format
Expand Down Expand Up @@ -731,7 +764,17 @@ def exists(self, table_id):
except self.http_error as ex:
self.process_http_error(ex)

def create(self, table_id, schema):
def create(
self,
table_id,
schema,
clustering_columns=None,
time_partitioning_column=None,
time_partitioning_type="DAY",
time_partitioning_expiration_ms=None,
range_partitioning_column=None,
range_partitioning_range=None,
):
"""Create a table in Google BigQuery given a table and schema

Parameters
Expand All @@ -741,8 +784,27 @@ def create(self, table_id, schema):
schema : str
Use the generate_bq_schema to generate your table schema from a
dataframe.
clustering_columns : list, optional
List of columns to cluster the table on.
time_partitioning_column : str, optional
Column to partition the table on.
time_partitioning_type : str, default 'DAY'
Type of time partitioning.
time_partitioning_expiration_ms : int, optional
Expiration time for the partitioning.
range_partitioning_column : str, optional
Column to partition the table on.
range_partitioning_range : dict, optional
Range for the partitioning.
"""
from google.cloud.bigquery import DatasetReference, Table, TableReference
from google.cloud.bigquery import (
DatasetReference,
Table,
TableReference,
TimePartitioning,
RangePartitioning,
PartitionRange,
)

if self.exists(table_id):
raise TableCreationError("Table {0} already exists".format(table_id))
Expand All @@ -762,6 +824,26 @@ def create(self, table_id, schema):
table = Table(table_ref)
table.schema = pandas_gbq.schema.to_google_cloud_bigquery(schema)

if clustering_columns:
table.clustering_fields = list(clustering_columns)

if time_partitioning_column:
table.time_partitioning = TimePartitioning(
type_=time_partitioning_type,
field=time_partitioning_column,
expiration_ms=time_partitioning_expiration_ms,
)

if range_partitioning_column and range_partitioning_range:
table.range_partitioning = RangePartitioning(
field=range_partitioning_column,
range_=PartitionRange(
start=range_partitioning_range["start"],
end=range_partitioning_range["end"],
interval=range_partitioning_range["interval"],
),
)

try:
self.client.create_table(table)
except self.http_error as ex:
Expand Down
Loading