Skip to content

Commit

Permalink
feat(bigquery): add EXPORT DATA statement support (#4688)
Browse files Browse the repository at this point in the history
* feat(bigquery): add EXPORT DATA statement support

* feat(bigquery): improve EXPORT DATA implementation with named function

* refactor(bigquery): improve EXPORT DATA implementation

* feat(bigquery): make code compatible with 3.7

* feat(bigquery): add test with connection

* fix(bigquery): improve WITH CONNECTION parsing logic
  • Loading branch information
ArnoldHueteG authored Feb 3, 2025
1 parent ade8b82 commit 9ea15c7
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 0 deletions.
59 changes: 59 additions & 0 deletions sqlglot/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,7 @@ class Tokenizer(tokens.Tokenizer):
"DECLARE": TokenType.COMMAND,
"ELSEIF": TokenType.COMMAND,
"EXCEPTION": TokenType.COMMAND,
"EXPORT": TokenType.EXPORT,
"FLOAT64": TokenType.DOUBLE,
"FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT,
"MODEL": TokenType.MODEL,
Expand All @@ -490,6 +491,11 @@ class Parser(parser.Parser):
LOG_DEFAULTS_TO_LN = True
SUPPORTS_IMPLICIT_UNNEST = True

ID_VAR_TOKENS = {
*parser.Parser.ID_VAR_TOKENS,
TokenType.EXPORT,
}

FUNCTIONS = {
**parser.Parser.FUNCTIONS,
"CONTAINS_SUBSTR": _build_contains_substring,
Expand Down Expand Up @@ -596,6 +602,7 @@ class Parser(parser.Parser):
TokenType.ELSE: lambda self: self._parse_as_command(self._prev),
TokenType.END: lambda self: self._parse_as_command(self._prev),
TokenType.FOR: lambda self: self._parse_for_in(),
TokenType.EXPORT: lambda self: self._parse_export_data(),
}

BRACKET_OFFSETS = {
Expand Down Expand Up @@ -829,6 +836,50 @@ def _parse_features_at_time(self) -> exp.FeaturesAtTime:

return expr

def _parse_export_data(self) -> exp.Export:
# https://cloud.google.com/bigquery/docs/reference/standard-sql/export-statements
if not self._match_text_seq("DATA"):
self.raise_error("Expected 'DATA' after 'EXPORT'")

with_connection = None
options = None

if self._match_text_seq("WITH", "CONNECTION"):
parts = []
while True:
part = self._parse_var()
if not part:
break
parts.append(part.name)
if not self._match(TokenType.DOT):
break

if not parts:
self.raise_error("Expected connection name after WITH CONNECTION")

with_connection = exp.Identifier(this=".".join(parts))

if self._match_text_seq("OPTIONS"):
self._match(TokenType.L_PAREN)
options = self._parse_properties()
self._match(TokenType.R_PAREN)
else:
self.raise_error("Expected 'OPTIONS' after 'EXPORT DATA'")

self._match_text_seq("AS")

# Parse the full SELECT statement
query = self._parse_statement()
if not isinstance(query, exp.Select):
self.raise_error("Expected SELECT statement in EXPORT DATA")

return self.expression(
exp.Export,
this=query,
with_connection=with_connection,
options=options,
)

class Generator(generator.Generator):
INTERVAL_ALLOWS_PLURAL_FORM = False
JOIN_HINTS = False
Expand Down Expand Up @@ -1237,3 +1288,11 @@ def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) ->
return f"{self.sql(expression, 'to')}{self.sql(this)}"

return super().cast_sql(expression, safe_prefix=safe_prefix)

def export_sql(self, expression: exp.Export) -> str:
this = self.sql(expression, "this")
with_connection = self.sql(expression, "with_connection")
with_connection = f"WITH CONNECTION {with_connection} " if with_connection else ""
options = self.sql(expression, "options")
options = f"{options} " if options else ""
return f"EXPORT DATA {with_connection}{options}{this}"
8 changes: 8 additions & 0 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8650,3 +8650,11 @@ def null() -> Null:
Boolean,
Null,
)


class Export(Expression):
arg_types = {
"this": True,
"with_connection": False,
"options": False,
}
1 change: 1 addition & 0 deletions sqlglot/tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,7 @@ class TokenType(AutoName):
SOURCE = auto()
ANALYZE = auto()
NAMESPACE = auto()
EXPORT = auto()


_ALL_TOKEN_TYPES = list(TokenType)
Expand Down
7 changes: 7 additions & 0 deletions tests/dialects/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -1661,6 +1661,13 @@ def test_bigquery(self):
)
self.validate_identity("SELECT * FROM ML.FEATURES_AT_TIME((SELECT 1), num_rows => 1)")

self.validate_identity(
"EXPORT DATA OPTIONS (URI='gs://path*.csv.gz', FORMAT='CSV') SELECT * FROM all_rows"
)
self.validate_identity(
"EXPORT DATA WITH CONNECTION myproject.us.myconnection OPTIONS (URI='gs://path*.csv.gz', FORMAT='CSV') SELECT * FROM all_rows"
)

def test_errors(self):
with self.assertRaises(TokenError):
transpile("'\\'", read="bigquery")
Expand Down

0 comments on commit 9ea15c7

Please sign in to comment.