Skip to content

Commit

Permalink
Update BigQuery extraction script
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-pmelendez committed Nov 22, 2024
1 parent 57a880a commit ca0c077
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 26 deletions.
17 changes: 15 additions & 2 deletions BigQuery/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ This repository offers a collection of straightforward scripts designed to facil

## Version

Release 2023-02-28
Release 2024-11-19

## Usage

Expand Down Expand Up @@ -35,7 +35,20 @@ The following are the steps to execute the DDL Code Generation. They can be exec
- Finally, run `create_ddls.sh` to extract the DDLs from BigQuery
- After a successful run, remove region information from the top line of `create_ddls.sh`.

3. Run `create_ddls.sh --version` to check the current version of the extraction scripts.
### Arguments

```--version```

Check the current version of the extraction scripts.

```--help```

Display the help screen.

```-s "schema1, schema2 [, ...]```

The parameter to limit to an in-list of schemas using the following structure schema1 [, ...].


### DDL Files
These files will contain the definitions of the objects specified by the file name.
Expand Down
89 changes: 65 additions & 24 deletions BigQuery/bin/create_ddls.sh
Original file line number Diff line number Diff line change
@@ -1,27 +1,49 @@
#!/bin/bash

#This version should match the README.md version. Please update this version on every change request.
VERSION="Release 2024-02-28"
VERSION="Release 2024-11-19"

export versionParam=$1

if [ "$versionParam" = "--version" ]; then
export inputParam=$1
if [ "$inputParam" = "--version" ]; then
echo "You are using the $VERSION of the extraction scripts"
exit 1
fi

if [ "$inputParam" = "--help" ]; then
echo " --help Display this help screen."
echo " --version Display version information."
echo " -s Optional parameter to limit to an in-list of schemas"
echo " using the following structure "schema1 [, ...]"\""
exit 1
fi


while getopts s: flag
do
case "${flag}" in
s) SCHEMA=${OPTARG};;
esac
done

REGION='us'

echo " "
echo " +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+ +-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+"
echo " |B|i|g|Q|u|e|r|y| |E|x|p|o|r|t| |b|y| |S|n|o|w|f|l|a|k|e|"
echo " +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+ +-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+"
echo " Version 1.0"
echo " Version $VERSION"
echo " +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+ +-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+"
echo " "

echo " Before execute this tool, please read the following link "
echo " https://github.com/Snowflake-Labs/SC.DDLExportScripts/blob/main/BigQuery/README.md"
echo " This tool is exclusively to execute using Google Cloud Console"
echo " "

echo "Extracting DDLs from region $REGION"
if [ "$SCHEMA" != "" ]; then
echo "Schemas to filter: $SCHEMA"
fi

echo "Creating Output Folder..."

mkdir -p Output
Expand All @@ -36,12 +58,19 @@ exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_Schema.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
if [ "$SCHEMA" != "" ]; then
SCHEMA_CLAUSE=' INNER JOIN UNNEST(SPLIT('\'$SCHEMA\'', '\',\'')) AS SCHEMAS
ON TRIM(SCHEMAS)=schema_name'
else
SCHEMA_CLAUSE=""
fi

./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-schema> '\''||catalog_name||'\''.'\''||schema_name||'\'' </sc-schema> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
FROM
`region-'$REGION'`.INFORMATION_SCHEMA.SCHEMATA
`region-'$REGION'`.INFORMATION_SCHEMA.SCHEMATA '$SCHEMA_CLAUSE'
'

# ----------------- EXTRACT TABLES ---------------------------------------------------------------------------------------------------
Expand All @@ -50,28 +79,34 @@ exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_Tables.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
if [ "$SCHEMA" != "" ]; then
SCHEMA_CLAUSE=' INNER JOIN UNNEST(SPLIT('\'$SCHEMA\'', '\',\'')) AS SCHEMAS
ON TRIM(SCHEMAS)=table_schema'
else
SCHEMA_CLAUSE=""
fi

./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-'\''||lower(table_type)||'\''> '\''||table_catalog||'\''.'\''||table_schema||'\''.'\''||table_name||'\'' </sc-'\''||lower(table_type)||'\''> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
FROM
`region-'$REGION'`.INFORMATION_SCHEMA.TABLES
`region-'$REGION'`.INFORMATION_SCHEMA.TABLES '$SCHEMA_CLAUSE'
WHERE
table_type = '\''BASE TABLE'\''
'

# ----------------- EXTRACT EXTERNAL TABLES ---------------------------------------------------------------------------------------------------

exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_External_Tables.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-'\''||lower(table_type)||'\''> '\''||table_catalog||'\''.'\''||table_schema||'\''.'\''||table_name||'\'' </sc-'\''||lower(table_type)||'\''> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
FROM
`region-'$REGION'`.INFORMATION_SCHEMA.TABLES
`region-'$REGION'`.INFORMATION_SCHEMA.TABLES '$SCHEMA_CLAUSE'
WHERE
table_type = '\''EXTERNAL TABLE'\''
'
Expand All @@ -82,12 +117,12 @@ exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_Views.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-'\''||lower(table_type)||'\''> '\''||table_catalog||'\''.'\''||table_schema||'\''.'\''||table_name||'\'' </sc-'\''||lower(table_type)||'\''> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
FROM
`region-'$REGION'`.INFORMATION_SCHEMA.TABLES
`region-'$REGION'`.INFORMATION_SCHEMA.TABLES '$SCHEMA_CLAUSE'
WHERE
table_type = '\''VIEW'\''
'
Expand All @@ -98,12 +133,19 @@ exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_Functions.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
if [ "$SCHEMA" != "" ]; then
SCHEMA_CLAUSE=' INNER JOIN UNNEST(SPLIT('\'$SCHEMA\'', '\',\'')) AS SCHEMAS
ON TRIM(SCHEMAS)=specific_schema'
else
SCHEMA_CLAUSE=""
fi

./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-'\''||lower(routine_type)||'\''> '\''||specific_catalog||'\''.'\''||specific_schema||'\''.'\''||specific_name||'\'' </sc-'\''||lower(routine_type)||'\''> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
FROM
`region-'$REGION'`.INFORMATION_SCHEMA.ROUTINES
`region-'$REGION'`.INFORMATION_SCHEMA.ROUTINES '$SCHEMA_CLAUSE'
WHERE
routine_type = '\''FUNCTION'\''
'
Expand All @@ -114,12 +156,12 @@ exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_Procedures.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-'\''||lower(routine_type)||'\''> '\''||specific_catalog||'\''.'\''||specific_schema||'\''.'\''||specific_name||'\'' </sc-'\''||lower(routine_type)||'\''> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
FROM
`region-'$REGION'`.INFORMATION_SCHEMA.ROUTINES
`region-'$REGION'`.INFORMATION_SCHEMA.ROUTINES '$SCHEMA_CLAUSE'
WHERE
routine_type = '\''PROCEDURE'\''
'
Expand All @@ -130,7 +172,7 @@ exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_Reservations.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-reservation> '\''||project_id||'\''.'\''||reservation_name||'\'' </sc-reservation> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
Expand All @@ -144,7 +186,7 @@ exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_Capacity_commitments.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-capacity_commitments> '\''||project_id||'\''.'\''||capacity_commitment_id||'\'' </sc-capacity_commitments> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
Expand All @@ -158,7 +200,7 @@ exec 3>&1 4>&2
trap 'exec 2>&4 1>&3' 0 1 2 3
exec 1>Output/DDL/DDL_Assignments.sql 2>&1

./google-cloud-sdk/bin/bq query --use_legacy_sql=false \
./google-cloud-sdk/bin/bq query --use_legacy_sql=false --max_rows=50000 \
'
SELECT
'\''/* <sc-assigments> '\''||project_id||'\''.'\''||reservation_name||'\'' </sc-assigments> */'\''||'\''\n\n'\''||ddl||'\''\n\n'\'' DDLs
Expand All @@ -174,5 +216,4 @@ sed -i ':a;N;$!ba;s/\n+/\n--+/g' *.sql
sed -i ':a;N;$!ba;s/+\n|/+\n--|/g' *.sql
sed -i ':a;N;$!ba;s/|\n|/\n/g' *.sql
sed -i ':a;N;$!ba;s/|\n/\n/g' *.sql
sed -i ':a;N;$!ba;s/Waiting/--Waiting/g' *.sql

sed -i ':a;N;$!ba;s/Waiting/--Waiting/g' *.sql

0 comments on commit ca0c077

Please sign in to comment.