Skip to content

Ft/bigquery module #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions modules/gcp/bigquery/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
locals {

bq_tables = flatten([
for dataset_key, values in var.bq_datasets : [
for table_key, table_values in lookup(values, "tables") : {
dataset_id = dataset_key
table_id = table_key
deletion_protection = try(table_values.deletion_protection, false)
friendly_name = try(table_values.friendly_name, table_key)
description = try(table_values.description, null)
source_uris = table_values.source_uris
}
]
])

bq_datasets_access_policy = flatten([
for dataset_key, values in var.bq_datasets : [
for role_key, members in lookup(values, "access") : {
dataset_id = dataset_key
role = role_key
members = members
}
]
])

labels = var.labels

postgres_password = var.postgres_password

stored_procedures = {}
cloudsql_scheduled_postgres_transfers = {}
}


resource "google_project_service" "bigquerydatatransfer"{
project = var.project
service = "bigquerydatatransfer.googleapis.com"
}

resource "google_project_service" "bigquery"{
project = var.project
service = "bigquery.googleapis.com"
}

resource "google_bigquery_dataset" "bq_datasets" {
for_each = var.bq_datasets
dataset_id = each.key
location = "US"
project = var.project
max_time_travel_hours = 168

# default_table_expiration_ms = 3600000 # 1 hr
friendly_name = each.key
description = each.value.dataset_description
labels = local.labels
delete_contents_on_destroy = each.value.force_destroy

}

resource "google_bigquery_dataset_iam_binding" "bq_access" {
depends_on = [google_bigquery_dataset.bq_datasets]
for_each = {
for datasets_access in local.bq_datasets_access_policy : "${datasets_access.dataset_id}.${datasets_access.role}" => datasets_access
}
dataset_id = each.value.dataset_id
role = "roles/${each.value.role}"
members = each.value.members
}


resource "google_bigquery_data_transfer_config" "cloudsql_postgres_transfer" {
for_each = {
for dt_config in var.cloudsql_scheduled_postgres_transfers: dt_config.name => dt_config
}

display_name = each.key
project = var.project
location = "US"
data_source_id = "postgresql"
schedule = each.value.schedule
destination_dataset_id = google_bigquery_dataset.bq_datasets[each.value.destination_dataset].dataset_id
params = {
"assets": jsonencode(each.value.source_table_names)
"connector.authentication.username": each.value.username
"connector.authentication.password": var.postgres_password
"connector.database": each.value.database
"connector.endpoint.host": each.value.host
"connector.endpoint.port": 5432
"connector.encryptionMode": each.value.encryption_mode
"connector.networkAttachment": try(each.value.network_attachment, null)
"connector.schema": each.value.schema

}
service_account_name = var.service_account_email
depends_on = [google_bigquery_dataset.bq_datasets]
}

resource "google_bigquery_table" "bq_tables" {
depends_on = [google_bigquery_dataset.bq_datasets]

project = var.project

for_each = {
for table in local.bq_tables: table.table_id => table
}

table_id = each.key
dataset_id = each.value.dataset_id
deletion_protection = each.value.deletion_protection
friendly_name = each.value.friendly_name
description = try(each.value.description, null)

external_data_configuration {
autodetect = true # Parquet files used
source_format = "PARQUET"
source_uris = each.value.source_uris
}

labels = local.labels
}
30 changes: 30 additions & 0 deletions modules/gcp/bigquery/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
variable "project" {
description = "The project ID to host the cluster in (required)"
type = string
}

variable "service_account_email" {
description = "The email of the custom service account."
type = string
}

variable "bq_datasets" {
type = map(any)
description = "BQ Datasets to create."
}


variable "cloudsql_scheduled_postgres_transfers" {
type = map(any)
description = "Schedule PG transfers to BigQuery"
}

variable "postgres_password" {
type = string
description = "Postgres Password, set by export TF_VAR_postgres_password=secret_password"
sensitive = true
}

variable "labels" {
type = map(any)
}
9 changes: 9 additions & 0 deletions modules/gcp/bigquery/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
terraform {
required_version = ">=1.3.0"
required_providers {
google = {
source = "hashicorp/google"
version = ">=6.2.0"
}
}
}
1 change: 1 addition & 0 deletions modules/gcp/bigquery/vpc/.tflint.hcl
Loading