Skip to content

Commit 4841279

Browse files
committed
fix: Split persist docs into column and table comments
The previous version of this macro blended the `MODIFY COMMENT` command for the table comment with the `COMMENT COLUMN` commands for the column comments, inside the same `ALTER TABLE`. This combined syntax **does not work** with `ON CLUSTER`. (This was thought to be a clickhouse bug, but that bug is fixed and the behavior remains.) The revised macro now sends a separate `ALTER TABLE` for the table comment `MODIFY COMMENT`, and a separate one for all the `COMMENT COLUMN` commands. I've split it up into 2 helper macros that accomplish each task for maintainability. This also works on non-cluster setups so there is no need to further complicate it by combining table and column comment alterations into a single ALTER. `clickhouse__persist_docs` now runs the relation (table comment) helper macro if `persist_relations_docs()` is set and descriptions exist for the relation and the column comment helper macro if `persist_column_docs()` and columns are defined in the model. Additionally, the formatting was cleaned up for multiple column comments so that comments for a different column are not appearing on the same line. Just makes it easier to read in clickhouse/dbt logs. Example: It produced comments like: ```sql alter table <table> [ON CLUSTER] modify comment $dbt_comment_literal_block$<table comment> $dbt_comment_literal_block$, comment column `col1_name` $dbt_comment_literal_block$<column comment> $dbt_comment_lieteral_block$, comment column `col2_name` ... ``` A bit messy, but works on a single node. Although `ON CLUSTER` is correctly added, this syntax is incorrect and ONLY produces table and column comments for the initiator node. Now it does: ```sql alter table <table> [ON CLUSTER] modify comment $dbt_comment_literal_block$ <table comment> $dbt_comment_literal_block$ alter table <table> [ON CLUSTER] comment column `col1_name` $dbt_comment_literal_block$<column multi linecomment>$dbt_comment_literal_block$, comment column `col2_name` $dbt_comment$ <column comment>$dbt_comment$, ... ```
1 parent e4d5aac commit 4841279

File tree

2 files changed

+32
-112
lines changed

2 files changed

+32
-112
lines changed

dbt/include/clickhouse/macros/persist_docs.sql

Lines changed: 32 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,29 +10,45 @@
1010
alter table {{ relation }} {{ on_cluster_clause(relation) }} modify comment '{{ comment }}'
1111
{% endmacro %}
1212

13-
{% macro clickhouse__persist_docs(relation, model, for_relation, for_columns) %}
14-
{%- set alter_comments = [] %}
15-
13+
{% macro clickhouse__persist_docs(relation, model, for_relation, for_columns) -%}
14+
{# Persist table comment if enabled and description provided #}
1615
{%- if for_relation and config.persist_relation_docs() and model.description -%}
17-
{% set escaped_comment = clickhouse_escape_comment(model.description) %}
18-
{% do alter_comments.append("modify comment {comment}".format(comment=escaped_comment)) %}
16+
{{ _persist_table_comment(relation, model.description) }}
1917
{%- endif -%}
2018

19+
{#- Persist column comments if enabled and columns defined -#}
2120
{%- if for_columns and config.persist_column_docs() and model.columns -%}
22-
{% set existing_columns = adapter.get_columns_in_relation(relation) | map(attribute="name") | list %}
23-
{% for column_name in model.columns if (column_name in existing_columns) %}
24-
{%- set comment = model.columns[column_name]['description'] -%}
25-
{%- if comment %}
26-
{% set escaped_comment = clickhouse_escape_comment(comment) %}
27-
{% do alter_comments.append("comment column `{column_name}` {comment}".format(column_name=column_name, comment=escaped_comment)) %}
28-
{%- endif %}
29-
{%- endfor -%}
21+
{{ _persist_column_comments(relation, model.columns) }}
3022
{%- endif -%}
23+
{%- endmacro %}
24+
25+
{#- Helper macro: persist the table comment for a ClickHouse relation. -#}
26+
{% macro _persist_table_comment(relation, description) -%}
27+
{#- Escape the description to be safe for ClickHouse #}
28+
{%- set escaped_comment = clickhouse_escape_comment(description) -%}
29+
{#- Build and run the ALTER TABLE ... MODIFY COMMENT statement -#}
30+
{%- set sql = "modify comment {comment}".format(comment=escaped_comment) -%}
31+
{{ run_query(one_alter_relation(relation, sql)) }}
32+
{%- endmacro %}
3133

32-
{%- if alter_comments | length > 0 -%}
33-
{% do run_query(one_alter_relation(relation, alter_comments|join(', '))) %}
34+
{#- Helper macro: persist comments for multiple columns on a ClickHouse table.
35+
relation: target table relation
36+
columns: dict mapping column names to metadata (including 'description') -#}
37+
{% macro _persist_column_comments(relation, columns) -%}
38+
{#- Gather existing columns in the relation to avoid altering non-existent ones -#}
39+
{%- set existing_columns = adapter.get_columns_in_relation(relation) | map(attribute="name") | list -%}
40+
{#- Collect ALTER statements for each column with a description -#}
41+
{%- set alterations = [] -%}
42+
{%- for column_name, info in columns.items() if info.description and column_name in existing_columns -%}
43+
{%- set escaped_comment = clickhouse_escape_comment(info.description) -%}
44+
{%- do alterations.append("\ncomment column `{column_name}` {comment}".format(column_name=column_name, comment=escaped_comment)) -%}
45+
{%- endfor -%}
46+
{#- Execute a single ALTER TABLE statement for all column comments -#}
47+
{%- if alterations -%}
48+
{{ run_query(one_alter_relation(relation, alterations | join(", "))) }}
3449
{%- endif -%}
35-
{% endmacro %}
50+
{%- endmacro %}
51+
3652

3753
{#
3854
By using dollar-quoting like this, users can embed anything they want into their comments

tests/integration/adapter/clickhouse/test_clickhouse_comments.py

Lines changed: 0 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -106,99 +106,3 @@ def test_comment(self, project, model_name):
106106
assert column_comment.startswith("XXX")
107107

108108
assert column_node['metadata']['comment'].startswith("YYY")
109-
110-
# Ensure comment is propoagated to all replicas on cluster
111-
#cluster = project.test_config['cluster']
112-
113-
# local_relation = relation_from_name(project.adapter, model_name)
114-
# if cluster:
115-
# ensure_column_comments_consistent_across_replicas(project, cluster, local_relation)
116-
# ensure_table_comment_on_cluster(project, cluster, local_relation)
117-
118-
119-
120-
# def ensure_table_comment_on_cluster(project, cluster, local_relation):
121-
# """Ensure all replicas have same comment for given relation"""
122-
# # Returns 'ok' if exactly one distinct comment exists across all replicas for this table; otherwise 'mismatch'.
123-
# sql = f"""
124-
# SELECT
125-
# if(COUNT(DISTINCT comment) = 1, 'ok', 'mismatch') AS status
126-
# FROM clusterAllReplicas('{cluster}', system.tables)
127-
# WHERE database = currentDatabase()
128-
# AND name = '{local_relation.identifier}'
129-
# """
130-
# result = project.run_sql(sql, fetch="one")
131-
# assert result[0] == "ok"
132-
133-
# sql = f"""
134-
# SELECT
135-
# hostname(),
136-
# comment
137-
# FROM clusterAllReplicas('{cluster}', system.tables)
138-
# WHERE `table` = '{local_relation.identifier}'
139-
# """
140-
141-
# result = project.run_sql(sql, fetch="all")
142-
143-
# for _table_row in result:
144-
# assert _table_row[-1].startswith("YYY")
145-
146-
# def ensure_column_comments_consistent_across_replicas(project, cluster, local_relation):
147-
# # This query groups by column name and checks that each has exactly one distinct comment across replicas.
148-
# check_sql = f"""
149-
# SELECT
150-
# name AS column_name,
151-
# COUNT(DISTINCT comment) AS distinct_comment_count,
152-
# groupArray((hostName(), comment)) AS per_replica_comments
153-
# FROM clusterAllReplicas('{cluster}', system.columns)
154-
# WHERE database = currentDatabase()
155-
# AND table = '{local_relation.identifier}'
156-
# GROUP BY column_name
157-
# """
158-
# rows = project.run_sql(check_sql, fetch="all")
159-
160-
# mismatches = [r for r in rows if r[1] != 1]
161-
# if mismatches:
162-
# print("Column comment mismatches:", mismatches)
163-
164-
# assert not mismatches
165-
166-
# sql = f"""
167-
# SELECT
168-
# name,
169-
# groupArray(hostname()) as hosts,
170-
# groupUniqArray(comment) as comments,
171-
# length(comments) as num_comments
172-
# FROM clusterAllReplicas('{cluster}', system.columns)
173-
# WHERE table = '{local_relation.identifier}'
174-
# GROUP BY name
175-
# """
176-
177-
# result = project.run_sql(sql, fetch="all")
178-
179-
# print("WOW"*100)
180-
# print("\n\n")
181-
# print(result)
182-
# print("WOW"*100)
183-
184-
# for _col in result:
185-
# assert _col[-1] == 1
186-
187-
# assert result == []
188-
189-
190-
# sql = f"""
191-
# SELECT
192-
# name,
193-
# count(hostname())
194-
# FROM clusterAllReplicas('{cluster}', system.columns)
195-
# WHERE table = '{local_relation.identifier}'
196-
# GROUP BY name
197-
# """
198-
# result = project.run_sql(sql, fetch="all")
199-
# assert result[-1] == NUM_CLUSTER_NODES
200-
201-
# assert result == []
202-
203-
204-

0 commit comments

Comments
 (0)