Skip to content

Commit 0561928

Browse files
committed
ignore non-key dialect in MODEL block
1 parent d4a3acb commit 0561928

File tree

2 files changed

+135
-2
lines changed

2 files changed

+135
-2
lines changed

sqlmesh/core/dialect.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,8 +803,15 @@ def text_diff(
803803
return "\n".join(unified_diff(a_sql, b_sql))
804804

805805

806+
WS_OR_COMMENT = r"(?:\s|--[^\n]*\n|/\*.*?\*/)"
807+
HEADER = r"\b(?:model|audit)\b(?=\s*\()"
808+
KEY_BOUNDARY = r"(?:\(|,)" # key is preceded by either '(' or ','
809+
DIALECT_VALUE = r"(?:'(?P<d_quoted>[a-z_][a-z0-9_]*)?'|(?P<d_unquoted>[a-z_][a-z0-9_]*))" # value is single-quoted (maybe empty) or unquoted
810+
VALUE_BOUNDARY = r"(?=,|\))" # value is followed by comma or closing paren
811+
806812
DIALECT_PATTERN = re.compile(
807-
r"(model|audit).*?\(.*?dialect\s+'?([a-z]*)", re.IGNORECASE | re.DOTALL
813+
rf"{HEADER}.*?{KEY_BOUNDARY}{WS_OR_COMMENT}*dialect{WS_OR_COMMENT}+{DIALECT_VALUE}{WS_OR_COMMENT}*{VALUE_BOUNDARY}",
814+
re.IGNORECASE | re.DOTALL,
808815
)
809816

810817

@@ -895,7 +902,8 @@ def parse(
895902
A list of the parsed expressions: [Model, *Statements, Query, *Statements]
896903
"""
897904
match = match_dialect and DIALECT_PATTERN.search(sql[:MAX_MODEL_DEFINITION_SIZE])
898-
dialect = Dialect.get_or_raise(match.group(2) if match else default_dialect)
905+
dialect_str = (match.group("d_quoted") or match.group("d_unquoted")) if match else None
906+
dialect = Dialect.get_or_raise(dialect_str or default_dialect)
899907

900908
tokens = dialect.tokenize(sql)
901909
chunks: t.List[t.Tuple[t.List[Token], ChunkType]] = [([], ChunkType.SQL)]

tests/core/test_model.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2727,6 +2727,131 @@ def test_parse(assert_exp_eq):
27272727
)
27282728

27292729

2730+
def test_dialect_pattern():
2731+
def make_test_sql(text: str) -> str:
2732+
return f"""
2733+
MODEL (
2734+
name test_model,
2735+
kind INCREMENTAL_BY_TIME_RANGE(
2736+
time_column ds
2737+
),
2738+
{text}
2739+
);
2740+
2741+
SELECT 1;
2742+
"""
2743+
2744+
def assert_match(test_sql: str, expected_value: t.Optional[str] = "duckdb"):
2745+
match = d.DIALECT_PATTERN.search(test_sql)
2746+
assert match
2747+
dialect_str = match.group("d_quoted") or match.group("d_unquoted")
2748+
assert dialect_str == expected_value
2749+
2750+
# single-quoted dialect
2751+
assert_match(
2752+
make_test_sql(
2753+
"""
2754+
dialect 'duckdb',
2755+
description 'there's a dialect foo in here too!'
2756+
"""
2757+
)
2758+
)
2759+
2760+
# bare dialect
2761+
assert_match(
2762+
make_test_sql(
2763+
"""
2764+
dialect duckdb,
2765+
description 'there's a dialect foo in here too!'
2766+
"""
2767+
)
2768+
)
2769+
2770+
# no dialect specified, "dialect" in description
2771+
test_sql = make_test_sql(
2772+
"""
2773+
description 'there's a dialect foo in here too!'
2774+
"""
2775+
)
2776+
2777+
matches = list(d.DIALECT_PATTERN.finditer(test_sql))
2778+
assert not matches
2779+
2780+
# line comment between properties
2781+
assert_match(
2782+
make_test_sql(
2783+
"""
2784+
tag my_tag, -- comment
2785+
dialect duckdb
2786+
"""
2787+
)
2788+
)
2789+
2790+
# block comment between properties
2791+
assert_match(
2792+
make_test_sql(
2793+
"""
2794+
tag my_tag, /* comment */
2795+
dialect duckdb
2796+
"""
2797+
)
2798+
)
2799+
2800+
# quoted empty dialect
2801+
assert_match(
2802+
make_test_sql(
2803+
"""
2804+
dialect '',
2805+
tag my_tag
2806+
"""
2807+
),
2808+
None,
2809+
)
2810+
2811+
# trailing comment after dialect value
2812+
assert_match(
2813+
make_test_sql(
2814+
"""
2815+
dialect duckdb -- trailing comment
2816+
"""
2817+
)
2818+
)
2819+
2820+
# dialect value isn't terminated by ',' or ')'
2821+
test_sql = make_test_sql(
2822+
"""
2823+
dialect duckdb -- trailing comment
2824+
tag my_tag
2825+
"""
2826+
)
2827+
2828+
matches = list(d.DIALECT_PATTERN.finditer(test_sql))
2829+
assert not matches
2830+
2831+
# dialect first
2832+
assert_match(
2833+
"""
2834+
MODEL(
2835+
dialect duckdb,
2836+
name my_name
2837+
);
2838+
"""
2839+
)
2840+
2841+
# full parse
2842+
sql = """
2843+
MODEL (
2844+
name test_model,
2845+
description 'this text mentions dialect foo but is not a property'
2846+
);
2847+
2848+
SELECT 1;
2849+
"""
2850+
expressions = d.parse(sql, default_dialect="duckdb")
2851+
model = load_sql_based_model(expressions)
2852+
assert model.dialect == ""
2853+
2854+
27302855
CONST = "bar"
27312856

27322857

0 commit comments

Comments
 (0)