Skip to content

Commit dd66efa

Browse files
author
Claude Code
committed
fix: address critical code review issues
- Fix SQL LIMIT portability by using database.apply_limit_to_sql() instead of hard-coded LIMIT syntax (supports SQL Server, Oracle, etc.) - Add virtual dataset and expression column handling to skip unsupported cases - Fix test mocking to include engine context manager and apply_limit_to_sql - Add test coverage for virtual datasets and expression columns Note: format_map integration will be added to superset/models/helpers.py after rebase
1 parent 6fb8e27 commit dd66efa

File tree

2 files changed

+71
-2
lines changed

2 files changed

+71
-2
lines changed

superset/datasets/datetime_format_detector.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,23 @@ def detect_column_format(
7070
)
7171
return None
7272

73+
# Skip expression columns - they don't have stored data to sample
74+
if column.expression:
75+
logger.debug(
76+
"Column %s is an expression column, skipping format detection",
77+
column.column_name,
78+
)
79+
return None
80+
81+
# Skip virtual datasets - they use SQL queries, not physical tables
82+
if dataset.is_virtual:
83+
logger.debug(
84+
"Dataset %s is virtual, skipping format detection for column %s",
85+
dataset.table_name,
86+
column.column_name,
87+
)
88+
return None
89+
7390
try:
7491
# Build SQL query using database's identifier quoting
7592
# Note: Column and table names come from internal metadata, not user input
@@ -97,10 +114,13 @@ def detect_column_format(
97114
# S608: false positive - using dialect's identifier preparer
98115
sql = ( # noqa: S608
99116
f"SELECT {column_name_quoted} FROM {full_table} " # noqa: S608
100-
f"WHERE {column_name_quoted} IS NOT NULL " # noqa: S608
101-
f"LIMIT {self.sample_size}"
117+
f"WHERE {column_name_quoted} IS NOT NULL" # noqa: S608
102118
)
103119

120+
# Apply database-specific LIMIT using apply_limit_to_sql
121+
# This handles different SQL dialects (LIMIT, TOP, FETCH FIRST, etc.)
122+
sql = database.apply_limit_to_sql(sql, limit=self.sample_size, force=True)
123+
104124
# Execute query and get results
105125
df = database.get_df(sql, dataset.schema)
106126

tests/unit_tests/datasets/test_datetime_format_detector.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,23 @@ def mock_dataset() -> MagicMock:
3131
dataset = MagicMock(spec=SqlaTable)
3232
dataset.table_name = "test_table"
3333
dataset.schema = "test_schema"
34+
dataset.is_virtual = False
35+
36+
# Mock the database engine and dialect for identifier quoting
37+
mock_engine = MagicMock()
38+
mock_dialect = MagicMock()
39+
mock_dialect.identifier_preparer.quote = lambda x: f'"{x}"'
40+
mock_engine.dialect = mock_dialect
41+
42+
# Mock the context manager returned by get_sqla_engine()
43+
dataset.database.get_sqla_engine.return_value.__enter__.return_value = mock_engine
44+
dataset.database.get_sqla_engine.return_value.__exit__.return_value = None
45+
46+
# Mock apply_limit_to_sql to return SQL with LIMIT
47+
dataset.database.apply_limit_to_sql = (
48+
lambda sql, limit, force: f"{sql} LIMIT {limit}"
49+
)
50+
3451
return dataset
3552

3653

@@ -41,6 +58,7 @@ def mock_column() -> MagicMock:
4158
column.column_name = "date_column"
4259
column.is_temporal = True
4360
column.datetime_format = None
61+
column.expression = None # Not an expression column
4462
return column
4563

4664

@@ -109,15 +127,18 @@ def test_detect_all_formats(mock_dataset: MagicMock) -> None:
109127
col1.column_name = "date1"
110128
col1.is_temporal = True
111129
col1.datetime_format = None
130+
col1.expression = None
112131

113132
col2 = MagicMock(spec=TableColumn)
114133
col2.column_name = "date2"
115134
col2.is_temporal = True
116135
col2.datetime_format = None
136+
col2.expression = None
117137

118138
col3 = MagicMock(spec=TableColumn)
119139
col3.column_name = "text_col"
120140
col3.is_temporal = False
141+
col3.expression = None
121142

122143
mock_dataset.columns = [col1, col2, col3]
123144

@@ -144,6 +165,7 @@ def test_detect_all_formats_skip_existing(mock_dataset: MagicMock) -> None:
144165
col1.column_name = "date1"
145166
col1.is_temporal = True
146167
col1.datetime_format = "%Y-%m-%d"
168+
col1.expression = None
147169

148170
mock_dataset.columns = [col1]
149171

@@ -161,6 +183,7 @@ def test_detect_all_formats_force_redetection(mock_dataset: MagicMock) -> None:
161183
col1.column_name = "date1"
162184
col1.is_temporal = True
163185
col1.datetime_format = "%Y-%m-%d"
186+
col1.expression = None
164187

165188
mock_dataset.columns = [col1]
166189
mock_dataset.table_name = "test_table"
@@ -174,3 +197,29 @@ def test_detect_all_formats_force_redetection(mock_dataset: MagicMock) -> None:
174197

175198
assert results["date1"] == "%m/%d/%Y"
176199
assert col1.datetime_format == "%m/%d/%Y"
200+
201+
202+
def test_detect_column_format_virtual_dataset(
203+
mock_dataset: MagicMock, mock_column: MagicMock
204+
) -> None:
205+
"""Test that virtual datasets are skipped."""
206+
mock_dataset.is_virtual = True
207+
208+
detector = DatetimeFormatDetector()
209+
detected_format = detector.detect_column_format(mock_dataset, mock_column)
210+
211+
assert detected_format is None
212+
mock_dataset.database.get_df.assert_not_called()
213+
214+
215+
def test_detect_column_format_expression_column(
216+
mock_dataset: MagicMock, mock_column: MagicMock
217+
) -> None:
218+
"""Test that expression columns are skipped."""
219+
mock_column.expression = "DATE_ADD(some_date, INTERVAL 1 DAY)"
220+
221+
detector = DatetimeFormatDetector()
222+
detected_format = detector.detect_column_format(mock_dataset, mock_column)
223+
224+
assert detected_format is None
225+
mock_dataset.database.get_df.assert_not_called()

0 commit comments

Comments
 (0)