Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit af8ee7a

Browse files
authored
Merge pull request #247 from datafold/issue245
Added validation for UUID columns (Also fixes issue #245)
2 parents 7bab3ac + f71aef1 commit af8ee7a

File tree

2 files changed

+44
-7
lines changed

2 files changed

+44
-7
lines changed

data_diff/diff_tables.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from .utils import safezip, run_as_daemon
1616
from .thread_utils import ThreadedYielder
17-
from .databases.database_types import IKey, NumericType, PrecisionType, StringType
17+
from .databases.database_types import IKey, NumericType, PrecisionType, StringType, ColType_UUID
1818
from .table_segment import TableSegment
1919
from .tracking import create_end_event_json, create_start_event_json, send_event_json, is_tracking_enabled
2020

@@ -209,6 +209,10 @@ def _validate_and_adjust_columns(self, table1, table2):
209209
table1._schema[c1] = col1.replace(precision=lowest.precision)
210210
table2._schema[c2] = col2.replace(precision=lowest.precision)
211211

212+
elif isinstance(col1, ColType_UUID):
213+
if not isinstance(col2, ColType_UUID):
214+
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
215+
212216
elif isinstance(col1, StringType):
213217
if not isinstance(col2, StringType):
214218
raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}")
@@ -222,7 +226,9 @@ def _validate_and_adjust_columns(self, table1, table2):
222226
"If encoding/formatting differs between databases, it may result in false positives."
223227
)
224228

225-
def _bisect_and_diff_tables(self, ti: ThreadedYielder, table1: TableSegment, table2: TableSegment, level=0, max_rows=None):
229+
def _bisect_and_diff_tables(
230+
self, ti: ThreadedYielder, table1: TableSegment, table2: TableSegment, level=0, max_rows=None
231+
):
226232
assert table1.is_bounded and table2.is_bounded
227233

228234
if max_rows is None:
@@ -259,7 +265,16 @@ def _bisect_and_diff_tables(self, ti: ThreadedYielder, table1: TableSegment, tab
259265
for i, (t1, t2) in enumerate(safezip(segmented1, segmented2)):
260266
ti.submit(self._diff_tables, ti, t1, t2, max_rows, level + 1, i + 1, len(segmented1), priority=level)
261267

262-
def _diff_tables(self, ti: ThreadedYielder, table1: TableSegment, table2: TableSegment, max_rows: int, level=0, segment_index=None, segment_count=None):
268+
def _diff_tables(
269+
self,
270+
ti: ThreadedYielder,
271+
table1: TableSegment,
272+
table2: TableSegment,
273+
max_rows: int,
274+
level=0,
275+
segment_index=None,
276+
segment_count=None,
277+
):
263278
logger.info(
264279
". " * level + f"Diffing segment {segment_index}/{segment_count}, "
265280
f"key-range: {table1.min_key}..{table2.max_key}, "

tests/test_postgresql.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,21 @@
22

33
from data_diff.databases.connect import connect
44
from data_diff import TableSegment, TableDiffer
5-
from .common import TEST_POSTGRESQL_CONN_STRING, random_table_suffix
5+
from .common import TEST_POSTGRESQL_CONN_STRING, TEST_MYSQL_CONN_STRING, random_table_suffix
66

77

8-
class TestWithConnection(unittest.TestCase):
8+
class TestUUID(unittest.TestCase):
99
def setUp(self) -> None:
1010
self.connection = connect(TEST_POSTGRESQL_CONN_STRING)
1111

12-
self.connection.query('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";', None)
13-
1412
table_suffix = random_table_suffix()
1513

1614
self.table_src = f"src{table_suffix}"
1715
self.table_dst = f"dst{table_suffix}"
1816

1917
def test_uuid(self):
18+
self.connection.query('CREATE EXTENSION IF NOT EXISTS "uuid-ossp";', None)
19+
2020
queries = [
2121
f"DROP TABLE IF EXISTS {self.table_src}",
2222
f"DROP TABLE IF EXISTS {self.table_dst}",
@@ -33,6 +33,7 @@ def test_uuid(self):
3333
]
3434

3535
queries.append(f"INSERT INTO {self.table_src}(comment) VALUES ('This one is different')")
36+
queries.append("COMMIT")
3637

3738
for query in queries:
3839
self.connection.query(query, None)
@@ -44,3 +45,24 @@ def test_uuid(self):
4445
diff = list(differ.diff_tables(a, b))
4546
uuid = diff[0][1][0]
4647
self.assertEqual(diff, [("-", (uuid, "This one is different"))])
48+
49+
# Compare with MySql
50+
mysql_conn = connect(TEST_MYSQL_CONN_STRING)
51+
52+
rows = self.connection.query(f"SELECT * FROM {self.table_src}", list)
53+
54+
mysql_conn.query(f"CREATE TABLE {self.table_dst} (id VARCHAR(128), comment VARCHAR(128))", None)
55+
mysql_conn.query(f"COMMIT", None)
56+
for uuid, comment in rows:
57+
mysql_conn.query(f"INSERT INTO {self.table_dst}(id, comment) VALUES ('{uuid}', '{comment}')", None)
58+
mysql_conn.query(f"COMMIT", None)
59+
60+
c = TableSegment(mysql_conn, (self.table_dst,), "id", "comment")
61+
diff = list(differ.diff_tables(a, c))
62+
assert not diff, diff
63+
diff = list(differ.diff_tables(c, a))
64+
assert not diff, diff
65+
66+
self.connection.query(f"DROP TABLE {self.table_src}", None)
67+
self.connection.query(f"DROP TABLE {self.table_dst}", None)
68+
mysql_conn.query(f"DROP TABLE {self.table_dst}", None)

0 commit comments

Comments
 (0)