Skip to content

Commit 85b32f1

Browse files
committed
feat(singlestoredb): add vector type support
1 parent 0f5957c commit 85b32f1

File tree

7 files changed

+56
-56
lines changed

7 files changed

+56
-56
lines changed

ibis/backends/singlestoredb/converter.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import datetime
44
import json
5+
from functools import partial
56

67
from ibis.formats.pandas import PandasData
78

@@ -359,27 +360,30 @@ def convert_SingleStoreDB_type(self, type_name):
359360

360361
# SingleStoreDB-specific mappings
361362
singlestore_specific = {
362-
"VECTOR": dt.binary,
363+
"VECTOR": partial(dt.Array, dt.float32), # Default to float32 array
363364
"BSON": dt.JSON,
364365
"GEOGRAPHY": dt.geometry,
365366
# Vector binary types
366-
"FLOAT32_VECTOR": dt.binary,
367-
"FLOAT64_VECTOR": dt.binary,
368-
"INT8_VECTOR": dt.binary,
369-
"INT16_VECTOR": dt.binary,
370-
"INT32_VECTOR": dt.binary,
371-
"INT64_VECTOR": dt.binary,
367+
"FLOAT32_VECTOR": partial(dt.Array, dt.float32),
368+
"FLOAT64_VECTOR": partial(dt.Array, dt.float64),
369+
"INT8_VECTOR": partial(dt.Array, dt.int8),
370+
"INT16_VECTOR": partial(dt.Array, dt.int16),
371+
"INT32_VECTOR": partial(dt.Array, dt.int32),
372+
"INT64_VECTOR": partial(dt.Array, dt.int64),
372373
# Vector JSON types
373-
"FLOAT32_VECTOR_JSON": dt.JSON,
374-
"FLOAT64_VECTOR_JSON": dt.JSON,
375-
"INT8_VECTOR_JSON": dt.JSON,
376-
"INT16_VECTOR_JSON": dt.JSON,
377-
"INT32_VECTOR_JSON": dt.JSON,
378-
"INT64_VECTOR_JSON": dt.JSON,
374+
"FLOAT32_VECTOR_JSON": partial(dt.Array, dt.float32),
375+
"FLOAT64_VECTOR_JSON": partial(dt.Array, dt.float64),
376+
"INT8_VECTOR_JSON": partial(dt.Array, dt.int8),
377+
"INT16_VECTOR_JSON": partial(dt.Array, dt.int16),
378+
"INT32_VECTOR_JSON": partial(dt.Array, dt.int32),
379+
"INT64_VECTOR_JSON": partial(dt.Array, dt.int64),
379380
}
380381

381382
ibis_type = singlestore_specific.get(normalized_name)
382383
if ibis_type is not None:
384+
# Handle partials (like VECTOR types)
385+
if hasattr(ibis_type, "func"):
386+
return ibis_type() # Call the partial function
383387
return ibis_type
384388

385389
# Default to string for unknown types

ibis/backends/singlestoredb/datatypes.py

Lines changed: 19 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -162,15 +162,6 @@ def _type_from_cursor_info(
162162
flags = _FieldFlags(flags)
163163
typename = _type_codes.get(type_code)
164164

165-
# Handle SingleStoreDB vector types that may not be in _type_codes
166-
if type_code in (3001, 3002, 3003, 3004, 3005, 3006): # Vector types
167-
# SingleStoreDB VECTOR types - map to Binary for now
168-
# Could be enhanced to Array[Float32] or other appropriate types in future
169-
return dt.Binary(nullable=True)
170-
elif type_code in (2001, 2002, 2003, 2004, 2005, 2006): # Vector JSON types
171-
# SingleStoreDB VECTOR_JSON types - map to JSON
172-
return dt.JSON(nullable=True)
173-
174165
if typename is None:
175166
raise NotImplementedError(
176167
f"SingleStoreDB type code {type_code:d} is not supported"
@@ -212,10 +203,6 @@ def _type_from_cursor_info(
212203
# making them indistinguishable from TINYINT. The DESCRIBE-based schema
213204
# detection (via to_ibis method) can properly distinguish these types.
214205
typ = dt.Boolean
215-
elif typename == "VECTOR":
216-
# SingleStoreDB VECTOR type - typically used for AI/ML workloads
217-
# For now, map to Binary; could be enhanced to Array[Float32] in future
218-
typ = dt.Binary
219206
elif flags.is_set:
220207
# Sets are limited to strings in SingleStoreDB
221208
typ = dt.Array(dt.string)
@@ -254,7 +241,12 @@ def _type_from_cursor_info(
254241
typ = dt.Geometry
255242
else:
256243
typ = _type_mapping[typename]
257-
if issubclass(typ, dt.SignedInteger) and flags.is_unsigned:
244+
# Only apply unsigned logic to actual type classes, not partials
245+
if (
246+
hasattr(typ, "__mro__")
247+
and issubclass(typ, dt.SignedInteger)
248+
and flags.is_unsigned
249+
):
258250
typ = getattr(dt, f"U{typ.__name__}")
259251

260252
# Projection columns are always nullable
@@ -304,20 +296,20 @@ def _decimal_length_to_precision(*, length: int, scale: int, is_unsigned: bool)
304296
# SingleStoreDB-specific types
305297
"BSON": dt.JSON,
306298
# Vector types for machine learning and AI workloads
307-
"VECTOR": dt.Binary, # General vector type
308-
"FLOAT32_VECTOR": dt.Binary,
309-
"FLOAT64_VECTOR": dt.Binary,
310-
"INT8_VECTOR": dt.Binary,
311-
"INT16_VECTOR": dt.Binary,
312-
"INT32_VECTOR": dt.Binary,
313-
"INT64_VECTOR": dt.Binary,
299+
"VECTOR": partial(dt.Array, dt.Float32), # General vector type
300+
"FLOAT32_VECTOR": partial(dt.Array, dt.Float32),
301+
"FLOAT64_VECTOR": partial(dt.Array, dt.Float64),
302+
"INT8_VECTOR": partial(dt.Array, dt.Int8),
303+
"INT16_VECTOR": partial(dt.Array, dt.Int16),
304+
"INT32_VECTOR": partial(dt.Array, dt.Int32),
305+
"INT64_VECTOR": partial(dt.Array, dt.Int64),
314306
# Vector JSON types (stored as JSON with vector semantics)
315-
"FLOAT32_VECTOR_JSON": dt.JSON,
316-
"FLOAT64_VECTOR_JSON": dt.JSON,
317-
"INT8_VECTOR_JSON": dt.JSON,
318-
"INT16_VECTOR_JSON": dt.JSON,
319-
"INT32_VECTOR_JSON": dt.JSON,
320-
"INT64_VECTOR_JSON": dt.JSON,
307+
"FLOAT32_VECTOR_JSON": partial(dt.Array, dt.Float32),
308+
"FLOAT64_VECTOR_JSON": partial(dt.Array, dt.Float64),
309+
"INT8_VECTOR_JSON": partial(dt.Array, dt.Int8),
310+
"INT16_VECTOR_JSON": partial(dt.Array, dt.Int16),
311+
"INT32_VECTOR_JSON": partial(dt.Array, dt.Int32),
312+
"INT64_VECTOR_JSON": partial(dt.Array, dt.Int64),
321313
# Extended types (SingleStoreDB-specific extensions)
322314
"GEOGRAPHY": dt.Geometry, # Enhanced geospatial support
323315
}

ibis/backends/singlestoredb/tests/test_datatypes.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_basic_type_mappings(self):
5454
# Collection types
5555
"SET": partial(dt.Array, dt.String),
5656
# SingleStoreDB-specific types
57-
"VECTOR": dt.Binary,
57+
"VECTOR": partial(dt.Array, dt.Float32),
5858
"GEOGRAPHY": dt.Geometry,
5959
}
6060

@@ -74,7 +74,10 @@ def test_singlestoredb_specific_types(self):
7474
"""Test SingleStoreDB-specific type extensions."""
7575
# Test VECTOR type
7676
assert "VECTOR" in _type_mapping
77-
assert _type_mapping["VECTOR"] == dt.Binary
77+
expected_vector_type = partial(dt.Array, dt.Float32)
78+
actual_vector_type = _type_mapping["VECTOR"]
79+
assert actual_vector_type.func == expected_vector_type.func
80+
assert actual_vector_type.args == expected_vector_type.args
7881

7982
# Test GEOGRAPHY type
8083
assert "GEOGRAPHY" in _type_mapping
@@ -147,8 +150,9 @@ def test_vector_type_handling(self):
147150
scale=0,
148151
multi_byte_maximum_length=1,
149152
)
150-
# Vector types are currently mapped to Binary
151-
assert isinstance(result, dt.Binary)
153+
# Vector types are mapped to Array[Float32]
154+
assert isinstance(result, dt.Array)
155+
assert isinstance(result.value_type, dt.Float32)
152156

153157
# Test FLOAT64_VECTOR type too
154158
result2 = _type_from_cursor_info(
@@ -158,7 +162,8 @@ def test_vector_type_handling(self):
158162
scale=0,
159163
multi_byte_maximum_length=1,
160164
)
161-
assert isinstance(result2, dt.Binary)
165+
assert isinstance(result2, dt.Array)
166+
assert isinstance(result2.value_type, dt.Float64)
162167

163168
def test_timestamp_with_timezone(self):
164169
"""Test TIMESTAMP type includes UTC timezone by default."""
@@ -472,12 +477,16 @@ def test_convert_singlestoredb_type_method(self):
472477
assert converter.convert_SingleStoreDB_type("GEOMETRY") == dt.geometry
473478

474479
# Test SingleStoreDB-specific types
475-
assert converter.convert_SingleStoreDB_type("VECTOR") == dt.binary
480+
vector_result = converter.convert_SingleStoreDB_type("VECTOR")
481+
assert isinstance(vector_result, dt.Array)
482+
assert isinstance(vector_result.value_type, dt.Float32)
476483
assert converter.convert_SingleStoreDB_type("GEOGRAPHY") == dt.geometry
477484

478485
# Test case insensitivity
479486
assert converter.convert_SingleStoreDB_type("varchar") == dt.string
480-
assert converter.convert_SingleStoreDB_type("Vector") == dt.binary
487+
vector_result_case = converter.convert_SingleStoreDB_type("Vector")
488+
assert isinstance(vector_result_case, dt.Array)
489+
assert isinstance(vector_result_case.value_type, dt.Float32)
481490

482491
# Test unknown type defaults to string
483492
assert converter.convert_SingleStoreDB_type("UNKNOWN_TYPE") == dt.string

ibis/backends/tests/snapshots/test_sql/test_mixed_qualified_and_unqualified_predicates/singlestoredb/out.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ FROM (
66
`t1`.`x`,
77
`t1`.`y`,
88
AVG(`t1`.`x`) OVER (
9-
ORDER BY CASE WHEN NULL IS NULL THEN 1 ELSE 0 END, NULL ASC
9+
ORDER BY NULL ASC NULLS LAST
1010
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
1111
) AS _w
1212
FROM (
1313
SELECT
1414
`t0`.`x`,
1515
SUM(`t0`.`x`) OVER (
16-
ORDER BY CASE WHEN NULL IS NULL THEN 1 ELSE 0 END, NULL ASC
16+
ORDER BY NULL ASC NULLS LAST
1717
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
1818
) AS `y`
1919
FROM `t` AS `t0`

ibis/backends/tests/snapshots/test_sql/test_order_by_no_deference_literals/singlestoredb/out.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ SELECT
44
'foo' AS `s`
55
FROM `test` AS `t0`
66
ORDER BY
7-
CASE WHEN `t0`.`a` IS NULL THEN 1 ELSE 0 END, `t0`.`a` ASC
7+
`t0`.`a` ASC NULLS LAST
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
SELECT
2-
NTILE(2) OVER (ORDER BY RAND() ASC) - 1 AS `new_col`
2+
NTILE(2) OVER (ORDER BY RAND() ASC NULLS LAST) - 1 AS `new_col`
33
FROM `test` AS `t0`
44
LIMIT 10

ibis/backends/tests/test_numeric.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,11 +1374,6 @@ def test_clip(backend, alltypes, df, ibis_func, pandas_func):
13741374
raises=PyDruidProgrammingError,
13751375
reason="SQL query requires 'MIN' operator that is not supported.",
13761376
)
1377-
@pytest.mark.notyet(
1378-
["singlestoredb"],
1379-
raises=SingleStoreDBOperationalError,
1380-
reason="Complex nested SQL exceeds SingleStoreDB stack size causing stack overflow",
1381-
)
13821377
def test_histogram(con, alltypes):
13831378
n = 10
13841379
hist = con.execute(alltypes.int_col.histogram(nbins=n).name("hist"))

0 commit comments

Comments
 (0)