Skip to content

Commit a8dc4ea

Browse files
committed
Types: Improve support for FLOAT type, converging to FLOAT vs. DOUBLE
1 parent 7b12867 commit a8dc4ea

File tree

3 files changed

+66
-1
lines changed

3 files changed

+66
-1
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ optional-dependencies.release = [
110110
optional-dependencies.test = [
111111
"cratedb-toolkit[testing]",
112112
"dask[dataframe]",
113-
"pandas<2.3",
113+
"pandas[test]<2.3",
114114
"pueblo>=0.0.7",
115115
"pytest<9",
116116
"pytest-cov<7",

src/sqlalchemy_cratedb/compiler.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,27 @@ def visit_TIMESTAMP(self, type_, **kw):
257257
def visit_BLOB(self, type_, **kw):
258258
return "STRING"
259259

260+
def visit_FLOAT(self, type_, **kw):
261+
"""
262+
From `sqlalchemy.sql.sqltypes.Float`.
263+
264+
When a :paramref:`.Float.precision` is not provided in a
265+
:class:`_types.Float` type some backend may compile this type as
266+
an 8 bytes / 64 bit float datatype. To use a 4 bytes / 32 bit float
267+
datatype a precision <= 24 can usually be provided or the
268+
:class:`_types.REAL` type can be used.
269+
This is known to be the case in the PostgreSQL and MSSQL dialects
270+
that render the type as ``FLOAT`` that's in both an alias of
271+
``DOUBLE PRECISION``. Other third party dialects may have similar
272+
behavior.
273+
"""
274+
if not type_.precision:
275+
return "FLOAT"
276+
elif type_.precision <= 24:
277+
return "FLOAT"
278+
else:
279+
return "DOUBLE"
280+
260281

261282
class CrateCompiler(compiler.SQLCompiler):
262283
def visit_getitem_binary(self, binary, operator, **kw):

tests/test_support_pandas.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import re
22
import sys
33

4+
import pandas as pd
45
import pytest
6+
from pandas._testing import assert_equal
57
from pueblo.testing.pandas import makeTimeDataFrame
68
from sqlalchemy.exc import ProgrammingError
79

@@ -15,6 +17,18 @@
1517
df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S")
1618
df["time"] = df.index
1719

20+
float_double_data = {
21+
"col_1": [19556.88, 629414.27, 51570.0, 2933.52, 20338.98],
22+
"col_2": [
23+
15379.920000000002,
24+
1107140.42,
25+
8081.999999999999,
26+
1570.0300000000002,
27+
29468.539999999997,
28+
],
29+
}
30+
float_double_df = pd.DataFrame.from_dict(float_double_data)
31+
1832

1933
@pytest.mark.skipif(
2034
sys.version_info < (3, 8), reason="Feature not supported on Python 3.7 and earlier"
@@ -113,3 +127,33 @@ def test_table_kwargs_unknown(cratedb_service):
113127
"passed to [ALTER | CREATE] TABLE statement]"
114128
)
115129
)
130+
131+
132+
@pytest.mark.skipif(
133+
sys.version_info < (3, 8), reason="Feature not supported on Python 3.7 and earlier"
134+
)
135+
@pytest.mark.skipif(
136+
SA_VERSION < SA_2_0, reason="Feature not supported on SQLAlchemy 1.4 and earlier"
137+
)
138+
def test_float_double(cratedb_service):
139+
"""
140+
Validate I/O with floating point numbers, specifically DOUBLE types.
141+
142+
Motto: Do not lose precision when DOUBLE is required.
143+
"""
144+
tablename = "pandas_double"
145+
engine = cratedb_service.database.engine
146+
float_double_df.to_sql(
147+
tablename,
148+
engine,
149+
if_exists="replace",
150+
index=False,
151+
)
152+
cratedb_service.database.run_sql(f"REFRESH TABLE {tablename}")
153+
df_load = pd.read_sql_table(tablename, engine)
154+
155+
before = float_double_df.sort_values(by="col_1", ignore_index=True)
156+
after = df_load.sort_values(by="col_1", ignore_index=True)
157+
158+
pd.options.display.float_format = "{:.12f}".format
159+
assert_equal(before, after, check_exact=True)

0 commit comments

Comments
 (0)