Skip to content

Commit 09c55fc

Browse files
committed
Types: Improve support for FLOAT type, converging to FLOAT vs. DOUBLE
1 parent 7b12867 commit 09c55fc

File tree

3 files changed

+62
-1
lines changed

3 files changed

+62
-1
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ optional-dependencies.release = [
110110
optional-dependencies.test = [
111111
"cratedb-toolkit[testing]",
112112
"dask[dataframe]",
113-
"pandas<2.3",
113+
"pandas[test]<2.3",
114114
"pueblo>=0.0.7",
115115
"pytest<9",
116116
"pytest-cov<7",

src/sqlalchemy_cratedb/compiler.py

+21
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,27 @@ def visit_TIMESTAMP(self, type_, **kw):
257257
def visit_BLOB(self, type_, **kw):
258258
return "STRING"
259259

260+
def visit_FLOAT(self, type_, **kw):
261+
"""
262+
From `sqlalchemy.sql.sqltypes.Float`.
263+
264+
When a :paramref:`.Float.precision` is not provided in a
265+
:class:`_types.Float` type some backend may compile this type as
266+
an 8 bytes / 64 bit float datatype. To use a 4 bytes / 32 bit float
267+
datatype a precision <= 24 can usually be provided or the
268+
:class:`_types.REAL` type can be used.
269+
This is known to be the case in the PostgreSQL and MSSQL dialects
270+
that render the type as ``FLOAT`` that's in both an alias of
271+
``DOUBLE PRECISION``. Other third party dialects may have similar
272+
behavior.
273+
"""
274+
if not type_.precision:
275+
return "FLOAT"
276+
elif type_.precision <= 24:
277+
return "FLOAT"
278+
else:
279+
return "DOUBLE"
280+
260281

261282
class CrateCompiler(compiler.SQLCompiler):
262283
def visit_getitem_binary(self, binary, operator, **kw):

tests/test_support_pandas.py

+40
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import re
22
import sys
33

4+
import pandas as pd
45
import pytest
6+
import sqlalchemy as sa
7+
from pandas._testing import assert_equal
58
from pueblo.testing.pandas import makeTimeDataFrame
69
from sqlalchemy.exc import ProgrammingError
710

@@ -15,6 +18,18 @@
1518
df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S")
1619
df["time"] = df.index
1720

21+
float_double_data = {
22+
"col_1": [19556.88, 629414.27, 51570.0, 2933.52, 20338.98],
23+
"col_2": [
24+
15379.920000000002,
25+
1107140.42,
26+
8081.999999999999,
27+
1570.0300000000002,
28+
29468.539999999997,
29+
],
30+
}
31+
float_double_df = pd.DataFrame.from_dict(float_double_data)
32+
1833

1934
@pytest.mark.skipif(
2035
sys.version_info < (3, 8), reason="Feature not supported on Python 3.7 and earlier"
@@ -113,3 +128,28 @@ def test_table_kwargs_unknown(cratedb_service):
113128
"passed to [ALTER | CREATE] TABLE statement]"
114129
)
115130
)
131+
132+
133+
def test_float_double(cratedb_service):
134+
"""
135+
Validate I/O with floating point numbers, specifically DOUBLE types.
136+
137+
Motto: Do not lose precision when DOUBLE is required.
138+
"""
139+
tablename = "pandas_double"
140+
engine = cratedb_service.database.engine
141+
with engine.connect() as conn:
142+
float_double_df.to_sql(
143+
tablename,
144+
conn,
145+
if_exists="replace",
146+
index=False,
147+
)
148+
conn.execute(sa.text(f"REFRESH TABLE {tablename}"))
149+
df_load = pd.read_sql_table(tablename, conn)
150+
151+
before = float_double_df.sort_values(by="col_1", ignore_index=True)
152+
after = df_load.sort_values(by="col_1", ignore_index=True)
153+
154+
pd.options.display.float_format = "{:.12f}".format
155+
assert_equal(before, after, check_exact=True)

0 commit comments

Comments
 (0)