Types: Improve support for FLOAT type, converging to FLOAT vs. DOUBLE

amotl · amotl · commit 09c55fc7104a · 2025-03-30T05:43:33.000+02:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -110,7 +110,7 @@ optional-dependencies.release = [
 optional-dependencies.test = [
   "cratedb-toolkit[testing]",
   "dask[dataframe]",
-  "pandas<2.3",
+  "pandas[test]<2.3",
   "pueblo>=0.0.7",
   "pytest<9",
   "pytest-cov<7",
diff --git a/src/sqlalchemy_cratedb/compiler.py b/src/sqlalchemy_cratedb/compiler.py
@@ -257,6 +257,27 @@ def visit_TIMESTAMP(self, type_, **kw):
     def visit_BLOB(self, type_, **kw):
         return "STRING"
 
+    def visit_FLOAT(self, type_, **kw):
+        """
+        From `sqlalchemy.sql.sqltypes.Float`.
+
+        When a :paramref:`.Float.precision` is not provided in a
+        :class:`_types.Float` type some backend may compile this type as
+        an 8 bytes / 64 bit float datatype. To use a 4 bytes / 32 bit float
+        datatype a precision <= 24 can usually be provided or the
+        :class:`_types.REAL` type can be used.
+        This is known to be the case in the PostgreSQL and MSSQL dialects
+        that render the type as ``FLOAT`` that's in both an alias of
+        ``DOUBLE PRECISION``. Other third party dialects may have similar
+        behavior.
+        """
+        if not type_.precision:
+            return "FLOAT"
+        elif type_.precision <= 24:
+            return "FLOAT"
+        else:
+            return "DOUBLE"
+
 
 class CrateCompiler(compiler.SQLCompiler):
     def visit_getitem_binary(self, binary, operator, **kw):
diff --git a/tests/test_support_pandas.py b/tests/test_support_pandas.py
@@ -1,7 +1,10 @@
 import re
 import sys
 
+import pandas as pd
 import pytest
+import sqlalchemy as sa
+from pandas._testing import assert_equal
 from pueblo.testing.pandas import makeTimeDataFrame
 from sqlalchemy.exc import ProgrammingError
 
@@ -15,6 +18,18 @@
 df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S")
 df["time"] = df.index
 
+float_double_data = {
+    "col_1": [19556.88, 629414.27, 51570.0, 2933.52, 20338.98],
+    "col_2": [
+        15379.920000000002,
+        1107140.42,
+        8081.999999999999,
+        1570.0300000000002,
+        29468.539999999997,
+    ],
+}
+float_double_df = pd.DataFrame.from_dict(float_double_data)
+
 
 @pytest.mark.skipif(
     sys.version_info < (3, 8), reason="Feature not supported on Python 3.7 and earlier"
@@ -113,3 +128,28 @@ def test_table_kwargs_unknown(cratedb_service):
                 "passed to [ALTER | CREATE] TABLE statement]"
             )
         )
+
+
+def test_float_double(cratedb_service):
+    """
+    Validate I/O with floating point numbers, specifically DOUBLE types.
+
+    Motto: Do not lose precision when DOUBLE is required.
+    """
+    tablename = "pandas_double"
+    engine = cratedb_service.database.engine
+    with engine.connect() as conn:
+        float_double_df.to_sql(
+            tablename,
+            conn,
+            if_exists="replace",
+            index=False,
+        )
+        conn.execute(sa.text(f"REFRESH TABLE {tablename}"))
+        df_load = pd.read_sql_table(tablename, conn)
+
+    before = float_double_df.sort_values(by="col_1", ignore_index=True)
+    after = df_load.sort_values(by="col_1", ignore_index=True)
+
+    pd.options.display.float_format = "{:.12f}".format
+    assert_equal(before, after, check_exact=True)