Skip to content

Types: Add support for BINARY columns and improve support for FLOATs #12

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ optional-dependencies.release = [
optional-dependencies.test = [
"cratedb-toolkit[testing]",
"dask[dataframe]",
"pandas<2.3",
"pandas[test]<2.3",
"pueblo>=0.0.7",
"pytest<9",
"pytest-cov<7",
Expand Down
24 changes: 24 additions & 0 deletions src/sqlalchemy_cratedb/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,30 @@
"""
return "TIMESTAMP %s" % ((type_.timezone and "WITH" or "WITHOUT") + " TIME ZONE",)

def visit_BLOB(self, type_, **kw):
return "STRING"

Check warning on line 258 in src/sqlalchemy_cratedb/compiler.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/compiler.py#L258

Added line #L258 was not covered by tests

def visit_FLOAT(self, type_, **kw):
"""
From `sqlalchemy.sql.sqltypes.Float`.

When a :paramref:`.Float.precision` is not provided in a
:class:`_types.Float` type some backend may compile this type as
an 8 bytes / 64 bit float datatype. To use a 4 bytes / 32 bit float
datatype a precision <= 24 can usually be provided or the
:class:`_types.REAL` type can be used.
This is known to be the case in the PostgreSQL and MSSQL dialects
that render the type as ``FLOAT`` that's in both an alias of
``DOUBLE PRECISION``. Other third party dialects may have similar
behavior.
"""
if not type_.precision:
return "FLOAT"
elif type_.precision <= 24:
return "FLOAT"

Check warning on line 277 in src/sqlalchemy_cratedb/compiler.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/compiler.py#L277

Added line #L277 was not covered by tests
else:
return "DOUBLE"


class CrateCompiler(compiler.SQLCompiler):
def visit_getitem_binary(self, binary, operator, **kw):
Expand Down
2 changes: 2 additions & 0 deletions src/sqlalchemy_cratedb/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)
from .sa_version import SA_1_4, SA_2_0, SA_VERSION
from .type import FloatVector, ObjectArray, ObjectType
from .type.binary import LargeBinary

TYPES_MAP = {
"boolean": sqltypes.Boolean,
Expand Down Expand Up @@ -158,6 +159,7 @@ def process(value):
sqltypes.Date: Date,
sqltypes.DateTime: DateTime,
sqltypes.TIMESTAMP: DateTime,
sqltypes.LargeBinary: LargeBinary,
}


Expand Down
2 changes: 2 additions & 0 deletions src/sqlalchemy_cratedb/type/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from .array import ObjectArray
from .binary import LargeBinary
from .geo import Geopoint, Geoshape
from .object import ObjectType
from .vector import FloatVector, knn_match

__all__ = [
Geopoint,
Geoshape,
LargeBinary,
ObjectArray,
ObjectType,
FloatVector,
Expand Down
44 changes: 44 additions & 0 deletions src/sqlalchemy_cratedb/type/binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import base64

import sqlalchemy as sa


class LargeBinary(sa.String):
"""A type for large binary byte data.

The :class:`.LargeBinary` type corresponds to a large and/or unlengthed
binary type for the target platform, such as BLOB on MySQL and BYTEA for
PostgreSQL. It also handles the necessary conversions for the DBAPI.

"""

__visit_name__ = "large_binary"

def bind_processor(self, dialect):
if dialect.dbapi is None:
return None

Check warning on line 19 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L18-L19

Added lines #L18 - L19 were not covered by tests

# TODO: DBAPIBinary = dialect.dbapi.Binary

def process(value):
if value is not None:

Check warning on line 24 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L23-L24

Added lines #L23 - L24 were not covered by tests
# TODO: return DBAPIBinary(value)
return base64.b64encode(value).decode()

Check warning on line 26 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L26

Added line #L26 was not covered by tests
else:
return None

Check warning on line 28 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L28

Added line #L28 was not covered by tests

return process

Check warning on line 30 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L30

Added line #L30 was not covered by tests
Comment on lines +17 to +30
Copy link
Member Author

@amotl amotl Dec 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self, or others who want to pick this up:

Review that detail about returning a DBAPIBinary, or not.


# Python 3 has native bytes() type
# both sqlite3 and pg8000 seem to return it,
# psycopg2 as of 2.5 returns 'memoryview'
def result_processor(self, dialect, coltype):
if dialect.returns_native_bytes:
return None

Check warning on line 37 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L36-L37

Added lines #L36 - L37 were not covered by tests

def process(value):
if value is not None:
return base64.b64decode(value)
return value

Check warning on line 42 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L39-L42

Added lines #L39 - L42 were not covered by tests

return process

Check warning on line 44 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L44

Added line #L44 was not covered by tests
Comment on lines +32 to +44
Copy link
Member Author

@amotl amotl Dec 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self, or others who want to pick this up:

Review that detail about dialect.returns_native_bytes: Should it be handled differently, because, well, base64.b64decode actually returns native bytes already?

44 changes: 44 additions & 0 deletions tests/test_support_pandas.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import re
import sys

import pandas as pd
import pytest
from pandas._testing import assert_equal
from pueblo.testing.pandas import makeTimeDataFrame
from sqlalchemy.exc import ProgrammingError

Expand All @@ -15,6 +17,18 @@
df = makeTimeDataFrame(nper=INSERT_RECORDS, freq="S")
df["time"] = df.index

float_double_data = {
"col_1": [19556.88, 629414.27, 51570.0, 2933.52, 20338.98],
"col_2": [
15379.920000000002,
1107140.42,
8081.999999999999,
1570.0300000000002,
29468.539999999997,
],
}
float_double_df = pd.DataFrame.from_dict(float_double_data)


@pytest.mark.skipif(
sys.version_info < (3, 8), reason="Feature not supported on Python 3.7 and earlier"
Expand Down Expand Up @@ -113,3 +127,33 @@ def test_table_kwargs_unknown(cratedb_service):
"passed to [ALTER | CREATE] TABLE statement]"
)
)


@pytest.mark.skipif(
sys.version_info < (3, 8), reason="Feature not supported on Python 3.7 and earlier"
)
@pytest.mark.skipif(
SA_VERSION < SA_2_0, reason="Feature not supported on SQLAlchemy 1.4 and earlier"
)
def test_float_double(cratedb_service):
"""
Validate I/O with floating point numbers, specifically DOUBLE types.

Motto: Do not lose precision when DOUBLE is required.
"""
tablename = "pandas_double"
engine = cratedb_service.database.engine
float_double_df.to_sql(
tablename,
engine,
if_exists="replace",
index=False,
)
cratedb_service.database.run_sql(f"REFRESH TABLE {tablename}")
df_load = pd.read_sql_table(tablename, engine)

before = float_double_df.sort_values(by="col_1", ignore_index=True)
after = df_load.sort_values(by="col_1", ignore_index=True)

pd.options.display.float_format = "{:.12f}".format
assert_equal(before, after, check_exact=True)