Skip to content

Preview: Bundle a few improvements #215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Changelog

## Unreleased
- Added canonical [PostgreSQL client parameter `sslmode`], implementing
`sslmode=prefer` to connect to SSL-enabled CrateDB instances without
verifying the host name.

[PostgreSQL client parameter `sslmode`]: https://www.postgresql.org/docs/current/libpq-ssl.html#LIBPQ-SSL-PROTECTION

## 2025/01/30 0.41.0
- Dependencies: Updated to `crate-2.0.0`, which uses `orjson` for JSON marshalling
Expand Down
4 changes: 2 additions & 2 deletions docs/inspection-reflection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ Create a SQLAlchemy table object:
Reflect column data types from the table metadata:

>>> table.columns.get('name')
Column('name', String(), table=<characters>)
Column('name', VARCHAR(), table=<characters>)

>>> table.primary_key
PrimaryKeyConstraint(Column('id', String(), table=<characters>, primary_key=True...
PrimaryKeyConstraint(Column('id', VARCHAR(), table=<characters>, primary_key=True...


CrateDialect
Expand Down
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ requires = [

[project]
name = "sqlalchemy-cratedb"
#dynamic = [
# "version",
#]
version = "0.42.0.dev2"
description = "SQLAlchemy dialect for CrateDB."
readme = "README.md"
keywords = [
Expand Down Expand Up @@ -77,9 +81,6 @@ classifiers = [
"Topic :: Text Processing",
"Topic :: Utilities",
]
dynamic = [
"version",
]
dependencies = [
"backports.zoneinfo<1; python_version<'3.9'",
"crate>=2,<3",
Expand Down Expand Up @@ -110,7 +111,7 @@ optional-dependencies.release = [
optional-dependencies.test = [
"cratedb-toolkit[testing]",
"dask[dataframe]",
"pandas<2.3",
"pandas[test]<2.3",
"pueblo>=0.0.7",
"pytest<9",
"pytest-cov<7",
Expand Down
41 changes: 41 additions & 0 deletions src/sqlalchemy_cratedb/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,17 @@
)
return

def visit_create_index(self, create, **kw) -> str:
"""
CrateDB does not support `CREATE INDEX` statements.
"""
warnings.warn(

Check warning on line 207 in src/sqlalchemy_cratedb/compiler.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/compiler.py#L207

Added line #L207 was not covered by tests
"CrateDB does not support `CREATE INDEX` statements, "
"they will be omitted when generating DDL statements.",
stacklevel=2,
)
return "SELECT 1"

Check warning on line 212 in src/sqlalchemy_cratedb/compiler.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/compiler.py#L212

Added line #L212 was not covered by tests


class CrateTypeCompiler(compiler.GenericTypeCompiler):
def visit_string(self, type_, **kw):
Expand Down Expand Up @@ -254,6 +265,36 @@
"""
return "TIMESTAMP %s" % ((type_.timezone and "WITH" or "WITHOUT") + " TIME ZONE",)

def visit_BLOB(self, type_, **kw):
return "STRING"

Check warning on line 269 in src/sqlalchemy_cratedb/compiler.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/compiler.py#L269

Added line #L269 was not covered by tests

def visit_FLOAT(self, type_, **kw):
"""
From `sqlalchemy.sql.sqltypes.Float`.

When a :paramref:`.Float.precision` is not provided in a
:class:`_types.Float` type some backend may compile this type as
an 8 bytes / 64 bit float datatype. To use a 4 bytes / 32 bit float
datatype a precision <= 24 can usually be provided or the
:class:`_types.REAL` type can be used.
This is known to be the case in the PostgreSQL and MSSQL dialects
that render the type as ``FLOAT`` that's in both an alias of
``DOUBLE PRECISION``. Other third party dialects may have similar
behavior.
"""
if not type_.precision:
return "FLOAT"
elif type_.precision <= 24:
return "FLOAT"

Check warning on line 288 in src/sqlalchemy_cratedb/compiler.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/compiler.py#L288

Added line #L288 was not covered by tests
else:
return "DOUBLE"

def visit_JSON(self, type_, **kw):
return "OBJECT"

Check warning on line 293 in src/sqlalchemy_cratedb/compiler.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/compiler.py#L293

Added line #L293 was not covered by tests

def visit_JSONB(self, type_, **kw):
return "OBJECT"

Check warning on line 296 in src/sqlalchemy_cratedb/compiler.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/compiler.py#L296

Added line #L296 was not covered by tests


class CrateCompiler(compiler.SQLCompiler):
def visit_getitem_binary(self, binary, operator, **kw):
Expand Down
82 changes: 55 additions & 27 deletions src/sqlalchemy_cratedb/dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,46 +34,60 @@
)
from .sa_version import SA_1_4, SA_2_0, SA_VERSION
from .type import FloatVector, ObjectArray, ObjectType
from .type.binary import LargeBinary

# For SQLAlchemy >= 1.1.
TYPES_MAP = {
"boolean": sqltypes.Boolean,
"short": sqltypes.SmallInteger,
"smallint": sqltypes.SmallInteger,
"timestamp": sqltypes.TIMESTAMP(timezone=False),
"boolean": sqltypes.BOOLEAN,
"short": sqltypes.SMALLINT,
"smallint": sqltypes.SMALLINT,
"timestamp": sqltypes.TIMESTAMP,
"timestamp with time zone": sqltypes.TIMESTAMP(timezone=True),
"timestamp without time zone": sqltypes.TIMESTAMP(timezone=False),
"object": ObjectType,
"integer": sqltypes.Integer,
"long": sqltypes.NUMERIC,
"bigint": sqltypes.NUMERIC,
"object_array": ObjectArray, # TODO: Can this also be improved to use `sqltypes.ARRAY`?
"integer": sqltypes.INTEGER,
"long": sqltypes.BIGINT,
"bigint": sqltypes.BIGINT,
"float": sqltypes.FLOAT,
"double": sqltypes.DECIMAL,
"double precision": sqltypes.DECIMAL,
"object_array": ObjectArray,
"float": sqltypes.Float,
"real": sqltypes.Float,
"string": sqltypes.String,
"text": sqltypes.String,
"real": sqltypes.REAL,
"string": sqltypes.VARCHAR,
"text": sqltypes.VARCHAR,
"float_vector": FloatVector,
}

# Needed for SQLAlchemy >= 1.1.
# TODO: Dissolve.
# For SQLAlchemy >= 1.4.
try:
from sqlalchemy.types import ARRAY

TYPES_MAP["integer_array"] = ARRAY(sqltypes.Integer)
TYPES_MAP["boolean_array"] = ARRAY(sqltypes.Boolean)
TYPES_MAP["short_array"] = ARRAY(sqltypes.SmallInteger)
TYPES_MAP["smallint_array"] = ARRAY(sqltypes.SmallInteger)
TYPES_MAP["integer_array"] = ARRAY(sqltypes.INTEGER)
TYPES_MAP["boolean_array"] = ARRAY(sqltypes.BOOLEAN)
TYPES_MAP["short_array"] = ARRAY(sqltypes.SMALLINT)
TYPES_MAP["smallint_array"] = ARRAY(sqltypes.SMALLINT)
TYPES_MAP["timestamp_array"] = ARRAY(sqltypes.TIMESTAMP)
TYPES_MAP["timestamp_array"] = ARRAY(sqltypes.TIMESTAMP(timezone=False))
TYPES_MAP["timestamp with time zone_array"] = ARRAY(sqltypes.TIMESTAMP(timezone=True))
TYPES_MAP["long_array"] = ARRAY(sqltypes.NUMERIC)
TYPES_MAP["bigint_array"] = ARRAY(sqltypes.NUMERIC)
TYPES_MAP["double_array"] = ARRAY(sqltypes.DECIMAL)
TYPES_MAP["double precision_array"] = ARRAY(sqltypes.DECIMAL)
TYPES_MAP["float_array"] = ARRAY(sqltypes.Float)
TYPES_MAP["real_array"] = ARRAY(sqltypes.Float)
TYPES_MAP["string_array"] = ARRAY(sqltypes.String)
TYPES_MAP["text_array"] = ARRAY(sqltypes.String)
TYPES_MAP["long_array"] = ARRAY(sqltypes.BIGINT)
TYPES_MAP["bigint_array"] = ARRAY(sqltypes.BIGINT)
TYPES_MAP["float_array"] = ARRAY(sqltypes.FLOAT)
TYPES_MAP["real_array"] = ARRAY(sqltypes.REAL)
TYPES_MAP["string_array"] = ARRAY(sqltypes.VARCHAR)
TYPES_MAP["text_array"] = ARRAY(sqltypes.VARCHAR)
except Exception: # noqa: S110
pass

Check warning on line 79 in src/sqlalchemy_cratedb/dialect.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/dialect.py#L78-L79

Added lines #L78 - L79 were not covered by tests

# For SQLAlchemy >= 2.0.
try:
from sqlalchemy.types import DOUBLE, DOUBLE_PRECISION

TYPES_MAP["real"] = DOUBLE
TYPES_MAP["real_array"] = ARRAY(DOUBLE)
TYPES_MAP["double"] = DOUBLE
TYPES_MAP["double_array"] = ARRAY(DOUBLE)
TYPES_MAP["double precision"] = DOUBLE_PRECISION
TYPES_MAP["double precision_array"] = ARRAY(DOUBLE_PRECISION)
except Exception: # noqa: S110
pass

Expand Down Expand Up @@ -158,6 +172,7 @@
sqltypes.Date: Date,
sqltypes.DateTime: DateTime,
sqltypes.TIMESTAMP: DateTime,
sqltypes.LargeBinary: LargeBinary,
}


Expand Down Expand Up @@ -206,6 +221,15 @@
# start with _. Adding it here causes sqlalchemy to quote such columns.
self.identifier_preparer.illegal_initial_characters.add("_")

def get_isolation_level_values(self, dbapi_conn):
return ()

Check warning on line 225 in src/sqlalchemy_cratedb/dialect.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/dialect.py#L225

Added line #L225 was not covered by tests

def set_isolation_level(self, dbapi_connection, level):
pass

Check warning on line 228 in src/sqlalchemy_cratedb/dialect.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/dialect.py#L228

Added line #L228 was not covered by tests

def get_isolation_level(self, dbapi_connection):
return "NONE"

Check warning on line 231 in src/sqlalchemy_cratedb/dialect.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/dialect.py#L231

Added line #L231 was not covered by tests

def initialize(self, connection):
# get lowest server version
self.server_version_info = self._get_server_version_info(connection)
Expand All @@ -228,8 +252,12 @@
servers = to_list(server)
if servers:
use_ssl = asbool(kwargs.pop("ssl", False))
if use_ssl:
# TODO: Switch to the canonical default `sslmode=prefer` later.
sslmode = kwargs.pop("sslmode", "disable")
if use_ssl or sslmode in ["allow", "prefer", "require", "verify-ca", "verify-full"]:
servers = ["https://" + server for server in servers]
if sslmode == "require":
kwargs["verify_ssl_cert"] = False

Check warning on line 260 in src/sqlalchemy_cratedb/dialect.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/dialect.py#L260

Added line #L260 was not covered by tests
return self.dbapi.connect(servers=servers, **kwargs)
return self.dbapi.connect(**kwargs)

Expand Down
2 changes: 2 additions & 0 deletions src/sqlalchemy_cratedb/type/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from .array import ObjectArray
from .binary import LargeBinary
from .geo import Geopoint, Geoshape
from .object import ObjectType
from .vector import FloatVector, knn_match

__all__ = [
Geopoint,
Geoshape,
LargeBinary,
ObjectArray,
ObjectType,
FloatVector,
Expand Down
5 changes: 5 additions & 0 deletions src/sqlalchemy_cratedb/type/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@
self.operator = operator


# TODO: Should this be inherited from PostgreSQL's
# `ARRAY`, in order to improve type checking?
class _ObjectArray(sqltypes.UserDefinedType):
cache_ok = True

Expand Down Expand Up @@ -139,5 +141,8 @@
def get_col_spec(self, **kws):
return "ARRAY(OBJECT)"

def as_generic(self, **kwargs):
return sqltypes.ARRAY

Check warning on line 145 in src/sqlalchemy_cratedb/type/array.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/array.py#L145

Added line #L145 was not covered by tests


ObjectArray = MutableList.as_mutable(_ObjectArray)
44 changes: 44 additions & 0 deletions src/sqlalchemy_cratedb/type/binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import base64

import sqlalchemy as sa


class LargeBinary(sa.String):
"""A type for large binary byte data.

The :class:`.LargeBinary` type corresponds to a large and/or unlengthed
binary type for the target platform, such as BLOB on MySQL and BYTEA for
PostgreSQL. It also handles the necessary conversions for the DBAPI.

"""

__visit_name__ = "large_binary"

def bind_processor(self, dialect):
if dialect.dbapi is None:
return None

Check warning on line 19 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L18-L19

Added lines #L18 - L19 were not covered by tests

# TODO: DBAPIBinary = dialect.dbapi.Binary

def process(value):
if value is not None:

Check warning on line 24 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L23-L24

Added lines #L23 - L24 were not covered by tests
# TODO: return DBAPIBinary(value)
return base64.b64encode(value).decode()

Check warning on line 26 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L26

Added line #L26 was not covered by tests
else:
return None

Check warning on line 28 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L28

Added line #L28 was not covered by tests

return process

Check warning on line 30 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L30

Added line #L30 was not covered by tests

# Python 3 has native bytes() type
# both sqlite3 and pg8000 seem to return it,
# psycopg2 as of 2.5 returns 'memoryview'
def result_processor(self, dialect, coltype):
if dialect.returns_native_bytes:
return None

Check warning on line 37 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L36-L37

Added lines #L36 - L37 were not covered by tests

def process(value):
if value is not None:
return base64.b64decode(value)
return value

Check warning on line 42 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L39-L42

Added lines #L39 - L42 were not covered by tests

return process

Check warning on line 44 in src/sqlalchemy_cratedb/type/binary.py

View check run for this annotation

Codecov / codecov/patch

src/sqlalchemy_cratedb/type/binary.py#L44

Added line #L44 was not covered by tests
14 changes: 10 additions & 4 deletions tests/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from crate.client import connect

from sqlalchemy_cratedb.sa_version import SA_2_0, SA_VERSION
from sqlalchemy_cratedb.sa_version import SA_1_4, SA_2_0, SA_VERSION
from tests.settings import crate_host

log = logging.getLogger()
Expand Down Expand Up @@ -179,16 +179,22 @@ def create_test_suite():
"docs/crud.rst",
"docs/working-with-types.rst",
"docs/advanced-querying.rst",
"docs/inspection-reflection.rst",
]

# Don't run DataFrame integration tests on SQLAlchemy 1.3 and Python 3.7.
skip_dataframe = SA_VERSION < SA_2_0 or sys.version_info < (3, 8) or sys.version_info >= (3, 13)
# Don't run DataFrame integration tests on SQLAlchemy 1.4 and earlier, or Python 3.7.
skip_dataframe = SA_VERSION < SA_2_0 or sys.version_info < (3, 8)
if not skip_dataframe:
sqlalchemy_integration_tests += [
"docs/dataframe.rst",
]

# Don't run reflection integration tests on SQLAlchemy 1.3 and earlier and Python 3.10 and 3.11.
skip_reflection = SA_VERSION < SA_1_4 and (3, 10) <= sys.version_info < (3, 12)
if not skip_reflection:
sqlalchemy_integration_tests += [
"docs/inspection-reflection.rst",
]

s = doctest.DocFileSuite(
*sqlalchemy_integration_tests,
module_relative=False,
Expand Down
4 changes: 4 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from unittest import skipIf

import sqlalchemy as sa

from sqlalchemy_cratedb.sa_version import SA_1_4, SA_VERSION
from tests.conftest import TESTDRIVE_DATA_SCHEMA


@skipIf(SA_VERSION < SA_1_4, "Does not work correctly on SQLAlchemy 1.3")
def test_correct_schema(cratedb_service):
"""
Tests that the correct schema is being picked up.
Expand Down
Loading