Skip to content

SQLAlchemy: Fix SQL statement caching for CrateDB's OBJECT type #559

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ Unreleased

- Allow handling datetime values tagged with time zone info when inserting or updating.

- SQLAlchemy: Fix SQL statement caching for CrateDB's ``OBJECT`` type.


2023/04/18 0.31.1
=================
Expand Down
9 changes: 8 additions & 1 deletion src/crate/client/sqlalchemy/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,10 @@
from .dialect_test import SqlAlchemyDialectTest
from .function_test import SqlAlchemyFunctionTest
from .warnings_test import SqlAlchemyWarningsTest
from .query_caching import SqlAlchemyQueryCompilationCaching


def test_suite():
def test_suite_unit():
tests = TestSuite()
tests.addTest(makeSuite(SqlAlchemyConnectionTest))
tests.addTest(makeSuite(SqlAlchemyDictTypeTest))
Expand All @@ -42,3 +43,9 @@ def test_suite():
tests.addTest(makeSuite(SqlAlchemyArrayTypeTest))
tests.addTest(makeSuite(SqlAlchemyWarningsTest))
return tests


def test_suite_integration():
tests = TestSuite()
tests.addTest(makeSuite(SqlAlchemyQueryCompilationCaching))
return tests
117 changes: 117 additions & 0 deletions src/crate/client/sqlalchemy/tests/query_caching.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# -*- coding: utf-8; -*-
#
# Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
# license agreements. See the NOTICE file distributed with this work for
# additional information regarding copyright ownership. Crate licenses
# this file to you under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. You may
# obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
#
# However, if you have executed another commercial license agreement
# with Crate these terms will supersede the license and you may use the
# software solely pursuant to the terms of the relevant commercial agreement.

from __future__ import absolute_import
from unittest import TestCase, skipIf

import sqlalchemy as sa
from sqlalchemy.orm import Session
from sqlalchemy.sql.operators import eq

from crate.client.sqlalchemy import SA_VERSION, SA_1_4
from crate.testing.settings import crate_host

try:
from sqlalchemy.orm import declarative_base
except ImportError:
from sqlalchemy.ext.declarative import declarative_base

from crate.client.sqlalchemy.types import Object, ObjectArray


class SqlAlchemyQueryCompilationCaching(TestCase):

def setUp(self):
self.engine = sa.create_engine(f"crate://{crate_host}")
self.metadata = sa.MetaData(schema="testdrive")
self.session = Session(bind=self.engine)
self.Character = self.setup_entity()

def setup_entity(self):
"""
Define ORM entity.
"""
Base = declarative_base(metadata=self.metadata)

class Character(Base):
__tablename__ = 'characters'
name = sa.Column(sa.String, primary_key=True)
age = sa.Column(sa.Integer)
data = sa.Column(Object)
data_list = sa.Column(ObjectArray)

return Character

def setup_data(self):
"""
Insert two records into the `characters` table.
"""
self.metadata.drop_all(self.engine)
self.metadata.create_all(self.engine)

Character = self.Character
char1 = Character(name='Trillian', data={'x': 1}, data_list=[{'foo': 1, 'bar': 10}])
char2 = Character(name='Slartibartfast', data={'y': 2}, data_list=[{'bar': 2}])
self.session.add(char1)
self.session.add(char2)
self.session.commit()
self.session.execute(sa.text("REFRESH TABLE testdrive.characters;"))

@skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'")
def test_object_multiple_select(self):
"""
The SQLAlchemy implementation of CrateDB's `OBJECT` type offers indexed
access to the instance's content in form of a dictionary. Thus, it must
not use `cache_ok = True` on its implementation, i.e. this part of the
compiled SQL clause must not be cached.

This test verifies that two subsequent `SELECT` statements are translated
well, and don't trip on incorrect SQL compiled statement caching.
"""
self.setup_data()
Character = self.Character

selectable = sa.select(Character).where(Character.data['x'] == 1)
result = self.session.execute(selectable).scalar_one().data
self.assertEqual({"x": 1}, result)

selectable = sa.select(Character).where(Character.data['y'] == 2)
result = self.session.execute(selectable).scalar_one().data
self.assertEqual({"y": 2}, result)
Copy link
Member

@seut seut Jul 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this test then failing without the followup commit to change cache_ok to False? Such both commits should be squashed to avoid having failing tests inside our commit history. (e.g. using git bisect to identify some broken case would raise false positive failures)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. Commits have been squashed now. Thanks.


@skipIf(SA_VERSION < SA_1_4, "On SA13, the 'ResultProxy' object has no attribute 'scalar_one'")
def test_objectarray_multiple_select(self):
"""
The SQLAlchemy implementation of CrateDB's `ARRAY` type in form of the
`ObjectArray`, does *not* offer indexed access to the instance's content.
Thus, using `cache_ok = True` on that type should be sane, and not mess
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but now we disable cache for both cases, no? and it's not configurable by the user?

Copy link
Member Author

@amotl amotl Jul 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caching of compiled statements is only turned off for OBJECT types, implemented on behalf of _Craty. See #559 (review).

It is not configurable by the user, unless she would be monkeypatching it. It is a good thing, because otherwise, it will return wrong results.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies if I'm still confused, but Character is still a custom type, I don't get when this cache is used and when not.

Copy link
Member Author

@amotl amotl Jul 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, and thanks for asking. Maybe this short introduction to SQLAlchemy's SQL compilation caching helps to understand the problem scope.

SQLAlchemy as of version 1.4 includes a SQL compilation caching facility which will allow Core and ORM SQL constructs to cache their stringified form, along with other structural information used to fetch results from the statement, allowing the relatively expensive string compilation process to be skipped when another structurally equivalent construct is next used. This system relies upon functionality that is implemented for all SQL constructs, including objects such as Column, select(), and TypeEngine objects, to produce a cache key which fully represents their state to the degree that it affects the SQL compilation process.

-- https://docs.sqlalchemy.org/en/20/faq/performance.html ff.

Copy link
Member Author

@amotl amotl Jul 5, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, accessing the OBJECT implementation _Craty using dict-like indexing breaks that machinery.

sa.select(Character).where(Character.data['x'] == 1)
sa.select(Character).where(Character.data['y'] == 2)

Second statement uses cached SQL statement from first one, and things go south. As far as I've investigated, it is only happening on this occasion, and not with other data types which do not offer indexed access.

up SQLAlchemy's SQL compiled statement caching.
"""
self.setup_data()
Character = self.Character

selectable = sa.select(Character).where(Character.data_list['foo'].any(1, operator=eq))
result = self.session.execute(selectable).scalar_one().data
self.assertEqual({"x": 1}, result)

selectable = sa.select(Character).where(Character.data_list['bar'].any(2, operator=eq))
result = self.session.execute(selectable).scalar_one().data
self.assertEqual({"y": 2}, result)
2 changes: 1 addition & 1 deletion src/crate/client/sqlalchemy/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def __eq__(self, other):


class _Craty(sqltypes.UserDefinedType):
cache_ok = True
cache_ok = False
Comment on lines 134 to +135
Copy link
Member Author

@amotl amotl Jul 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The effective change is this one. The other code is for testing only.

-- https://docs.sqlalchemy.org/en/20/core/type_api.html#sqlalchemy.types.ExternalType.cache_ok

Copy link
Member Author

@amotl amotl Jul 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In order to eventually re-enable SQL statement caching in this context, it may be advisable to look at SQLAlchemy's implementation for PostgreSQL's JSONB type, and its contains() method, or at the corresponding implementation for MSSQL's JSON type and the Comparator.as_json() accessor it is using for that purpose.

-- https://stackoverflow.com/a/39467149

Copy link
Member Author

@amotl amotl Jul 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I played a bit with the _Craty implementation for OBJECT types within the crate.client.sqlalchemy.types module, trying to make this Python statement based on Comparator.as_integer() work, which I believe would be correct in that sense, and where I would hope the SQLAlchemy machinery would handle it well, without getting the statement caching wrong.

selectable = sa.select(Character).where(Character.data['x'].as_integer() == 1)

However, I got different exceptions, so I think it will need further investigations to make this work. In that manner, I suggest to postpone it to a later iteration, and release the bugfix without further ado.

Copy link
Member Author

@amotl amotl Jul 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GH-560 will track what has been suggested above.


class Comparator(sqltypes.TypeEngine.Comparator):

Expand Down
6 changes: 4 additions & 2 deletions src/crate/client/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@
TestCrateJsonEncoder,
TestDefaultSchemaHeader,
)
from .sqlalchemy.tests import test_suite as sqlalchemy_test_suite
from .sqlalchemy.tests import test_suite_unit as sqlalchemy_test_suite_unit
from .sqlalchemy.tests import test_suite_integration as sqlalchemy_test_suite_integration

log = logging.getLogger('crate.testing.layer')
ch = logging.StreamHandler()
Expand Down Expand Up @@ -344,7 +345,7 @@ def test_suite():
suite.addTest(unittest.makeSuite(TestUsernameSentAsHeader))
suite.addTest(unittest.makeSuite(TestCrateJsonEncoder))
suite.addTest(unittest.makeSuite(TestDefaultSchemaHeader))
suite.addTest(sqlalchemy_test_suite())
suite.addTest(sqlalchemy_test_suite_unit())
suite.addTest(doctest.DocTestSuite('crate.client.connection'))
suite.addTest(doctest.DocTestSuite('crate.client.http'))

Expand Down Expand Up @@ -394,6 +395,7 @@ def test_suite():
encoding='utf-8'
)
s.layer = ensure_cratedb_layer()
s.addTest(sqlalchemy_test_suite_integration())
suite.addTest(s)

return suite