Skip to content

Commit 644ef91

Browse files
committed
Get ready for test automation
formatting Make sql client change backwards compatible, fix broken tests fix the integration workflow and linting fix broken tests fix path lint + install missing sqlalchemy library skip cache for one run fix integration so that it doesn't uninstall sqlalchemy Signed-off-by: Ben Cassell <[email protected]>
1 parent 1b469c0 commit 644ef91

20 files changed

+916
-716
lines changed

.github/workflows/code-quality-checks.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: Code Quality Checks
2-
on:
2+
on:
33
push:
44
branches:
55
- main
@@ -157,7 +157,7 @@ jobs:
157157
- name: Install library
158158
run: poetry install --no-interaction
159159
#----------------------------------------------
160-
# black the code
160+
# mypy the code
161161
#----------------------------------------------
162162
- name: Mypy
163163
run: poetry run mypy --install-types --non-interactive src

.github/workflows/integration.yml

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
name: Integration Tests
2+
on:
3+
push:
4+
paths-ignore:
5+
- "**.MD"
6+
- "**.md"
7+
8+
jobs:
9+
run-e2e-tests:
10+
runs-on: ubuntu-latest
11+
environment: azure-prod
12+
env:
13+
DATABRICKS_SERVER_HOSTNAME: ${{ secrets.DATABRICKS_HOST }}
14+
DATABRICKS_HTTP_PATH: ${{ secrets.TEST_PECO_WAREHOUSE_HTTP_PATH }}
15+
DATABRICKS_TOKEN: ${{ secrets.DATABRICKS_TOKEN }}
16+
DATABRICKS_CATALOG: peco
17+
DATABRICKS_USER: ${{ secrets.TEST_PECO_SP_ID }}
18+
steps:
19+
#----------------------------------------------
20+
# check-out repo and set-up python
21+
#----------------------------------------------
22+
- name: Check out repository
23+
uses: actions/checkout@v3
24+
- name: Set up python
25+
id: setup-python
26+
uses: actions/setup-python@v4
27+
with:
28+
python-version: "3.10"
29+
#----------------------------------------------
30+
# ----- install & configure poetry -----
31+
#----------------------------------------------
32+
- name: Install Poetry
33+
uses: snok/install-poetry@v1
34+
with:
35+
virtualenvs-create: true
36+
virtualenvs-in-project: true
37+
installer-parallel: true
38+
39+
#----------------------------------------------
40+
# load cached venv if cache exists
41+
#----------------------------------------------
42+
- name: Load cached venv
43+
id: cached-poetry-dependencies
44+
uses: actions/cache@v2
45+
with:
46+
path: .venv
47+
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }}
48+
#----------------------------------------------
49+
# install dependencies if cache does not exist
50+
#----------------------------------------------
51+
- name: Install dependencies
52+
run: poetry install --no-interaction --all-extras
53+
#----------------------------------------------
54+
# run test suite
55+
#----------------------------------------------
56+
- name: Run e2e tests
57+
run: poetry run python -m pytest tests/e2e
58+
- name: Run SQL Alchemy tests
59+
run: poetry run python -m pytest src/databricks/sqlalchemy/test_local

conftest.py

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import os
2+
import pytest
3+
4+
5+
@pytest.fixture(scope="session")
6+
def host():
7+
return os.getenv("DATABRICKS_SERVER_HOSTNAME")
8+
9+
10+
@pytest.fixture(scope="session")
11+
def http_path():
12+
return os.getenv("DATABRICKS_HTTP_PATH")
13+
14+
15+
@pytest.fixture(scope="session")
16+
def access_token():
17+
return os.getenv("DATABRICKS_TOKEN")
18+
19+
20+
@pytest.fixture(scope="session")
21+
def ingestion_user():
22+
return os.getenv("DATABRICKS_USER")
23+
24+
25+
@pytest.fixture(scope="session")
26+
def catalog():
27+
return os.getenv("DATABRICKS_CATALOG")
28+
29+
30+
@pytest.fixture(scope="session")
31+
def schema():
32+
return os.getenv("DATABRICKS_SCHEMA", "default")
33+
34+
35+
@pytest.fixture(scope="session", autouse=True)
36+
def connection_details(host, http_path, access_token, ingestion_user, catalog, schema):
37+
return {
38+
"host": host,
39+
"http_path": http_path,
40+
"access_token": access_token,
41+
"ingestion_user": ingestion_user,
42+
"catalog": catalog,
43+
"schema": schema,
44+
}

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ exclude = ['ttypes\.py$', 'TCLIService\.py$']
5858
exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/'
5959

6060
[tool.pytest.ini_options]
61+
markers = {"reviewed" = "Test case has been reviewed by Databricks"}
6162
minversion = "6.0"
6263
log_cli = "false"
6364
log_cli_level = "INFO"

src/databricks/sql/client.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -605,12 +605,15 @@ def _handle_staging_operation(
605605
"Local file operations are restricted to paths within the configured staging_allowed_local_path"
606606
)
607607

608-
# TODO: Experiment with DBR sending real headers.
609-
# The specification says headers will be in JSON format but the current null value is actually an empty list []
608+
# May be real headers, or could be json string
609+
headers = (
610+
json.loads(row.headers) if isinstance(row.headers, str) else row.headers
611+
)
612+
610613
handler_args = {
611614
"presigned_url": row.presignedUrl,
612615
"local_file": abs_localFile,
613-
"headers": json.loads(row.headers or "{}"),
616+
"headers": dict(headers) or {},
614617
}
615618

616619
logger.debug(

src/databricks/sqlalchemy/pytest.ini

-3
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import os
2+
import pytest
3+
4+
5+
@pytest.fixture(scope="session")
6+
def host():
7+
return os.getenv("DATABRICKS_SERVER_HOSTNAME")
8+
9+
10+
@pytest.fixture(scope="session")
11+
def http_path():
12+
return os.getenv("DATABRICKS_HTTP_PATH")
13+
14+
15+
@pytest.fixture(scope="session")
16+
def access_token():
17+
return os.getenv("DATABRICKS_TOKEN")
18+
19+
20+
@pytest.fixture(scope="session")
21+
def ingestion_user():
22+
return os.getenv("DATABRICKS_USER")
23+
24+
25+
@pytest.fixture(scope="session")
26+
def catalog():
27+
return os.getenv("DATABRICKS_CATALOG")
28+
29+
30+
@pytest.fixture(scope="session")
31+
def schema():
32+
return os.getenv("DATABRICKS_SCHEMA", "default")
33+
34+
35+
@pytest.fixture(scope="session", autouse=True)
36+
def connection_details(host, http_path, access_token, ingestion_user, catalog, schema):
37+
return {
38+
"host": host,
39+
"http_path": http_path,
40+
"access_token": access_token,
41+
"ingestion_user": ingestion_user,
42+
"catalog": catalog,
43+
"schema": schema,
44+
}

src/databricks/sqlalchemy/test_local/e2e/test_basic.py

+26-27
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import datetime
22
import decimal
3-
import os
43
from typing import Tuple, Union, List
54
from unittest import skipIf
65

@@ -19,7 +18,7 @@
1918
from sqlalchemy.engine.reflection import Inspector
2019
from sqlalchemy.orm import DeclarativeBase, Mapped, Session, mapped_column
2120
from sqlalchemy.schema import DropColumnComment, SetColumnComment
22-
from sqlalchemy.types import BOOLEAN, DECIMAL, Date, DateTime, Integer, String
21+
from sqlalchemy.types import BOOLEAN, DECIMAL, Date, Integer, String
2322

2423
try:
2524
from sqlalchemy.orm import declarative_base
@@ -49,12 +48,12 @@ def version_agnostic_select(object_to_select, *args, **kwargs):
4948
return select(object_to_select, *args, **kwargs)
5049

5150

52-
def version_agnostic_connect_arguments(catalog=None, schema=None) -> Tuple[str, dict]:
53-
HOST = os.environ.get("host")
54-
HTTP_PATH = os.environ.get("http_path")
55-
ACCESS_TOKEN = os.environ.get("access_token")
56-
CATALOG = catalog or os.environ.get("catalog")
57-
SCHEMA = schema or os.environ.get("schema")
51+
def version_agnostic_connect_arguments(connection_details) -> Tuple[str, dict]:
52+
HOST = connection_details["host"]
53+
HTTP_PATH = connection_details["http_path"]
54+
ACCESS_TOKEN = connection_details["access_token"]
55+
CATALOG = connection_details["catalog"]
56+
SCHEMA = connection_details["schema"]
5857

5958
ua_connect_args = {"_user_agent_entry": USER_AGENT_TOKEN}
6059

@@ -77,8 +76,8 @@ def version_agnostic_connect_arguments(catalog=None, schema=None) -> Tuple[str,
7776

7877

7978
@pytest.fixture
80-
def db_engine() -> Engine:
81-
conn_string, connect_args = version_agnostic_connect_arguments()
79+
def db_engine(connection_details) -> Engine:
80+
conn_string, connect_args = version_agnostic_connect_arguments(connection_details)
8281
return create_engine(conn_string, connect_args=connect_args)
8382

8483

@@ -92,10 +91,11 @@ def run_query(db_engine: Engine, query: Union[str, Text]):
9291

9392

9493
@pytest.fixture
95-
def samples_engine() -> Engine:
96-
conn_string, connect_args = version_agnostic_connect_arguments(
97-
catalog="samples", schema="nyctaxi"
98-
)
94+
def samples_engine(connection_details) -> Engine:
95+
details = connection_details.copy()
96+
details["catalog"] = "samples"
97+
details["schema"] = "nyctaxi"
98+
conn_string, connect_args = version_agnostic_connect_arguments(details)
9999
return create_engine(conn_string, connect_args=connect_args)
100100

101101

@@ -141,7 +141,7 @@ def test_connect_args(db_engine):
141141
def test_pandas_upload(db_engine, metadata_obj):
142142
import pandas as pd
143143

144-
SCHEMA = os.environ.get("schema")
144+
SCHEMA = "default"
145145
try:
146146
df = pd.read_excel(
147147
"src/databricks/sqlalchemy/test_local/e2e/demo_data/MOCK_DATA.xlsx"
@@ -409,7 +409,9 @@ def test_get_table_names_smoke_test(samples_engine: Engine):
409409
_names is not None, "get_table_names did not succeed"
410410

411411

412-
def test_has_table_across_schemas(db_engine: Engine, samples_engine: Engine):
412+
def test_has_table_across_schemas(
413+
db_engine: Engine, samples_engine: Engine, catalog: str, schema: str
414+
):
413415
"""For this test to pass these conditions must be met:
414416
- Table samples.nyctaxi.trips must exist
415417
- Table samples.tpch.customer must exist
@@ -426,9 +428,6 @@ def test_has_table_across_schemas(db_engine: Engine, samples_engine: Engine):
426428
)
427429

428430
# 3) Check for a table within a different catalog
429-
other_catalog = os.environ.get("catalog")
430-
other_schema = os.environ.get("schema")
431-
432431
# Create a table in a different catalog
433432
with db_engine.connect() as conn:
434433
conn.execute(text("CREATE TABLE test_has_table (numbers_are_cool INT);"))
@@ -442,8 +441,8 @@ def test_has_table_across_schemas(db_engine: Engine, samples_engine: Engine):
442441
assert samples_engine.dialect.has_table(
443442
connection=conn,
444443
table_name="test_has_table",
445-
schema=other_schema,
446-
catalog=other_catalog,
444+
schema=schema,
445+
catalog=catalog,
447446
)
448447
finally:
449448
conn.execute(text("DROP TABLE test_has_table;"))
@@ -503,12 +502,12 @@ def test_get_columns(db_engine, sample_table: str):
503502

504503
class TestCommentReflection:
505504
@pytest.fixture(scope="class")
506-
def engine(self):
507-
HOST = os.environ.get("host")
508-
HTTP_PATH = os.environ.get("http_path")
509-
ACCESS_TOKEN = os.environ.get("access_token")
510-
CATALOG = os.environ.get("catalog")
511-
SCHEMA = os.environ.get("schema")
505+
def engine(self, connection_details: dict):
506+
HOST = connection_details["host"]
507+
HTTP_PATH = connection_details["http_path"]
508+
ACCESS_TOKEN = connection_details["access_token"]
509+
CATALOG = connection_details["catalog"]
510+
SCHEMA = connection_details["schema"]
512511

513512
connection_string = f"databricks://token:{ACCESS_TOKEN}@{HOST}?http_path={HTTP_PATH}&catalog={CATALOG}&schema={SCHEMA}"
514513
connect_args = {"_user_agent_entry": USER_AGENT_TOKEN}

src/databricks/sqlalchemy/test_local/test_parsing.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -64,16 +64,16 @@ def test_extract_3l_namespace_from_bad_constraint_string():
6464
extract_three_level_identifier_from_constraint_string(input)
6565

6666

67-
@pytest.mark.parametrize("schema", [None, "some_schema"])
68-
def test_build_fk_dict(schema):
67+
@pytest.mark.parametrize("tschema", [None, "some_schema"])
68+
def test_build_fk_dict(tschema):
6969
fk_constraint_string = "FOREIGN KEY (`parent_user_id`) REFERENCES `main`.`some_schema`.`users` (`user_id`)"
7070

71-
result = build_fk_dict("some_fk_name", fk_constraint_string, schema_name=schema)
71+
result = build_fk_dict("some_fk_name", fk_constraint_string, schema_name=tschema)
7272

7373
assert result == {
7474
"name": "some_fk_name",
7575
"constrained_columns": ["parent_user_id"],
76-
"referred_schema": schema,
76+
"referred_schema": tschema,
7777
"referred_table": "users",
7878
"referred_columns": ["user_id"],
7979
}

test.env.example

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Authentication details for running e2e tests
2-
host=""
3-
http_path=""
4-
access_token=""
2+
DATABRICKS_SERVER_HOSTNAME=
3+
DATABRICKS_HTTP_PATH=
4+
DATABRICKS_TOKEN=
55

66
# Only required to run the PySQLStagingIngestionTestSuite
7-
staging_ingestion_user=""
7+
DATABRICKS_USER=
88

99
# Only required to run SQLAlchemy tests
10-
catalog=""
11-
schema=""
10+
DATABRICKS_CATALOG=
11+
DATABRICKS_SCHEMA=

0 commit comments

Comments
 (0)