Skip to content

Commit 4b6d3d8

Browse files
committed
fix(mssql): fix temporary table creation and implement cache
MSSQL has two kinds of temporary tables. 1. Local temporary tables, created in `tempdb.dbo`, prefixed with a single `#` and suffixed with ~~112 underscores and a number. 2. Global temporary tables, created in `tempdb.dbo`, and prefixed with a double `#`. The suffixing on local temporary tables is to avoid name collisions between separate user sessions trying to create the same temporary table name. Otherwise, the two types of temporary tables are functionally equivalent. Users need specific access to look at temporary tables that they didn't create, and all temp tables are cleaned up after the session closes and all stored procedures referencing those tables have run. Because of the slightly wacky semantics of local temporary tables, this PR makes all temp tables created by the MSSQL backend "global". This makes the names much more predictable. A user creating a temporary table with the name "hithere", will find that there is no table called "hithere" in the output of `con.list_tables()`. Instead, they will find a table called "##hithere" if they run `con.list_tables(database=("tempdb", "dbo"))`. The returned table reference from the `create_table` call works the same as any other table reference, and you can create persistent tables from temporary tables. The only major hiccough is the perpending of the `##` on the table name, but that is the behavior of MSSQL. I've also added in support for passing in explicit `catalog` and `database` to `create_table`, which was missing (I believe this was an oversight, since MSSQL definitely supports this).
1 parent 370a4de commit 4b6d3d8

File tree

3 files changed

+93
-14
lines changed

3 files changed

+93
-14
lines changed

ibis/backends/mssql/__init__.py

+70-12
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ def do_connect(
133133
def get_schema(
134134
self, name: str, *, catalog: str | None = None, database: str | None = None
135135
) -> sch.Schema:
136+
if name.startswith("ibis_cache_"):
137+
catalog, database = ("tempdb", "dbo")
138+
name = "##" + name
136139
conditions = [sg.column("table_name").eq(sge.convert(name))]
137140

138141
if database is not None:
@@ -481,20 +484,56 @@ def create_table(
481484
temp: bool = False,
482485
overwrite: bool = False,
483486
) -> ir.Table:
487+
"""Create a new table.
488+
489+
Parameters
490+
----------
491+
name
492+
Name of the new table.
493+
obj
494+
An Ibis table expression or pandas table that will be used to
495+
extract the schema and the data of the new table. If not provided,
496+
`schema` must be given.
497+
schema
498+
The schema for the new table. Only one of `schema` or `obj` can be
499+
provided.
500+
database
501+
Name of the database where the table will be created, if not the
502+
default.
503+
504+
To specify a location in a separate catalog, you can pass in the
505+
catalog and database as a string `"catalog.database"`, or as a tuple of
506+
strings `("catalog", "database")`.
507+
temp
508+
Whether a table is temporary or not.
509+
All created temp tables are "Global Temporary Tables". They will be
510+
created in "tempdb.dbo" and will be prefixed with "##".
511+
overwrite
512+
Whether to clobber existing data.
513+
`overwrite` and `temp` cannot be used together with MSSQL.
514+
515+
Returns
516+
-------
517+
Table
518+
The table that was created.
519+
520+
"""
484521
if obj is None and schema is None:
485522
raise ValueError("Either `obj` or `schema` must be specified")
486523

487-
if database is not None and database != self.current_database:
488-
raise com.UnsupportedOperationError(
489-
"Creating tables in other databases is not supported by Postgres"
524+
if temp and overwrite:
525+
raise ValueError(
526+
"MSSQL doesn't support overwriting temp tables, create a new temp table instead."
490527
)
491-
else:
492-
database = None
528+
529+
table_loc = self._to_sqlglot_table(database)
530+
catalog, db = self._to_catalog_db_tuple(table_loc)
493531

494532
properties = []
495533

496534
if temp:
497535
properties.append(sge.TemporaryProperty())
536+
catalog, db = None, None
498537

499538
temp_memtable_view = None
500539
if obj is not None:
@@ -528,8 +567,10 @@ def create_table(
528567
else:
529568
temp_name = name
530569

531-
table = sg.table(temp_name, catalog=database, quoted=self.compiler.quoted)
532-
raw_table = sg.table(temp_name, catalog=database, quoted=False)
570+
table = sg.table(
571+
"#" * temp + temp_name, catalog=catalog, db=db, quoted=self.compiler.quoted
572+
)
573+
raw_table = sg.table(temp_name, catalog=catalog, db=db, quoted=False)
533574
target = sge.Schema(this=table, expressions=column_defs)
534575

535576
create_stmt = sge.Create(
@@ -538,11 +579,22 @@ def create_table(
538579
properties=sge.Properties(expressions=properties),
539580
)
540581

541-
this = sg.table(name, catalog=database, quoted=self.compiler.quoted)
542-
raw_this = sg.table(name, catalog=database, quoted=False)
582+
this = sg.table(name, catalog=catalog, db=db, quoted=self.compiler.quoted)
583+
raw_this = sg.table(name, catalog=catalog, db=db, quoted=False)
543584
with self._safe_raw_sql(create_stmt) as cur:
544585
if query is not None:
545-
insert_stmt = sge.Insert(this=table, expression=query).sql(self.dialect)
586+
# You can specify that a table is temporary for the sqlglot `Create` but not
587+
# for the subsequent `Insert`, so we need to shove a `#` in
588+
# front of the table identifier.
589+
_table = sg.table(
590+
"##" * temp + temp_name,
591+
catalog=catalog,
592+
db=db,
593+
quoted=self.compiler.quoted,
594+
)
595+
insert_stmt = sge.Insert(this=_table, expression=query).sql(
596+
self.dialect
597+
)
546598
cur.execute(insert_stmt)
547599

548600
if overwrite:
@@ -558,11 +610,17 @@ def create_table(
558610
# for in-memory reads
559611
if temp_memtable_view is not None:
560612
self.drop_table(temp_memtable_view)
561-
return self.table(name, database=database)
613+
return self.table(
614+
"##" * temp + name,
615+
database=("tempdb" * temp or catalog, "dbo" * temp or db),
616+
)
562617

563618
# preserve the input schema if it was provided
564619
return ops.DatabaseTable(
565-
name, schema=schema, source=self, namespace=ops.Namespace(database=database)
620+
name,
621+
schema=schema,
622+
source=self,
623+
namespace=ops.Namespace(catalog=catalog, database=db),
566624
).to_expr()
567625

568626
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:

ibis/backends/mssql/tests/test_client.py

+18
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,21 @@ def test_list_tables_schema_warning_refactor(con):
134134

135135
assert con.list_tables(database="msdb.dbo", like="restore") == restore_tables
136136
assert con.list_tables(database=("msdb", "dbo"), like="restore") == restore_tables
137+
138+
139+
def test_create_temp_table_from_obj(con):
140+
obj = {"team": ["john", "joe"]}
141+
142+
t = con.create_table("team", obj, temp=True)
143+
144+
t2 = con.table("##team", database="tempdb.dbo")
145+
146+
assert t.to_pyarrow().equals(t2.to_pyarrow())
147+
148+
persisted_from_temp = con.create_table("fuhreal", t2)
149+
150+
assert "fuhreal" in con.list_tables()
151+
152+
assert persisted_from_temp.to_pyarrow().equals(t2.to_pyarrow())
153+
154+
con.drop_table("fuhreal")

ibis/backends/tests/test_client.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,11 @@ def test_create_table(backend, con, temp_table, func, sch):
129129
["pyspark", "trino", "exasol", "risingwave"],
130130
reason="No support for temp tables",
131131
),
132-
pytest.mark.broken(["mssql"], reason="Incorrect temp table syntax"),
132+
pytest.mark.notyet(
133+
["mssql"],
134+
reason="Can't rename temp tables",
135+
raises=ValueError,
136+
),
133137
pytest.mark.broken(
134138
["bigquery"],
135139
reason="tables created with temp=True cause a 404 on retrieval",
@@ -1722,7 +1726,6 @@ def test_json_to_pyarrow(con):
17221726
assert result == expected
17231727

17241728

1725-
@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError)
17261729
@pytest.mark.notyet(
17271730
["risingwave", "exasol"],
17281731
raises=com.UnsupportedOperationError,

0 commit comments

Comments
 (0)