Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions src/adapter/src/catalog/consistency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -483,8 +483,16 @@ impl CatalogState {
});
}

// Make sure the items in the schema are consistent.
for (item_name, item_id) in &schema.items {
// Make sure the items in the schema are consistent. A schema
// holds items, types, and functions in separate maps, and the
// create_sql of all three must stay consistent, e.g. after a
// schema rename.
for (item_name, item_id) in schema
.items
.iter()
.chain(schema.types.iter())
.chain(schema.functions.iter())
{
let Some(entry) = self.entry_by_id.get(item_id) else {
item_inconsistencies.push(ItemInconsistency::NonExistentItem {
db_id: *db_id,
Expand Down
35 changes: 27 additions & 8 deletions src/adapter/src/catalog/transact.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2349,11 +2349,22 @@ impl Catalog {
let database = state.get_database(&database_id);
let database_name = &database.name;

let mut updates = Vec::new();
let mut updates: Vec<CatalogItemId> = Vec::new();
let mut items_to_update = BTreeMap::new();

let mut update_item = |id| {
if items_to_update.contains_key(id) {
// Dedup guard covering both persistent and temporary items. An
// item can be reached more than once: once as a member of the
// schema and again for each object in the schema it depends on.
// Persistent items are also tracked in `items_to_update`, but
// temporary items only land in `temporary_item_updates` (a Vec
// with no dedup), so a temp dependent referencing multiple
// objects in the renamed schema would otherwise be enqueued once
// per reference. That produces duplicate retraction/addition
// updates that consolidate to an invalid diff (e.g. -2) and
// panic catalog apply.
let mut seen: BTreeSet<CatalogItemId> = BTreeSet::new();

let mut update_item = |id: &CatalogItemId| {
if !seen.insert(*id) {
return Ok(());
}

Expand Down Expand Up @@ -2381,13 +2392,21 @@ impl Catalog {
temporary_item_updates.push((entry.clone().into(), StateDiff::Retraction));
temporary_item_updates.push((new_entry.into(), StateDiff::Addition));
}
updates.push(id);
updates.push(*id);

Ok::<_, AdapterError>(())
};

// Update all of the items in the schema.
for (_name, item_id) in &schema.items {
// Update all of the items in the schema. A schema holds items,
// types, and functions in separate maps, and any of them may be
// referenced by another object's create_sql via a schema-qualified
// name, so all three must be rewritten.
for (_name, item_id) in schema
.items
.iter()
.chain(schema.types.iter())
.chain(schema.functions.iter())
{
// Update the item itself.
update_item(item_id)?;

Expand Down Expand Up @@ -2435,7 +2454,7 @@ impl Catalog {
tx.update_schema(schema_id, new_schema.into())?;

for id in updates {
Self::log_update(state, id);
Self::log_update(state, &id);
}
}
Op::UpdateOwner { id, new_owner } => {
Expand Down
24 changes: 23 additions & 1 deletion src/sql/src/ast/transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::ast::{
AstInfo, CreateConnectionStatement, CreateIndexStatement, CreateMaterializedViewStatement,
CreateSecretStatement, CreateSinkStatement, CreateSourceStatement, CreateSubsourceStatement,
CreateTableStatement, CreateViewStatement, CreateWebhookSourceStatement, Expr, Ident, Query,
Raw, RawItemName, Statement, UnresolvedItemName, ViewDefinition,
Raw, RawDataType, RawItemName, Statement, UnresolvedItemName, ViewDefinition,
};
use crate::names::FullItemName;

Expand Down Expand Up @@ -119,6 +119,28 @@ impl<'a, 'ast> VisitMut<'ast, Raw> for CreateSqlRewriteSchema<'a> {
self.maybe_rewrite_idents(&mut unresolved_item_name.0);
}

fn visit_data_type_mut(&mut self, data_type: &'ast mut RawDataType) {
// The generated `visit_data_type_mut` is a no-op because `DataType` is an
// associated type the generic visitor treats opaquely, so we must descend
// by hand. A type is referenced by a schema-qualified name precisely in
// these data type positions (a cast, a column type, or a nested element
// type), so without this a schema rename leaves stale references in any
// dependent that uses one of the schema's types.
match data_type {
RawDataType::Array(element_type) | RawDataType::List(element_type) => {
self.visit_data_type_mut(element_type)
}
RawDataType::Map {
key_type,
value_type,
} => {
self.visit_data_type_mut(key_type);
self.visit_data_type_mut(value_type);
}
RawDataType::Other { name, .. } => self.visit_item_name_mut(name),
}
}

fn visit_item_name_mut(
&mut self,
item_name: &'ast mut <mz_sql_parser::ast::Raw as AstInfo>::ItemName,
Expand Down
98 changes: 97 additions & 1 deletion test/restart/mzcompose.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,12 +634,20 @@ def workflow_drop_materialize_database(c: Composition) -> None:
# Verify that materialize hasn't blown up
c.sql("SELECT 1")

# Restore for next tests
# Restore for next tests. The recreated database is owned by mz_system, so
# the materialize role must be re-granted the database-level privileges
# (notably CREATE, needed to create schemas) and the public schema
# privileges it holds by default.
c.sql(
"CREATE DATABASE materialize",
port=6877,
user="mz_system",
)
c.sql(
"GRANT ALL PRIVILEGES ON DATABASE materialize TO materialize",
port=6877,
user="mz_system",
)
c.sql(
"GRANT ALL PRIVILEGES ON SCHEMA materialize.public TO materialize",
port=6877,
Expand Down Expand Up @@ -985,6 +993,94 @@ def user_id_nums(c: Composition, name_prefix: str) -> list[int]:
c.sql("DROP TABLE idreuse_t1")


def workflow_rename_schema_types_functions(c: Composition) -> None:
"""Verify that ALTER SCHEMA RENAME updates references to a renamed schema's types.

A type is only ever referenced by a schema-qualified name in "data type"
position: a cast, a table column type, or a nested element type. Every kind
of dependent object (view, materialized view, table, another type) reaches
the type the same way, so all of them must have their create_sql rewritten
on rename.

Regression test for three related bugs:

1. transact.rs RenameSchema only iterated schema.items, missing schema.types
(and schema.functions). The renamed schema's own types kept stale
create_sql, which fails to re-parse on restart (the original panic).

2. transform.rs CreateSqlRewriteSchema never descended into data types, so
references to a renamed schema's types inside dependents' create_sql
(casts, column types, element types) were left pointing at the old name.

3. consistency.rs check_items() only iterated schema.items, so a type with
invalid create_sql after a rename was never flagged by the checker.

The persisted create_sql is only re-parsed on boot, so the corruption is
invisible until a restart, after which the stale references fail to resolve.
"""

c.up("materialized")

# Create a schema with a custom type, then exercise every object kind that
# can reference that type by a schema-qualified name.
c.sql("CREATE SCHEMA s1")
c.sql("CREATE TYPE s1.mytype AS LIST (ELEMENT TYPE = int4)")
# View: references the type in a cast.
c.sql("CREATE VIEW public.v_uses_type AS SELECT NULL::s1.mytype")
# Materialized view: same, but persisted as a separate object kind.
c.sql("CREATE MATERIALIZED VIEW public.mv_uses_type AS SELECT NULL::s1.mytype")
# Table: references the type as a column type.
c.sql("CREATE TABLE public.t_uses_type (a s1.mytype)")
# Type-in-type: an outer type in another schema whose element type is the
# renamed schema's type (nested data type position).
c.sql("CREATE TYPE public.outer_type AS LIST (ELEMENT TYPE = s1.mytype)")

# Sanity: everything works before rename.
assert c.sql_query("SELECT count(*) FROM public.v_uses_type")[0][0] == 1
assert c.sql_query("SELECT count(*) FROM public.mv_uses_type")[0][0] == 1
assert c.sql_query("SELECT count(*) FROM public.t_uses_type")[0][0] == 0

# Rename the schema.
c.sql("ALTER SCHEMA s1 RENAME TO s2")

# Restart Materialize. The persisted create_sql is re-parsed on boot, so any
# dependent whose create_sql still references the old schema name "s1" (which
# no longer exists) fails to resolve here.
c.kill("materialized")
c.up("materialized")

# After restart, every dependent must still be queryable.
assert c.sql_query("SELECT count(*) FROM public.v_uses_type")[0][0] == 1
assert c.sql_query("SELECT count(*) FROM public.mv_uses_type")[0][0] == 1
assert c.sql_query("SELECT count(*) FROM public.t_uses_type")[0][0] == 0

# Every object's create_sql must reference the new schema name, never the old
# one. This covers the type itself and each kind of dependent.
checks = [
("mz_types", "mytype"),
("mz_types", "outer_type"),
("mz_views", "v_uses_type"),
("mz_materialized_views", "mv_uses_type"),
("mz_tables", "t_uses_type"),
]
for catalog_table, name in checks:
result = c.sql_query(
f"SELECT create_sql FROM {catalog_table} WHERE name = '{name}'"
)
create_sql = result[0][0]
assert (
'"s2"' in create_sql and '"s1"' not in create_sql
), f"{name} create_sql still references old schema after rename: {create_sql}"

# Cleanup.
c.sql("DROP TABLE public.t_uses_type")
c.sql("DROP MATERIALIZED VIEW public.mv_uses_type")
c.sql("DROP VIEW public.v_uses_type")
c.sql("DROP TYPE public.outer_type")
c.sql("DROP TYPE s2.mytype")
c.sql("DROP SCHEMA s2")


def workflow_default(c: Composition) -> None:
def process(name: str) -> None:
if name == "default":
Expand Down
6 changes: 6 additions & 0 deletions test/sqllogictest/mzcompose.py
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,9 @@ def compileFastSltConfig() -> SltRunConfig:
"test/sqllogictest/regproc.slt",
"test/sqllogictest/regressions.slt",
"test/sqllogictest/regtype.slt",
# Selects from temporary objects, which cannot be wrapped in the
# persistent indexed views that --auto-index-selects creates.
"test/sqllogictest/rename.slt",
"test/sqllogictest/returning.slt",
"test/sqllogictest/replacement-materialized-views.slt",
"test/sqllogictest/role.slt",
Expand Down Expand Up @@ -980,6 +983,9 @@ def compileSlowSltConfig() -> SltRunConfig:
"test/sqllogictest/replacement-materialized-views.slt",
}
tests_no_auto_index_selects = {
# Selects from temporary objects, which cannot be wrapped in the
# persistent indexed views that --auto-index-selects creates.
"test/sqllogictest/rename.slt",
# pg_typeof contains public schema name in views
"test/sqllogictest/cast.slt",
"test/sqllogictest/map.slt",
Expand Down
137 changes: 137 additions & 0 deletions test/sqllogictest/rename.slt
Original file line number Diff line number Diff line change
Expand Up @@ -718,3 +718,140 @@ EXPLAIN PHYSICAL PLAN FOR MATERIALIZED VIEW s_rename_metainfo_renamed.mv

statement ok
EXPLAIN OPTIMIZED PLAN FOR INDEX s_rename_metainfo_renamed.idx_in_schema

# Regression test: ALTER SCHEMA ... RENAME TO ... must rewrite references to the
# schema's user-defined types. A type is referenced by a schema-qualified name
# only in "data type" position (a cast, a column type, a nested element type),
# which lives in a separate map (schema.types) from regular items and is reached
# through a different AST node than item references. Before the fix, the renamed
# schema's own types and every dependent that used one kept create_sql pointing
# at the old schema name, which then failed to re-parse on restart.
#
# NOTE: assertions here read the raw `create_sql` column, not `SHOW CREATE`.
# `SHOW CREATE` re-resolves names against the current catalog (by id), so it
# would print the new name even if the stored `create_sql` were never rewritten,
# masking the bug. The raw column is what gets re-parsed on boot.

statement ok
CREATE SCHEMA ty1

statement ok
CREATE TYPE ty1.mylist AS LIST (ELEMENT TYPE = int4)

# Every object kind that can reference a type by a schema-qualified name.

statement ok
CREATE VIEW ty_view AS SELECT NULL::ty1.mylist AS c

statement ok
CREATE MATERIALIZED VIEW ty_mv AS SELECT NULL::ty1.mylist AS c

statement ok
CREATE TABLE ty_table (a ty1.mylist)

statement ok
CREATE TYPE ty_outer AS LIST (ELEMENT TYPE = ty1.mylist)

statement ok
ALTER SCHEMA ty1 RENAME TO ty2

# Everything still resolves and is queryable after the rename.

query I
SELECT count(*) FROM ty_view
----
1

query I
SELECT count(*) FROM ty_mv
----
1

query I
SELECT count(*) FROM ty_table
----
0

# The type's own raw create_sql, and every dependent's, must reference the new
# schema name and never the old one. Each row appears only if its create_sql was
# correctly rewritten, so a missing row signals a stale reference.

query T rowsort
SELECT name FROM mz_types
WHERE name IN ('mylist', 'ty_outer')
AND create_sql LIKE '%ty2%' AND create_sql NOT LIKE '%ty1%'
UNION ALL
SELECT name FROM mz_views
WHERE name = 'ty_view'
AND create_sql LIKE '%ty2%' AND create_sql NOT LIKE '%ty1%'
UNION ALL
SELECT name FROM mz_materialized_views
WHERE name = 'ty_mv'
AND create_sql LIKE '%ty2%' AND create_sql NOT LIKE '%ty1%'
UNION ALL
SELECT name FROM mz_tables
WHERE name = 'ty_table'
AND create_sql LIKE '%ty2%' AND create_sql NOT LIKE '%ty1%'
----
mylist
ty_mv
ty_outer
ty_table
ty_view

# Regression test for the temp-object double-enqueue hazard in ALTER SCHEMA
# RENAME (SQL-401 / database-issues#9975). A temporary object is never a member
# of the renamed schema (temp objects live in mz_temp), so the rename reaches it
# only as a dependent, once for each object in the schema it references. The
# rename loop deduplicates persistent items via items_to_update, but temporary
# items are appended straight to temporary_item_updates with no dedup, so a temp
# object that references two objects in the renamed schema used to be enqueued
# twice: two identical retractions and two identical additions. Catalog apply
# consolidates updates by kind, so the duplicates collapse to a diff of +/-2 and
# trip the "diff other than -1 or 1" assertion, panicking the coordinator.

# Case 1: temp object referencing two types in the renamed schema.

statement ok
CREATE SCHEMA tydup1

statement ok
CREATE TYPE tydup1.tdl AS LIST (ELEMENT TYPE = int4)

statement ok
CREATE TYPE tydup1.tdr AS LIST (ELEMENT TYPE = int4)

statement ok
CREATE TEMP TABLE tmp_two_types (a tydup1.tdl, b tydup1.tdr)

statement ok
ALTER SCHEMA tydup1 RENAME TO tydup2

query I
SELECT count(*) FROM tmp_two_types
----
0

# Case 2: temp object referencing two persistent items in the renamed schema.
# This triggers the same panic without any type support, via a temp view's two
# table references, i.e. the original SQL-401 report.

statement ok
CREATE SCHEMA titem1

statement ok
CREATE TABLE titem1.a (x int)

statement ok
CREATE TABLE titem1.b (y int)

statement ok
CREATE TEMP VIEW tmp_two_tables AS SELECT x, y FROM titem1.a, titem1.b

statement ok
ALTER SCHEMA titem1 RENAME TO titem2

query I
SELECT count(*) FROM tmp_two_tables
----
0
Loading