Skip to content

Commit 2fc8dd7

Browse files
author
Sean Loiselle
authored
Merge pull request #27040 from sploiselle/v099-global-id-migration-fix
v.0.99: global id migration fix
2 parents af45975 + aaa43c1 commit 2fc8dd7

File tree

3 files changed

+225
-15
lines changed

3 files changed

+225
-15
lines changed

src/adapter/src/catalog/apply.rs

Lines changed: 155 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
//! Logic related to applying updates from a [`mz_catalog::durable::DurableCatalogState`] to a
1111
//! [`CatalogState`].
1212
13-
use std::collections::{BTreeMap, BTreeSet};
13+
use std::collections::{BTreeMap, BTreeSet, VecDeque};
1414
use std::fmt::Debug;
15+
use std::str::FromStr;
1516

1617
use mz_catalog::builtin::{Builtin, BUILTIN_LOG_LOOKUP, BUILTIN_LOOKUP};
1718
use mz_catalog::memory::error::{Error, ErrorKind};
@@ -32,9 +33,9 @@ use mz_sql::names::{
3233
ItemQualifiers, QualifiedItemName, QualifiedSchemaName, ResolvedDatabaseSpecifier, ResolvedIds,
3334
SchemaSpecifier,
3435
};
35-
use mz_sql::rbac;
3636
use mz_sql::session::user::MZ_SYSTEM_ROLE_ID;
3737
use mz_sql::session::vars::{VarError, VarInput};
38+
use mz_sql::{plan, rbac};
3839
use mz_sql_parser::ast::Expr;
3940
use mz_storage_types::sources::Timeline;
4041
use once_cell::sync::Lazy;
@@ -46,6 +47,8 @@ use crate::AdapterError;
4647

4748
enum ApplyUpdateError {
4849
Error(Error),
50+
AwaitingIdDependency((GlobalId, mz_catalog::durable::Item, Diff)),
51+
AwaitingNameDependency((String, mz_catalog::durable::Item, Diff)),
4952
}
5053

5154
impl CatalogState {
@@ -66,13 +69,126 @@ impl CatalogState {
6669
&mut self,
6770
updates: Vec<StateUpdate>,
6871
) -> Result<(), Error> {
69-
for StateUpdate { kind, diff } in updates {
72+
// TODO(v100): we can refactor this to assume that items are
73+
// topologically sorted by their `GlobalId`.
74+
let mut awaiting_id_dependencies: BTreeMap<GlobalId, Vec<_>> = BTreeMap::new();
75+
let mut awaiting_name_dependencies: BTreeMap<String, Vec<_>> = BTreeMap::new();
76+
let mut updates: VecDeque<_> = updates.into_iter().collect();
77+
while let Some(StateUpdate { kind, diff }) = updates.pop_front() {
7078
assert_eq!(
7179
diff, 1,
7280
"initial catalog updates should be consolidated: ({kind:?}, {diff:?})"
7381
);
74-
self.apply_update(kind, diff)
75-
.map_err(|ApplyUpdateError::Error(err)| err)?;
82+
match self.apply_update(kind, diff) {
83+
Ok(None) => {}
84+
Ok(Some(id)) => {
85+
// Enqueue any items waiting on this dependency.
86+
let mut resolved_dependent_items = Vec::new();
87+
if let Some(dependent_items) = awaiting_id_dependencies.remove(&id) {
88+
resolved_dependent_items.extend(dependent_items);
89+
}
90+
let entry = self.get_entry(&id);
91+
let full_name = self.resolve_full_name(entry.name(), None);
92+
if let Some(dependent_items) =
93+
awaiting_name_dependencies.remove(&full_name.to_string())
94+
{
95+
resolved_dependent_items.extend(dependent_items);
96+
}
97+
let resolved_dependent_items =
98+
resolved_dependent_items
99+
.into_iter()
100+
.map(|(item, diff)| StateUpdate {
101+
kind: StateUpdateKind::Item(item),
102+
diff,
103+
});
104+
updates.extend(resolved_dependent_items);
105+
}
106+
Err(ApplyUpdateError::Error(err)) => return Err(err),
107+
Err(ApplyUpdateError::AwaitingIdDependency((id, item, diff))) => {
108+
awaiting_id_dependencies
109+
.entry(id)
110+
.or_default()
111+
.push((item, diff));
112+
}
113+
Err(ApplyUpdateError::AwaitingNameDependency((name, item, diff))) => {
114+
awaiting_name_dependencies
115+
.entry(name)
116+
.or_default()
117+
.push((item, diff));
118+
}
119+
}
120+
}
121+
122+
// Error on any unsatisfied dependencies.
123+
if let Some((missing_dep, mut dependents)) = awaiting_id_dependencies.into_iter().next() {
124+
let (
125+
mz_catalog::durable::Item {
126+
id,
127+
oid: _,
128+
schema_id,
129+
name,
130+
create_sql: _,
131+
owner_id: _,
132+
privileges: _,
133+
},
134+
diff,
135+
) = dependents.remove(0);
136+
let schema = self.find_non_temp_schema(&schema_id);
137+
let name = QualifiedItemName {
138+
qualifiers: ItemQualifiers {
139+
database_spec: schema.database().clone(),
140+
schema_spec: schema.id().clone(),
141+
},
142+
item: name,
143+
};
144+
let name = self.resolve_full_name(&name, None);
145+
let action = if diff == 1 { "deserialize" } else { "remove" };
146+
147+
return Err(Error::new(ErrorKind::Corruption {
148+
detail: format!(
149+
"failed to {} item {} ({}): {}",
150+
action,
151+
id,
152+
name,
153+
plan::PlanError::InvalidId(missing_dep)
154+
),
155+
}));
156+
}
157+
158+
if let Some((missing_dep, mut dependents)) = awaiting_name_dependencies.into_iter().next() {
159+
let (
160+
mz_catalog::durable::Item {
161+
id,
162+
oid: _,
163+
schema_id,
164+
name,
165+
create_sql: _,
166+
owner_id: _,
167+
privileges: _,
168+
},
169+
diff,
170+
) = dependents.remove(0);
171+
let schema = self.find_non_temp_schema(&schema_id);
172+
let name = QualifiedItemName {
173+
qualifiers: ItemQualifiers {
174+
database_spec: schema.database().clone(),
175+
schema_spec: schema.id().clone(),
176+
},
177+
item: name,
178+
};
179+
let name = self.resolve_full_name(&name, None);
180+
let action = if diff == 1 { "deserialize" } else { "remove" };
181+
return Err(Error::new(ErrorKind::Corruption {
182+
detail: format!(
183+
"failed to {} item {} ({}): {}",
184+
action,
185+
id,
186+
name,
187+
Error {
188+
kind: ErrorKind::Sql(SqlCatalogError::UnknownItem(missing_dep))
189+
}
190+
),
191+
}));
76192
}
77193

78194
Ok(())
@@ -615,12 +731,18 @@ impl CatalogState {
615731
/// Applies an item update to `self`.
616732
///
617733
/// Returns a `GlobalId` on success, if the applied update added a new `GlobalID` to `self`.
734+
/// Returns a dependency on failure, if the update could not be applied due to a missing
735+
/// dependency.
618736
#[instrument(level = "debug")]
619737
fn apply_item_update(
620738
&mut self,
621739
item: mz_catalog::durable::Item,
622740
diff: Diff,
623741
) -> Result<Option<GlobalId>, ApplyUpdateError> {
742+
// If we knew beforehand that the items were being applied in dependency
743+
// order, then we could fully delegate to `self.insert_item(...)` and`self.drop_item(...)`.
744+
// However, we don't know that items are applied in dependency order, so we must handle the
745+
// case that the item is valid, but we haven't applied all of its dependencies yet.
624746
match diff {
625747
1 => {
626748
// TODO(benesch): a better way of detecting when a view has depended
@@ -641,6 +763,27 @@ impl CatalogState {
641763
},
642764
)));
643765
}
766+
// If we were missing a dependency, wait for it to be added.
767+
Err(AdapterError::PlanError(plan::PlanError::InvalidId(missing_dep))) => {
768+
return Err(ApplyUpdateError::AwaitingIdDependency((
769+
missing_dep,
770+
item,
771+
diff,
772+
)));
773+
}
774+
// If we were missing a dependency, wait for it to be added.
775+
Err(AdapterError::PlanError(plan::PlanError::Catalog(
776+
SqlCatalogError::UnknownItem(missing_dep),
777+
))) => {
778+
return match GlobalId::from_str(&missing_dep) {
779+
Ok(id) => Err(ApplyUpdateError::AwaitingIdDependency((id, item, diff))),
780+
Err(_) => Err(ApplyUpdateError::AwaitingNameDependency((
781+
missing_dep,
782+
item,
783+
diff,
784+
))),
785+
}
786+
}
644787
Err(e) => {
645788
let schema = self.find_non_temp_schema(&item.schema_id);
646789
let name = QualifiedItemName {
@@ -678,6 +821,13 @@ impl CatalogState {
678821
Ok(Some(item.id))
679822
}
680823
-1 => {
824+
let entry = self.get_entry(&item.id);
825+
if let Some(id) = entry.referenced_by().first() {
826+
return Err(ApplyUpdateError::AwaitingIdDependency((*id, item, diff)));
827+
}
828+
if let Some(id) = entry.used_by().first() {
829+
return Err(ApplyUpdateError::AwaitingIdDependency((*id, item, diff)));
830+
}
681831
self.drop_item(item.id);
682832
Ok(None)
683833
}

src/adapter/src/catalog/migrate.rs

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ where
8383
pub(crate) async fn migrate(
8484
state: &CatalogState,
8585
tx: &mut Transaction<'_>,
86-
_now: NowFn,
86+
now: NowFn,
8787
_boot_ts: Timestamp,
8888
_connection_context: &ConnectionContext,
8989
) -> Result<(), anyhow::Error> {
@@ -158,6 +158,7 @@ pub(crate) async fn migrate(
158158
// Each migration should be a function that takes `tx` and `conn_cat` as
159159
// input and stages arbitrary transformations to the catalog on `tx`.
160160
mysql_subsources_remove_unnecessary_db_name(tx, &conn_cat)?;
161+
fix_dependency_order_0_99_1(tx, &conn_cat, now)?;
161162

162163
info!(
163164
"migration from catalog version {:?} complete",
@@ -178,7 +179,7 @@ pub(crate) async fn migrate(
178179
/// `GlobalId`s they wish to reassign to `needs_new_id`.
179180
/// - Assumes all items in `needs_new_id` are present in `conn_catalog` and that
180181
/// their types do not change.
181-
fn _assign_new_user_global_ids(
182+
fn assign_new_user_global_ids(
182183
tx: &mut Transaction<'_>,
183184
conn_catalog: &ConnCatalog,
184185
now: NowFn,
@@ -260,7 +261,7 @@ fn _assign_new_user_global_ids(
260261

261262
let object_type = entry.item_type().into();
262263

263-
_add_to_audit_log(
264+
add_to_audit_log(
264265
tx,
265266
mz_audit_log::EventType::Create,
266267
object_type,
@@ -271,7 +272,7 @@ fn _assign_new_user_global_ids(
271272
occurred_at,
272273
)?;
273274

274-
_add_to_audit_log(
275+
add_to_audit_log(
275276
tx,
276277
mz_audit_log::EventType::Drop,
277278
object_type,
@@ -416,7 +417,7 @@ fn _assign_new_user_global_ids(
416417
// Please include the adapter team on any code reviews that add or edit
417418
// migrations.
418419

419-
fn _add_to_audit_log(
420+
fn add_to_audit_log(
420421
tx: &mut Transaction,
421422
event_type: mz_audit_log::EventType,
422423
object_type: mz_audit_log::ObjectType,
@@ -646,6 +647,65 @@ fn mysql_subsources_remove_unnecessary_db_name(
646647
Ok(())
647648
}
648649

650+
/// In v0.98.x, we inverted the dependency order of sources + subsources and
651+
/// migrated those items, as well as their dependencies, to new IDs.
652+
/// Unfortunately, because of a bug, this did not place items in the correct
653+
/// order and it was possible that some items depend on items with IDs less than
654+
/// its own.
655+
///
656+
/// This PR goes back and fixes any IDs with that broken relationship.
657+
fn fix_dependency_order_0_99_1(
658+
tx: &mut Transaction<'_>,
659+
conn_catalog: &ConnCatalog,
660+
now: NowFn,
661+
) -> Result<(), anyhow::Error> {
662+
// A vector in the _new_ dependency order. Because the `vec` doesn't have
663+
// any built-in de-duplication, we will need to deduplicate these values
664+
// elsewhere.
665+
let mut needs_new_id = vec![];
666+
667+
for item in conn_catalog.get_items() {
668+
for dependent_id in item.used_by() {
669+
// If an item has a dependent with an ID less than its own, that
670+
// dependent needs an ID that will be pushed forward ahead of this
671+
// ID.
672+
if *dependent_id < item.id() {
673+
needs_new_id.push(*dependent_id);
674+
}
675+
}
676+
}
677+
678+
let mut remaining_updates = std::collections::VecDeque::from_iter(needs_new_id.drain(..));
679+
680+
let state = conn_catalog.state();
681+
682+
// If this item's ID must be moved forward, so too must every item that
683+
// depends on it except for its primary source ID.
684+
while let Some(id) = remaining_updates.pop_front() {
685+
needs_new_id.push(id);
686+
// If we push this item forward, we must all push forward all IDs that
687+
// refer to this ID (i.e. we need to ensure that all of an item's
688+
// dependents have IDs greater than its own).
689+
remaining_updates.extend(state.get_entry(&id).used_by().iter().cloned());
690+
}
691+
692+
// Ensure that each ID is present only once and that it is in the greatest
693+
// position.
694+
let mut id_dedup = BTreeSet::new();
695+
needs_new_id = needs_new_id
696+
.into_iter()
697+
.rev()
698+
.filter(|id| id_dedup.insert(*id))
699+
.collect();
700+
701+
// Flip this back around in the right order.
702+
needs_new_id = needs_new_id.into_iter().rev().collect();
703+
704+
assign_new_user_global_ids(tx, conn_catalog, now, needs_new_id)?;
705+
706+
Ok(())
707+
}
708+
649709
// Durable migrations
650710

651711
/// Migrations that run only on the durable catalog before any data is loaded into memory.

src/adapter/src/coord.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1499,13 +1499,13 @@ impl Coordinator {
14991499
// beneficial.
15001500
!entry.id().is_user()
15011501
|| entry
1502-
.used_by()
1502+
.uses()
15031503
.iter()
1504-
.all(|dependent_id| *dependent_id > entry.id),
1505-
"user item dependencies should respect `GlobalId`'s PartialOrd \
1506-
but {:?} depends on {:?}",
1504+
.all(|dependency_id| *dependency_id < entry.id),
1505+
"entries should only use to items with lesser `GlobalId`s, but \
1506+
but {:?} uses {:?}",
15071507
entry.id,
1508-
entry.used_by()
1508+
entry.uses()
15091509
);
15101510

15111511
debug!(

0 commit comments

Comments
 (0)