-
Notifications
You must be signed in to change notification settings - Fork 41
Don't rollback update history after migration #2674
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -737,18 +737,82 @@ class UpdateHistory( | |
historyId: Long | ||
)(implicit tc: TraceContext): Future[Unit] = { | ||
val previousMigrationId = domainMigrationInfo.currentMigrationId - 1 | ||
domainMigrationInfo.acsRecordTime match { | ||
case Some(acsRecordTime) => | ||
domainMigrationInfo.migrationTimeInfo match { | ||
case Some(info) => | ||
for { | ||
_ <- deleteAcsSnapshotsAfter(historyId, previousMigrationId, acsRecordTime) | ||
_ <- deleteRolledBackUpdateHistory(historyId, previousMigrationId, acsRecordTime) | ||
_ <- | ||
if (info.synchronizerWasPaused) { | ||
for { | ||
_ <- verifyNoRolledBackAcsSnapshots( | ||
historyId, | ||
previousMigrationId, | ||
info.acsRecordTime, | ||
) | ||
_ <- verifyNoRolledBackData(historyId, previousMigrationId, info.acsRecordTime) | ||
} yield () | ||
} else { | ||
for { | ||
_ <- deleteAcsSnapshotsAfter(historyId, previousMigrationId, info.acsRecordTime) | ||
_ <- deleteRolledBackUpdateHistory( | ||
historyId, | ||
previousMigrationId, | ||
info.acsRecordTime, | ||
) | ||
} yield () | ||
} | ||
} yield () | ||
case _ => | ||
logger.debug("No previous domain migration, not checking or deleting updates") | ||
Future.unit | ||
} | ||
} | ||
|
||
private[this] def verifyNoRolledBackData( | ||
historyId: Long, // Not using the storeId from the state, as the state might not be updated yet | ||
migrationId: Long, | ||
recordTime: CantonTimestamp, | ||
)(implicit tc: TraceContext): Future[Unit] = { | ||
val action = DBIO | ||
.sequence( | ||
Seq( | ||
sql""" | ||
select count(*) from update_history_creates | ||
where history_id = $historyId and migration_id = $migrationId and record_time > $recordTime | ||
""".as[Long].head, | ||
sql""" | ||
select count(*) from update_history_exercises | ||
where history_id = $historyId and migration_id = $migrationId and record_time > $recordTime | ||
""".as[Long].head, | ||
sql""" | ||
select count(*) from update_history_transactions | ||
where history_id = $historyId and migration_id = $migrationId and record_time > $recordTime | ||
""".as[Long].head, | ||
sql""" | ||
select count(*) from update_history_assignments | ||
where history_id = $historyId and migration_id = $migrationId and record_time > $recordTime | ||
""".as[Long].head, | ||
sql""" | ||
select count(*) from update_history_unassignments | ||
where history_id = $historyId and migration_id = $migrationId and record_time > $recordTime | ||
""".as[Long].head, | ||
) | ||
) | ||
.map(rows => | ||
if (rows.sum > 0) { | ||
throw new IllegalStateException( | ||
s"Found $rows rows for $updateStreamParty where migration_id = $migrationId and record_time > $recordTime, " + | ||
"but the configuration says the domain was paused during the migration. " + | ||
"Check the domain migration configuration and the content of the update history database." | ||
) | ||
} else { | ||
logger.debug( | ||
s"No updates found for $updateStreamParty where migration_id = $migrationId and record_time > $recordTime" | ||
) | ||
} | ||
) | ||
storage.query(action, "verifyNoRolledBackData") | ||
} | ||
|
||
private[this] def deleteRolledBackUpdateHistory( | ||
historyId: Long, // Not using the storeId from the state, as the state might not be updated yet | ||
migrationId: Long, | ||
|
@@ -843,6 +907,37 @@ class UpdateHistory( | |
) | ||
} | ||
|
||
def verifyNoRolledBackAcsSnapshots( | ||
historyId: Long, | ||
migrationId: Long, | ||
recordTime: CantonTimestamp, | ||
)(implicit tc: TraceContext): Future[Unit] = { | ||
val action = sql""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hm somehow I hit this in tests
But I don't understand why, I don't see a snapshot with a higher time 😕 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This makes no sense to me. @OriolMunoz-da I think I need a 🦆, am I missing something obvious here? I can't find any log that inserts this snapshot. We also are nowhere close to 09:00:00 (and this is a wallclock test not simtime). There also are no weird force calls (and even if they were they would not be at 09:00:00). Is there some codepath I'm missing that creates this snapshot here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. discussed with @OriolMunoz-da: this is a bug in the acs snapshot logic. He's looking into fixing it |
||
select snapshot_record_time from acs_snapshot | ||
where history_id = $historyId and migration_id = $migrationId and snapshot_record_time > $recordTime | ||
""" | ||
.as[CantonTimestamp] | ||
.map(times => | ||
if (times.length > 0) { | ||
throw new IllegalStateException( | ||
s"Found acs snapshots at $times for $updateStreamParty where migration_id = $migrationId and record_time > $recordTime, " + | ||
"but the configuration says the domain was paused during the migration. " + | ||
"Check the domain migration configuration and the content of the update history database" | ||
) | ||
} else { | ||
logger.debug( | ||
s"No updates found for $updateStreamParty where migration_id = $migrationId and record_time > $recordTime" | ||
) | ||
} | ||
) | ||
|
||
storage | ||
.query( | ||
action, | ||
"verifyNoRolledBackAcsSnapshots", | ||
) | ||
} | ||
|
||
/** Deletes all updates on the given domain with a record time before the given time. | ||
*/ | ||
def deleteUpdatesBefore( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ import org.lfdecentralizedtrust.splice.environment.ledger.api.{ | |
TransactionTreeUpdate, | ||
TreeUpdateOrOffsetCheckpoint, | ||
} | ||
import org.lfdecentralizedtrust.splice.migration.MigrationTimeInfo | ||
import org.lfdecentralizedtrust.splice.util.DomainRecordTimeRange | ||
|
||
import java.time.Instant | ||
|
@@ -617,6 +618,83 @@ class UpdateHistoryTest extends UpdateHistoryTestBase { | |
} yield succeed | ||
} | ||
|
||
"tx rollbacks after migrations are handled correctly" in { | ||
val t0 = time(1) | ||
val t1 = time(2) | ||
val store1 = mkStore(party1, migration1, participant1) | ||
val store2TimeTooEarly = mkStore( | ||
party1, | ||
migration2, | ||
participant1, | ||
migrationTimeInfo = Some(MigrationTimeInfo(t0, synchronizerWasPaused = true)), | ||
) | ||
val store2TimeCorrect = mkStore( | ||
party1, | ||
migration2, | ||
participant1, | ||
migrationTimeInfo = Some(MigrationTimeInfo(t1, synchronizerWasPaused = true)), | ||
) | ||
for { | ||
_ <- initStore(store1) | ||
_ <- create(domain1, cid1, offset1, party1, store1, t0) | ||
_ <- create(domain1, cid2, offset2, party1, store1, t1) | ||
updates1 <- updates(store1) | ||
ex <- recoverToExceptionIf[IllegalStateException](initStore(store2TimeTooEarly)) | ||
_ = ex.getMessage should include("Found List(1, 0, 1, 0, 0) rows") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. log is a bit confusing but it's the same log format we have for the delete and I can't be bothered to fix that righ tnow. |
||
_ <- initStore(store2TimeCorrect) | ||
updates2 <- updates(store2TimeCorrect) | ||
} yield { | ||
checkUpdates( | ||
updates1, | ||
Seq( | ||
ExpectedCreate(cid1, domain1), | ||
ExpectedCreate(cid2, domain1), | ||
), | ||
) | ||
checkUpdates( | ||
updates2, | ||
Seq( | ||
ExpectedCreate(cid1, domain1), | ||
ExpectedCreate(cid2, domain1), | ||
), | ||
) | ||
} | ||
} | ||
|
||
"tx rollbacks after DR are handled correctly" in { | ||
val t0 = time(1) | ||
val t1 = time(2) | ||
val store1 = mkStore(party1, migration1, participant1) | ||
val store2TimeTooEarly = mkStore( | ||
party1, | ||
migration2, | ||
participant1, | ||
migrationTimeInfo = Some(MigrationTimeInfo(t0, synchronizerWasPaused = false)), | ||
) | ||
for { | ||
_ <- initStore(store1) | ||
_ <- create(domain1, cid1, offset1, party1, store1, t0) | ||
_ <- create(domain1, cid2, offset2, party1, store1, t1) | ||
updates1 <- updates(store1) | ||
_ <- initStore(store2TimeTooEarly) | ||
updates2 <- updates(store2TimeTooEarly) | ||
} yield { | ||
checkUpdates( | ||
updates1, | ||
Seq( | ||
ExpectedCreate(cid1, domain1), | ||
ExpectedCreate(cid2, domain1), | ||
), | ||
) | ||
checkUpdates( | ||
updates2, | ||
Seq( | ||
ExpectedCreate(cid1, domain1) | ||
), | ||
) | ||
} | ||
} | ||
|
||
} | ||
|
||
"getImportUpdates" should { | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the queries are identical to the
delete
variant so essentially the same query plan as wellThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is probably even faster, but I don't think it's worth optimizing for maximum speed here since AFAICT we're only running these queries on actual migration/DR procedures.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah also the counts are useful in logs imho. So I'd lean towards keeping it as it is.