-
Notifications
You must be signed in to change notification settings - Fork 884
Add gov proposal based migration trigger #3650
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a8983ae
1ab407e
6388061
358df23
e09fa19
1e6546d
91e1766
e74e608
3327b28
a5cf528
c20c5a8
76e5040
20390c1
63ba76a
453bb76
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ import ( | |
| "context" | ||
| "crypto/sha256" | ||
| "fmt" | ||
| "math" | ||
| "math/big" | ||
| "time" | ||
|
|
||
|
|
@@ -13,6 +14,7 @@ import ( | |
| otelmetrics "go.opentelemetry.io/otel/metric" | ||
|
|
||
| "github.com/sei-protocol/sei-chain/app/legacyabci" | ||
| "github.com/sei-protocol/sei-chain/app/migration" | ||
| "github.com/sei-protocol/sei-chain/sei-cosmos/tasks" | ||
| "github.com/sei-protocol/sei-chain/sei-cosmos/telemetry" | ||
| sdk "github.com/sei-protocol/sei-chain/sei-cosmos/types" | ||
|
|
@@ -53,12 +55,50 @@ func (app *App) BeginBlock( | |
| if app.HardForkManager.TargetHeightReached(ctx) { | ||
| app.HardForkManager.ExecuteForTargetHeight(ctx) | ||
| } | ||
| app.applyMigrationBatchSize(ctx) | ||
| legacyabci.BeginBlock(ctx, height, votes, byzantineValidators, app.BeginBlockKeepers) | ||
| return abci.ResponseBeginBlock{ | ||
| Events: sdk.MarkEventsToIndex(ctx.EventManager().ABCIEvents(), app.IndexEvents), | ||
| } | ||
| } | ||
|
|
||
| // applyMigrationBatchSize paces the SC store's background data migration at the network-agreed rate. | ||
| // The NumKeysToMigratePerBlock gov param is read from chain state so every node | ||
| // applies the same value each block; a per-node rate would diverge the | ||
| // AppHash. 0 (the default until a gov proposal raises it) leaves the migration | ||
| // paused; it is the sole source of the rate (there is no node-local fallback). | ||
| func (app *App) applyMigrationBatchSize(ctx sdk.Context) { | ||
| if app.rootStore == nil { | ||
| return | ||
| } | ||
| numKeys := migration.DefaultNumKeysToMigratePerBlock | ||
| if subspace, ok := app.ParamsKeeper.GetSubspace(migration.SubspaceName); ok { | ||
| // The migration subspace has no owning module to seed it in InitGenesis, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [suggestion] The lazy seed correctly handles a fresh/never-set param deterministically, but because this subspace has no module ExportGenesis (x/params only exports Fees/CosmosGas params), the value is lost across a |
||
| // so lazily persist the default the first time we see it unset. This is | ||
| // deterministic across nodes (every node runs BeginBlock identically) and | ||
| // makes the param visible to gov: ParameterChangeProposal submission only | ||
| // accepts a change when subspace.Has reports the key is already stored. | ||
| if !subspace.Has(ctx, migration.KeyNumKeysToMigratePerBlock) { | ||
| subspace.Set(ctx, migration.KeyNumKeysToMigratePerBlock, migration.DefaultNumKeysToMigratePerBlock) | ||
| } | ||
| subspace.GetIfExists(ctx, migration.KeyNumKeysToMigratePerBlock, &numKeys) | ||
| } | ||
| if numKeys > uint64(math.MaxInt64) { | ||
| numKeys = uint64(math.MaxInt64) | ||
| } | ||
| if err := app.rootStore.SetMigrationBatchSize(int(numKeys)); err != nil { | ||
| // Never panic on the migration-rate update: log and continue. AppHash | ||
| // verification is the safety net. If the rate/mode update fails on only | ||
| // some nodes, those nodes' AppHash diverges and the normal AppHash | ||
| // comparison halts them at the next block — no proactive panic needed. | ||
| // If it fails on every node, all stay in the same (old) mode with an | ||
| // identical AppHash, so the chain keeps moving and the level-triggered | ||
| // trigger re-fires on a later block. Panicking here would needlessly | ||
| // halt the whole chain in that all-fail case. | ||
| logger.Error("failed to set SC migration batch size; continuing", "err", err) | ||
| } | ||
| } | ||
|
|
||
| func (app *App) MidBlock(ctx sdk.Context, height int64) []abci.Event { | ||
| _, span := app.GetBaseApp().TracingInfo.StartWithContext("MidBlock", ctx.TraceSpanContext()) | ||
| defer span.End() | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| package app | ||
|
|
||
| import ( | ||
| "context" | ||
| "math" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/sei-protocol/sei-chain/app/migration" | ||
| abci "github.com/sei-protocol/sei-chain/sei-tendermint/abci/types" | ||
| tmproto "github.com/sei-protocol/sei-chain/sei-tendermint/proto/tendermint/types" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| // TestMigrationSubspaceRegistered verifies the generic "migration" params | ||
| // subspace is wired with its key table so governance can edit | ||
| // NumKeysToMigratePerBlock via a ParameterChangeProposal. | ||
| func TestMigrationSubspaceRegistered(t *testing.T) { | ||
| a := Setup(t, false, false, false) | ||
| subspace, ok := a.ParamsKeeper.GetSubspace(migration.SubspaceName) | ||
| require.True(t, ok, "migration subspace must be registered") | ||
| require.True(t, subspace.HasKeyTable(), "migration subspace must have a key table") | ||
|
|
||
| ctx := a.NewContext(false, tmproto.Header{Height: 1, ChainID: "sei-test", Time: time.Now()}) | ||
| subspace.Set(ctx, migration.KeyNumKeysToMigratePerBlock, uint64(123)) | ||
| var got uint64 | ||
| subspace.GetIfExists(ctx, migration.KeyNumKeysToMigratePerBlock, &got) | ||
| require.Equal(t, uint64(123), got) | ||
| } | ||
|
|
||
| // TestApplyMigrationBatchSize covers the BeginBlock push: the gov param is | ||
| // read from chain state and forwarded into the SC commit store. | ||
| func TestApplyMigrationBatchSize(t *testing.T) { | ||
| a := Setup(t, false, false, false) | ||
| ctx := a.NewContext(false, tmproto.Header{Height: 1, ChainID: "sei-test", Time: time.Now()}) | ||
|
|
||
| subspace, ok := a.ParamsKeeper.GetSubspace(migration.SubspaceName) | ||
| require.True(t, ok) | ||
|
|
||
| // Unset param: the store receives the default (0 = paused). | ||
| a.applyMigrationBatchSize(ctx) | ||
| got, ok := a.rootStore.GetMigrationBatchSize() | ||
| require.True(t, ok, "SC store should track a migration batch size") | ||
| require.Equal(t, 0, got) | ||
|
|
||
| // Governance raises the rate: BeginBlock forwards the new value. | ||
| subspace.Set(ctx, migration.KeyNumKeysToMigratePerBlock, uint64(500)) | ||
| a.applyMigrationBatchSize(ctx) | ||
| got, _ = a.rootStore.GetMigrationBatchSize() | ||
| require.Equal(t, 500, got) | ||
|
|
||
| // Out-of-int64-range values are clamped to MaxInt64 (defensive; gov | ||
| // validation only type-checks). | ||
| subspace.Set(ctx, migration.KeyNumKeysToMigratePerBlock, uint64(math.MaxUint64)) | ||
| a.applyMigrationBatchSize(ctx) | ||
| got, _ = a.rootStore.GetMigrationBatchSize() | ||
| require.Equal(t, math.MaxInt64, got) | ||
| } | ||
|
|
||
| // TestBeginBlockAppliesMigrationBatchSize exercises the full BeginBlock path | ||
| // (not the helper in isolation): it mimics a governance ParameterChangeProposal | ||
| // having set NumKeysToMigratePerBlock, then runs app.BeginBlock and asserts the | ||
| // new rate landed in the SC commit store. | ||
| func TestBeginBlockAppliesMigrationBatchSize(t *testing.T) { | ||
| a := Setup(t, false, false, false) | ||
| ctx := a.NewContext(false, tmproto.Header{Height: 2, ChainID: "sei-test", Time: time.Now()}) | ||
|
|
||
| // Sanity: nothing set yet, so the store is paused at 0. | ||
| before, ok := a.rootStore.GetMigrationBatchSize() | ||
| require.True(t, ok) | ||
| require.Equal(t, 0, before) | ||
|
|
||
| // Simulate the gov proposal landing in chain state. | ||
| subspace, ok := a.ParamsKeeper.GetSubspace(migration.SubspaceName) | ||
| require.True(t, ok) | ||
| subspace.Set(ctx, migration.KeyNumKeysToMigratePerBlock, uint64(321)) | ||
|
|
||
| // Run the real BeginBlock (checkHeight=false to skip height validation). | ||
| require.NotPanics(t, func() { | ||
| a.BeginBlock(ctx, 2, nil, nil, false) | ||
| }) | ||
|
|
||
| after, _ := a.rootStore.GetMigrationBatchSize() | ||
| require.Equal(t, 321, after, "BeginBlock should push the gov param into the SC store") | ||
| } | ||
|
|
||
| // TestMigrationBatchSizeTakesEffectNextBlock is the full end-to-end timing | ||
| // check: a governance proposal committed in block N (written into the block's | ||
| // deliver state, then Commit) only changes the SC store's migration rate when | ||
| // block N+1's BeginBlock runs and reads it from committed state. | ||
| func TestMigrationBatchSizeTakesEffectNextBlock(t *testing.T) { | ||
| a := Setup(t, false, false, false) | ||
| bg := context.Background() | ||
|
|
||
| // Block 1: BeginBlock runs first (param still unset), then the gov | ||
| // proposal lands by writing into this block's deliver state, then Commit | ||
| // persists it to the committed multistore. | ||
| _, err := a.FinalizeBlock(bg, &abci.RequestFinalizeBlock{ | ||
| Header: &tmproto.Header{ChainID: "sei-test", Height: 1, Time: time.Now()}, | ||
| }) | ||
| require.NoError(t, err) | ||
|
|
||
| subspace, ok := a.ParamsKeeper.GetSubspace(migration.SubspaceName) | ||
| require.True(t, ok) | ||
| subspace.Set(a.GetContextForDeliverTx([]byte{}), migration.KeyNumKeysToMigratePerBlock, uint64(640)) | ||
|
|
||
| _, err = a.Commit(bg) | ||
| require.NoError(t, err) | ||
|
|
||
| // The param was committed in block 1, but BeginBlock(1) ran before it | ||
| // existed, so the rate is still paused at this point. | ||
| got, ok := a.rootStore.GetMigrationBatchSize() | ||
| require.True(t, ok) | ||
| require.Equal(t, 0, got, "param committed in block 1 must not take effect within block 1") | ||
|
|
||
| // Block 2: BeginBlock reads the now-committed param and applies it. | ||
| _, err = a.FinalizeBlock(bg, &abci.RequestFinalizeBlock{ | ||
| Header: &tmproto.Header{ChainID: "sei-test", Height: 2, Time: time.Now().Add(time.Second)}, | ||
| }) | ||
| require.NoError(t, err) | ||
|
|
||
| got, _ = a.rootStore.GetMigrationBatchSize() | ||
| require.Equal(t, 640, got, "migration rate must take effect on the block after the param is committed") | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| // Package migration defines the module-agnostic governance parameters | ||
| // that control the state-commitment store's background data migration | ||
| // (currently memiavl->flatkv). | ||
| // | ||
| // These live outside any business module on purpose: the migration rate | ||
| // applies to whichever stores the SC router is migrating, so it is an | ||
| // app/storage-level concern rather than EVM-specific. The value is held in a | ||
| // dedicated x/params subspace and is editable via the standard | ||
| // ParameterChangeProposal gov flow. The app reads it once per block in | ||
| // BeginBlock and pushes it into the SC commit store. | ||
| package migration | ||
|
Comment on lines
+1
to
+11
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 Pre-existing nit (flagged by seidroid[bot] as a [suggestion]): The new Extended reasoning...What the bug isThe PR adds a generic func (k Keeper) ExportGenesis(ctx sdk.Context) *types.GenesisState {
feesParams := k.GetFeesParams(ctx)
cosmosGasParams := k.GetCosmosGasParams(ctx)
return types.NewGenesisState(feesParams, cosmosGasParams)
}It does not iterate over registered subspaces and emit their stored KV pairs. So nothing in the export pipeline ever serializes How the failure manifests — step by step
Why existing code doesn't prevent it
ImpactThis is not consensus-fatal: the resumed chain produces blocks correctly, just with the migration paused at the genesis boundary. Recovery is a single governance proposal to re-raise the param. seidroid[bot] itself labeled it How to fix itThree reasonable options, in increasing invasiveness:
🔬 also observed by seidroid |
||
|
|
||
| import ( | ||
| "fmt" | ||
|
|
||
| paramtypes "github.com/sei-protocol/sei-chain/sei-cosmos/x/params/types" | ||
| ) | ||
|
|
||
| // SubspaceName is the x/params subspace that holds storage-migration controls. | ||
| const SubspaceName = "migration" | ||
|
|
||
| // KeyNumKeysToMigratePerBlock is the param key for the number of keys the | ||
| // in-flight SC migration advances per block. | ||
| var KeyNumKeysToMigratePerBlock = []byte("NumKeysToMigratePerBlock") | ||
|
|
||
| // DefaultNumKeysToMigratePerBlock leaves the migration paused. While it is 0 | ||
| // (the default until a gov proposal raises it) the SC store does no migration | ||
| // work; this param is the sole source of the per-block rate. | ||
| const DefaultNumKeysToMigratePerBlock uint64 = 0 | ||
|
|
||
| // ParamKeyTable returns the key table for the migration subspace. | ||
| func ParamKeyTable() paramtypes.KeyTable { | ||
| return paramtypes.NewKeyTable( | ||
| paramtypes.NewParamSetPair(KeyNumKeysToMigratePerBlock, new(uint64), validateNumKeysToMigratePerBlock), | ||
| ) | ||
| } | ||
|
|
||
| // validateNumKeysToMigratePerBlock only type-checks the value; any uint64 is a | ||
| // valid (consensus-deterministic) rate, with 0 meaning "paused". | ||
| func validateNumKeysToMigratePerBlock(i interface{}) error { | ||
| if _, ok := i.(uint64); !ok { | ||
| return fmt.Errorf("invalid parameter type: %T", i) | ||
| } | ||
| return nil | ||
| } | ||
|
Comment on lines
+38
to
+45
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 Bug: Extended reasoning...Bug
The chain from gov proposal to consensus halt
Step-by-step proofConcrete scenario with value
Because the gov param is consensus state, every validator reads the same value and panics identically on the same height. The chain halts. No recovery path exists short of a coordinated downgrade or a new gov proposal — and the chain cannot produce blocks to vote on either. Even non-overflowing values are dangerous: Why existing code does not prevent itThe defenses in place address adjacent concerns:
Reviewer historyseidroid[bot] in inline-comment 3483034959 explicitly suggested "bounding the param to a sane maximum (well under math.MaxInt) at validation time". yzang2019 replied in inline-comment 3483128268: "Tried that, looks like it would require a big refactory, will actually do a fallback here to 0 if its negative." The negative-clamp landed; the upper-bound suggestion did not. Pre-existing vs PR-introducedPR-introduced. The pre-PR migration rate was a node-local FixOne-line guard in const MaxNumKeysToMigratePerBlock = 1_000_000 // or another sane upper bound
func validateNumKeysToMigratePerBlock(i interface{}) error {
v, ok := i.(uint64)
if !ok {
return fmt.Errorf("invalid parameter type: %T", i)
}
if v > MaxNumKeysToMigratePerBlock {
return fmt.Errorf("NumKeysToMigratePerBlock must be <= %d, got %d", MaxNumKeysToMigratePerBlock, v)
}
return nil
}This rejects the proposal at submission, before it can reach chain state. Severity
🔬 also observed by 3483034959 |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| package migration | ||
|
|
||
| import ( | ||
| "testing" | ||
|
|
||
| paramtypes "github.com/sei-protocol/sei-chain/sei-cosmos/x/params/types" | ||
| "github.com/stretchr/testify/require" | ||
| ) | ||
|
|
||
| func TestDefaultLeavesMigrationPaused(t *testing.T) { | ||
| require.Equal(t, uint64(0), DefaultNumKeysToMigratePerBlock, | ||
| "default must be 0 so the migration stays paused until governance raises it") | ||
| } | ||
|
|
||
| func TestParamKeyTableRegistersKey(t *testing.T) { | ||
| table := ParamKeyTable() | ||
|
|
||
| // Re-registering the same key must panic with "duplicate parameter key", | ||
| // which proves NumKeysToMigratePerBlock is already in the returned table. | ||
| require.PanicsWithValue(t, "duplicate parameter key", func() { | ||
| table.RegisterType(paramtypes.NewParamSetPair( | ||
| KeyNumKeysToMigratePerBlock, new(uint64), validateNumKeysToMigratePerBlock)) | ||
| }) | ||
| } | ||
|
|
||
| func TestValidateNumKeysToMigratePerBlock(t *testing.T) { | ||
| // Any uint64 is valid, including 0 (paused) and large values. | ||
| for _, v := range []uint64{0, 1, 1024, 1 << 40} { | ||
| require.NoError(t, validateNumKeysToMigratePerBlock(v), "uint64 %d should be valid", v) | ||
| } | ||
|
|
||
| // Wrong types are rejected. | ||
| for _, v := range []interface{}{int(1), int64(1), "1", float64(1), nil} { | ||
| require.Error(t, validateNumKeysToMigratePerBlock(v), "value %v (%T) should be rejected", v, v) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[blocker]
applyMigrationBatchSizewrites to committed state in BeginBlock: when the param is unset it callssubspace.Set(...), adding a key to the x/params store and changing the AppHash at the first block the new binary runs. That is state-machine-breaking and would diverge nodes on a rolling/uncoordinated upgrade, which seems at odds with the PR'snon-app-hash-breakinglabel. Consider seeding the param via an upgrade handler / InitGenesis instead of lazily here, or confirm this ships at a coordinated upgrade height.