-
Notifications
You must be signed in to change notification settings - Fork 872
Helper files for the flatKV cache implementation #3072
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| package dbcache | ||
|
|
||
| import ( | ||
| "github.com/sei-protocol/sei-chain/sei-db/db_engine/types" | ||
| ) | ||
|
|
||
| // Reader reads a single key from the backing store. | ||
| // | ||
| // If the key does not exist, Reader must return (nil, false, nil) rather than an error. | ||
| // Errors are reserved for actual failures (e.g. I/O errors). | ||
| type Reader func(key []byte) (value []byte, found bool, err error) | ||
|
|
||
| // Cache describes a read-through cache backed by a Reader. | ||
| // | ||
| // Warning: it is not safe to mutate byte slices (keys or values) passed to or received from the cache. | ||
| // A cache is not required to make defensive copies, and so these slices must be treated as immutable. | ||
| // | ||
| // Although several methods on this interface return errors, the conditions when a cache | ||
| // is permitted to actually return an error is limited at the API level. A cache method | ||
| // may return an error under the following conditions: | ||
| // - malformed input (e.g. a nil key) | ||
| // - the Reader method returns an error (for methods that accpet a Reader) | ||
| // - the cache is shutting down | ||
| // - the cache's work pools are shutting down | ||
| type Cache interface { | ||
|
|
||
| // Get returns the value for the given key, or (nil, false, nil) if not found. | ||
| // On a cache miss the provided Reader is called to fetch from the backing store, | ||
| // and the result is loaded into the cache. | ||
| // | ||
| // It is not safe to mutate the key slice after calling this method, nor is it safe to mutate the value slice | ||
| // that is returned. | ||
| Get( | ||
| // Reads a value from the backing store on cache miss. | ||
| read Reader, | ||
| // The entry to fetch. | ||
| key []byte, | ||
| // If true, the LRU queue will be updated. If false, the LRU queue will not be updated. | ||
| // Useful for when an operation is performed multiple times in close succession on the same key, | ||
| // since it requires non-zero overhead to do so with little benefit. | ||
| updateLru bool, | ||
| ) ([]byte, bool, error) | ||
|
|
||
| // Perform a batch read operation. Given a map of keys to read, performs the reads and updates the | ||
| // map with the results. On cache misses the provided Reader is called to fetch from the backing store. | ||
| // | ||
| // It is not thread safe to read or mutate the map while this method is running. It is also not safe to mutate the | ||
| // key or value slices in the map after calling this method. | ||
| BatchGet(read Reader, keys map[string]types.BatchGetResult) error | ||
|
|
||
| // Set sets the value for the given key. | ||
| // | ||
| // It is not safe to mutate the key or value slices after calling this method. | ||
| Set(key []byte, value []byte) | ||
|
|
||
| // Delete deletes the value for the given key. | ||
| // | ||
| // It is not safe to mutate the key slice after calling this method. | ||
| Delete(key []byte) | ||
|
|
||
| // BatchSet applies the given updates to the cache. | ||
| // | ||
| // It is not safe to mutate the key or value slices in the CacheUpdate structs after calling this method. | ||
| BatchSet(updates []CacheUpdate) error | ||
| } | ||
|
|
||
| // CacheUpdate describes a single key-value mutation to apply to the cache. | ||
| type CacheUpdate struct { | ||
| // The key to update. | ||
| Key []byte | ||
| // The value to set. If nil, the key will be deleted. | ||
| Value []byte | ||
| } | ||
|
|
||
| // IsDelete returns true if the update is a delete operation. | ||
| func (u *CacheUpdate) IsDelete() bool { | ||
| return u.Value == nil | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| package dbcache | ||
|
|
||
| import ( | ||
| "fmt" | ||
|
|
||
| "github.com/sei-protocol/sei-chain/sei-db/db_engine/types" | ||
| ) | ||
|
|
||
| // cachedBatch wraps a types.Batch and applies pending mutations to the cache | ||
| // after a successful commit. | ||
| type cachedBatch struct { | ||
| inner types.Batch | ||
| cache Cache | ||
| pending []CacheUpdate | ||
| } | ||
|
|
||
| var _ types.Batch = (*cachedBatch)(nil) | ||
|
|
||
| func newCachedBatch(inner types.Batch, cache Cache) *cachedBatch { | ||
| return &cachedBatch{inner: inner, cache: cache} | ||
| } | ||
|
|
||
| func (cb *cachedBatch) Set(key, value []byte) error { | ||
| cb.pending = append(cb.pending, CacheUpdate{Key: key, Value: value}) | ||
| return cb.inner.Set(key, value) | ||
| } | ||
|
|
||
| func (cb *cachedBatch) Delete(key []byte) error { | ||
| cb.pending = append(cb.pending, CacheUpdate{Key: key, Value: nil}) | ||
| return cb.inner.Delete(key) | ||
| } | ||
|
|
||
| func (cb *cachedBatch) Commit(opts types.WriteOptions) error { | ||
| if err := cb.inner.Commit(opts); err != nil { | ||
| return err | ||
| } | ||
| if err := cb.cache.BatchSet(cb.pending); err != nil { | ||
| // A cache write can only fail during a shutdown when the cache's context is cancelled, | ||
| // or when the cache's work pools have their contexts cancelled. Continuing to use the | ||
| // cache after shutdown is not permissible, and so this method must return an error. | ||
| return fmt.Errorf("failed to update cache after commit: %w", err) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if inner.Commit() succeeds but cache.BatchSet() fails, the method returns an error. Would that cause any confusion to the caller since the data is already persisted? Is cache update best-effort? If so maybe we just return nil but log the error?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This actually would cause a problem. If the old data is present in the cache, then when the caller attempts to read it might observe the old data instead of the new data. This is a violation of "read your writes" consistency, which will likely break deterministic transaction execution. There are exactly two conditions which can cause the cache to return an error, as currently implemented:
The purpose of this failure mode is to unblock threads during teardown (e.g. at the end of a unit test), and it's not really a recoverable state. I've added the following documentation: Additionally, I've added the following text to the cache's godoc: |
||
| } | ||
| cb.pending = nil | ||
| return nil | ||
| } | ||
|
|
||
| func (cb *cachedBatch) Len() int { | ||
| return cb.inner.Len() | ||
| } | ||
|
|
||
| func (cb *cachedBatch) Reset() { | ||
| cb.inner.Reset() | ||
| cb.pending = nil | ||
| } | ||
|
|
||
| func (cb *cachedBatch) Close() error { | ||
| return cb.inner.Close() | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,204 @@ | ||
| package dbcache | ||
|
|
||
| import ( | ||
| "errors" | ||
| "testing" | ||
|
|
||
| "github.com/stretchr/testify/require" | ||
|
|
||
| "github.com/sei-protocol/sei-chain/sei-db/db_engine/types" | ||
| ) | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // mock batch | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| type mockBatch struct { | ||
| sets []CacheUpdate | ||
| deletes [][]byte | ||
| committed bool | ||
| closed bool | ||
| resetCount int | ||
| commitErr error | ||
| } | ||
|
|
||
| func (m *mockBatch) Set(key, value []byte) error { | ||
| m.sets = append(m.sets, CacheUpdate{Key: key, Value: value}) | ||
| return nil | ||
| } | ||
|
|
||
| func (m *mockBatch) Delete(key []byte) error { | ||
| m.deletes = append(m.deletes, key) | ||
| return nil | ||
| } | ||
|
|
||
| func (m *mockBatch) Commit(opts types.WriteOptions) error { | ||
| if m.commitErr != nil { | ||
| return m.commitErr | ||
| } | ||
| m.committed = true | ||
| return nil | ||
| } | ||
|
|
||
| func (m *mockBatch) Len() int { | ||
| return len(m.sets) + len(m.deletes) | ||
| } | ||
|
|
||
| func (m *mockBatch) Reset() { | ||
| m.sets = nil | ||
| m.deletes = nil | ||
| m.committed = false | ||
| m.resetCount++ | ||
| } | ||
|
|
||
| func (m *mockBatch) Close() error { | ||
| m.closed = true | ||
| return nil | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // mock cache | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| type mockCache struct { | ||
| data map[string][]byte | ||
| batchSetErr error | ||
| } | ||
|
|
||
| func newMockCache() *mockCache { | ||
| return &mockCache{data: make(map[string][]byte)} | ||
| } | ||
|
|
||
| func (mc *mockCache) Get(_ Reader, key []byte, _ bool) ([]byte, bool, error) { | ||
| v, ok := mc.data[string(key)] | ||
| return v, ok, nil | ||
| } | ||
|
|
||
| func (mc *mockCache) BatchGet(_ Reader, keys map[string]types.BatchGetResult) error { | ||
| for k := range keys { | ||
| v, ok := mc.data[k] | ||
| if ok { | ||
| keys[k] = types.BatchGetResult{Value: v} | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func (mc *mockCache) Set(key, value []byte) { | ||
| mc.data[string(key)] = value | ||
| } | ||
|
|
||
| func (mc *mockCache) Delete(key []byte) { | ||
| delete(mc.data, string(key)) | ||
| } | ||
|
|
||
| func (mc *mockCache) BatchSet(updates []CacheUpdate) error { | ||
| if mc.batchSetErr != nil { | ||
| return mc.batchSetErr | ||
| } | ||
| for _, u := range updates { | ||
| if u.IsDelete() { | ||
| delete(mc.data, string(u.Key)) | ||
| } else { | ||
| mc.data[string(u.Key)] = u.Value | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| // --------------------------------------------------------------------------- | ||
| // tests | ||
| // --------------------------------------------------------------------------- | ||
|
|
||
| func TestCachedBatchCommitUpdatesCacheOnSuccess(t *testing.T) { | ||
| inner := &mockBatch{} | ||
| cache := newMockCache() | ||
| cb := newCachedBatch(inner, cache) | ||
|
|
||
| require.NoError(t, cb.Set([]byte("a"), []byte("1"))) | ||
| require.NoError(t, cb.Set([]byte("b"), []byte("2"))) | ||
| require.NoError(t, cb.Commit(types.WriteOptions{})) | ||
|
|
||
| require.True(t, inner.committed) | ||
| v, ok := cache.data["a"] | ||
| require.True(t, ok) | ||
| require.Equal(t, []byte("1"), v) | ||
| v, ok = cache.data["b"] | ||
| require.True(t, ok) | ||
| require.Equal(t, []byte("2"), v) | ||
| } | ||
|
|
||
| func TestCachedBatchCommitDoesNotUpdateCacheOnInnerFailure(t *testing.T) { | ||
| inner := &mockBatch{commitErr: errors.New("disk full")} | ||
| cache := newMockCache() | ||
| cb := newCachedBatch(inner, cache) | ||
|
|
||
| require.NoError(t, cb.Set([]byte("a"), []byte("1"))) | ||
| err := cb.Commit(types.WriteOptions{}) | ||
|
|
||
| require.Error(t, err) | ||
| require.Contains(t, err.Error(), "disk full") | ||
| _, ok := cache.data["a"] | ||
| require.False(t, ok, "cache should not be updated when inner commit fails") | ||
| } | ||
|
|
||
| func TestCachedBatchCommitReturnsCacheError(t *testing.T) { | ||
| inner := &mockBatch{} | ||
| cache := newMockCache() | ||
| cache.batchSetErr = errors.New("cache broken") | ||
| cb := newCachedBatch(inner, cache) | ||
|
|
||
| require.NoError(t, cb.Set([]byte("a"), []byte("1"))) | ||
| err := cb.Commit(types.WriteOptions{}) | ||
|
|
||
| require.Error(t, err) | ||
| require.Contains(t, err.Error(), "cache broken") | ||
| require.True(t, inner.committed, "inner batch should have committed") | ||
| } | ||
|
|
||
| func TestCachedBatchDeleteMarksKeyForRemoval(t *testing.T) { | ||
| inner := &mockBatch{} | ||
| cache := newMockCache() | ||
| cache.Set([]byte("x"), []byte("old")) | ||
| cb := newCachedBatch(inner, cache) | ||
|
|
||
| require.NoError(t, cb.Delete([]byte("x"))) | ||
| require.NoError(t, cb.Commit(types.WriteOptions{})) | ||
|
|
||
| _, ok := cache.data["x"] | ||
| require.False(t, ok, "key should be deleted from cache") | ||
| } | ||
|
|
||
| func TestCachedBatchResetClearsPending(t *testing.T) { | ||
| inner := &mockBatch{} | ||
| cache := newMockCache() | ||
| cb := newCachedBatch(inner, cache) | ||
|
|
||
| require.NoError(t, cb.Set([]byte("a"), []byte("1"))) | ||
| require.NoError(t, cb.Set([]byte("b"), []byte("2"))) | ||
| cb.Reset() | ||
|
|
||
| require.NoError(t, cb.Commit(types.WriteOptions{})) | ||
|
|
||
| require.Empty(t, cache.data, "cache should have no entries after reset + commit") | ||
| } | ||
|
|
||
| func TestCachedBatchLenDelegatesToInner(t *testing.T) { | ||
| inner := &mockBatch{} | ||
| cache := newMockCache() | ||
| cb := newCachedBatch(inner, cache) | ||
|
|
||
| require.Equal(t, 0, cb.Len()) | ||
| require.NoError(t, cb.Set([]byte("a"), []byte("1"))) | ||
| require.NoError(t, cb.Delete([]byte("b"))) | ||
| require.Equal(t, 2, cb.Len()) | ||
| } | ||
|
|
||
| func TestCachedBatchCloseDelegatesToInner(t *testing.T) { | ||
| inner := &mockBatch{} | ||
| cache := newMockCache() | ||
| cb := newCachedBatch(inner, cache) | ||
|
|
||
| require.NoError(t, cb.Close()) | ||
| require.True(t, inner.closed) | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Set and Delete append CacheUpdate{Key: key, Value: value} directly using the caller's slices. If the caller reuses or mutates those byte slices after calling Set/Delete but before Commit, the pending updates will contain corrupted data?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I made the intentional choice to not make defensive copies in the caching layer since this is performance sensitive code running in the hot path. I'm open to discussion on this topic though.
What is your preference when it comes to code like this? Do you think it's better to make defensive copies in order to avoid code that can be a foot gun, or is it worth allowing a higher risk API if it lets us avoid lots of extra data copies?
Reading through the code, I don't think I've done a good job at documenting the requirement not to mutate byte slices after passing them into the cache. I've added that in this branch, in case we decide to keep the zero-copy pattern.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As long as we add the comments and make it clear that it's not safe to mutate the slice I think we are good here