Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,13 @@ lint:
go mod tidy
go mod verify

# Run lint on the sei-db package. Much faster than running lint on the entire project.
# Makes life easier for storage team when iterating on changes inside the sei-db package.
dblint:
go run github.com/golangci/golangci-lint/v2/cmd/golangci-lint@v2.8.0 run ./sei-db/...
go fmt ./sei-db/...
go vet ./sei-db/...

build:
go build $(BUILD_FLAGS) -o ./build/seid ./cmd/seid

Expand Down
78 changes: 78 additions & 0 deletions sei-db/db_engine/dbcache/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package dbcache

import (
"github.com/sei-protocol/sei-chain/sei-db/db_engine/types"
)

// Reader reads a single key from the backing store.
//
// If the key does not exist, Reader must return (nil, false, nil) rather than an error.
// Errors are reserved for actual failures (e.g. I/O errors).
type Reader func(key []byte) (value []byte, found bool, err error)

// Cache describes a read-through cache backed by a Reader.
//
// Warning: it is not safe to mutate byte slices (keys or values) passed to or received from the cache.
// A cache is not required to make defensive copies, and so these slices must be treated as immutable.
//
// Although several methods on this interface return errors, the conditions when a cache
// is permitted to actually return an error is limited at the API level. A cache method
// may return an error under the following conditions:
// - malformed input (e.g. a nil key)
// - the Reader method returns an error (for methods that accpet a Reader)
// - the cache is shutting down
// - the cache's work pools are shutting down
type Cache interface {

// Get returns the value for the given key, or (nil, false, nil) if not found.
// On a cache miss the provided Reader is called to fetch from the backing store,
// and the result is loaded into the cache.
//
// It is not safe to mutate the key slice after calling this method, nor is it safe to mutate the value slice
// that is returned.
Get(
// Reads a value from the backing store on cache miss.
read Reader,
// The entry to fetch.
key []byte,
// If true, the LRU queue will be updated. If false, the LRU queue will not be updated.
// Useful for when an operation is performed multiple times in close succession on the same key,
// since it requires non-zero overhead to do so with little benefit.
updateLru bool,
) ([]byte, bool, error)

// Perform a batch read operation. Given a map of keys to read, performs the reads and updates the
// map with the results. On cache misses the provided Reader is called to fetch from the backing store.
//
// It is not thread safe to read or mutate the map while this method is running. It is also not safe to mutate the
// key or value slices in the map after calling this method.
BatchGet(read Reader, keys map[string]types.BatchGetResult) error

// Set sets the value for the given key.
//
// It is not safe to mutate the key or value slices after calling this method.
Set(key []byte, value []byte)

// Delete deletes the value for the given key.
//
// It is not safe to mutate the key slice after calling this method.
Delete(key []byte)

// BatchSet applies the given updates to the cache.
//
// It is not safe to mutate the key or value slices in the CacheUpdate structs after calling this method.
BatchSet(updates []CacheUpdate) error
}

// CacheUpdate describes a single key-value mutation to apply to the cache.
type CacheUpdate struct {
// The key to update.
Key []byte
// The value to set. If nil, the key will be deleted.
Value []byte
}

// IsDelete returns true if the update is a delete operation.
func (u *CacheUpdate) IsDelete() bool {
return u.Value == nil
}
58 changes: 58 additions & 0 deletions sei-db/db_engine/dbcache/cached_batch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package dbcache

import (
"fmt"

"github.com/sei-protocol/sei-chain/sei-db/db_engine/types"
)

// cachedBatch wraps a types.Batch and applies pending mutations to the cache
// after a successful commit.
type cachedBatch struct {
inner types.Batch
cache Cache
pending []CacheUpdate
}

var _ types.Batch = (*cachedBatch)(nil)

func newCachedBatch(inner types.Batch, cache Cache) *cachedBatch {
return &cachedBatch{inner: inner, cache: cache}
}

func (cb *cachedBatch) Set(key, value []byte) error {
cb.pending = append(cb.pending, CacheUpdate{Key: key, Value: value})
return cb.inner.Set(key, value)
}

func (cb *cachedBatch) Delete(key []byte) error {
Comment on lines +23 to +28
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Set and Delete append CacheUpdate{Key: key, Value: value} directly using the caller's slices. If the caller reuses or mutates those byte slices after calling Set/Delete but before Commit, the pending updates will contain corrupted data?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made the intentional choice to not make defensive copies in the caching layer since this is performance sensitive code running in the hot path. I'm open to discussion on this topic though.

What is your preference when it comes to code like this? Do you think it's better to make defensive copies in order to avoid code that can be a foot gun, or is it worth allowing a higher risk API if it lets us avoid lots of extra data copies?

Reading through the code, I don't think I've done a good job at documenting the requirement not to mutate byte slices after passing them into the cache. I've added that in this branch, in case we decide to keep the zero-copy pattern.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As long as we add the comments and make it clear that it's not safe to mutate the slice I think we are good here

cb.pending = append(cb.pending, CacheUpdate{Key: key, Value: nil})
return cb.inner.Delete(key)
}

func (cb *cachedBatch) Commit(opts types.WriteOptions) error {
if err := cb.inner.Commit(opts); err != nil {
return err
}
if err := cb.cache.BatchSet(cb.pending); err != nil {
// A cache write can only fail during a shutdown when the cache's context is cancelled,
// or when the cache's work pools have their contexts cancelled. Continuing to use the
// cache after shutdown is not permissible, and so this method must return an error.
return fmt.Errorf("failed to update cache after commit: %w", err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if inner.Commit() succeeds but cache.BatchSet() fails, the method returns an error. Would that cause any confusion to the caller since the data is already persisted? Is cache update best-effort? If so maybe we just return nil but log the error?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This actually would cause a problem. If the old data is present in the cache, then when the caller attempts to read it might observe the old data instead of the new data. This is a violation of "read your writes" consistency, which will likely break deterministic transaction execution.

There are exactly two conditions which can cause the cache to return an error, as currently implemented:

  • The cache's context is cancelled
  • One of the work pools used by the cache has its context cancelled

The purpose of this failure mode is to unblock threads during teardown (e.g. at the end of a unit test), and it's not really a recoverable state.

I've added the following documentation:

	if err := cb.cache.BatchSet(cb.pending); err != nil {
		// A cache write can only fail during a shutdown when the cache's context is cancelled,
		// or when the cache's work pools have their contexts cancelled. Continuing to use the
		// cache after shutdown is not permissible, and so this method must return an error.
		return fmt.Errorf("failed to update cache after commit: %w", err)
	}

Additionally, I've added the following text to the cache's godoc:

// Cache describes a read-through cache backed by a Reader.
//
// Although several methods on this interface return errors, the conditions when a cache
// is permitted to actually return an error is limited at the API level. A cache method
// may return an error under the following conditions:
// - malformed input (e.g. a nil key)
// - the Reader method returns an error (for methods that accpet a Reader)
// - the cache is shutting down
// - the cache's work pools are shutting down
type Cache interface {

}
cb.pending = nil
return nil
}

func (cb *cachedBatch) Len() int {
return cb.inner.Len()
}

func (cb *cachedBatch) Reset() {
cb.inner.Reset()
cb.pending = nil
}

func (cb *cachedBatch) Close() error {
return cb.inner.Close()
}
204 changes: 204 additions & 0 deletions sei-db/db_engine/dbcache/cached_batch_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package dbcache

import (
"errors"
"testing"

"github.com/stretchr/testify/require"

"github.com/sei-protocol/sei-chain/sei-db/db_engine/types"
)

// ---------------------------------------------------------------------------
// mock batch
// ---------------------------------------------------------------------------

type mockBatch struct {
sets []CacheUpdate
deletes [][]byte
committed bool
closed bool
resetCount int
commitErr error
}

func (m *mockBatch) Set(key, value []byte) error {
m.sets = append(m.sets, CacheUpdate{Key: key, Value: value})
return nil
}

func (m *mockBatch) Delete(key []byte) error {
m.deletes = append(m.deletes, key)
return nil
}

func (m *mockBatch) Commit(opts types.WriteOptions) error {
if m.commitErr != nil {
return m.commitErr
}
m.committed = true
return nil
}

func (m *mockBatch) Len() int {
return len(m.sets) + len(m.deletes)
}

func (m *mockBatch) Reset() {
m.sets = nil
m.deletes = nil
m.committed = false
m.resetCount++
}

func (m *mockBatch) Close() error {
m.closed = true
return nil
}

// ---------------------------------------------------------------------------
// mock cache
// ---------------------------------------------------------------------------

type mockCache struct {
data map[string][]byte
batchSetErr error
}

func newMockCache() *mockCache {
return &mockCache{data: make(map[string][]byte)}
}

func (mc *mockCache) Get(_ Reader, key []byte, _ bool) ([]byte, bool, error) {
v, ok := mc.data[string(key)]
return v, ok, nil
}

func (mc *mockCache) BatchGet(_ Reader, keys map[string]types.BatchGetResult) error {
for k := range keys {
v, ok := mc.data[k]
if ok {
keys[k] = types.BatchGetResult{Value: v}
}
}
return nil
}

func (mc *mockCache) Set(key, value []byte) {
mc.data[string(key)] = value
}

func (mc *mockCache) Delete(key []byte) {
delete(mc.data, string(key))
}

func (mc *mockCache) BatchSet(updates []CacheUpdate) error {
if mc.batchSetErr != nil {
return mc.batchSetErr
}
for _, u := range updates {
if u.IsDelete() {
delete(mc.data, string(u.Key))
} else {
mc.data[string(u.Key)] = u.Value
}
}
return nil
}

// ---------------------------------------------------------------------------
// tests
// ---------------------------------------------------------------------------

func TestCachedBatchCommitUpdatesCacheOnSuccess(t *testing.T) {
inner := &mockBatch{}
cache := newMockCache()
cb := newCachedBatch(inner, cache)

require.NoError(t, cb.Set([]byte("a"), []byte("1")))
require.NoError(t, cb.Set([]byte("b"), []byte("2")))
require.NoError(t, cb.Commit(types.WriteOptions{}))

require.True(t, inner.committed)
v, ok := cache.data["a"]
require.True(t, ok)
require.Equal(t, []byte("1"), v)
v, ok = cache.data["b"]
require.True(t, ok)
require.Equal(t, []byte("2"), v)
}

func TestCachedBatchCommitDoesNotUpdateCacheOnInnerFailure(t *testing.T) {
inner := &mockBatch{commitErr: errors.New("disk full")}
cache := newMockCache()
cb := newCachedBatch(inner, cache)

require.NoError(t, cb.Set([]byte("a"), []byte("1")))
err := cb.Commit(types.WriteOptions{})

require.Error(t, err)
require.Contains(t, err.Error(), "disk full")
_, ok := cache.data["a"]
require.False(t, ok, "cache should not be updated when inner commit fails")
}

func TestCachedBatchCommitReturnsCacheError(t *testing.T) {
inner := &mockBatch{}
cache := newMockCache()
cache.batchSetErr = errors.New("cache broken")
cb := newCachedBatch(inner, cache)

require.NoError(t, cb.Set([]byte("a"), []byte("1")))
err := cb.Commit(types.WriteOptions{})

require.Error(t, err)
require.Contains(t, err.Error(), "cache broken")
require.True(t, inner.committed, "inner batch should have committed")
}

func TestCachedBatchDeleteMarksKeyForRemoval(t *testing.T) {
inner := &mockBatch{}
cache := newMockCache()
cache.Set([]byte("x"), []byte("old"))
cb := newCachedBatch(inner, cache)

require.NoError(t, cb.Delete([]byte("x")))
require.NoError(t, cb.Commit(types.WriteOptions{}))

_, ok := cache.data["x"]
require.False(t, ok, "key should be deleted from cache")
}

func TestCachedBatchResetClearsPending(t *testing.T) {
inner := &mockBatch{}
cache := newMockCache()
cb := newCachedBatch(inner, cache)

require.NoError(t, cb.Set([]byte("a"), []byte("1")))
require.NoError(t, cb.Set([]byte("b"), []byte("2")))
cb.Reset()

require.NoError(t, cb.Commit(types.WriteOptions{}))

require.Empty(t, cache.data, "cache should have no entries after reset + commit")
}

func TestCachedBatchLenDelegatesToInner(t *testing.T) {
inner := &mockBatch{}
cache := newMockCache()
cb := newCachedBatch(inner, cache)

require.Equal(t, 0, cb.Len())
require.NoError(t, cb.Set([]byte("a"), []byte("1")))
require.NoError(t, cb.Delete([]byte("b")))
require.Equal(t, 2, cb.Len())
}

func TestCachedBatchCloseDelegatesToInner(t *testing.T) {
inner := &mockBatch{}
cache := newMockCache()
cb := newCachedBatch(inner, cache)

require.NoError(t, cb.Close())
require.True(t, inner.closed)
}
Loading
Loading