Skip to content

Commit 30ec581

Browse files
ARR4Nalarso16
authored andcommitted
feat(ffi): Database.Close() guarantees proposals committed or freed (#1349)
Explicit freeing of proposals propagated through `libevm` (i.e. `geth`) plumbing has proven difficult when not being committed as they are simply dropped for the GC to collect. Furthermore, strict ordering of calls to `Proposal.Drop()` (or `Commit()`) before `Database.Close()` is required to avoid segfaults. This PR implements a fix for both issues: 1. All new `Proposal`s have a GC finalizer attached, which calls `Drop()`. This is safe because it is a no-op if called twice or after a call to `Commit()`. 2. The `Database` has a `sync.WaitGroup` introduced, which tracks all outstanding proposals. Calls to `Commit()` / `Drop()` decrement the group counter (only once per `Proposal`). 3. `Database.Close()` waits on the `WaitGroup` before freeing its own handle, avoiding segfaults. Assuming that all calls to `Database.Propose()` and `Proposal.Propose()` occur _before_ the call to `Database.Close()` then this is a correct usage of `sync.WaitGroup`'s documented requirement for ordering of calls to `Add()` and `Wait()`. An integration test demonstrates blocking and eventual return of `Database.Close()`, specifically due to the unreachability of un-dropped, un-committed `Proposal`s, resulting in their finalizers decrementing the `WaitGroup`. --------- Signed-off-by: Arran Schlosberg <[email protected]> Co-authored-by: Austin Larson <[email protected]>
1 parent f4342cf commit 30ec581

File tree

6 files changed

+159
-56
lines changed

6 files changed

+159
-56
lines changed

ffi/firewood.go

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,12 @@ import "C"
2727

2828
import (
2929
"bytes"
30+
"context"
3031
"errors"
3132
"fmt"
3233
"runtime"
34+
"sync"
35+
"time"
3336
)
3437

3538
// These constants are used to identify errors returned by the Firewood Rust FFI.
@@ -49,7 +52,8 @@ type Database struct {
4952
// handle is returned and accepted by cgo functions. It MUST be treated as
5053
// an opaque value without special meaning.
5154
// https://en.wikipedia.org/wiki/Blinkenlights
52-
handle *C.DatabaseHandle
55+
handle *C.DatabaseHandle
56+
proposals sync.WaitGroup
5357
}
5458

5559
// Config configures the opening of a [Database].
@@ -139,6 +143,9 @@ func (db *Database) Update(keys, vals [][]byte) ([]byte, error) {
139143
return getHashKeyFromHashResult(C.fwd_batch(db.handle, kvp))
140144
}
141145

146+
// Propose creates a new proposal with the given keys and values. The proposal
147+
// is not committed until [Proposal.Commit] is called. See [Database.Close] re
148+
// freeing proposals.
142149
func (db *Database) Propose(keys, vals [][]byte) (*Proposal, error) {
143150
if db.handle == nil {
144151
return nil, errDBClosed
@@ -151,8 +158,7 @@ func (db *Database) Propose(keys, vals [][]byte) (*Proposal, error) {
151158
if err != nil {
152159
return nil, err
153160
}
154-
155-
return getProposalFromProposalResult(C.fwd_propose_on_db(db.handle, kvp), db)
161+
return getProposalFromProposalResult(C.fwd_propose_on_db(db.handle, kvp), &db.proposals)
156162
}
157163

158164
// Get retrieves the value for the given key. It always returns a nil error.
@@ -245,19 +251,43 @@ func (db *Database) Revision(root []byte) (*Revision, error) {
245251
return rev, nil
246252
}
247253

254+
// defaultCloseTimeout is the duration by which the [context.Context] passed to
255+
// [Database.Close] is limited. A minute is arbitrary but well above what is
256+
// reasonably required, and is chosen simply to avoid permanently blocking.
257+
var defaultCloseTimeout = time.Minute
258+
248259
// Close releases the memory associated with the Database.
249260
//
250-
// This is not safe to call while there are any outstanding Proposals. All proposals
251-
// must be freed or committed before calling this.
261+
// This blocks until all outstanding Proposals are either unreachable or one of
262+
// [Proposal.Commit] or [Proposal.Drop] has been called on them. Unreachable
263+
// proposals will be automatically dropped before Close returns, unless an
264+
// alternate GC finalizer is set on them.
252265
//
253-
// This is safe to call if the pointer is nil, in which case it does nothing. The
254-
// pointer will be set to nil after freeing to prevent double free. However, it is
255-
// not safe to call this method concurrently from multiple goroutines.
256-
func (db *Database) Close() error {
266+
// This is safe to call if the handle pointer is nil, in which case it does
267+
// nothing. The pointer will be set to nil after freeing to prevent double free.
268+
// However, it is not safe to call this method concurrently from multiple
269+
// goroutines.
270+
func (db *Database) Close(ctx context.Context) error {
257271
if db.handle == nil {
258272
return nil
259273
}
260274

275+
go runtime.GC()
276+
277+
done := make(chan struct{})
278+
go func() {
279+
db.proposals.Wait()
280+
close(done)
281+
}()
282+
283+
ctx, cancel := context.WithTimeout(ctx, defaultCloseTimeout)
284+
defer cancel()
285+
select {
286+
case <-done:
287+
case <-ctx.Done():
288+
return fmt.Errorf("at least one reachable %T neither dropped nor committed", &Proposal{})
289+
}
290+
261291
if err := getErrorFromVoidResult(C.fwd_close_db(db.handle)); err != nil {
262292
return fmt.Errorf("unexpected error when closing database: %w", err)
263293
}

ffi/firewood_test.go

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package ffi
55

66
import (
77
"bytes"
8+
"context"
89
"encoding/hex"
910
"errors"
1011
"fmt"
@@ -59,14 +60,14 @@ var (
5960
expectedRoots map[string]string
6061
)
6162

62-
func inferHashingMode() (string, error) {
63+
func inferHashingMode(ctx context.Context) (string, error) {
6364
dbFile := filepath.Join(os.TempDir(), "test.db")
64-
db, closeDB, err := newDatabase(dbFile)
65+
db, err := newDatabase(dbFile)
6566
if err != nil {
6667
return "", err
6768
}
6869
defer func() {
69-
_ = closeDB()
70+
_ = db.Close(ctx)
7071
_ = os.Remove(dbFile)
7172
}()
7273

@@ -111,7 +112,7 @@ func TestMain(m *testing.M) {
111112
// Otherwise, infer the hash mode from an empty database.
112113
hashMode := os.Getenv("TEST_FIREWOOD_HASH_MODE")
113114
if hashMode == "" {
114-
inferredHashMode, err := inferHashingMode()
115+
inferredHashMode, err := inferHashingMode(context.Background())
115116
if err != nil {
116117
fmt.Fprintf(os.Stderr, "failed to infer hash mode %v\n", err)
117118
os.Exit(1)
@@ -133,15 +134,15 @@ func newTestDatabase(t *testing.T, configureFns ...func(*Config)) *Database {
133134
r := require.New(t)
134135

135136
dbFile := filepath.Join(t.TempDir(), "test.db")
136-
db, closeDB, err := newDatabase(dbFile, configureFns...)
137+
db, err := newDatabase(dbFile, configureFns...)
137138
r.NoError(err)
138139
t.Cleanup(func() {
139-
r.NoError(closeDB())
140+
r.NoError(db.Close(context.Background())) //nolint:usetesting // t.Context() will already be cancelled
140141
})
141142
return db
142143
}
143144

144-
func newDatabase(dbFile string, configureFns ...func(*Config)) (*Database, func() error, error) {
145+
func newDatabase(dbFile string, configureFns ...func(*Config)) (*Database, error) {
145146
conf := DefaultConfig()
146147
conf.Truncate = true // in tests, we use filepath.Join, which creates an empty file
147148
for _, fn := range configureFns {
@@ -150,9 +151,9 @@ func newDatabase(dbFile string, configureFns ...func(*Config)) (*Database, func(
150151

151152
f, err := New(dbFile, conf)
152153
if err != nil {
153-
return nil, nil, fmt.Errorf("failed to create new database at filepath %q: %w", dbFile, err)
154+
return nil, fmt.Errorf("failed to create new database at filepath %q: %w", dbFile, err)
154155
}
155-
return f, f.Close, nil
156+
return f, nil
156157
}
157158

158159
func TestUpdateSingleKV(t *testing.T) {
@@ -196,7 +197,7 @@ func TestTruncateDatabase(t *testing.T) {
196197
r.NoError(err)
197198

198199
// Close the database.
199-
r.NoError(db.Close())
200+
r.NoError(db.Close(t.Context()))
200201

201202
// Reopen the database with truncate enabled.
202203
db, err = New(dbFile, config)
@@ -210,16 +211,16 @@ func TestTruncateDatabase(t *testing.T) {
210211
r.NoError(err)
211212
r.Equal(expectedHash, hash, "Root hash mismatch after truncation")
212213

213-
r.NoError(db.Close())
214+
r.NoError(db.Close(t.Context()))
214215
}
215216

216217
func TestClosedDatabase(t *testing.T) {
217218
r := require.New(t)
218219
dbFile := filepath.Join(t.TempDir(), "test.db")
219-
db, _, err := newDatabase(dbFile)
220+
db, err := newDatabase(dbFile)
220221
r.NoError(err)
221222

222-
r.NoError(db.Close())
223+
r.NoError(db.Close(t.Context()))
223224

224225
_, err = db.Root()
225226
r.ErrorIs(err, errDBClosed)
@@ -231,7 +232,7 @@ func TestClosedDatabase(t *testing.T) {
231232
r.Empty(root)
232233
r.ErrorIs(err, errDBClosed)
233234

234-
r.NoError(db.Close())
235+
r.NoError(db.Close(t.Context()))
235236
}
236237

237238
func keyForTest(i int) []byte {
@@ -1171,6 +1172,63 @@ func TestGetFromRootParallel(t *testing.T) {
11711172
}
11721173
}
11731174

1175+
func TestProposalHandlesFreed(t *testing.T) {
1176+
t.Parallel()
1177+
1178+
db, err := newDatabase(filepath.Join(t.TempDir(), "test_GC_drops_proposal.db"))
1179+
require.NoError(t, err)
1180+
1181+
// These MUST NOT be committed nor dropped as they demonstrate that the GC
1182+
// finalizer does it for us.
1183+
p0, err := db.Propose(kvForTest(1))
1184+
require.NoErrorf(t, err, "%T.Propose(...)", db)
1185+
p1, err := p0.Propose(kvForTest(1))
1186+
require.NoErrorf(t, err, "%T.Propose(...)", p0)
1187+
1188+
// Demonstrates that explicit [Proposal.Commit] and [Proposal.Drop] calls
1189+
// are sufficient to unblock [Database.Close].
1190+
var keep []*Proposal
1191+
for name, free := range map[string](func(*Proposal) error){
1192+
"Commit": (*Proposal).Commit,
1193+
"Drop": (*Proposal).Drop,
1194+
} {
1195+
p, err := db.Propose(kvForTest(1))
1196+
require.NoErrorf(t, err, "%T.Propose(...)", db)
1197+
require.NoErrorf(t, free(p), "%T.%s()", p, name)
1198+
keep = append(keep, p)
1199+
}
1200+
1201+
done := make(chan struct{})
1202+
go func() {
1203+
require.NoErrorf(t, db.Close(t.Context()), "%T.Close()", db)
1204+
close(done)
1205+
}()
1206+
1207+
select {
1208+
case <-done:
1209+
t.Errorf("%T.Close() returned with undropped %T", db, p0) //nolint:forbidigo // Use of require is impossible without a hack like require.False(true)
1210+
case <-time.After(300 * time.Millisecond):
1211+
// TODO(arr4n) use `synctest` package when at Go 1.25
1212+
}
1213+
1214+
runtime.KeepAlive(p0)
1215+
runtime.KeepAlive(p1)
1216+
p0 = nil
1217+
p1 = nil //nolint:ineffassign // Makes the value unreachable, allowing the finalizer to call Drop()
1218+
1219+
// In practice there's no need to call [runtime.GC] if [Database.Close] is
1220+
// called after all proposals are unreachable, as it does it itself.
1221+
runtime.GC()
1222+
// Note that [Database.Close] waits for outstanding proposals, so this would
1223+
// block permanently if the unreachability of `p0` and `p1` didn't result in
1224+
// their [Proposal.Drop] methods being called.
1225+
<-done
1226+
1227+
for _, p := range keep {
1228+
runtime.KeepAlive(p)
1229+
}
1230+
}
1231+
11741232
type kvIter interface {
11751233
SetBatchSize(int)
11761234
Next() bool

ffi/kvbackend.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,6 @@ type kVBackend interface {
3838
// commits happen on a rolling basis.
3939
// Length of the root slice is guaranteed to be common.HashLength.
4040
Commit(root []byte) error
41-
42-
// Close closes the backend and releases all held resources.
43-
Close() error
4441
}
4542

4643
// Prefetch is a no-op since we don't need to prefetch for Firewood.

0 commit comments

Comments
 (0)