Skip to content

Commit bac68f5

Browse files
authored
feat(ffi): add keepalive struct to own waitgroup logic (#1437)
Expanding on Arran and Austin's work, I added the `databaseKeepAliveHandle` to manage the WaitGroup logic for keeping the database alive while various objects needed. In the future, the `RangeProof` object will also use this keep-alive handle, but only during certain stages of its lifecycle. Because of this, the disown method on the keep-alive handle is capable of being called on the nil receiver without causing a panic.
1 parent 8165c36 commit bac68f5

File tree

5 files changed

+308
-79
lines changed

5 files changed

+308
-79
lines changed

ffi/firewood.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,9 @@ const RootLength = C.sizeof_HashKey
3939
type Hash [RootLength]byte
4040

4141
var (
42-
EmptyRoot Hash
43-
errDBClosed = errors.New("firewood database already closed")
42+
EmptyRoot Hash
43+
errDBClosed = errors.New("firewood database already closed")
44+
ErrActiveKeepAliveHandles = errors.New("cannot close database with active keep-alive handles")
4445
)
4546

4647
// A Database is a handle to a Firewood database.
@@ -252,10 +253,11 @@ var defaultCloseTimeout = time.Minute
252253

253254
// Close releases the memory associated with the Database.
254255
//
255-
// This blocks until all outstanding Proposals are either unreachable or one of
256-
// [Proposal.Commit] or [Proposal.Drop] has been called on them. Unreachable
257-
// proposals will be automatically dropped before Close returns, unless an
258-
// alternate GC finalizer is set on them.
256+
// This blocks until all outstanding keep-alive handles are disowned. That is,
257+
// until all Revisions and Proposals created from this Database are either
258+
// unreachable or one of [Proposal.Commit], [Proposal.Drop], or [Revision.Drop]
259+
// has been called on them. Unreachable objects will be automatically dropped
260+
// before Close returns, unless an alternate GC finalizer is set on them.
259261
//
260262
// This is safe to call if the handle pointer is nil, in which case it does
261263
// nothing. The pointer will be set to nil after freeing to prevent double free.
@@ -279,7 +281,7 @@ func (db *Database) Close(ctx context.Context) error {
279281
select {
280282
case <-done:
281283
case <-ctx.Done():
282-
return fmt.Errorf("at least one reachable %T neither dropped nor committed", &Proposal{})
284+
return ErrActiveKeepAliveHandles
283285
}
284286

285287
if err := getErrorFromVoidResult(C.fwd_close_db(db.handle)); err != nil {

ffi/firewood_test.go

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1566,3 +1566,163 @@ func TestNilVsEmptyValue(t *testing.T) {
15661566
r.NotNil(got, "key4 should exist")
15671567
r.Empty(got, "key4 should have empty value")
15681568
}
1569+
1570+
// TestCloseWithCancelledContext verifies that Database.Close returns
1571+
// ErrActiveKeepAliveHandles when the context is cancelled before handles are dropped.
1572+
func TestCloseWithCancelledContext(t *testing.T) {
1573+
r := require.New(t)
1574+
dbFile := filepath.Join(t.TempDir(), "test.db")
1575+
db, err := newDatabase(dbFile)
1576+
r.NoError(err)
1577+
1578+
// Create a proposal to keep a handle active
1579+
keys, vals := kvForTest(1)
1580+
proposal, err := db.Propose(keys, vals)
1581+
r.NoError(err)
1582+
1583+
ctx, cancel := context.WithCancel(t.Context())
1584+
1585+
var wg sync.WaitGroup
1586+
wg.Add(1)
1587+
go func() {
1588+
defer wg.Done()
1589+
1590+
err = db.Close(ctx)
1591+
}()
1592+
1593+
cancel()
1594+
wg.Wait()
1595+
1596+
r.ErrorIs(err, ErrActiveKeepAliveHandles, "Close should return ErrActiveKeepAliveHandles when context is cancelled")
1597+
1598+
// Drop the proposal
1599+
r.NoError(proposal.Drop())
1600+
1601+
// Now Close should succeed
1602+
r.NoError(db.Close(t.Context()))
1603+
}
1604+
1605+
// TestCloseWithShortTimeout verifies that Database.Close returns
1606+
// ErrActiveKeepAliveHandles when the context times out before handles are dropped.
1607+
func TestCloseWithShortTimeout(t *testing.T) {
1608+
r := require.New(t)
1609+
dbFile := filepath.Join(t.TempDir(), "test.db")
1610+
db, err := newDatabase(dbFile)
1611+
r.NoError(err)
1612+
1613+
// Create a revision to keep a handle active
1614+
keys, vals := kvForTest(1)
1615+
root, err := db.Update(keys, vals)
1616+
r.NoError(err)
1617+
1618+
revision, err := db.Revision(root)
1619+
r.NoError(err)
1620+
1621+
// Create a context with a very short timeout (100ms)
1622+
ctx, cancel := context.WithTimeout(t.Context(), 100*time.Millisecond)
1623+
defer cancel()
1624+
1625+
// Record start time to verify timeout occurred quickly
1626+
start := time.Now()
1627+
1628+
// Close should return ErrActiveKeepAliveHandles after timeout
1629+
err = db.Close(ctx)
1630+
elapsed := time.Since(start)
1631+
1632+
r.ErrorIs(err, ErrActiveKeepAliveHandles, "Close should return ErrActiveKeepAliveHandles when context times out")
1633+
r.Less(elapsed, defaultCloseTimeout, "Close should timeout quickly, not wait for default timeout")
1634+
1635+
// Drop the revision
1636+
r.NoError(revision.Drop())
1637+
1638+
// Now Close should succeed
1639+
r.NoError(db.Close(t.Context()))
1640+
}
1641+
1642+
// TestCloseWithMultipleActiveHandles verifies that Database.Close returns
1643+
// ErrActiveKeepAliveHandles when multiple handles are active and context is cancelled.
1644+
func TestCloseWithMultipleActiveHandles(t *testing.T) {
1645+
r := require.New(t)
1646+
dbFile := filepath.Join(t.TempDir(), "test.db")
1647+
db, err := newDatabase(dbFile)
1648+
r.NoError(err)
1649+
1650+
// Create multiple proposals
1651+
keys1, vals1 := kvForTest(3)
1652+
proposal1, err := db.Propose(keys1[:1], vals1[:1])
1653+
r.NoError(err)
1654+
proposal2, err := db.Propose(keys1[1:2], vals1[1:2])
1655+
r.NoError(err)
1656+
proposal3, err := db.Propose(keys1[2:3], vals1[2:3])
1657+
r.NoError(err)
1658+
1659+
// Create multiple revisions
1660+
root1, err := db.Update([][]byte{keyForTest(10)}, [][]byte{valForTest(10)})
1661+
r.NoError(err)
1662+
root2, err := db.Update([][]byte{keyForTest(20)}, [][]byte{valForTest(20)})
1663+
r.NoError(err)
1664+
1665+
revision1, err := db.Revision(root1)
1666+
r.NoError(err)
1667+
revision2, err := db.Revision(root2)
1668+
r.NoError(err)
1669+
1670+
// Create a cancelled context
1671+
ctx, cancel := context.WithCancel(t.Context())
1672+
cancel()
1673+
1674+
// Close should return ErrActiveKeepAliveHandles
1675+
err = db.Close(ctx)
1676+
r.ErrorIs(err, ErrActiveKeepAliveHandles, "Close should return ErrActiveKeepAliveHandles with multiple active handles")
1677+
1678+
// Drop all handles
1679+
r.NoError(proposal1.Drop())
1680+
r.NoError(proposal2.Drop())
1681+
r.NoError(proposal3.Drop())
1682+
r.NoError(revision1.Drop())
1683+
r.NoError(revision2.Drop())
1684+
1685+
// Now Close should succeed
1686+
r.NoError(db.Close(t.Context()))
1687+
}
1688+
1689+
// TestCloseSucceedsWhenHandlesDroppedInTime verifies that Database.Close succeeds
1690+
// when all handles are dropped before the context timeout.
1691+
func TestCloseSucceedsWhenHandlesDroppedInTime(t *testing.T) {
1692+
r := require.New(t)
1693+
dbFile := filepath.Join(t.TempDir(), "test.db")
1694+
db, err := newDatabase(dbFile)
1695+
r.NoError(err)
1696+
1697+
// Create two active proposals
1698+
keys, vals := kvForTest(2)
1699+
proposal1, err := db.Propose(keys[:1], vals[:1])
1700+
r.NoError(err)
1701+
proposal2, err := db.Propose(keys[1:2], vals[1:2])
1702+
r.NoError(err)
1703+
1704+
// Create a context with a reasonable timeout
1705+
ctx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond)
1706+
defer cancel()
1707+
1708+
// Channel to receive Close result
1709+
closeDone := make(chan error, 1)
1710+
1711+
// Start Close in a goroutine
1712+
go func() {
1713+
closeDone <- db.Close(ctx)
1714+
}()
1715+
1716+
// Drop handles after a short delay (before timeout)
1717+
time.Sleep(100 * time.Millisecond)
1718+
r.NoError(proposal1.Drop())
1719+
r.NoError(proposal2.Drop())
1720+
1721+
// Close should succeed (not timeout)
1722+
select {
1723+
case err := <-closeDone:
1724+
r.NoError(err, "Close should succeed when handles are dropped before timeout")
1725+
case <-time.After(defaultCloseTimeout):
1726+
r.Fail("Close did not complete in time")
1727+
}
1728+
}

ffi/keepalive.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// Copyright (C) 2025, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE.md for licensing terms.
3+
4+
package ffi
5+
6+
import "sync"
7+
8+
// databaseKeepAliveHandle is added to types that hold a lease on the database
9+
// to ensure it is not closed while those types are still in use.
10+
//
11+
// This is necessary to prevent use-after-free bugs where a type holding a
12+
// reference to the database outlives the database itself. Even attempting to
13+
// free those objects after the database has been closed will lead to undefined
14+
// behavior, as a part of the underling Rust object will have already been freed.
15+
type databaseKeepAliveHandle struct {
16+
mu sync.Mutex
17+
// [Database.Close] blocks on this WaitGroup, which is set and incremented
18+
// by [newKeepAliveHandle], and decremented by
19+
// [databaseKeepAliveHandle.disown].
20+
outstandingHandles *sync.WaitGroup
21+
}
22+
23+
// init initializes the keep-alive handle to track a new outstanding handle.
24+
func (h *databaseKeepAliveHandle) init(wg *sync.WaitGroup) {
25+
// lock not necessary today, but will be necessary in the future for types
26+
// that initialize the handle at some point after construction (#1429).
27+
h.mu.Lock()
28+
defer h.mu.Unlock()
29+
30+
if h.outstandingHandles != nil {
31+
// setting the finalizer twice will also panic, so we're panicking
32+
// early to provide better context
33+
panic("keep-alive handle already initialized")
34+
}
35+
36+
h.outstandingHandles = wg
37+
h.outstandingHandles.Add(1)
38+
}
39+
40+
// disown indicates that the object owning this handle is no longer keeping the
41+
// database alive. If [attemptDisown] returns an error, disowning will only occur
42+
// if [disownEvenOnErr] is true.
43+
//
44+
// This method is safe to call multiple times; subsequent calls after the first
45+
// will continue to invoke [attemptDisown] but will not decrement the wait group
46+
// unless [databaseKeepAliveHandle.init] was called again in the meantime.
47+
func (h *databaseKeepAliveHandle) disown(disownEvenOnErr bool, attemptDisown func() error) error {
48+
h.mu.Lock()
49+
defer h.mu.Unlock()
50+
51+
err := attemptDisown()
52+
53+
if (err == nil || disownEvenOnErr) && h.outstandingHandles != nil {
54+
h.outstandingHandles.Done()
55+
// prevent calling `Done` multiple times if disown is called again, which
56+
// may happen when the finalizer runs after an explicit call to Drop or Commit.
57+
h.outstandingHandles = nil
58+
}
59+
60+
return err
61+
}

ffi/proposal.go

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ import (
2020

2121
var errDroppedProposal = errors.New("proposal already dropped")
2222

23+
// Proposal represents a set of proposed changes to be committed to the database.
24+
// Proposals are created via [Database.Propose] or [Proposal.Propose], and must be
25+
// either committed with [Proposal.Commit] or released with [Proposal.Drop].
26+
//
27+
// Proposals must be committed or dropped before the associated database is
28+
// closed. A finalizer is set on each Proposal to ensure that Drop is called
29+
// when the Proposal is garbage collected, but relying on finalizers is not
30+
// recommended. Failing to commit or drop a proposal before the database is
31+
// closed will cause it to block or fail.
2332
type Proposal struct {
2433
// handle is an opaque pointer to the proposal within Firewood. It should be
2534
// passed to the C FFI functions that operate on proposals
@@ -29,14 +38,14 @@ type Proposal struct {
2938
// Calls to `C.fwd_commit_proposal` and `C.fwd_free_proposal` will invalidate
3039
// this handle, so it should not be used after those calls.
3140
handle *C.ProposalHandle
32-
disown sync.Mutex
33-
// [Database.Close] blocks on this WaitGroup, which is incremented by
34-
// [getProposalFromProposalResult], and decremented by either
35-
// [Proposal.Commit] or [Proposal.Drop] (when the handle is disowned).
36-
outstandingHandles *sync.WaitGroup
3741

38-
// The proposal root hash.
42+
// root is the root hash of the proposal and the expected root hash after commit.
3943
root Hash
44+
45+
// keepAliveHandle is used to keep the database alive while this proposal is
46+
// in use. It is initialized when the proposal is created and disowned after
47+
// [Proposal.Commit] or [Proposal.Drop] is called.
48+
keepAliveHandle databaseKeepAliveHandle
4049
}
4150

4251
// Root retrieves the root hash of the proposal.
@@ -93,37 +102,25 @@ func (p *Proposal) Propose(keys, vals [][]byte) (*Proposal, error) {
93102
if err != nil {
94103
return nil, err
95104
}
96-
return getProposalFromProposalResult(C.fwd_propose_on_proposal(p.handle, kvp), p.outstandingHandles)
97-
}
98-
99-
// disownHandle is the common path of [Proposal.Commit] and [Proposal.Drop], the
100-
// `fn` argument defining the method-specific behaviour.
101-
func (p *Proposal) disownHandle(fn func(*C.ProposalHandle) error, disownEvenOnErr bool) error {
102-
p.disown.Lock()
103-
defer p.disown.Unlock()
104-
105-
if p.handle == nil {
106-
return errDroppedProposal
107-
}
108-
err := fn(p.handle)
109-
if disownEvenOnErr || err == nil {
110-
p.handle = nil
111-
p.outstandingHandles.Done()
112-
}
113-
return err
105+
return getProposalFromProposalResult(C.fwd_propose_on_proposal(p.handle, kvp), p.keepAliveHandle.outstandingHandles)
114106
}
115107

116108
// Commit commits the proposal and returns any errors.
117109
//
118110
// The proposal handle is no longer valid after this call, but the root
119-
// hash can still be retrieved using Root().
111+
// hash can still be retrieved using [Proposal.Root].
120112
func (p *Proposal) Commit() error {
121-
return p.disownHandle(commitProposal, true)
122-
}
113+
return p.keepAliveHandle.disown(true /* evenOnError */, func() error {
114+
if p.handle == nil {
115+
return errDroppedProposal
116+
}
117+
118+
_, err := getHashKeyFromHashResult(C.fwd_commit_proposal(p.handle))
123119

124-
func commitProposal(h *C.ProposalHandle) error {
125-
_, err := getHashKeyFromHashResult(C.fwd_commit_proposal(h))
126-
return err
120+
p.handle = nil
121+
122+
return err
123+
})
127124
}
128125

129126
// Drop releases the memory associated with the Proposal.
@@ -132,33 +129,34 @@ func commitProposal(h *C.ProposalHandle) error {
132129
//
133130
// The pointer will be set to nil after freeing to prevent double free.
134131
func (p *Proposal) Drop() error {
135-
if err := p.disownHandle(dropProposal, false); err != nil && err != errDroppedProposal {
136-
return err
137-
}
138-
return nil
139-
}
132+
return p.keepAliveHandle.disown(false /* evenOnError */, func() error {
133+
if p.handle == nil {
134+
return nil
135+
}
140136

141-
func dropProposal(h *C.ProposalHandle) error {
142-
if err := getErrorFromVoidResult(C.fwd_free_proposal(h)); err != nil {
143-
return fmt.Errorf("%w: %w", errFreeingValue, err)
144-
}
145-
return nil
137+
if err := getErrorFromVoidResult(C.fwd_free_proposal(p.handle)); err != nil {
138+
return fmt.Errorf("%w: %w", errFreeingValue, err)
139+
}
140+
141+
p.handle = nil
142+
143+
return nil
144+
})
146145
}
147146

148147
// getProposalFromProposalResult converts a C.ProposalResult to a Proposal or error.
149-
func getProposalFromProposalResult(result C.ProposalResult, outstandingHandles *sync.WaitGroup) (*Proposal, error) {
148+
func getProposalFromProposalResult(result C.ProposalResult, wg *sync.WaitGroup) (*Proposal, error) {
150149
switch result.tag {
151150
case C.ProposalResult_NullHandlePointer:
152151
return nil, errDBClosed
153152
case C.ProposalResult_Ok:
154153
body := (*C.ProposalResult_Ok_Body)(unsafe.Pointer(&result.anon0))
155154
hashKey := *(*Hash)(unsafe.Pointer(&body.root_hash._0))
156155
proposal := &Proposal{
157-
handle: body.handle,
158-
root: hashKey,
159-
outstandingHandles: outstandingHandles,
156+
handle: body.handle,
157+
root: hashKey,
160158
}
161-
outstandingHandles.Add(1)
159+
proposal.keepAliveHandle.init(wg)
162160
runtime.SetFinalizer(proposal, (*Proposal).Drop)
163161
return proposal, nil
164162
case C.ProposalResult_Err:

0 commit comments

Comments
 (0)