Skip to content

Commit 02bee6c

Browse files
snapshots: send manifest
1 parent 64ae55c commit 02bee6c

File tree

8 files changed

+118
-89
lines changed

8 files changed

+118
-89
lines changed

src/app/firedancer-dev/commands/backtest.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ backtest_topo( config_t * config ) {
302302

303303
/* Replay decoded manifest dcache topo obj */
304304
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
305-
fd_pod_insertf_ulong( topo->props, 2UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
305+
fd_pod_insertf_ulong( topo->props, (4 * 1UL << 30UL), "obj.%lu.data_sz", replay_manifest_dcache->id );
306306
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );
307307

308308
fd_topob_tile_uses( topo, snapin_tile, funk_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );

src/app/firedancer-dev/commands/snapshot_load.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ snapshot_load_topo( config_t * config,
4646
manifest */
4747
fd_topob_wksp( topo, "replay_manif" );
4848
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
49-
fd_pod_insertf_ulong( topo->props, 1UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
49+
fd_pod_insertf_ulong( topo->props, (4 * 1UL << 30UL), "obj.%lu.data_sz", replay_manifest_dcache->id );
5050
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );
5151

5252
/* read() tile */

src/app/firedancer/topology.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ fd_topo_initialize( config_t * config ) {
377377

378378
/* Replay decoded manifest dcache topo obj */
379379
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
380-
fd_pod_insertf_ulong( topo->props, 2UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
380+
fd_pod_insertf_ulong( topo->props, (4 * 1UL << 30UL), "obj.%lu.data_sz", replay_manifest_dcache->id );
381381
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );
382382

383383
ushort parsed_tile_to_cpu[ FD_TILE_MAX ];

src/discof/replay/fd_replay_tile.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,15 @@ restore_slot_ctx( fd_replay_tile_ctx_t * ctx,
656656

657657
fd_solana_manifest_global_t * manifest_global
658658
= (fd_solana_manifest_global_t *)fd_chunk_to_laddr( fd_wksp_containing( ctx->manifest_dcache ), chunk );
659+
660+
/* If the bank already exists, that means that we have already
661+
restored the bank for this slot. */
662+
fd_bank_t * bank = fd_banks_get_bank( ctx->slot_ctx->banks, manifest_global->bank.slot );
663+
if( FD_UNLIKELY( !!bank ) ) {
664+
FD_LOG_NOTICE(( "The bank for slot %lu already exists, skipping slot context restore", manifest_global->bank.slot ));
665+
return;
666+
}
667+
659668
fd_exec_slot_ctx_t * recovered_slot_ctx = fd_exec_slot_ctx_recover( ctx->slot_ctx,
660669
manifest_global,
661670
ctx->runtime_spad );
@@ -985,6 +994,7 @@ on_snapshot_message( fd_replay_tile_ctx_t * ctx,
985994
}
986995
case FD_FULL_SNAPSHOT_MANIFEST_EXTERNAL:
987996
case FD_INCREMENTAL_SNAPSHOT_MANIFEST_EXTERNAL: {
997+
FD_LOG_NOTICE(( "Received decoded global snapshot manifest message" ));
988998
/* We may either receive a full snapshot manifest or an
989999
incremental snapshot manifest. Note that this external message
9901000
id is only used temporarily because replay cannot yet receive

src/discof/restore/fd_snapin_tile.c

Lines changed: 87 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,11 @@ struct fd_snapin_tile {
6060
/* State machine */
6161
int state;
6262

63+
/* Stem pointer
64+
TODO: a hack to store the stem pointer so that it can be used
65+
in snapshot parser callbacks. */
66+
fd_stem_context_t * stem;
67+
6368
/* Account insertion */
6469
fd_funk_t funk[1];
6570
fd_funk_txn_t * funk_txn;
@@ -82,11 +87,14 @@ struct fd_snapin_tile {
8287
/* A shared dcache object between snapin and replay that holds the
8388
decoded solana manifest.
8489
TODO: remove when replay can receive the snapshot manifest. */
85-
uchar * replay_manifest_dcache;
86-
ulong replay_manifest_dcache_obj_id;
87-
88-
/* TODO: remove when replay can receive the snapshot manifest. */
89-
ulong manifest_sz;
90+
struct {
91+
fd_wksp_t * wksp;
92+
uchar * dcache;
93+
ulong chunk0;
94+
ulong wmark;
95+
ulong chunk;
96+
ulong obj_id;
97+
} replay_manifest_dcache;
9098

9199
int shutdown;
92100

@@ -117,13 +125,56 @@ fd_snapin_accumulate_metrics( fd_snapin_tile_t * ctx ) {
117125
}
118126
}
119127

128+
static void
129+
send_manifest( fd_snapin_tile_t * ctx,
130+
ulong manifest_sz ) {
131+
ulong sig = 0UL;
132+
ulong external_sig = 0UL;
133+
if( ctx->state == FD_SNAPIN_STATE_LOADING_FULL ) {
134+
sig = FD_FULL_SNAPSHOT_MANIFEST;
135+
external_sig = FD_FULL_SNAPSHOT_MANIFEST_EXTERNAL;
136+
} else if( ctx->state == FD_SNAPIN_STATE_LOADING_INCREMENTAL ) {
137+
sig = FD_INCREMENTAL_SNAPSHOT_MANIFEST;
138+
external_sig = FD_INCREMENTAL_SNAPSHOT_MANIFEST_EXTERNAL;
139+
}
140+
141+
/* Send snapshot manifest message over snap_out link */
142+
fd_stem_publish( ctx->stem,
143+
0UL,
144+
sig,
145+
ctx->manifest_out.chunk,
146+
sizeof(fd_snapshot_manifest_t),
147+
0UL,
148+
0UL,
149+
0UL );
150+
ctx->manifest_out.chunk = fd_dcache_compact_next( ctx->manifest_out.chunk,
151+
sizeof(fd_snapshot_manifest_t),
152+
ctx->manifest_out.chunk0,
153+
ctx->manifest_out.wmark );
154+
155+
/* send manifest over replay manifest dcache */
156+
fd_stem_publish( ctx->stem,
157+
0UL,
158+
external_sig,
159+
ctx->replay_manifest_dcache.chunk,
160+
manifest_sz,
161+
0UL,
162+
ctx->replay_manifest_dcache.obj_id,
163+
0UL );
164+
ctx->replay_manifest_dcache.chunk = fd_dcache_compact_next( ctx->replay_manifest_dcache.chunk,
165+
manifest_sz,
166+
ctx->replay_manifest_dcache.chunk0,
167+
ctx->replay_manifest_dcache.wmark );
168+
FD_TEST( ctx->replay_manifest_dcache.chunk <= ctx->replay_manifest_dcache.wmark );
169+
}
170+
120171
/* Snapshot parser callbacks ******************************************/
121172

122173
static void
123-
save_manifest( fd_snapshot_parser_t * parser,
124-
void * _ctx,
125-
fd_solana_manifest_global_t * manifest,
126-
ulong manifest_sz ) {
174+
handle_manifest( fd_snapshot_parser_t * parser,
175+
void * _ctx,
176+
fd_solana_manifest_global_t * manifest,
177+
ulong manifest_sz ) {
127178
(void)parser;
128179
fd_snapin_tile_t * ctx = fd_type_pun( _ctx );
129180

@@ -135,10 +186,13 @@ save_manifest( fd_snapshot_parser_t * parser,
135186
FD_LOG_NOTICE(( "Snapshot manifest loaded for slot %lu", snapshot_manifest_mem->slot ));
136187

137188
/* Send decoded manifest to replay */
138-
fd_memcpy( ctx->replay_manifest_dcache,
189+
uchar * next_dcache_mem = fd_chunk_to_laddr( ctx->replay_manifest_dcache.wksp,
190+
ctx->replay_manifest_dcache.chunk );
191+
fd_memcpy( next_dcache_mem,
139192
manifest,
140193
manifest_sz );
141-
ctx->manifest_sz = manifest_sz;
194+
195+
send_manifest( ctx, manifest_sz );
142196
}
143197

144198
static int
@@ -195,10 +249,11 @@ snapshot_insert_account( fd_snapshot_parser_t * parser,
195249
}
196250

197251
static void
198-
snapshot_copy_acc_data( fd_snapshot_parser_t * parser FD_PARAM_UNUSED,
252+
snapshot_copy_acc_data( fd_snapshot_parser_t * parser,
199253
void * _ctx,
200254
uchar const * buf,
201255
ulong data_sz ) {
256+
(void)parser;
202257
fd_snapin_tile_t * ctx = fd_type_pun( _ctx );
203258

204259
if( ctx->acc_data ) {
@@ -208,8 +263,9 @@ snapshot_copy_acc_data( fd_snapshot_parser_t * parser FD_PARAM_UNUSED,
208263
}
209264

210265
static void
211-
snapshot_reset_acc_data( fd_snapshot_parser_t * parser FD_PARAM_UNUSED,
266+
snapshot_reset_acc_data( fd_snapshot_parser_t * parser,
212267
void * _ctx ) {
268+
(void)parser;
213269
fd_snapin_tile_t * ctx = fd_type_pun( _ctx );
214270
ctx->acc_data = NULL;
215271
}
@@ -246,7 +302,7 @@ unprivileged_init( fd_topo_t * topo,
246302

247303
fd_snapshot_parser_process_manifest_fn_t manifest_cb = NULL;
248304
if( 0==strcmp( topo->links[tile->out_link_id[ MANIFEST_OUT_IDX ]].name, "snap_out" ) ) {
249-
manifest_cb = save_manifest;
305+
manifest_cb = handle_manifest;
250306
}
251307

252308
ctx->shutdown = 0;
@@ -277,18 +333,23 @@ unprivileged_init( fd_topo_t * topo,
277333
ctx->metrics.incremental.accounts_processed = 0UL;
278334
ctx->metrics.num_accounts_inserted = 0UL;
279335

280-
/* join replay manifest dcache */
281-
ctx->replay_manifest_dcache = fd_topo_obj_laddr( topo, tile->snapin.manifest_dcache_obj_id );
282-
ctx->replay_manifest_dcache_obj_id = tile->snapin.manifest_dcache_obj_id;
283-
ctx->manifest_sz = 0UL;
284-
285336
/* set up the manifest message producer */
286337
fd_topo_link_t * writer_link = &topo->links[ tile->out_link_id[ MANIFEST_OUT_IDX ] ];
287338
ctx->manifest_out.wksp = topo->workspaces[ topo->objs[ writer_link->dcache_obj_id ].wksp_id ].wksp;
288339
ctx->manifest_out.chunk0 = fd_dcache_compact_chunk0( fd_wksp_containing( writer_link->dcache ), writer_link->dcache );
289340
ctx->manifest_out.wmark = fd_dcache_compact_wmark ( ctx->manifest_out.wksp, writer_link->dcache, writer_link->mtu );
290341
ctx->manifest_out.chunk = ctx->manifest_out.chunk0;
291342

343+
/* join replay manifest dcache */
344+
ctx->replay_manifest_dcache.dcache = fd_topo_obj_laddr( topo, tile->snapin.manifest_dcache_obj_id );
345+
ctx->replay_manifest_dcache.wksp = fd_wksp_containing( ctx->replay_manifest_dcache.dcache );
346+
ctx->replay_manifest_dcache.obj_id = tile->snapin.manifest_dcache_obj_id;
347+
ctx->replay_manifest_dcache.chunk0 = fd_dcache_compact_chunk0( ctx->replay_manifest_dcache.wksp, ctx->replay_manifest_dcache.dcache );
348+
ctx->replay_manifest_dcache.chunk = ctx->replay_manifest_dcache.chunk0;
349+
ctx->replay_manifest_dcache.wmark = fd_dcache_compact_wmark( ctx->replay_manifest_dcache.wksp,
350+
ctx->replay_manifest_dcache.dcache,
351+
writer_link->mtu);
352+
292353
/* set up in link */
293354
fd_topo_link_t const * in_link = &topo->links[ tile->in_link_id[ SNAPSHOT_IN_LINK_IDX ] ];
294355
fd_topo_wksp_t const * in_wksp = &topo->workspaces[ topo->objs[ in_link->dcache_obj_id ].wksp_id ];
@@ -333,50 +394,6 @@ hard_reset_funk( fd_snapin_tile_t * ctx ) {
333394
/* TODO: Assert that hard reset suceeded */
334395
}
335396

336-
static void
337-
send_manifest( fd_snapin_tile_t * ctx,
338-
fd_stem_context_t * stem ) {
339-
/* Assumes the manifest is already mem copied into the snap_out
340-
dcache and the replay_manifest_dcache from the save_manifest
341-
callback. */
342-
FD_TEST( ctx->manifest_sz );
343-
344-
ulong sig = 0UL;
345-
ulong external_sig = 0UL;
346-
if( ctx->state == FD_SNAPIN_STATE_LOADING_FULL ) {
347-
sig = FD_FULL_SNAPSHOT_MANIFEST;
348-
external_sig = FD_FULL_SNAPSHOT_MANIFEST_EXTERNAL;
349-
} else if( ctx->state == FD_SNAPIN_STATE_LOADING_INCREMENTAL ) {
350-
sig = FD_INCREMENTAL_SNAPSHOT_MANIFEST;
351-
external_sig = FD_INCREMENTAL_SNAPSHOT_MANIFEST_EXTERNAL;
352-
}
353-
354-
/* Send snapshot manifest message over snap_out link */
355-
fd_stem_publish( stem,
356-
0UL,
357-
sig,
358-
ctx->manifest_out.chunk,
359-
sizeof(fd_snapshot_manifest_t),
360-
0UL,
361-
0UL,
362-
0UL );
363-
ctx->manifest_out.chunk = fd_dcache_compact_next( ctx->manifest_out.chunk,
364-
sizeof(fd_snapshot_manifest_t),
365-
ctx->manifest_out.chunk0,
366-
ctx->manifest_out.wmark );
367-
368-
/* send manifest over replay manifest dcache */
369-
ulong chunk = fd_dcache_compact_chunk0( fd_wksp_containing( ctx->replay_manifest_dcache ), ctx->replay_manifest_dcache );
370-
fd_stem_publish( stem,
371-
0UL,
372-
external_sig,
373-
chunk,
374-
ctx->manifest_sz,
375-
0UL,
376-
ctx->replay_manifest_dcache_obj_id,
377-
0UL );
378-
}
379-
380397
static void
381398
handle_control_frag( fd_snapin_tile_t * ctx,
382399
fd_stem_context_t * stem,
@@ -390,10 +407,6 @@ handle_control_frag( fd_snapin_tile_t * ctx,
390407
0 );
391408
}
392409

393-
/* Once the snapshot is fully loaded, we can send the manifest
394-
message over. */
395-
send_manifest( ctx, stem );
396-
397410
/* Notify consumers of manifest out that the snapshot is fully
398411
loaded. */
399412
fd_stem_publish( stem,
@@ -443,13 +456,16 @@ handle_control_frag( fd_snapin_tile_t * ctx,
443456
}
444457

445458
static void
446-
handle_data_frag( fd_snapin_tile_t * ctx,
447-
ulong chunk,
448-
ulong sz ) {
459+
handle_data_frag( fd_snapin_tile_t * ctx,
460+
ulong chunk,
461+
ulong sz,
462+
fd_stem_context_t * stem ) {
449463
FD_TEST( ctx->state==FD_SNAPIN_STATE_LOADING_FULL ||
450464
ctx->state==FD_SNAPIN_STATE_LOADING_INCREMENTAL );
451465
FD_TEST( chunk>=ctx->in.chunk0 && chunk<=ctx->in.wmark );
452466

467+
ctx->stem = stem;
468+
453469
if( FD_UNLIKELY( ctx->parser->flags & SNAP_FLAG_BLOCKED ||
454470
ctx->parser->flags & SNAP_FLAG_DONE ) ) {
455471
/* Don't consume the frag if blocked or done */
@@ -518,7 +534,7 @@ after_frag( fd_snapin_tile_t * ctx,
518534

519535
/* handle frag */
520536
if( FD_UNLIKELY( sz==0 ) ) handle_control_frag( ctx, stem, sig );
521-
else handle_data_frag( ctx, ctx->in._chunk, sz );
537+
else handle_data_frag( ctx, ctx->in._chunk, sz, stem );
522538
}
523539

524540
#define STEM_BURST 1UL

src/discof/restore/utils/fd_snapshot_parser.c

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -337,8 +337,8 @@ fd_snapshot_parser_hdr_read_is_complete( fd_snapshot_parser_t const * self ) {
337337

338338
static uchar const *
339339
fd_snapshot_parser_read_buffered( fd_snapshot_parser_t * self,
340-
uchar const * buf,
341-
ulong bufsz ) {
340+
uchar const * buf,
341+
ulong bufsz ) {
342342
/* Should not be called if read is complete */
343343
FD_TEST( self->buf_ctr < self->buf_sz );
344344

@@ -414,8 +414,8 @@ fd_snapshot_parser_restore_account_hdr( fd_snapshot_parser_t * self ) {
414414

415415
static uchar const *
416416
fd_snapshot_parser_read_account_hdr_chunk( fd_snapshot_parser_t * self,
417-
uchar const * buf,
418-
ulong bufsz ) {
417+
uchar const * buf,
418+
ulong bufsz ) {
419419
if( !self->accv_sz ) {
420420
/* Reached end of AppendVec */
421421
self->state = SNAP_STATE_IGNORE;
@@ -429,25 +429,18 @@ fd_snapshot_parser_read_account_hdr_chunk( fd_snapshot_parser_t * self,
429429
self->accv_sz -= hdr_read;
430430
bufsz -= hdr_read;
431431

432-
// ulong peek_sz = 0UL;
433432
if( FD_LIKELY( fd_snapshot_parser_hdr_read_is_complete( self ) ) ) {
434433
if( FD_UNLIKELY( 0!=fd_snapshot_parser_restore_account_hdr( self ) ) ) {
435434
return buf; /* parse error */
436435
}
437-
// peek_sz = fd_ulong_min( self->acc_rem, bufsz );
438436
}
439-
440-
// self->acc_rem -= peek_sz;
441-
// self->accv_sz -= peek_sz;
442-
// buf_next += peek_sz;
443-
444437
return buf_next;
445438
}
446439

447440
static uchar const *
448441
fd_snapshot_parser_read_account_chunk( fd_snapshot_parser_t * self,
449-
uchar const * buf,
450-
ulong bufsz ) {
442+
uchar const * buf,
443+
ulong bufsz ) {
451444

452445
ulong chunk_sz = fd_ulong_min( self->acc_rem, bufsz );
453446
if( FD_UNLIKELY( chunk_sz > self->accv_sz ) )

src/discof/restore/utils/fd_snapshot_parser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ fd_snapshot_parser_reset( fd_snapshot_parser_t * self ) {
166166
}
167167

168168
static inline fd_snapshot_parser_t *
169-
fd_snapshot_parser_new( void * mem,
169+
fd_snapshot_parser_new( void * mem,
170170
fd_snapshot_parser_process_manifest_fn_t manifest_cb,
171171
fd_snapshot_process_acc_hdr_fn_t acc_hdr_cb,
172172
fd_snapshot_process_acc_data_fn_t acc_data_cb,

src/flamenco/runtime/context/fd_exec_slot_ctx.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,18 @@ fd_exec_slot_ctx_t *
168168
fd_exec_slot_ctx_recover( fd_exec_slot_ctx_t * slot_ctx,
169169
fd_solana_manifest_global_t const * manifest,
170170
fd_spad_t * runtime_spad ) {
171+
fd_bank_t * bank = fd_banks_get_bank( slot_ctx->banks, manifest->bank.slot );
172+
if( FD_UNLIKELY( !!bank ) ) {
173+
/* If the bank already exists, that means that we have already
174+
restored the bank for this slot. Clear the bank to restore the new
175+
manifest. */
176+
fd_banks_clear_bank( slot_ctx->banks, bank );
177+
} else {
178+
bank = fd_banks_clone_from_parent( slot_ctx->banks, manifest->bank.slot, 0UL );
179+
}
180+
181+
slot_ctx->bank = bank;
171182

172-
slot_ctx->bank = fd_banks_clone_from_parent( slot_ctx->banks, manifest->bank.slot, 0UL );
173183
if( FD_UNLIKELY( !slot_ctx->bank ) ) {
174184
FD_LOG_CRIT(( "fd_banks_clone_from_parent failed" ));
175185
}

0 commit comments

Comments
 (0)