Skip to content

Commit 1ab248c

Browse files
snapshots: send manifest
1 parent d2aec3e commit 1ab248c

File tree

7 files changed

+159
-117
lines changed

7 files changed

+159
-117
lines changed

src/app/firedancer-dev/commands/backtest.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ backtest_topo( config_t * config ) {
302302

303303
/* Replay decoded manifest dcache topo obj */
304304
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
305-
fd_pod_insertf_ulong( topo->props, 2UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
305+
fd_pod_insertf_ulong( topo->props, 4UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
306306
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );
307307

308308
fd_topob_tile_uses( topo, snapin_tile, funk_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );

src/app/firedancer-dev/commands/snapshot_load.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ snapshot_load_topo( config_t * config,
4646
manifest */
4747
fd_topob_wksp( topo, "replay_manif" );
4848
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
49-
fd_pod_insertf_ulong( topo->props, 1UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
49+
fd_pod_insertf_ulong( topo->props, 4UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
5050
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );
5151

5252
/* read() tile */

src/app/firedancer/topology.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ fd_topo_initialize( config_t * config ) {
376376

377377
/* Replay decoded manifest dcache topo obj */
378378
fd_topo_obj_t * replay_manifest_dcache = fd_topob_obj( topo, "dcache", "replay_manif" );
379-
fd_pod_insertf_ulong( topo->props, 2UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
379+
fd_pod_insertf_ulong( topo->props, 4UL << 30UL, "obj.%lu.data_sz", replay_manifest_dcache->id );
380380
fd_pod_insert_ulong( topo->props, "manifest_dcache", replay_manifest_dcache->id );
381381

382382
ushort parsed_tile_to_cpu[ FD_TILE_MAX ];

src/discof/replay/fd_replay_tile.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,15 @@ restore_slot_ctx( fd_replay_tile_ctx_t * ctx,
656656

657657
fd_solana_manifest_global_t * manifest_global
658658
= (fd_solana_manifest_global_t *)fd_chunk_to_laddr( fd_wksp_containing( ctx->manifest_dcache ), chunk );
659+
660+
/* If the bank already exists, that means that we have already
661+
restored the bank for this slot. */
662+
fd_bank_t * bank = fd_banks_get_bank( ctx->slot_ctx->banks, manifest_global->bank.slot );
663+
if( FD_UNLIKELY( !!bank ) ) {
664+
FD_LOG_NOTICE(( "The bank for slot %lu already exists, skipping slot context restore", manifest_global->bank.slot ));
665+
return;
666+
}
667+
659668
fd_exec_slot_ctx_t * recovered_slot_ctx = fd_exec_slot_ctx_recover( ctx->slot_ctx,
660669
manifest_global,
661670
ctx->runtime_spad );
@@ -998,6 +1007,7 @@ on_snapshot_message( fd_replay_tile_ctx_t * ctx,
9981007
}
9991008
case FD_FULL_SNAPSHOT_MANIFEST_EXTERNAL:
10001009
case FD_INCREMENTAL_SNAPSHOT_MANIFEST_EXTERNAL: {
1010+
FD_LOG_NOTICE(( "Received external snapshot manifest message" ));
10011011
/* We may either receive a full snapshot manifest or an
10021012
incremental snapshot manifest. Note that this external message
10031013
id is only used temporarily because replay cannot yet receive

src/discof/restore/fd_snapin_tile.c

Lines changed: 93 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,14 @@ struct fd_snapin_tile {
8282
/* A shared dcache object between snapin and replay that holds the
8383
decoded solana manifest.
8484
TODO: remove when replay can receive the snapshot manifest. */
85-
uchar * replay_manifest_dcache;
86-
ulong replay_manifest_dcache_obj_id;
87-
88-
/* TODO: remove when replay can receive the snapshot manifest. */
89-
ulong manifest_sz;
85+
struct {
86+
fd_wksp_t * wksp;
87+
uchar * dcache;
88+
ulong chunk0;
89+
ulong wmark;
90+
ulong chunk;
91+
ulong obj_id;
92+
} replay_manifest_dcache;
9093

9194
int shutdown;
9295

@@ -117,13 +120,58 @@ fd_snapin_accumulate_metrics( fd_snapin_tile_t * ctx ) {
117120
}
118121
}
119122

123+
static void
124+
send_manifest( fd_snapin_tile_t * ctx,
125+
fd_stem_context_t * stem,
126+
ulong manifest_sz ) {
127+
ulong sig = 0UL;
128+
ulong external_sig = 0UL;
129+
if( ctx->state == FD_SNAPIN_STATE_LOADING_FULL ) {
130+
sig = FD_FULL_SNAPSHOT_MANIFEST;
131+
external_sig = FD_FULL_SNAPSHOT_MANIFEST_EXTERNAL;
132+
} else if( ctx->state == FD_SNAPIN_STATE_LOADING_INCREMENTAL ) {
133+
sig = FD_INCREMENTAL_SNAPSHOT_MANIFEST;
134+
external_sig = FD_INCREMENTAL_SNAPSHOT_MANIFEST_EXTERNAL;
135+
}
136+
137+
/* Send snapshot manifest message over snap_out link */
138+
fd_stem_publish( stem,
139+
0UL,
140+
sig,
141+
ctx->manifest_out.chunk,
142+
sizeof(fd_snapshot_manifest_t),
143+
0UL,
144+
0UL,
145+
0UL );
146+
ctx->manifest_out.chunk = fd_dcache_compact_next( ctx->manifest_out.chunk,
147+
sizeof(fd_snapshot_manifest_t),
148+
ctx->manifest_out.chunk0,
149+
ctx->manifest_out.wmark );
150+
151+
/* send manifest over replay manifest dcache */
152+
fd_stem_publish( stem,
153+
0UL,
154+
external_sig,
155+
ctx->replay_manifest_dcache.chunk,
156+
manifest_sz,
157+
0UL,
158+
ctx->replay_manifest_dcache.obj_id,
159+
0UL );
160+
ctx->replay_manifest_dcache.chunk = fd_dcache_compact_next( ctx->replay_manifest_dcache.chunk,
161+
manifest_sz,
162+
ctx->replay_manifest_dcache.chunk0,
163+
ctx->replay_manifest_dcache.wmark );
164+
FD_TEST( ctx->replay_manifest_dcache.chunk <= ctx->replay_manifest_dcache.wmark );
165+
}
166+
120167
/* Snapshot parser callbacks ******************************************/
121168

122169
static void
123-
save_manifest( fd_snapshot_parser_t * parser,
124-
void * _ctx,
125-
fd_solana_manifest_global_t * manifest,
126-
ulong manifest_sz ) {
170+
handle_manifest( fd_snapshot_parser_t * parser,
171+
void * _ctx,
172+
fd_stem_context_t * stem,
173+
fd_solana_manifest_global_t * manifest,
174+
ulong manifest_sz ) {
127175
(void)parser;
128176
fd_snapin_tile_t * ctx = fd_type_pun( _ctx );
129177

@@ -135,10 +183,13 @@ save_manifest( fd_snapshot_parser_t * parser,
135183
FD_LOG_NOTICE(( "Snapshot manifest loaded for slot %lu", snapshot_manifest_mem->slot ));
136184

137185
/* Send decoded manifest to replay */
138-
fd_memcpy( ctx->replay_manifest_dcache,
186+
uchar * next_dcache_mem = fd_chunk_to_laddr( ctx->replay_manifest_dcache.wksp,
187+
ctx->replay_manifest_dcache.chunk );
188+
fd_memcpy( next_dcache_mem,
139189
manifest,
140190
manifest_sz );
141-
ctx->manifest_sz = manifest_sz;
191+
192+
send_manifest( ctx, stem, manifest_sz );
142193
}
143194

144195
static int
@@ -167,7 +218,9 @@ snapshot_is_duplicate_account( fd_snapshot_parser_t * parser,
167218
static void
168219
snapshot_insert_account( fd_snapshot_parser_t * parser,
169220
fd_solana_account_hdr_t const * hdr,
170-
void * _ctx ) {
221+
void * _ctx,
222+
fd_stem_context_t * stem ) {
223+
(void)stem;
171224
fd_snapin_tile_t * ctx = fd_type_pun( _ctx );
172225
fd_pubkey_t const * account_key = fd_type_pun_const( hdr->meta.pubkey );
173226

@@ -195,10 +248,13 @@ snapshot_insert_account( fd_snapshot_parser_t * parser,
195248
}
196249

197250
static void
198-
snapshot_copy_acc_data( fd_snapshot_parser_t * parser FD_PARAM_UNUSED,
251+
snapshot_copy_acc_data( fd_snapshot_parser_t * parser,
199252
void * _ctx,
253+
fd_stem_context_t * stem,
200254
uchar const * buf,
201255
ulong data_sz ) {
256+
(void)parser;
257+
(void)stem;
202258
fd_snapin_tile_t * ctx = fd_type_pun( _ctx );
203259

204260
if( ctx->acc_data ) {
@@ -208,8 +264,11 @@ snapshot_copy_acc_data( fd_snapshot_parser_t * parser FD_PARAM_UNUSED,
208264
}
209265

210266
static void
211-
snapshot_reset_acc_data( fd_snapshot_parser_t * parser FD_PARAM_UNUSED,
212-
void * _ctx ) {
267+
snapshot_reset_acc_data( fd_snapshot_parser_t * parser,
268+
void * _ctx,
269+
fd_stem_context_t * stem ) {
270+
(void)parser;
271+
(void)stem;
213272
fd_snapin_tile_t * ctx = fd_type_pun( _ctx );
214273
ctx->acc_data = NULL;
215274
}
@@ -228,6 +287,7 @@ scratch_footprint( fd_topo_tile_t const * tile ) {
228287
l = FD_LAYOUT_APPEND( l, alignof(fd_snapin_tile_t), sizeof(fd_snapin_tile_t) );
229288
l = FD_LAYOUT_APPEND( l, fd_snapshot_parser_align(), fd_snapshot_parser_footprint() );
230289
l = FD_LAYOUT_APPEND( l, fd_scratch_smem_align(), fd_scratch_smem_footprint( FD_SNAPIN_SCRATCH_MAX ) );
290+
l = FD_LAYOUT_APPEND( l, fd_scratch_fmem_align(), fd_scratch_fmem_footprint( FD_SNAPIN_SCRATCH_DEPTH ) );
231291
return FD_LAYOUT_FINI( l, alignof(fd_snapin_tile_t) );
232292
}
233293

@@ -245,7 +305,7 @@ unprivileged_init( fd_topo_t * topo,
245305

246306
fd_snapshot_parser_process_manifest_fn_t manifest_cb = NULL;
247307
if( 0==strcmp( topo->links[tile->out_link_id[ MANIFEST_OUT_IDX ]].name, "snap_out" ) ) {
248-
manifest_cb = save_manifest;
308+
manifest_cb = handle_manifest;
249309
}
250310

251311
ctx->shutdown = 0;
@@ -276,18 +336,23 @@ unprivileged_init( fd_topo_t * topo,
276336
ctx->metrics.incremental.accounts_processed = 0UL;
277337
ctx->metrics.num_accounts_inserted = 0UL;
278338

279-
/* join replay manifest dcache */
280-
ctx->replay_manifest_dcache = fd_topo_obj_laddr( topo, tile->snapin.manifest_dcache_obj_id );
281-
ctx->replay_manifest_dcache_obj_id = tile->snapin.manifest_dcache_obj_id;
282-
ctx->manifest_sz = 0UL;
283-
284339
/* set up the manifest message producer */
285340
fd_topo_link_t * writer_link = &topo->links[ tile->out_link_id[ MANIFEST_OUT_IDX ] ];
286341
ctx->manifest_out.wksp = topo->workspaces[ topo->objs[ writer_link->dcache_obj_id ].wksp_id ].wksp;
287342
ctx->manifest_out.chunk0 = fd_dcache_compact_chunk0( fd_wksp_containing( writer_link->dcache ), writer_link->dcache );
288343
ctx->manifest_out.wmark = fd_dcache_compact_wmark ( ctx->manifest_out.wksp, writer_link->dcache, writer_link->mtu );
289344
ctx->manifest_out.chunk = ctx->manifest_out.chunk0;
290345

346+
/* join replay manifest dcache */
347+
ctx->replay_manifest_dcache.dcache = fd_topo_obj_laddr( topo, tile->snapin.manifest_dcache_obj_id );
348+
ctx->replay_manifest_dcache.wksp = fd_wksp_containing( ctx->replay_manifest_dcache.dcache );
349+
ctx->replay_manifest_dcache.obj_id = tile->snapin.manifest_dcache_obj_id;
350+
ctx->replay_manifest_dcache.chunk0 = fd_dcache_compact_chunk0( ctx->replay_manifest_dcache.wksp, ctx->replay_manifest_dcache.dcache );
351+
ctx->replay_manifest_dcache.chunk = ctx->replay_manifest_dcache.chunk0;
352+
ctx->replay_manifest_dcache.wmark = fd_dcache_compact_wmark( ctx->replay_manifest_dcache.wksp,
353+
ctx->replay_manifest_dcache.dcache,
354+
writer_link->mtu);
355+
291356
/* set up in link */
292357
fd_topo_link_t const * in_link = &topo->links[ tile->in_link_id[ SNAPSHOT_IN_LINK_IDX ] ];
293358
fd_topo_wksp_t const * in_wksp = &topo->workspaces[ topo->objs[ in_link->dcache_obj_id ].wksp_id ];
@@ -332,50 +397,6 @@ hard_reset_funk( fd_snapin_tile_t * ctx ) {
332397
/* TODO: Assert that hard reset suceeded */
333398
}
334399

335-
static void
336-
send_manifest( fd_snapin_tile_t * ctx,
337-
fd_stem_context_t * stem ) {
338-
/* Assumes the manifest is already mem copied into the snap_out
339-
dcache and the replay_manifest_dcache from the save_manifest
340-
callback. */
341-
FD_TEST( ctx->manifest_sz );
342-
343-
ulong sig = 0UL;
344-
ulong external_sig = 0UL;
345-
if( ctx->state == FD_SNAPIN_STATE_LOADING_FULL ) {
346-
sig = FD_FULL_SNAPSHOT_MANIFEST;
347-
external_sig = FD_FULL_SNAPSHOT_MANIFEST_EXTERNAL;
348-
} else if( ctx->state == FD_SNAPIN_STATE_LOADING_INCREMENTAL ) {
349-
sig = FD_INCREMENTAL_SNAPSHOT_MANIFEST;
350-
external_sig = FD_INCREMENTAL_SNAPSHOT_MANIFEST_EXTERNAL;
351-
}
352-
353-
/* Send snapshot manifest message over snap_out link */
354-
fd_stem_publish( stem,
355-
0UL,
356-
sig,
357-
ctx->manifest_out.chunk,
358-
sizeof(fd_snapshot_manifest_t),
359-
0UL,
360-
0UL,
361-
0UL );
362-
ctx->manifest_out.chunk = fd_dcache_compact_next( ctx->manifest_out.chunk,
363-
sizeof(fd_snapshot_manifest_t),
364-
ctx->manifest_out.chunk0,
365-
ctx->manifest_out.wmark );
366-
367-
/* send manifest over replay manifest dcache */
368-
ulong chunk = fd_dcache_compact_chunk0( fd_wksp_containing( ctx->replay_manifest_dcache ), ctx->replay_manifest_dcache );
369-
fd_stem_publish( stem,
370-
0UL,
371-
external_sig,
372-
chunk,
373-
ctx->manifest_sz,
374-
0UL,
375-
ctx->replay_manifest_dcache_obj_id,
376-
0UL );
377-
}
378-
379400
static void
380401
handle_control_frag( fd_snapin_tile_t * ctx,
381402
fd_stem_context_t * stem,
@@ -389,10 +410,6 @@ handle_control_frag( fd_snapin_tile_t * ctx,
389410
0 );
390411
}
391412

392-
/* Once the snapshot is fully loaded, we can send the manifest
393-
message over. */
394-
send_manifest( ctx, stem );
395-
396413
/* Notify consumers of manifest out that the snapshot is fully
397414
loaded. */
398415
fd_stem_publish( stem,
@@ -442,9 +459,10 @@ handle_control_frag( fd_snapin_tile_t * ctx,
442459
}
443460

444461
static void
445-
handle_data_frag( fd_snapin_tile_t * ctx,
446-
ulong chunk,
447-
ulong sz ) {
462+
handle_data_frag( fd_snapin_tile_t * ctx,
463+
ulong chunk,
464+
ulong sz,
465+
fd_stem_context_t * stem ) {
448466
FD_TEST( ctx->state==FD_SNAPIN_STATE_LOADING_FULL ||
449467
ctx->state==FD_SNAPIN_STATE_LOADING_INCREMENTAL );
450468
FD_TEST( chunk>=ctx->in.chunk0 && chunk<=ctx->in.wmark );
@@ -465,7 +483,8 @@ handle_data_frag( fd_snapin_tile_t * ctx,
465483
}
466484
cur = fd_snapshot_parser_process_chunk( ctx->parser,
467485
cur,
468-
(ulong)( chunk_end-cur ) );
486+
(ulong)( chunk_end-cur ),
487+
stem );
469488
if( FD_UNLIKELY( ctx->parser->flags ) ) {
470489
if( FD_UNLIKELY( ctx->parser->flags & SNAP_FLAG_FAILED ) ) {
471490
/* abort app if parser failed */
@@ -517,7 +536,7 @@ after_frag( fd_snapin_tile_t * ctx,
517536

518537
/* handle frag */
519538
if( FD_UNLIKELY( sz==0 ) ) handle_control_frag( ctx, stem, sig );
520-
else handle_data_frag( ctx, ctx->in._chunk, sz );
539+
else handle_data_frag( ctx, ctx->in._chunk, sz, stem );
521540
}
522541

523542
#define STEM_BURST 1UL

0 commit comments

Comments
 (0)