Skip to content

Commit e5e71b7

Browse files
committed
firedancer-dev: repair profiler light
1 parent 8403bb0 commit e5e71b7

File tree

10 files changed

+725
-23
lines changed

10 files changed

+725
-23
lines changed

src/app/firedancer-dev/Local.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ $(call add-objs,commands/dev,fd_firedancer_dev)
1414
$(call add-objs,commands/sim,fd_firedancer_dev)
1515
$(call add-objs,commands/backtest,fd_firedancer_dev)
1616
$(call add-objs,commands/snapshot_load,fd_firedancer_dev)
17+
$(call add-objs,commands/repair,fd_firedancer_dev)
1718

1819
$(call make-bin,firedancer-dev,main,fd_firedancer_dev fd_firedancer fddev_shared fdctl_shared fdctl_platform fd_discof fd_disco fd_choreo fd_flamenco fd_funk fd_quic fd_tls fd_reedsol fd_waltz fd_tango fd_ballet fd_util firedancer_version,$(SECP256K1_LIBS) $(ROCKSDB_LIBS) $(OPENSSL_LIBS))
1920

src/app/firedancer-dev/commands/repair.c

Lines changed: 428 additions & 0 deletions
Large diffs are not rendered by default.

src/app/firedancer-dev/main.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ extern action_t fd_action_gossip;
180180
extern action_t fd_action_sim;
181181
extern action_t fd_action_backtest;
182182
extern action_t fd_action_snapshot_load;
183+
extern action_t fd_action_repair;
183184

184185
action_t * ACTIONS[] = {
185186
&fd_action_run,
@@ -208,6 +209,7 @@ action_t * ACTIONS[] = {
208209
&fd_action_sim,
209210
&fd_action_backtest,
210211
&fd_action_snapshot_load,
212+
&fd_action_repair,
211213
NULL,
212214
};
213215

src/app/firedancer/topology.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,7 @@ fd_topo_initialize( config_t * config ) {
789789
fd_topob_wksp( topo, "repair_scap" );
790790
fd_topob_wksp( topo, "replay_scap" );
791791

792-
fd_topob_tile( topo, "scap", "scap", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
792+
fd_topo_tile_t * scap_tile = fd_topob_tile( topo, "scap", "scap", "metric_in", tile_to_cpu[ topo->tile_cnt ], 0, 0 );
793793

794794
fd_topob_link( topo, "repair_scap", "repair_scap", 128UL, FD_SLICE_MAX_WITH_HEADERS, 1UL );
795795
fd_topob_link( topo, "replay_scap", "replay_scap", 128UL, sizeof(fd_hash_t)+sizeof(ulong), 1UL );
@@ -809,6 +809,9 @@ fd_topo_initialize( config_t * config ) {
809809

810810
fd_topob_tile_out( topo, "repair", 0UL, "repair_scap", 0UL );
811811
fd_topob_tile_out( topo, "replay", 0UL, "replay_scap", 0UL );
812+
813+
fd_topob_tile_uses( topo, scap_tile, root_slot_obj, FD_SHMEM_JOIN_MODE_READ_WRITE );
814+
/* No default fd_topob_tile_in connection to stake_out */
812815
}
813816

814817
fd_topob_wksp( topo, "replay_notif" );
@@ -1105,6 +1108,8 @@ fd_topo_configure_tile( fd_topo_tile_t * tile,
11051108
tile->shredcap.repair_intake_listen_port = config->tiles.repair.repair_intake_listen_port;
11061109
strncpy( tile->shredcap.folder_path, config->tiles.shredcap.folder_path, sizeof(tile->shredcap.folder_path) );
11071110
tile->shredcap.write_buffer_size = config->tiles.shredcap.write_buffer_size;
1111+
tile->shredcap.enable_publish_stake_weights = 0; /* this is not part of the config */
1112+
strncpy( tile->shredcap.manifest_path, "", PATH_MAX ); /* this is not part of the config */
11081113
} else {
11091114
return 0;
11101115
}

src/app/shared/fd_action.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ union fdctl_args {
6767
char name[ 13UL ];
6868
} flame;
6969

70+
struct {
71+
char manifest_path[ 256UL ];
72+
} repair;
73+
7074
struct {
7175
char affinity[ AFFINITY_SZ ];
7276
uint tpu_ip;

src/disco/topo/fd_topo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,8 @@ struct fd_topo_tile {
462462
char folder_path[ PATH_MAX ];
463463
ushort repair_intake_listen_port;
464464
ulong write_buffer_size; /* Size of the write buffer for the capture tile */
465+
int enable_publish_stake_weights;
466+
char manifest_path[ PATH_MAX ];
465467

466468
/* Set internally by the capture tile */
467469
int shreds_fd;

src/discof/repair/fd_repair_tile.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -842,6 +842,7 @@ after_frag( fd_repair_tile_ctx_t * ctx,
842842
fd_fseq_update( ctx->turbine_slot, fd_ulong_max( shred->slot, fd_fseq_query( ctx->turbine_slot ) ) );
843843
if( FD_UNLIKELY( shred->slot <= fd_forest_root_slot( ctx->forest ) ) ) return; /* shred too old */
844844

845+
/* TODO add automated caught-up test */
845846

846847
/* Insert the shred sig (shared by all shred members in the FEC set)
847848
into the map. */
@@ -1075,6 +1076,7 @@ during_housekeeping( fd_repair_tile_ctx_t * ctx ) {
10751076
return;
10761077
}
10771078
}
1079+
10781080
static void
10791081
privileged_init( fd_topo_t * topo,
10801082
fd_topo_tile_t * tile ) {
@@ -1196,7 +1198,6 @@ unprivileged_init( fd_topo_t * topo,
11961198

11971199
/* Scratch mem setup */
11981200

1199-
ctx->blockstore = &ctx->blockstore_ljoin;
12001201
ctx->repair = FD_SCRATCH_ALLOC_APPEND( l, fd_repair_align(), fd_repair_footprint() );
12011202
ctx->forest = FD_SCRATCH_ALLOC_APPEND( l, fd_forest_align(), fd_forest_footprint( tile->repair.slot_max ) );
12021203
ctx->fec_sigs = FD_SCRATCH_ALLOC_APPEND( l, fd_fec_sig_align(), fd_fec_sig_footprint( 20 ) );
@@ -1237,17 +1238,20 @@ unprivileged_init( fd_topo_t * topo,
12371238
}
12381239

12391240
/* Blockstore setup */
1241+
ctx->blockstore = NULL;
12401242
ulong blockstore_obj_id = fd_pod_queryf_ulong( topo->props, ULONG_MAX, "blockstore" );
1241-
FD_TEST( blockstore_obj_id!=ULONG_MAX );
1242-
ctx->blockstore_wksp = topo->workspaces[ topo->objs[ blockstore_obj_id ].wksp_id ].wksp;
1243-
1244-
if( ctx->blockstore_wksp==NULL ) {
1245-
FD_LOG_ERR(( "no blocktore workspace" ));
1243+
if( FD_UNLIKELY( blockstore_obj_id==ULONG_MAX ) ) {
1244+
FD_LOG_WARNING(( "no blockstore_obj_id" ));
1245+
} else {
1246+
ctx->blockstore_wksp = topo->workspaces[ topo->objs[ blockstore_obj_id ].wksp_id ].wksp;
1247+
if( FD_UNLIKELY( ctx->blockstore_wksp==NULL ) ) {
1248+
FD_LOG_WARNING(( "no blocktore workspace" ));
1249+
} else {
1250+
ctx->blockstore = fd_blockstore_join( &ctx->blockstore_ljoin, fd_topo_obj_laddr( topo, blockstore_obj_id ) );
1251+
FD_TEST( ctx->blockstore!=NULL );
1252+
}
12461253
}
12471254

1248-
ctx->blockstore = fd_blockstore_join( &ctx->blockstore_ljoin, fd_topo_obj_laddr( topo, blockstore_obj_id ) );
1249-
FD_TEST( ctx->blockstore!=NULL );
1250-
12511255
FD_LOG_NOTICE(( "repair starting" ));
12521256

12531257
/* Repair set up */

src/discof/replay/fd_exec.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,16 @@ static inline ulong
1414
generate_stake_weight_msg( fd_exec_slot_ctx_t * slot_ctx,
1515
fd_spad_t * runtime_spad,
1616
ulong epoch,
17+
fd_vote_accounts_global_t const * vote_accounts,
1718
ulong * stake_weight_msg_out ) {
1819
/* This function needs to be completely rewritten for SIMD-0180.
1920
For now it's a hack that sends old data (pre SIMD-0180) in the new format. */
2021

2122
fd_stake_weight_msg_t * stake_weight_msg = (fd_stake_weight_msg_t *)fd_type_pun( stake_weight_msg_out );
2223
fd_vote_stake_weight_t * stake_weights = stake_weight_msg->weights;
23-
fd_vote_accounts_global_t const * vote_accounts = fd_bank_epoch_stakes_locking_query( slot_ctx->bank );
2424
ulong staked_cnt = fd_stake_weights_by_node( vote_accounts,
2525
stake_weights,
2626
runtime_spad );
27-
fd_bank_epoch_stakes_end_locking_query( slot_ctx->bank );
28-
2927
fd_epoch_schedule_t const * epoch_schedule = fd_bank_epoch_schedule_query( slot_ctx->bank );
3028

3129
stake_weight_msg->epoch = epoch;

src/discof/replay/fd_replay_tile.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,7 @@ publish_stake_weights( fd_replay_tile_ctx_t * ctx,
300300
if( epoch_stakes_root!=NULL ) {
301301
ulong * stake_weights_msg = fd_chunk_to_laddr( ctx->stake_out->mem, ctx->stake_out->chunk );
302302
ulong epoch = fd_slot_to_leader_schedule_epoch( epoch_schedule, fd_bank_slot_get( slot_ctx->bank ) );
303-
ulong stake_weights_sz = generate_stake_weight_msg( slot_ctx, ctx->runtime_spad, epoch - 1, stake_weights_msg );
303+
ulong stake_weights_sz = generate_stake_weight_msg( slot_ctx, ctx->runtime_spad, epoch - 1, epoch_stakes, stake_weights_msg );
304304
ulong stake_weights_sig = 4UL;
305305
fd_stem_publish( stem, 0UL, stake_weights_sig, ctx->stake_out->chunk, stake_weights_sz, 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
306306
ctx->stake_out->chunk = fd_dcache_compact_next( ctx->stake_out->chunk, stake_weights_sz, ctx->stake_out->chunk0, ctx->stake_out->wmark );
@@ -315,7 +315,7 @@ publish_stake_weights( fd_replay_tile_ctx_t * ctx,
315315
if( next_epoch_stakes_root!=NULL ) {
316316
ulong * stake_weights_msg = fd_chunk_to_laddr( ctx->stake_out->mem, ctx->stake_out->chunk );
317317
ulong epoch = fd_slot_to_leader_schedule_epoch( epoch_schedule, fd_bank_slot_get( slot_ctx->bank ) ); /* epoch */
318-
ulong stake_weights_sz = generate_stake_weight_msg( slot_ctx, ctx->runtime_spad, epoch, stake_weights_msg );
318+
ulong stake_weights_sz = generate_stake_weight_msg( slot_ctx, ctx->runtime_spad, epoch, next_epoch_stakes, stake_weights_msg );
319319
ulong stake_weights_sig = 4UL;
320320
fd_stem_publish( stem, 0UL, stake_weights_sig, ctx->stake_out->chunk, stake_weights_sz, 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
321321
ctx->stake_out->chunk = fd_dcache_compact_next( ctx->stake_out->chunk, stake_weights_sz, ctx->stake_out->chunk0, ctx->stake_out->wmark );

0 commit comments

Comments
 (0)