Skip to content

Commit 1069a36

Browse files
committed
repair: request shreds of orphaned slots during repair orphan stage
* overrun & bw metrics
1 parent 9124ad9 commit 1069a36

File tree

15 files changed

+496
-233
lines changed

15 files changed

+496
-233
lines changed

book/api/metrics-generated.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@
701701
| <span class="metrics-name">repair_&#8203;sent_&#8203;pkt_&#8203;types</span><br/>{repair_&#8203;sent_&#8203;request_&#8203;types="<span class="metrics-enum">needed_&#8203;highest_&#8203;window</span>"} | counter | What types of client messages are we sending (Need Highest Window) |
702702
| <span class="metrics-name">repair_&#8203;sent_&#8203;pkt_&#8203;types</span><br/>{repair_&#8203;sent_&#8203;request_&#8203;types="<span class="metrics-enum">needed_&#8203;orphan</span>"} | counter | What types of client messages are we sending (Need Orphans) |
703703
| <span class="metrics-name">repair_&#8203;sent_&#8203;pkt_&#8203;types</span><br/>{repair_&#8203;sent_&#8203;request_&#8203;types="<span class="metrics-enum">pong</span>"} | counter | What types of client messages are we sending (Pong) |
704+
| <span class="metrics-name">repair_&#8203;send_&#8203;pkt_&#8203;rate</span> | gauge | Rate of network packets we are sending, in packets per second coarsely |
704705
| <span class="metrics-name">repair_&#8203;repaired_&#8203;slots</span> | counter | Until which slots have we fully repaired |
705706
| <span class="metrics-name">repair_&#8203;current_&#8203;slot</span> | counter | Our view of the current cluster slot, max slot received |
706707
| <span class="metrics-name">repair_&#8203;request_&#8203;peers</span> | counter | How many peers have we requested |

src/app/firedancer-dev/commands/repair.c

Lines changed: 80 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "../../../util/pod/fd_pod_format.h"
2121
#include "../../../waltz/resolv/fd_io_readline.h"
2222
#include "../../shared/commands/monitor/helper.h"
23+
#include "../../../disco/metrics/fd_metrics.h"
2324
#include "../../../discof/repair/fd_repair_tile.c"
2425

2526
#include "gossip.h"
@@ -131,6 +132,7 @@ repair_topo( config_t * config ) {
131132
fd_topob_wksp( topo, "snapin_manif" );
132133

133134
fd_topob_wksp( topo, "slot_fseqs" ); /* fseqs for marked slots eg. turbine slot */
135+
fd_topob_wksp( topo, "genesi_out" ); /* mock genesi_out for ipecho */
134136

135137
#define FOR(cnt) for( ulong i=0UL; i<cnt; i++ )
136138

@@ -160,6 +162,8 @@ repair_topo( config_t * config ) {
160162

161163
/**/ fd_topob_link( topo, "snapin_manif", "snapin_manif", 2UL, sizeof(fd_snapshot_manifest_t), 1UL );
162164

165+
/**/ fd_topob_link( topo, "genesi_out", "genesi_out", 2UL, 128, 1UL );
166+
163167
FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_repair", i, config->net.ingress_buffer_size );
164168
FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_quic", i, config->net.ingress_buffer_size );
165169
FOR(net_tile_cnt) fd_topos_net_rx_link( topo, "net_shred", i, config->net.ingress_buffer_size );
@@ -229,7 +233,6 @@ repair_topo( config_t * config ) {
229233
FOR(shred_tile_cnt) fd_topob_tile_out( topo, "shred", i, "shred_out", i );
230234
FOR(shred_tile_cnt) fd_topob_tile_out( topo, "shred", i, "shred_net", i );
231235
FOR(shred_tile_cnt) fd_topob_tile_in ( topo, "shred", i, "metric_in", "ipecho_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
232-
233236
FOR(shred_tile_cnt) fd_topob_tile_in( topo, "shred", i, "metric_in", "repair_shred", i, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
234237
235238
/**/ fd_topob_tile_out( topo, "repair", 0UL, "repair_net", 0UL );
@@ -260,6 +263,7 @@ repair_topo( config_t * config ) {
260263
FOR(sign_tile_cnt-1) fd_topob_tile_in ( topo, "repair", 0UL, "metric_in", "sign_repair", i, FD_TOPOB_UNRELIABLE, FD_TOPOB_POLLED );
261264

262265
/**/ fd_topob_tile_in ( topo, "gossip", 0UL, "metric_in", "sign_gossip", 0UL, FD_TOPOB_UNRELIABLE, FD_TOPOB_UNPOLLED );
266+
/**/ fd_topob_tile_in ( topo, "ipecho", 0UL, "metric_in", "genesi_out", 0UL, FD_TOPOB_RELIABLE, FD_TOPOB_POLLED );
263267

264268
if( 1 ) {
265269
fd_topob_wksp( topo, "scap" );
@@ -283,8 +287,8 @@ repair_topo( config_t * config ) {
283287
FD_TEST( fd_link_permit_no_producers( topo, "quic_net" ) == quic_tile_cnt );
284288
FD_TEST( fd_link_permit_no_producers( topo, "poh_shred" ) == 1UL );
285289
FD_TEST( fd_link_permit_no_producers( topo, "send_out" ) == 1UL );
286-
287-
FD_TEST( fd_link_permit_no_consumers( topo, "net_quic" ) == quic_tile_cnt );
290+
FD_TEST( fd_link_permit_no_producers( topo, "genesi_out" ) == 1UL );
291+
FD_TEST( fd_link_permit_no_consumers( topo, "net_quic" ) == net_tile_cnt );
288292

289293
config->tiles.send.send_src_port = 0; /* disable send */
290294

@@ -311,21 +315,24 @@ repair_cmd_args( int * pargc,
311315

312316
if( FD_UNLIKELY( !*pargc ) )
313317
FD_LOG_ERR(( "\n \
314-
usage: (1) repair --manifest-path <manifest_path> [--iptable-path <iptable_path>] \n \
315-
(2) repair --metrics [--iptable-path <iptable_path>] \n\n \
318+
usage: (1) repair --manifest-path <manifest_path> [--iptable-path <iptable_path>] [--sorted] \n \
319+
(2) repair --metrics [--iptable-path <iptable_path>] [--sorted] \n\n \
316320
(3) repair --tree \n\n \
317321
Passing --manifest-path starts up profiler mode, which runs a reduced topology that tests catchup and repair performance. \n \
318322
Passing --metrics prints recent slot completion times and response latencies during a live run. These modes are exclusive. \n \
319323
Passing --tree prints the tree of the repair process. \n \
324+
--sorted: optional flag to print the slots in sorted order. Default prints in completion order.\n \
320325
--iptable-path: optional path to iptable file to map IP addresses to locations." ));
321326

322327
char const * manifest_path = fd_env_strip_cmdline_cstr( pargc, pargv, "--manifest-path", NULL, NULL );
328+
int sorted = fd_env_strip_cmdline_contains( pargc, pargv, "--sorted" );
323329
if( fd_env_strip_cmdline_contains( pargc, pargv, "--metrics" ) ) {
324330
args->repair.metrics_only = 1;
325331
if( FD_UNLIKELY( manifest_path ) ) FD_LOG_ERR(( "metrics mode does not support --manifest-path" ));
326332
} else if( fd_env_strip_cmdline_contains( pargc, pargv, "--tree" ) ) {
327333
args->repair.forest_only = 1;
328334
if( FD_UNLIKELY( manifest_path ) ) FD_LOG_ERR(( "tree mode does not support --manifest-path" ));
335+
if( FD_UNLIKELY( sorted ) ) FD_LOG_ERR(( "tree mode does not support --sorted" ));
329336
} else {
330337
fd_cstr_fini( fd_cstr_append_cstr_safe( fd_cstr_init( args->repair.manifest_path ), manifest_path, sizeof(args->repair.manifest_path)-1UL ) );
331338
}
@@ -334,6 +341,9 @@ usage: (1) repair --manifest-path <manifest_path> [--iptable-path <iptable_path>
334341
if( FD_LIKELY( iptable_path ) ) {
335342
fd_cstr_fini( fd_cstr_append_cstr_safe( fd_cstr_init( args->repair.iptable_path ), iptable_path, sizeof(args->repair.iptable_path)-1UL ) );
336343
}
344+
if( FD_LIKELY( sorted ) ) {
345+
args->repair.sorted = 1;
346+
}
337347
}
338348

339349
static char *
@@ -430,12 +440,18 @@ print_histogram_buckets( volatile ulong * metrics,
430440
printf( " +---------------------+--------------------+---------------+\n" );
431441
}
432442

443+
static fd_slot_metrics_t temp_slots[ FD_CATCHUP_METRICS_MAX ];
444+
433445
static void
434-
print_catchup_slots( fd_wksp_t * repair_tile_wksp, ctx_t * repair_ctx, int verbose ) {
446+
print_catchup_slots( fd_wksp_t * repair_tile_wksp, ctx_t * repair_ctx, int verbose, int sorted ) {
435447
fd_repair_metrics_t * catchup = repair_ctx->slot_metrics;
436448
ulong catchup_gaddr = fd_wksp_gaddr_fast( repair_ctx->wksp, catchup );
437449
fd_repair_metrics_t * catchup_table = (fd_repair_metrics_t *)fd_wksp_laddr( repair_tile_wksp, catchup_gaddr );
438-
fd_repair_metrics_print( catchup_table, verbose );
450+
if( FD_LIKELY( sorted ) ) {
451+
fd_repair_metrics_print_sorted( catchup_table, verbose, temp_slots );
452+
} else {
453+
fd_repair_metrics_print( catchup_table, verbose );
454+
}
439455
}
440456

441457
static fd_location_info_t * location_table;
@@ -452,7 +468,7 @@ sort_peers_by_latency( fd_policy_peer_t * active_table, fd_peer_dlist_t * peers_
452468
if( FD_UNLIKELY( i >= FD_ACTIVE_KEY_MAX ) ) break;
453469
iter = fd_peer_dlist_iter_fwd_next( iter, peers_dlist, peers_arr );
454470
}
455-
FD_LOG_NOTICE(( "Fast peers cnt: %lu. Remainder is slow.", i ));
471+
ulong fast_cnt = i;
456472
iter = fd_peer_dlist_iter_fwd_init( peers_wlist, peers_arr );
457473
while( !fd_peer_dlist_iter_done( iter, peers_wlist, peers_arr ) ) {
458474
fd_peer_t * peer = fd_peer_dlist_iter_ele( iter, peers_wlist, peers_arr );
@@ -461,6 +477,7 @@ sort_peers_by_latency( fd_policy_peer_t * active_table, fd_peer_dlist_t * peers_
461477
if( FD_UNLIKELY( i >= FD_ACTIVE_KEY_MAX ) ) break;
462478
iter = fd_peer_dlist_iter_fwd_next( iter, peers_wlist, peers_arr );
463479
}
480+
FD_LOG_NOTICE(( "Fast peers cnt: %lu. Slow peers cnt: %lu.", fast_cnt, i - fast_cnt ));
464481

465482
ulong peer_cnt = i;
466483
for( uint i = 0; i < peer_cnt - 1; i++ ) {
@@ -511,7 +528,7 @@ print_peer_location_latency( fd_wksp_t * repair_tile_wksp, ctx_t * tile_ctx ) {
511528
char * geolocation = info ? info->location : "Unknown";
512529
double peer_bps = (double)(active->res_cnt * FD_SHRED_MIN_SZ) / ((double)(active->last_resp_ts - active->first_resp_ts) / 1e9);
513530
double req_bps = (double)active->req_cnt * 202 / ((double)(active->last_req_ts - active->first_req_ts) / 1e9);
514-
printf( "| %-46s | %-7lu | %-8.2f | %-8.2f | %-7.2f | %10.3fms | %s\n", FD_BASE58_ENC_32_ALLOCA( &active->key ), active->req_cnt, req_bps, peer_bps, (double)active->res_cnt / (double)active->req_cnt, ((double)active->total_lat / (double)active->res_cnt) / 1e6, geolocation );
531+
printf( "%-5u | %-46s | %-7lu | %-8.2f | %-8.2f | %-7.2f | %10.3fms | %s\n", i, FD_BASE58_ENC_32_ALLOCA( &active->key ), active->req_cnt, req_bps, peer_bps, (double)active->res_cnt / (double)active->req_cnt, ((double)active->total_lat / (double)active->res_cnt) / 1e6, geolocation );
515532
}
516533
}
517534
fflush( stdout );
@@ -549,18 +566,15 @@ repair_cmd_fn_metrics_mode( args_t * args,
549566
config_t * config ) {
550567
FD_LOG_NOTICE(( "Attempting to join with running firedancer-dev instance..." ));
551568

552-
fd_topo_t * topo = &config->topo;
553-
ulong wksp_id = fd_topo_find_wksp( topo, "repair" );
569+
fd_topo_t * topo = &config->topo;
570+
ulong wksp_id = fd_topo_find_wksp( topo, "repair" );
554571
if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair workspace not found" ));
555-
556572
fd_topo_wksp_t * repair_wksp = &topo->workspaces[ wksp_id ];
557-
558-
ulong tile_id = fd_topo_find_tile( topo, "repair", 0UL );
559-
if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair tile not found" ));
560-
561573
fd_topo_join_workspace( topo, repair_wksp, FD_SHMEM_JOIN_MODE_READ_ONLY );
562574

563575
/* Access the repair tile scratch memory where repair_tile_ctx is stored */
576+
ulong tile_id = fd_topo_find_tile( topo, "repair", 0UL );
577+
if( FD_UNLIKELY( tile_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair tile not found" ));
564578
fd_topo_tile_t * tile = &topo->tiles[ tile_id ];
565579
void * scratch = fd_topo_obj_laddr( &config->topo, tile->tile_obj_id );
566580
if( FD_UNLIKELY( !scratch ) ) FD_LOG_ERR(( "Failed to access repair tile scratch memory" ));
@@ -570,16 +584,8 @@ repair_cmd_fn_metrics_mode( args_t * args,
570584

571585
/* catchup cmd owned memory */
572586
location_table = fd_location_table_join( fd_location_table_new( location_table_mem ) );
573-
574587
read_iptable( args->repair.iptable_path, location_table );
575588

576-
if( FD_UNLIKELY( !args->repair.metrics_only ) ) {
577-
print_peer_location_latency( repair_wksp->wksp, repair_ctx );
578-
print_catchup_slots( repair_wksp->wksp, repair_ctx, 0 );
579-
printf( "\nCatchup tool completed successfully.\n" );
580-
return;
581-
}
582-
583589
// Add terminal setup here - same as monitor.c
584590
atexit( restore_terminal );
585591
if( FD_UNLIKELY( 0!=tcgetattr( STDIN_FILENO, &termios_backup ) ) ) {
@@ -603,7 +609,7 @@ repair_cmd_fn_metrics_mode( args_t * args,
603609
long now = fd_log_wallclock();
604610
if( FD_UNLIKELY( now - last_print > 1e9L ) ) {
605611
last_print = now;
606-
print_catchup_slots( repair_wksp->wksp, repair_ctx, catchup_verbose );
612+
print_catchup_slots( repair_wksp->wksp, repair_ctx, catchup_verbose, args->repair.sorted );
607613
printf( "catchup slots | Use 'i' to toggle extra slot information" TEXT_NEWLINE );
608614
fflush( stdout );
609615

@@ -692,6 +698,11 @@ repair_cmd_fn_profiler_mode( args_t * args,
692698
FD_TEST( repair_tile_idx!=ULONG_MAX );
693699
fd_topo_tile_t * repair_tile = &config->topo.tiles[ repair_tile_idx ];
694700

701+
ulong wksp_id = fd_topo_find_wksp( &config->topo, "repair" );
702+
if( FD_UNLIKELY( wksp_id==ULONG_MAX ) ) FD_LOG_ERR(( "repair workspace not found" ));
703+
fd_topo_wksp_t * repair_wksp = &config->topo.workspaces[ wksp_id ];
704+
fd_topo_join_workspace( &config->topo, repair_wksp, FD_SHMEM_JOIN_MODE_READ_WRITE );
705+
695706
ulong shred_tile_idx = fd_topo_find_tile( &config->topo, "shred", 0UL );
696707
FD_TEST( shred_tile_idx!=ULONG_MAX );
697708
fd_topo_tile_t * shred_tile = &config->topo.tiles[ shred_tile_idx ];
@@ -702,9 +713,27 @@ repair_cmd_fn_profiler_mode( args_t * args,
702713
volatile ulong * repair_metrics = fd_metrics_tile( repair_tile->metrics );
703714
FD_TEST( repair_metrics );
704715

716+
/* Collect all net tiles and their repair_net link metrics */
717+
ulong net_tile_cnt = config->layout.net_tile_count;
718+
volatile ulong ** repair_net_links = aligned_alloc( 8UL, net_tile_cnt * sizeof(volatile ulong*) );
719+
FD_TEST( repair_net_links );
720+
721+
for( ulong i = 0UL; i < net_tile_cnt; i++ ) {
722+
ulong tile_idx = fd_topo_find_tile( &config->topo, "net", i );
723+
if( FD_UNLIKELY( tile_idx == ULONG_MAX ) ) FD_LOG_ERR(( "net tile %lu not found", i ));
724+
fd_topo_tile_t * tile = &config->topo.tiles[ tile_idx ];
725+
726+
ulong repair_net_in_idx = fd_topo_find_tile_in_link( &config->topo, tile, "repair_net", 0UL );
727+
if( FD_UNLIKELY( repair_net_in_idx == ULONG_MAX ) ) {
728+
FD_LOG_ERR(( "repair_net link not found for net tile %lu", i ));
729+
}
730+
repair_net_links[i] = fd_metrics_link_in( tile->metrics, repair_net_in_idx );
731+
FD_TEST( repair_net_links[i] );
732+
}
733+
705734
FD_LOG_NOTICE(( "Repair profiler run" ));
706735

707-
ulong shred_out_link_idx = fd_topo_find_link( &config->topo, "shred_out", 0UL );
736+
ulong shred_out_link_idx = fd_topo_find_link( &config->topo, "shred_out", 0UL );
708737
FD_TEST( shred_out_link_idx!=ULONG_MAX );
709738
fd_topo_link_t * shred_out_link = &config->topo.links[ shred_out_link_idx ];
710739
FD_TEST( shred_out_link );
@@ -742,6 +771,18 @@ repair_cmd_fn_profiler_mode( args_t * args,
742771
printf( " | HighestWindow | %s |\n", fmt_count( buf2, repair_metrics[ MIDX( COUNTER, REPAIR, SENT_PKT_TYPES_NEEDED_HIGHEST_WINDOW ) ] ) );
743772
printf( " | Index | %s |\n", fmt_count( buf2, repair_metrics[ MIDX( COUNTER, REPAIR, SENT_PKT_TYPES_NEEDED_WINDOW ) ] ) );
744773
printf( " +---------------+--------------+\n" );
774+
printf( " Send Pkt Rate: %s pps\n", fmt_count( buf2, repair_metrics[ MIDX( GAUGE, REPAIR, SEND_PKT_RATE ) ] ) );
775+
776+
/* Sum overrun across all net tiles connected to repair_net */
777+
ulong total_overrun = repair_net_links[0][ MIDX( COUNTER, LINK, OVERRUN_POLLING_FRAG_COUNT ) ]; /* coarse double counting prevention */
778+
ulong total_consumed = 0UL;
779+
for( ulong i = 0UL; i < net_tile_cnt; i++ ) {
780+
volatile ulong * ovar_net_metrics = repair_net_links[i];
781+
total_overrun += ovar_net_metrics[ MIDX( COUNTER, LINK, OVERRUN_READING_FRAG_COUNT ) ];
782+
total_consumed += ovar_net_metrics[ MIDX( COUNTER, LINK, CONSUMED_COUNT ) ];
783+
}
784+
printf( " Total overrun: %s\n", fmt_count( buf2, total_overrun ) );
785+
printf( " Net consumed: %s\n", fmt_count( buf2, total_consumed ) );
745786

746787
print_histogram_buckets( repair_metrics,
747788
MIDX( HISTOGRAM, REPAIR, RESPONSE_LATENCY ),
@@ -767,8 +808,20 @@ repair_cmd_fn_profiler_mode( args_t * args,
767808
fflush( stdout );
768809
last_print = now;
769810
}
811+
770812
if( FD_UNLIKELY( catchup_finished ) ) {
771-
repair_cmd_fn_metrics_mode( args, config );
813+
/* Access the repair tile scratch memory where repair_tile_ctx is stored */
814+
void * scratch = fd_topo_obj_laddr( &config->topo, repair_tile->tile_obj_id );
815+
if( FD_UNLIKELY( !scratch ) ) FD_LOG_ERR(( "Failed to access repair tile scratch memory" ));
816+
FD_SCRATCH_ALLOC_INIT( l, scratch );
817+
ctx_t * repair_ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
818+
819+
/* repair cmd owned memory */
820+
location_table = fd_location_table_join( fd_location_table_new( location_table_mem ) );
821+
read_iptable( args->repair.iptable_path, location_table );
822+
print_peer_location_latency( repair_wksp->wksp, repair_ctx );
823+
print_catchup_slots( repair_wksp->wksp, repair_ctx, 0, 0 );
824+
printf( "\nCatchup tool completed successfully.\n" );
772825
FD_LOG_ERR(("catchup finished. slot %lu", turbine_slot0));
773826
}
774827
}

src/app/shared/fd_action.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ union fdctl_args {
8181
char iptable_path[ 256UL ];
8282
int metrics_only;
8383
int forest_only;
84+
int sorted;
8485
} repair;
8586

8687
struct {

src/disco/metrics/generated/fd_metrics_repair.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ const fd_metrics_meta_t FD_METRICS_REPAIR[FD_METRICS_REPAIR_TOTAL] = {
77
DECLARE_METRIC_ENUM( REPAIR_SENT_PKT_TYPES, COUNTER, REPAIR_SENT_REQUEST_TYPES, NEEDED_HIGHEST_WINDOW ),
88
DECLARE_METRIC_ENUM( REPAIR_SENT_PKT_TYPES, COUNTER, REPAIR_SENT_REQUEST_TYPES, NEEDED_ORPHAN ),
99
DECLARE_METRIC_ENUM( REPAIR_SENT_PKT_TYPES, COUNTER, REPAIR_SENT_REQUEST_TYPES, PONG ),
10+
DECLARE_METRIC( REPAIR_SEND_PKT_RATE, GAUGE ),
1011
DECLARE_METRIC( REPAIR_REPAIRED_SLOTS, COUNTER ),
1112
DECLARE_METRIC( REPAIR_CURRENT_SLOT, COUNTER ),
1213
DECLARE_METRIC( REPAIR_REQUEST_PEERS, COUNTER ),

0 commit comments

Comments
 (0)