Skip to content

Commit e67af12

Browse files
committed
askrene: limit how many children we have.
Queue them before we query local channels, so they don't use stale information. Changelog-Added: Config: `askrene-max-threads` to control how many CPUs we use for routing (default 4). Signed-off-by: Rusty Russell <[email protected]>
1 parent 4825b4a commit e67af12

File tree

3 files changed

+73
-12
lines changed

3 files changed

+73
-12
lines changed

doc/lightningd-config.5.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,10 @@ command, so they invoices can also be paid onchain.
561561

562562
This option makes the `getroutes` call fail if it takes more than this many seconds. Setting it to zero is a fun way to ensure your node never makes payments.
563563

564+
* **askrene-max-threads**=*NUMBER* [plugin `askrene`, *dynamic*]
565+
566+
This option controls how many routes askrene will calculate at once: this is only useful on nodes which make multiple payments at once, and setting the number higher than your number of cores/CPUS will not help. The default is 4.
567+
564568
### Networking options
565569

566570
Note that for simple setups, the implicit *autolisten* option does the

plugins/askrene/askrene.c

Lines changed: 63 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ param_algorithm(struct command *cmd, const char *name, const char *buffer,
337337
}
338338

339339
struct getroutes_info {
340+
/* We keep this around in askrene->waiting if we're busy */
341+
struct list_node list;
340342
struct command *cmd;
341343
struct node_id source, dest;
342344
struct amount_msat amount, maxfee;
@@ -618,7 +620,6 @@ static struct command_result *do_getroutes(struct command *cmd,
618620
child->log_conn = io_new_conn(child, log_fd, child_log_init, child);
619621
child->cmd = cmd;
620622

621-
/* FIXME: limit parallelism! */
622623
list_add_tail(&askrene->children, &child->list);
623624
tal_add_destructor(child, destroy_router_child);
624625
return command_still_pending(cmd);
@@ -727,6 +728,49 @@ listpeerchannels_done(struct command *cmd,
727728
return do_getroutes(cmd, localmods, info);
728729
}
729730

731+
/* Mutual recursion */
732+
static struct command_result *begin_request(struct askrene *askrene,
733+
struct getroutes_info *info);
734+
735+
/* One is finished. Maybe wake up a waiter */
736+
static void destroy_live_command(struct command *cmd)
737+
{
738+
struct askrene *askrene = get_askrene(cmd->plugin);
739+
struct getroutes_info *info;
740+
741+
assert(askrene->num_live_requests > 0);
742+
askrene->num_live_requests--;
743+
744+
if (askrene->num_live_requests >= askrene->max_children)
745+
return;
746+
747+
info = list_pop(&askrene->waiters, struct getroutes_info, list);
748+
if (info)
749+
begin_request(askrene, info);
750+
}
751+
752+
static struct command_result *begin_request(struct askrene *askrene,
753+
struct getroutes_info *info)
754+
{
755+
askrene->num_live_requests++;
756+
757+
/* Wake any waiting ones when we're finished */
758+
tal_add_destructor(info->cmd, destroy_live_command);
759+
760+
if (have_layer(info->layers, "auto.localchans")) {
761+
struct out_req *req;
762+
763+
req = jsonrpc_request_start(info->cmd,
764+
"listpeerchannels",
765+
listpeerchannels_done,
766+
forward_error, info);
767+
return send_outreq(req);
768+
} else
769+
info->local_layer = NULL;
770+
771+
return do_getroutes(info->cmd, gossmap_localmods_new(info->cmd), info);
772+
}
773+
730774
static struct command_result *json_getroutes(struct command *cmd,
731775
const char *buffer,
732776
const jsmntok_t *params)
@@ -739,6 +783,7 @@ static struct command_result *json_getroutes(struct command *cmd,
739783
*/
740784
/* FIXME: Typo in spec for CLTV in descripton! But it breaks our spelling check, so we omit it above */
741785
const u32 maxdelay_allowed = 2016;
786+
struct askrene *askrene = get_askrene(cmd->plugin);
742787
const u32 default_maxparts = 100;
743788
struct getroutes_info *info = tal(cmd, struct getroutes_info);
744789
/* param functions require pointers */
@@ -796,18 +841,15 @@ static struct command_result *json_getroutes(struct command *cmd,
796841
info->additional_costs = new_htable(info, additional_cost_htable);
797842
info->maxparts = *maxparts;
798843

799-
if (have_layer(info->layers, "auto.localchans")) {
800-
struct out_req *req;
801-
802-
req = jsonrpc_request_start(cmd,
803-
"listpeerchannels",
804-
listpeerchannels_done,
805-
forward_error, info);
806-
return send_outreq(req);
807-
} else
808-
info->local_layer = NULL;
844+
if (askrene->num_live_requests >= askrene->max_children) {
845+
cmd_log(tmpctx, cmd, LOG_INFORM,
846+
"Too many running at once (%zu vs %u): waiting",
847+
askrene->num_live_requests, askrene->max_children);
848+
list_add_tail(&askrene->waiters, &info->list);
849+
return command_still_pending(cmd);
850+
}
809851

810-
return do_getroutes(cmd, gossmap_localmods_new(cmd), info);
852+
return begin_request(askrene, info);
811853
}
812854

813855
static struct command_result *json_askrene_reserve(struct command *cmd,
@@ -1323,6 +1365,8 @@ static const char *init(struct command *init_cmd,
13231365
askrene->plugin = plugin;
13241366
list_head_init(&askrene->layers);
13251367
list_head_init(&askrene->children);
1368+
list_head_init(&askrene->waiters);
1369+
askrene->num_live_requests = 0;
13261370
askrene->reserved = new_reserve_htable(askrene);
13271371
askrene->gossmap = gossmap_load(askrene, GOSSIP_STORE_FILENAME,
13281372
plugin_gossmap_logcb, plugin);
@@ -1350,6 +1394,7 @@ int main(int argc, char *argv[])
13501394

13511395
askrene = tal(NULL, struct askrene);
13521396
askrene->route_seconds = 10;
1397+
askrene->max_children = 4;
13531398
plugin_main(argv, init, take(askrene), PLUGIN_RESTARTABLE, true, NULL, commands, ARRAY_SIZE(commands),
13541399
NULL, 0, NULL, 0, NULL, 0,
13551400
plugin_option_dynamic("askrene-timeout",
@@ -1358,5 +1403,11 @@ int main(int argc, char *argv[])
13581403
" Defaults to 10 seconds",
13591404
u32_option, u32_jsonfmt,
13601405
&askrene->route_seconds),
1406+
plugin_option_dynamic("askrene-max-threads",
1407+
"int",
1408+
"How many routes to calculate at once."
1409+
" Defaults to 4",
1410+
u32_option, u32_jsonfmt,
1411+
&askrene->max_children),
13611412
NULL);
13621413
}

plugins/askrene/askrene.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,14 @@ struct askrene {
2828
struct command *layer_cmd;
2929
/* How long before we abort trying to find a route? */
3030
u32 route_seconds;
31+
/* Maximum number of routing children */
32+
u32 max_children;
33+
/* How many requests live now? */
34+
size_t num_live_requests;
3135
/* Routing children currently in flight. */
3236
struct list_head children;
37+
/* Ones waiting */
38+
struct list_head waiters;
3339
};
3440

3541
/* Useful plugin->askrene mapping */

0 commit comments

Comments
 (0)