Skip to content

Commit 001446e

Browse files
committed
lightningd: allow up to 100 "slow open" channels before forgetting them.
Michael of Boltz told me this long ago, that when fees spiked some of their clients' opens got stuck. After two weeks their node forgot them, and when fees came back down the opens still failed. Unfortunately, I can't see an issue for this! We can give some grace: we don't want to waste too many resources, but 100 channels is nothing. The test needs to be adjusted to have *two* slow open channels, now. Changelog-Changed: Protocol: we won't forget still-opening channels after 2016 blocks, unless there are more than 100. Signed-off-by: Rusty Russell <[email protected]>
1 parent ffce819 commit 001446e

File tree

3 files changed

+90
-47
lines changed

3 files changed

+90
-47
lines changed

lightningd/channel_control.c

+40-22
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "config.h"
2+
#include <ccan/asort/asort.h>
23
#include <ccan/cast/cast.h>
34
#include <ccan/mem/mem.h>
45
#include <ccan/tal/str/str.h>
@@ -1927,18 +1928,8 @@ is_fundee_should_forget(struct lightningd *ld,
19271928
struct channel *channel,
19281929
u32 block_height)
19291930
{
1930-
/* BOLT #2:
1931-
*
1932-
* A non-funding node (fundee):
1933-
* - SHOULD forget the channel if it does not see the
1934-
* correct funding transaction after a timeout of 2016 blocks.
1935-
*/
1936-
u32 max_funding_unconfirmed;
1937-
1938-
if (ld->developer)
1939-
max_funding_unconfirmed = ld->dev_max_funding_unconfirmed;
1940-
else
1941-
max_funding_unconfirmed = 2016;
1931+
/* 2016 by default */
1932+
u32 max_funding_unconfirmed = ld->dev_max_funding_unconfirmed;
19421933

19431934
/* Only applies if we are fundee. */
19441935
if (channel->opener == LOCAL)
@@ -1969,30 +1960,55 @@ is_fundee_should_forget(struct lightningd *ld,
19691960
return true;
19701961
}
19711962

1963+
/* We want in ascending order, so oldest channels first */
1964+
static int cmp_channel_start(struct channel *const *a, struct channel *const *b, void *unused)
1965+
{
1966+
if ((*a)->first_blocknum < (*b)->first_blocknum)
1967+
return -1;
1968+
else if ((*a)->first_blocknum > (*b)->first_blocknum)
1969+
return 1;
1970+
return 0;
1971+
}
1972+
19721973
/* Notify all channels of new blocks. */
19731974
void channel_notify_new_block(struct lightningd *ld,
19741975
u32 block_height)
19751976
{
19761977
struct peer *peer;
19771978
struct channel *channel;
1978-
struct channel **to_forget = tal_arr(NULL, struct channel *, 0);
1979+
struct channel **to_forget = tal_arr(tmpctx, struct channel *, 0);
19791980
size_t i;
19801981
struct peer_node_id_map_iter it;
19811982

1983+
/* BOLT #2:
1984+
*
1985+
* A non-funding node (fundee):
1986+
* - SHOULD forget the channel if it does not see the
1987+
* correct funding transaction after a timeout of 2016 blocks.
1988+
*/
1989+
1990+
/* But we give some latitude! Boltz reported that after a months-long
1991+
* fee spike, they had some failed opens when the tx finally got mined.
1992+
* They're individually cheap, keep the latest 100. */
1993+
size_t forgettable_channels_to_keep = 100;
1994+
1995+
/* For testing */
1996+
if (ld->dev_max_funding_unconfirmed != 2016)
1997+
forgettable_channels_to_keep = 1;
1998+
19821999
/* FIXME: keep separate block-aware channel structure instead? */
19832000
for (peer = peer_node_id_map_first(ld->peers, &it);
19842001
peer;
19852002
peer = peer_node_id_map_next(ld->peers, &it)) {
19862003
list_for_each(&peer->channels, channel, list) {
19872004
if (channel_state_uncommitted(channel->state))
19882005
continue;
1989-
if (is_fundee_should_forget(ld, channel, block_height)) {
2006+
if (is_fundee_should_forget(ld, channel, block_height))
19902007
tal_arr_expand(&to_forget, channel);
1991-
} else
1992-
/* Let channels know about new blocks,
1993-
* required for lease updates */
1994-
try_update_blockheight(ld, channel,
1995-
block_height);
2008+
2009+
/* Let channels know about new blocks,
2010+
* required for lease updates */
2011+
try_update_blockheight(ld, channel, block_height);
19962012
}
19972013
}
19982014

@@ -2004,7 +2020,11 @@ void channel_notify_new_block(struct lightningd *ld,
20042020
* just the freeing of the channel that occurs, but the
20052021
* potential destruction of the peer that invalidates
20062022
* memory the inner loop is accessing. */
2007-
for (i = 0; i < tal_count(to_forget); ++i) {
2023+
if (tal_count(to_forget) < forgettable_channels_to_keep)
2024+
return;
2025+
2026+
asort(to_forget, tal_count(to_forget), cmp_channel_start, NULL);
2027+
for (i = 0; i + forgettable_channels_to_keep < tal_count(to_forget); ++i) {
20082028
channel = to_forget[i];
20092029
/* Report it first. */
20102030
log_unusual(channel->log,
@@ -2020,8 +2040,6 @@ void channel_notify_new_block(struct lightningd *ld,
20202040
/* And forget it. COMPLETELY. */
20212041
delete_channel(channel, true);
20222042
}
2023-
2024-
tal_free(to_forget);
20252043
}
20262044

20272045
/* Since this could vanish while we're checking with bitcoind, we need to save

tests/test_connection.py

+33-19
Original file line numberDiff line numberDiff line change
@@ -2791,19 +2791,19 @@ def test_fundee_forget_funding_tx_unconfirmed(node_factory, bitcoind):
27912791
"""Test that fundee will forget the channel if
27922792
the funding tx has been unconfirmed for too long.
27932793
"""
2794-
# Keep this low (default is 2016), since everything
2794+
# Keep confirms low (default is 2016), since everything
27952795
# is much slower in VALGRIND mode and wait_for_log
27962796
# could time out before lightningd processes all the
2797-
# blocks.
2798-
blocks = 50
2799-
# opener
2800-
l1 = node_factory.get_node()
2801-
# peer
2802-
l2 = node_factory.get_node(options={"dev-max-funding-unconfirmed-blocks": blocks})
2803-
l1.rpc.connect(l2.info['id'], 'localhost', l2.port)
2797+
# blocks. This also reduces the number of "slow confirm" peers
2798+
# from 100, to 1.
2799+
blocks = 10
2800+
l1, l2, l3 = node_factory.line_graph(3, fundchannel=False,
2801+
opts={"dev-max-funding-unconfirmed-blocks": blocks})
28042802

2805-
# Give opener some funds.
2803+
# Give openers some funds.
28062804
l1.fundwallet(10**7)
2805+
l3.fundwallet(10**7)
2806+
sync_blockheight(bitcoind, [l1, l2, l3])
28072807

28082808
def mock_sendrawtransaction(r):
28092809
return {'id': r['id'], 'error': {'code': 100, 'message': 'sendrawtransaction disabled'}}
@@ -2813,23 +2813,37 @@ def mock_donothing(r):
28132813

28142814
# Prevent opener from broadcasting funding tx (any tx really).
28152815
l1.daemon.rpcproxy.mock_rpc('sendrawtransaction', mock_sendrawtransaction)
2816+
# (for EXPERIMENTAL_DUAL_FUND=1, have to prevent l2 doing it too!)
28162817
l2.daemon.rpcproxy.mock_rpc('sendrawtransaction', mock_donothing)
28172818

2818-
# Fund the channel.
2819-
# The process will complete, but opener will be unable
2820-
# to broadcast and confirm funding tx.
2819+
# l1 tries to open, fails.
28212820
with pytest.raises(RpcError, match=r'sendrawtransaction disabled'):
28222821
l1.rpc.fundchannel(l2.info['id'], 10**6)
28232822

2824-
# Generate blocks until unconfirmed.
2825-
bitcoind.generate_block(blocks)
2823+
# One block later, l3 tries, fails silently.
2824+
bitcoind.generate_block(1)
2825+
sync_blockheight(bitcoind, [l1, l2, l3])
2826+
2827+
l3.daemon.rpcproxy.mock_rpc('sendrawtransaction', mock_donothing)
2828+
l3.rpc.fundchannel(l2.info['id'], 10**6)
2829+
2830+
# After 10 blocks, l1's is due to be forgotten, but doesn't since we let 1 linger.
2831+
bitcoind.generate_block(blocks - 1)
2832+
assert not l2.daemon.is_in_log(r'Forgetting channel')
28262833

2827-
# fundee will forget channel!
2828-
# (Note that we let the last number be anything (hence the {}\d)
2829-
l2.daemon.wait_for_log(r'Forgetting channel: It has been {}\d blocks'.format(str(blocks)[:-1]))
2834+
if EXPERIMENTAL_DUAL_FUND:
2835+
state = 'DUALOPEND_AWAITING_LOCKIN'
2836+
else:
2837+
state = 'CHANNELD_AWAITING_LOCKIN'
2838+
assert [c['state'] for c in l2.rpc.listpeerchannels()['channels']] == [state] * 2
2839+
2840+
# But once l3 is also delayed, l1 gets kicked out.
2841+
bitcoind.generate_block(1)
28302842

2831-
# fundee will also forget, but not disconnect from peer.
2832-
wait_for(lambda: l2.rpc.listpeerchannels(l1.info['id'])['channels'] == [])
2843+
# fundee will forget oldest channel: the one with l1!
2844+
l2.daemon.wait_for_log(rf'Forgetting channel: It has been {blocks + 1} blocks')
2845+
assert [c['state'] for c in l2.rpc.listpeerchannels()['channels']] == [state]
2846+
assert l2.rpc.listpeerchannels(l1.info['id']) == {'channels': []}
28332847

28342848

28352849
@pytest.mark.openchannel('v2')

tests/test_opening.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import pytest
1111
import re
1212
import unittest
13+
import time
1314

1415

1516
def find_next_feerate(node, peer):
@@ -2697,8 +2698,8 @@ def test_zeroconf_forget(node_factory, bitcoind, dopay: bool):
26972698
"""
26982699
blocks = 50
26992700
plugin_path = Path(__file__).parent / "plugins" / "zeroconf-selective.py"
2700-
l1, l2 = node_factory.get_nodes(
2701-
2,
2701+
l1, l2, l3 = node_factory.get_nodes(
2702+
3,
27022703
opts=[
27032704
{},
27042705
{
@@ -2707,6 +2708,7 @@ def test_zeroconf_forget(node_factory, bitcoind, dopay: bool):
27072708
"zeroconf-mindepth": "0",
27082709
"dev-max-funding-unconfirmed-blocks": blocks,
27092710
},
2711+
{},
27102712
],
27112713
)
27122714

@@ -2732,11 +2734,17 @@ def censoring_sendrawtx(tx):
27322734
l1.rpc.pay(inv["bolt11"])
27332735
wait_for(lambda: only_one(l2.rpc.listpeerchannels()['channels'])['to_us_msat'] == 1)
27342736

2737+
# We need *another* channel to make it forget the first though!
2738+
l3.connect(l2)
2739+
l3.fundwallet(10**7)
2740+
l3.rpc.fundchannel(l2.info["id"], 10**6, mindepth=0)
2741+
bitcoind.generate_block(1)
2742+
27352743
# Now stop, in order to cause catchup and re-evaluate whether to forget the channel
27362744
l2.stop()
27372745

27382746
# Now we generate enough blocks to cause l2 to forget the channel.
2739-
bitcoind.generate_block(blocks) # > blocks
2747+
bitcoind.generate_block(blocks - 1) # > blocks
27402748
l2.start()
27412749

27422750
sync_blockheight(bitcoind, [l1, l2])
@@ -2746,13 +2754,16 @@ def censoring_sendrawtx(tx):
27462754
bitcoind.generate_block(1)
27472755
sync_blockheight(bitcoind, [l1, l2])
27482756

2757+
# This may take a moment!
2758+
time.sleep(5)
2759+
27492760
have_forgotten = l2.daemon.is_in_log(
2750-
r"Forgetting channel: It has been [0-9]+ blocks without the funding transaction"
2761+
r"UNUSUAL {}-chan#1: Forgetting channel: It has been 51 blocks without the funding transaction ".format(l1.info['id'])
27512762
)
27522763

27532764
if dopay:
27542765
assert not have_forgotten
2755-
assert len(l2.rpc.listpeerchannels()["channels"]) == 1
2766+
assert set([c['peer_id'] for c in l2.rpc.listpeerchannels()["channels"]]) == set([l1.info['id'], l3.info['id']])
27562767
else:
27572768
assert have_forgotten
2758-
assert l2.rpc.listpeerchannels() == {"channels": []}
2769+
assert [c['peer_id'] for c in l2.rpc.listpeerchannels()["channels"]] == [l3.info['id']]

0 commit comments

Comments
 (0)