Skip to content

Commit

Permalink
lightningd: increase fd limit if we can, to double number of starting…
Browse files Browse the repository at this point in the history
… channels.

1024 is a common limit, and people are starting to hit that many channels, so we should increase it: twice the number of channels seems reasonable, though we only do this at restart time.

Changelog-Changed: lightningd: we now try to increase the number of file descriptors, if it's less than twice the number of channels at startup (and log if we cannot!).
Signed-off-by: Rusty Russell <[email protected]>
  • Loading branch information
rustyrussell committed Apr 19, 2024
1 parent 69bb05e commit adbf9bc
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 59 deletions.
152 changes: 97 additions & 55 deletions lightningd/lightningd.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
ld->dev_any_channel_type = false;
ld->dev_allow_shutdown_destination_change = false;

/*~ We try to ensure enough fds for twice the number of channels
* we start with. We have a developer option to change that factor
* for testing. */
ld->fd_limit_multiplier = 2;

/*~ This is a CCAN list: an embedded double-linked list. It's not
* really typesafe, but relies on convention to access the contents.
* It's inspired by the closely-related Linux kernel list.h.
Expand Down Expand Up @@ -1049,6 +1054,90 @@ bool lightningd_deprecated_in_ok(struct lightningd *ld,
complain_deprecated, &depr_in);
}

/*~ We fork out new processes very very often; every channel gets its own
* process, for example, and we have `hsmd` and `gossipd` and the plugins as
* well. Now, we also keep around several file descriptors (`fd`s), including
* file descriptors to communicate with `hsmd` which is a privileged process
* with access to private keys and is therefore very sensitive. Thus, we need
* to close all file descriptors other than what the forked-out new process
* should have ASAP.
*
* We do this by using the `ccan/closefrom` module, which implements an
* emulation for the `closefrom` syscall on BSD and Solaris. This emulation
* tries to use the fastest facility available on the system (`close_range`
* syscall on Linux 5.9+, snooping through `/proc/$PID/fd` on many OSs (but
* requires procps to be mounted), the actual `closefrom` call if available,
* etc.). As a fallback if none of those are available on the system,
* however, it just iterates over the theoretical range of possible file
* descriptors.
*
* On some systems, that theoretical range can be very high, up to `INT_MAX`
* in the worst case. If the `closefrom` emulation has to fall back to this
* loop, it can be very slow; fortunately, the emulation will also inform us
* of that via the `closefrom_may_be_slow` function, and also has
* `closefrom_limit` to limit the number of allowed file descriptors *IF AND
* ONLY IF* `closefrom_may_be_slow()` is true.
*
* On systems with a fast `closefrom` then `closefrom_limit` does nothing.
*
* Previously we always imposed a limit of 1024 file descriptors (because we
* used to always iterate up to limit instead of using some OS facility,
* because those were non-portable and needed code for each OS), until
* @whitslack went and made >1000 channels and hit the 1024 limit.
*/
static void setup_fd_limit(struct lightningd *ld, size_t num_channels)
{
struct rlimit nofile;

if (getrlimit(RLIMIT_NOFILE, &nofile) != 0) {
log_broken(ld->log,
"Could not get file descriptor limit: %s",
strerror(errno));
return;
}

/* Aim for twice as many fds as current channels, for growth. */
if (nofile.rlim_cur < num_channels * ld->fd_limit_multiplier) {
if (num_channels * ld->fd_limit_multiplier > nofile.rlim_max) {
log_unusual(ld->log,
"WARNING: we have %zu channels but can file descriptors limited to %zu!",
num_channels, (size_t)nofile.rlim_max);
nofile.rlim_cur = nofile.rlim_max;
} else {
log_debug(ld->log,
"Increasing file descriptor limit to %zu (%zu channels, max is %zu)",
num_channels * ld->fd_limit_multiplier,
num_channels,
(size_t)nofile.rlim_max);
nofile.rlim_cur = num_channels * ld->fd_limit_multiplier;
}
if (setrlimit(RLIMIT_NOFILE, &nofile) != 0) {
log_broken(ld->log,
"Could not increase file limit to %zu: %s",
(size_t)nofile.rlim_cur,
strerror(errno));
}
}

/*~ If `closefrom_may_be_slow`, we limit ourselves to 4096 file
* descriptors; tell the user about it as that limits the number
* of channels they can have.
* We do not really expect most users to ever reach that many,
* but: https://github.com/ElementsProject/lightning/issues/4868
*/
if (closefrom_may_be_slow()) {
log_info(ld->log,
"We have self-limited number of open file "
"descriptors to 4096, but that will result in a "
"'Too many open files' error if you ever reach "
">4000 channels. Please upgrade your OS kernel "
"(Linux 5.9+, FreeBSD 8.0+), or mount proc or "
"/dev/fd (if running in chroot) if you are "
"approaching that many channels.");
closefrom_limit(4096);
}
}

int main(int argc, char *argv[])
{
struct lightningd *ld;
Expand All @@ -1063,6 +1152,7 @@ int main(int argc, char *argv[])
int exit_code = 0;
char **orig_argv;
bool try_reexec;
size_t num_channels;

trace_span_start("lightningd/startup", argv);

Expand All @@ -1072,44 +1162,6 @@ int main(int argc, char *argv[])
/*~ This handles --dev-debug-self really early, which we otherwise ignore */
daemon_developer_mode(argv);

/*~ We fork out new processes very very often; every channel gets its
* own process, for example, and we have `hsmd` and `gossipd` and
* the plugins as well.
* Now, we also keep around several file descriptors (`fd`s), including
* file descriptors to communicate with `hsmd` which is a privileged
* process with access to private keys and is therefore very sensitive.
* Thus, we need to close all file descriptors other than what the
* forked-out new process should have ASAP.
*
* We do this by using the `ccan/closefrom` module, which implements
* an emulation for the `closefrom` syscall on BSD and Solaris.
* This emulation tries to use the fastest facility available on the
* system (`close_range` syscall on Linux 5.9+, snooping through
* `/proc/$PID/fd` on many OSs (but requires procps to be mounted),
* the actual `closefrom` call if available, etc.).
* As a fallback if none of those are available on the system, however,
* it just iterates over the theoretical range of possible file
* descriptors.
*
* On some systems, that theoretical range can be very high, up to
* `INT_MAX` in the worst case.
* If the `closefrom` emulation has to fall back to this loop, it
* can be very slow; fortunately, the emulation will also inform
* us of that via the `closefrom_may_be_slow` function, and also has
* `closefrom_limit` to limit the number of allowed file descriptors
* *IF AND ONLY IF* `closefrom_may_be_slow()` is true.
*
* On systems with a fast `closefrom` then `closefrom_limit` does
* nothing.
*
* Previously we always imposed a limit of 1024 file descriptors
* (because we used to always iterate up to limit instead of using
* some OS facility, because those were non-portable and needed
* code for each OS), until @whitslack went and made >1000 channels
* and hit the 1024 limit.
*/
closefrom_limit(4096);

/*~ This sets up SIGCHLD to make sigchld_rfd readable. */
sigchld_rfd = setup_sig_handlers();

Expand Down Expand Up @@ -1305,10 +1357,15 @@ int main(int argc, char *argv[])
/*~ Pull peers, channels and HTLCs from db. Needs to happen after the
* topology is initialized since some decisions rely on being able to
* know the blockheight. */
unconnected_htlcs_in = notleak(load_channels_from_wallet(ld));
unconnected_htlcs_in = notleak(load_channels_from_wallet(ld,
&num_channels));
db_commit_transaction(ld->wallet->db);

/*~ The gossip daemon looks after the routing gossip;
/*~ Now we have channels, try to ensure we have enough file descriptors
* to cover 2x that many. */
setup_fd_limit(ld, num_channels);

/*~ The gossip daemon looks after the routing gossip;
* channel_announcement, channel_update, node_announcement and gossip
* queries. It also hands us the latest channel_updates for our
* channels. */
Expand Down Expand Up @@ -1373,21 +1430,6 @@ int main(int argc, char *argv[])
ld->recover);
plugin_hook_call_recover(ld, NULL, payload);
}
/*~ If `closefrom_may_be_slow`, we limit ourselves to 4096 file
* descriptors; tell the user about it as that limits the number
* of channels they can have.
* We do not really expect most users to ever reach that many,
* but: https://github.com/ElementsProject/lightning/issues/4868
*/
if (closefrom_may_be_slow())
log_info(ld->log,
"We have self-limited number of open file "
"descriptors to 4096, but that will result in a "
"'Too many open files' error if you ever reach "
">4000 channels. Please upgrade your OS kernel "
"(Linux 5.9+, FreeBSD 8.0+), or mount proc or "
"/dev/fd (if running in chroot) if you are "
"approaching that many channels.");

/*~ If we have channels closing, make sure we re-xmit the last
* transaction, in case bitcoind lost it. */
Expand Down
3 changes: 3 additions & 0 deletions lightningd/lightningd.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ struct lightningd {
/* Contains the codex32 string used with --recover flag */
char *recover;

/* 2, unless overridden by --dev-fd-limit-multiplier */
u32 fd_limit_multiplier;

/* If we want to debug a subdaemon/plugin. */
char *dev_debug_subprocess;

Expand Down
4 changes: 4 additions & 0 deletions lightningd/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,10 @@ static void dev_register_opts(struct lightningd *ld)
opt_set_bool,
&ld->dev_allow_shutdown_destination_change,
"Allow destination override on close, even if risky");
clnopt_witharg("--dev-fd-limit-multiplier", OPT_DEV|OPT_SHOWINT,
opt_set_u32, opt_show_u32,
&ld->fd_limit_multiplier,
"Try to set fd limit to this many times by number of channels (default: 2)");
/* This is handled directly in daemon_developer_mode(), so we ignore it here */
clnopt_noarg("--dev-debug-self", OPT_DEV,
opt_ignore,
Expand Down
5 changes: 4 additions & 1 deletion lightningd/peer_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -2509,7 +2509,8 @@ void setup_peers(struct lightningd *ld)
}

/* Pull peers, channels and HTLCs from db, and wire them up. */
struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld)
struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld,
size_t *num_channels)
{
struct peer *peer;
struct htlc_in_map *unconnected_htlcs_in = tal(ld, struct htlc_in_map);
Expand All @@ -2519,6 +2520,7 @@ struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld)
if (!wallet_init_channels(ld->wallet))
fatal("Could not load channels from the database");

*num_channels = 0;
/* First we load the incoming htlcs */
for (peer = peer_node_id_map_first(ld->peers, &it);
peer;
Expand All @@ -2531,6 +2533,7 @@ struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld)
ld->htlcs_in)) {
fatal("could not load htlcs for channel");
}
(*num_channels)++;
}
}

Expand Down
9 changes: 7 additions & 2 deletions lightningd/peer_control.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,13 @@ struct amount_msat channel_amount_spendable(const struct channel *channel);
struct amount_msat channel_amount_receivable(const struct channel *channel);

/* Pull peers, channels and HTLCs from db, and wire them up.
* Returns any HTLCs we have to resubmit via htlcs_resubmit. */
struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld);
* Returns any HTLCs we have to resubmit via htlcs_resubmit.
*
* As a side-effect, count total channels loaded into *num_channels.
*/
struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld,
size_t *num_channels);


struct leak_detect;
void peer_dev_memleak(struct lightningd *ld, struct leak_detect *leaks);
Expand Down
3 changes: 2 additions & 1 deletion lightningd/test/run-find_my_abspath.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ void jsonrpc_stop_all(struct lightningd *ld UNNEEDED)
void jsonrpc_stop_listening(struct jsonrpc *jsonrpc UNNEEDED)
{ fprintf(stderr, "jsonrpc_stop_listening called!\n"); abort(); }
/* Generated stub for load_channels_from_wallet */
struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld UNNEEDED)
struct htlc_in_map *load_channels_from_wallet(struct lightningd *ld UNNEEDED,
size_t *num_channels UNNEEDED)
{ fprintf(stderr, "load_channels_from_wallet called!\n"); abort(); }
/* Generated stub for log_ */
void log_(struct logger *logger UNNEEDED, enum log_level level UNNEEDED,
Expand Down
21 changes: 21 additions & 0 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import pytest
import re
import resource
import shutil
import signal
import socket
Expand Down Expand Up @@ -3982,3 +3983,23 @@ def test_set_feerate_offset(node_factory, bitcoind):

l1.daemon.wait_for_log(' to CLOSINGD_COMPLETE')
l2.daemon.wait_for_log(' to CLOSINGD_COMPLETE')


def test_low_fd_limit(node_factory, bitcoind):
limits = resource.getrlimit(resource.RLIMIT_NOFILE)

# We assume this, otherwise l2 cannot increase limits!
assert limits[0] < limits[1]

# l1 asks for too much, l2 asks for more than it has, but enough.
l1, l2 = node_factory.line_graph(2, opts=[{'dev-fd-limit-multiplier': limits[1] + 1, 'allow_warning': True}, {'dev-fd-limit-multiplier': limits[1]}])

# fd check is done at start, so restart.
l1.restart()

# It should warn that FD limit is "low".
assert l1.daemon.is_in_log('UNUSUAL.*WARNING: we have 1 channels but can file descriptors limited to {}'.format(limits[1]))

l2.restart()

assert l2.daemon.is_in_log(r'Increasing file descriptor limit to {} \(1 channels, max is {}'.format(limits[1], limits[1]))

0 comments on commit adbf9bc

Please sign in to comment.