Skip to content

Commit

Permalink
lightningd: implement recover command.
Browse files Browse the repository at this point in the history
Changelog-Added: JSON-RPC: `recover` command to force (unused) lightningd node to restart with `--recover` flag.
Signed-off-by: Rusty Russell <[email protected]>
  • Loading branch information
rustyrussell committed Oct 23, 2023
1 parent 75f58e1 commit c9470c7
Show file tree
Hide file tree
Showing 6 changed files with 209 additions and 12 deletions.
3 changes: 3 additions & 0 deletions common/jsonrpc_errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ enum jsonrpc_errcode {
RUNE_NOT_PERMITTED = 1502,
RUNE_BLACKLISTED = 1503,

/* Errors from recover command */
RECOVER_NODE_IN_USE = 1600,

/* Errors from wait* commands */
WAIT_TIMEOUT = 2000,
};
Expand Down
140 changes: 130 additions & 10 deletions lightningd/jsonrpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,28 @@
*/
/* eg: { "jsonrpc":"2.0", "method" : "dev-echo", "params" : [ "hello", "Arabella!" ], "id" : "1" } */
#include "config.h"
#include <ccan/array_size/array_size.h>
#include <ccan/asort/asort.h>
#include <ccan/err/err.h>
#include <ccan/io/io.h>
#include <ccan/json_escape/json_escape.h>
#include <ccan/json_out/json_out.h>
#include <ccan/tal/path/path.h>
#include <ccan/tal/str/str.h>
#include <common/codex32.h>
#include <common/configdir.h>
#include <common/json_command.h>
#include <common/json_filter.h>
#include <common/json_param.h>
#include <common/memleak.h>
#include <common/timeout.h>
#include <common/trace.h>
#include <db/common.h>
#include <db/exec.h>
#include <errno.h>
#include <fcntl.h>
#include <lightningd/jsonrpc.h>
#include <lightningd/options.h>
#include <lightningd/plugin_hook.h>
#include <sys/socket.h>
#include <sys/stat.h>
Expand Down Expand Up @@ -184,20 +190,15 @@ static const struct json_command help_command = {
};
AUTODATA(json_command, &help_command);

static struct command_result *json_stop(struct command *cmd,
const char *buffer,
const jsmntok_t *obj UNNEEDED,
const jsmntok_t *params)
/* We prepare a canned JSON response, for top level to write as reply
* immediately before we exit. */
static struct command_result *prepare_stop_conn(struct command *cmd,
const char *why)
{
struct json_out *jout;
const char *p;
size_t len;

if (!param(cmd, buffer, params, NULL))
return command_param_failed();

log_unusual(cmd->ld->log, "JSON-RPC shutdown");

/* With rpc_command_hook, jcon might have closed in the meantime! */
if (!cmd->jcon) {
/* Return us to toplevel lightningd.c */
Expand All @@ -215,7 +216,7 @@ static struct command_result *json_stop(struct command *cmd,
/* Copy input id token exactly */
memcpy(json_out_member_direct(jout, "id", strlen(cmd->id)),
cmd->id, strlen(cmd->id));
json_out_addstr(jout, "result", "Shutdown complete");
json_out_addstr(jout, "result", why);
json_out_end(jout, '}');
json_out_finished(jout);

Expand All @@ -230,6 +231,18 @@ static struct command_result *json_stop(struct command *cmd,
return command_still_pending(cmd);
}

static struct command_result *json_stop(struct command *cmd,
const char *buffer,
const jsmntok_t *obj UNNEEDED,
const jsmntok_t *params)
{
if (!param(cmd, buffer, params, NULL))
return command_param_failed();

log_unusual(cmd->ld->log, "JSON-RPC shutdown");
return prepare_stop_conn(cmd, "Shutdown complete");
}

static const struct json_command stop_command = {
"stop",
"utility",
Expand All @@ -238,6 +251,113 @@ static const struct json_command stop_command = {
};
AUTODATA(json_command, &stop_command);

static bool have_channels(struct lightningd *ld)
{
struct peer_node_id_map_iter it;
struct peer *peer;

for (peer = peer_node_id_map_first(ld->peers, &it);
peer;
peer = peer_node_id_map_next(ld->peers, &it)) {
if (peer->uncommitted_channel)
return true;
if (!list_empty(&peer->channels))
return true;
}
return false;
}

static struct command_result *param_codex32_or_hex(struct command *cmd,
const char *name,
const char *buffer,
const jsmntok_t *tok,
const char **hsm_secret)
{
char *err;
const u8 *payload;

*hsm_secret = json_strdup(cmd, buffer, tok);
err = hsm_secret_arg(tmpctx, *hsm_secret, &payload);
if (err)
return command_fail_badparam(cmd, name, buffer, tok, err);
return NULL;
}

/* We cannot --recover unless these files are not in place. */
static void move_prerecover_files(const char *dir)
{
const char *files[] = {
"lightningd.sqlite3",
"emergency.recover",
"hsm_secret",
};

if (mkdir(dir, 0770) != 0)
fatal("Could not make %s: %s", dir, strerror(errno));
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
if (rename(files[i], path_join(tmpctx, dir, files[i])) != 0) {
fatal("Could not move %s: %s", files[i], strerror(errno));
}
}
}

static struct command_result *json_recover(struct command *cmd,
const char *buffer,
const jsmntok_t *obj UNNEEDED,
const jsmntok_t *params)
{
const char *hsm_secret, *dir;

if (!param_check(cmd, buffer, params,
p_req("hsmsecret", param_codex32_or_hex, &hsm_secret),
NULL))
return command_param_failed();

/* FIXME: How do we "move" the Postgres DB? */
if (!streq(cmd->ld->wallet->db->config->name, "sqlite3"))
return command_fail(cmd, LIGHTNINGD,
"Only sqlite3 supported for recover command");

/* Check this is an empty node! */
if (db_get_intvar(cmd->ld->wallet->db, "bip32_max_index", 0) != 0) {
return command_fail(cmd, RECOVER_NODE_IN_USE,
"Node has already issued bitcoin addresses!");
}

if (have_channels(cmd->ld)) {
return command_fail(cmd, RECOVER_NODE_IN_USE,
"Node has channels!");
}

/* Don't try to add --recover to cmdline twice! */
if (cmd->ld->recover != NULL) {
return command_fail(cmd, RECOVER_NODE_IN_USE,
"Already doing recover");
}

if (command_check_only(cmd))
return command_check_done(cmd);

dir = tal_fmt(tmpctx, "lightning.pre-recover.%u", getpid());
log_unusual(cmd->ld->log,
"JSON-RPC recovery command: moving existing files to %s", dir);

move_prerecover_files(dir);

/* Top level with add --recover=... here */
cmd->ld->recover_secret = tal_steal(cmd->ld, hsm_secret);
cmd->ld->try_reexec = true;
return prepare_stop_conn(cmd, "Recovery restart in progress");
}

static const struct json_command recover_command = {
"recover",
"utility",
json_recover,
"Restart an unused lightning node with --recover"
};
AUTODATA(json_command, &recover_command);

struct slowcmd {
struct command *cmd;
unsigned *msec;
Expand Down
12 changes: 10 additions & 2 deletions lightningd/lightningd.c
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ static struct lightningd *new_lightningd(const tal_t *ctx)
ld->autolisten = true;
ld->reconnect = true;
ld->try_reexec = false;
ld->recover_secret = NULL;
ld->db_upgrade_ok = NULL;

/* --experimental-upgrade-protocol */
Expand Down Expand Up @@ -1390,8 +1391,15 @@ int main(int argc, char *argv[])

/* Gather these before we free ld! */
try_reexec = ld->try_reexec;
if (try_reexec)
if (try_reexec) {
/* Maybe we reexec with --recover, due to recover command */
if (ld->recover_secret) {
tal_arr_insert(&orig_argv, argc,
tal_fmt(orig_argv, "--recover=%s",
ld->recover_secret));
}
tal_steal(NULL, orig_argv);
}

/* Free this last: other things may clean up timers. */
timers = tal_steal(NULL, ld->timers);
Expand All @@ -1418,7 +1426,7 @@ int main(int argc, char *argv[])
/* Close all filedescriptors except stdin/stdout/stderr */
closefrom(STDERR_FILENO + 1);
execv(orig_argv[0], orig_argv);
err(1, "Failed to re-exec ourselves after version change");
err(1, "Failed to re-exec ourselves after version change/recover");
}

/*~ Farewell. Next stop: hsmd/hsmd.c. */
Expand Down
2 changes: 2 additions & 0 deletions lightningd/lightningd.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,8 @@ struct lightningd {

/* Should we re-exec ourselves instead of just exiting? */
bool try_reexec;
/* If set, we are to restart with --recover=... */
const char *recover_secret;

/* Array of (even) TLV types that we should allow. This is required
* since we otherwise would outright reject them. */
Expand Down
8 changes: 8 additions & 0 deletions lightningd/test/run-jsonrpc.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ void db_begin_transaction_(struct db *db UNNEEDED, const char *location UNNEEDED
/* Generated stub for db_commit_transaction */
void db_commit_transaction(struct db *db UNNEEDED)
{ fprintf(stderr, "db_commit_transaction called!\n"); abort(); }
/* Generated stub for db_get_intvar */
s64 db_get_intvar(struct db *db UNNEEDED, const char *varname UNNEEDED, s64 defval UNNEEDED)
{ fprintf(stderr, "db_get_intvar called!\n"); abort(); }
/* Generated stub for db_set_readonly */
void db_set_readonly(struct db *db UNNEEDED, bool readonly UNNEEDED)
{ fprintf(stderr, "db_set_readonly called!\n"); abort(); }
Expand Down Expand Up @@ -44,6 +47,11 @@ void fromwire_node_id(const u8 **cursor UNNEEDED, size_t *max UNNEEDED, struct n
/* Generated stub for get_feerate_floor */
u32 get_feerate_floor(const struct chain_topology *topo UNNEEDED)
{ fprintf(stderr, "get_feerate_floor called!\n"); abort(); }
/* Generated stub for hsm_secret_arg */
char *hsm_secret_arg(const tal_t *ctx UNNEEDED,
const char *arg UNNEEDED,
const u8 **hsm_secret UNNEEDED)
{ fprintf(stderr, "hsm_secret_arg called!\n"); abort(); }
/* Generated stub for htlc_resolution_feerate */
u32 htlc_resolution_feerate(struct chain_topology *topo UNNEEDED)
{ fprintf(stderr, "htlc_resolution_feerate called!\n"); abort(); }
Expand Down
56 changes: 56 additions & 0 deletions tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3611,3 +3611,59 @@ def test_setconfig(node_factory, bitcoind):
assert lines[1].startswith('# Inserted by setconfig ')
assert lines[2] == 'min-capacity-sat=400000'
assert len(lines) == 3


@unittest.skipIf(os.getenv('TEST_DB_PROVIDER', 'sqlite3') != 'sqlite3', "deletes database, which is assumed sqlite3")
def test_recover_command(node_factory, bitcoind):
l1, l2 = node_factory.get_nodes(2)

l1oldid = l1.info['id']

def get_hsm_secret(n):
"""Returns codex32 and hex"""
hsmfile = os.path.join(n.daemon.lightning_dir, TEST_NETWORK, "hsm_secret")
codex32 = subprocess.check_output(["tools/hsmtool", "getcodexsecret", hsmfile, "leet"]).decode('utf-8').strip()
with open(hsmfile, "rb") as f:
hexhsm = f.read().hex()
return codex32, hexhsm

l1codex32, l1hex = get_hsm_secret(l1)
l2codex32, l2hex = get_hsm_secret(l2)

# Get the PID for later
with open(os.path.join(l1.daemon.lightning_dir,
f"lightningd-{TEST_NETWORK}.pid"), "r") as f:
pid = f.read().strip()

assert l1.rpc.check('recover', hsmsecret=l2codex32) == {'command_to_check': 'recover'}
l1.rpc.recover(hsmsecret=l2codex32)
l1.daemon.wait_for_log("Server started with public key")
# l1.info is cached on start, so won't reflect current reality!
assert l1.rpc.getinfo()['id'] == l2.info['id']

# Won't work if we issue an address...
l2.rpc.newaddr()

with pytest.raises(RpcError, match='Node has already issued bitcoin addresses'):
l2.rpc.recover(hsmsecret=l1codex32)

with pytest.raises(RpcError, match='Node has already issued bitcoin addresses'):
l2.rpc.check('recover', hsmsecret=l1codex32)

# Now try recovering using hex secret (remove old prerecover!)
shutil.rmtree(os.path.join(l1.daemon.lightning_dir, TEST_NETWORK,
f"lightning.pre-recover.{pid}"))

# l1 already has --recover in cmdline: recovering again would add it
# twice!
with pytest.raises(RpcError, match='Already doing recover'):
l1.rpc.check('recover', hsmsecret=l1hex)

with pytest.raises(RpcError, match='Already doing recover'):
l1.rpc.recover(hsmsecret=l1hex)

l1.restart()
assert l1.rpc.check('recover', hsmsecret=l1hex) == {'command_to_check': 'recover'}
l1.rpc.recover(hsmsecret=l1hex)
l1.daemon.wait_for_log("Server started with public key")
assert l1.rpc.getinfo()['id'] == l1oldid

0 comments on commit c9470c7

Please sign in to comment.