Skip to content

Commit

Permalink
Test and fix recovery with snapshot without ledger (#6472)
Browse files Browse the repository at this point in the history
  • Loading branch information
achamayou authored Sep 11, 2024
1 parent c66cbb8 commit 47e6853
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 4 deletions.
10 changes: 9 additions & 1 deletion src/host/ledger.h
Original file line number Diff line number Diff line change
Expand Up @@ -1392,8 +1392,16 @@ namespace asynchost

LOG_DEBUG_FMT("Ledger truncate: {}/{}", idx, last_idx);

if (idx >= last_idx || idx < committed_idx)
// Conservative check to avoid truncating to future indices, or dropping
// committed entries. If the ledger is being initialised from a snapshot
// alone, the first truncation effectively sets the last index.
if (last_idx != 0 && (idx >= last_idx || idx < committed_idx))
{
LOG_DEBUG_FMT(
"Ignoring truncate to {} - last_idx: {}, committed_idx: {}",
idx,
last_idx,
committed_idx);
return;
}

Expand Down
37 changes: 34 additions & 3 deletions tests/recovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def get_and_verify_historical_receipt(network, ref_msg):

@reqs.description("Recover a service")
@reqs.recover(number_txs=2)
def test_recover_service(network, args, from_snapshot=True):
def test_recover_service(network, args, from_snapshot=True, no_ledger=False):
network.save_service_identity(args)
old_primary, _ = network.find_primary()

Expand All @@ -71,7 +71,11 @@ def test_recover_service(network, args, from_snapshot=True):

watcher.wait_for_recovery()

current_ledger_dir, committed_ledger_dirs = old_primary.get_ledger()
if no_ledger:
current_ledger_dir = None
committed_ledger_dirs = None
else:
current_ledger_dir, committed_ledger_dirs = old_primary.get_ledger()

with tempfile.NamedTemporaryFile(mode="w+") as node_data_tf:
start_node_data = {"this is a": "recovery node"}
Expand Down Expand Up @@ -116,7 +120,7 @@ def test_recover_service(network, args, from_snapshot=True):
args.initial_service_cert_validity_days
)

new_nodes = recovered_network.find_primary_and_any_backup()
new_nodes = recovered_network.get_joined_nodes()
for n in new_nodes:
with n.client() as c:
r = c.get("/node/service/previous_identity")
Expand Down Expand Up @@ -811,6 +815,26 @@ def run(args):
)


def run_recover_snapshot_alone(args):
"""
Recover a service from a snapshot alone, without any ledger files from a previous service.
"""
txs = app.LoggingTxs("user0")
with infra.network.network(
args.nodes,
args.binary_dir,
args.debug_nodes,
args.perf_nodes,
pdb=args.pdb,
txs=txs,
) as network:
network.start_and_open(args)
primary, _ = network.find_primary()
# Recover node solely from snapshot
test_recover_service(network, args, from_snapshot=True, no_ledger=True)
return network


if __name__ == "__main__":

def add(parser):
Expand Down Expand Up @@ -860,4 +884,11 @@ def add(parser):
snapshot_tx_interval=1000000,
)

cr.add(
"recovery_snapshot_alone",
run_recover_snapshot_alone,
package="samples/apps/logging/liblogging",
nodes=infra.e2e_args.min_nodes(cr.args, f=0), # 1 node suffices for recovery
)

cr.run()

0 comments on commit 47e6853

Please sign in to comment.