From 676810cc13558f943ad61fcef9ab74a858e6c7b8 Mon Sep 17 00:00:00 2001 From: Toni Spets Date: Mon, 18 Dec 2023 13:30:59 +0200 Subject: [PATCH] Use safe checkpointing before snapshots (#522) --- replica.go | 12 +++++----- replica_test.go | 63 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/replica.go b/replica.go index 3d2b0995..a5daf2dd 100644 --- a/replica.go +++ b/replica.go @@ -482,16 +482,16 @@ func (r *Replica) Snapshot(ctx context.Context) (info SnapshotInfo, err error) { r.muf.Lock() defer r.muf.Unlock() - // Prevent checkpoints during snapshot. - r.db.BeginSnapshot() - defer r.db.EndSnapshot() - // Issue a passive checkpoint to flush any pages to disk before snapshotting. - if _, err := r.db.db.ExecContext(ctx, `PRAGMA wal_checkpoint(PASSIVE);`); err != nil { + if err := r.db.Checkpoint(ctx, CheckpointModePassive); err != nil { return info, fmt.Errorf("pre-snapshot checkpoint: %w", err) } - // Acquire a read lock on the database during snapshot to prevent checkpoints. + // Prevent internal checkpoints during snapshot. + r.db.BeginSnapshot() + defer r.db.EndSnapshot() + + // Acquire a read lock on the database during snapshot to prevent external checkpoints. tx, err := r.db.db.Begin() if err != nil { return info, err diff --git a/replica_test.go b/replica_test.go index 7f42c08a..78ba6206 100644 --- a/replica_test.go +++ b/replica_test.go @@ -13,6 +13,13 @@ import ( "github.com/pierrec/lz4/v4" ) +func nextIndex(pos litestream.Pos) litestream.Pos { + return litestream.Pos{ + Generation: pos.Generation, + Index: pos.Index + 1, + } +} + func TestReplica_Name(t *testing.T) { t.Run("WithName", func(t *testing.T) { if got, want := litestream.NewReplica(nil, "NAME").Name(), "NAME"; got != want { @@ -32,11 +39,6 @@ func TestReplica_Sync(t *testing.T) { db, sqldb := MustOpenDBs(t) defer MustCloseDBs(t, db, sqldb) - // Execute a query to force a write to the WAL. - if _, err := sqldb.Exec(`CREATE TABLE foo (bar TEXT);`); err != nil { - t.Fatal(err) - } - // Issue initial database sync to setup generation. if err := db.Sync(context.Background()); err != nil { t.Fatal(err) @@ -66,10 +68,47 @@ func TestReplica_Sync(t *testing.T) { t.Fatalf("generations[0]=%v, want %v", got, want) } + // Verify we synced checkpoint page to WAL. + if r, err := c.WALSegmentReader(context.Background(), nextIndex(dpos)); err != nil { + t.Fatal(err) + } else if b, err := io.ReadAll(lz4.NewReader(r)); err != nil { + t.Fatal(err) + } else if err := r.Close(); err != nil { + t.Fatal(err) + } else if len(b) == db.PageSize() { + t.Fatalf("wal mismatch: len(%d), len(%d)", len(b), db.PageSize()) + } + + // Reset WAL so the next write will only write out the segment we are checking. + if err := db.Checkpoint(context.Background(), litestream.CheckpointModeTruncate); err != nil { + t.Fatal(err) + } + + // Execute a query to write something into the truncated WAL. + if _, err := sqldb.Exec(`CREATE TABLE foo (bar TEXT);`); err != nil { + t.Fatal(err) + } + + // Sync database to catch up the shadow WAL. + if err := db.Sync(context.Background()); err != nil { + t.Fatal(err) + } + + // Save position after sync, it should be after our write. + dpos, err = db.Pos() + if err != nil { + t.Fatal(err) + } + + // Sync WAL segment out to replica. + if err := r.Sync(context.Background()); err != nil { + t.Fatal(err) + } + // Verify WAL matches replica WAL. if b0, err := os.ReadFile(db.Path() + "-wal"); err != nil { t.Fatal(err) - } else if r, err := c.WALSegmentReader(context.Background(), litestream.Pos{Generation: generations[0], Index: 0, Offset: 0}); err != nil { + } else if r, err := c.WALSegmentReader(context.Background(), dpos.Truncate()); err != nil { t.Fatal(err) } else if b1, err := io.ReadAll(lz4.NewReader(r)); err != nil { t.Fatal(err) @@ -103,7 +142,7 @@ func TestReplica_Snapshot(t *testing.T) { t.Fatal(err) } else if info, err := r.Snapshot(context.Background()); err != nil { t.Fatal(err) - } else if got, want := info.Pos(), pos0.Truncate(); got != want { + } else if got, want := info.Pos(), nextIndex(pos0); got != want { t.Fatalf("pos=%s, want %s", got, want) } @@ -127,18 +166,20 @@ func TestReplica_Snapshot(t *testing.T) { t.Fatal(err) } else if info, err := r.Snapshot(context.Background()); err != nil { t.Fatal(err) - } else if got, want := info.Pos(), pos1.Truncate(); got != want { + } else if got, want := info.Pos(), nextIndex(pos1); got != want { t.Fatalf("pos=%v, want %v", got, want) } - // Verify two snapshots exist. + // Verify three snapshots exist. if infos, err := r.Snapshots(context.Background()); err != nil { t.Fatal(err) - } else if got, want := len(infos), 2; got != want { + } else if got, want := len(infos), 3; got != want { t.Fatalf("len=%v, want %v", got, want) } else if got, want := infos[0].Pos(), pos0.Truncate(); got != want { t.Fatalf("info[0]=%s, want %s", got, want) - } else if got, want := infos[1].Pos(), pos1.Truncate(); got != want { + } else if got, want := infos[1].Pos(), nextIndex(pos0); got != want { t.Fatalf("info[1]=%s, want %s", got, want) + } else if got, want := infos[2].Pos(), nextIndex(pos1); got != want { + t.Fatalf("info[2]=%s, want %s", got, want) } }