Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix(node): altda failover to ethda should keep finalizing l2 chain #12845

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions op-alt-da/damgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,15 @@ func (d *DA) OnFinalizedHeadSignal(f HeadSignalFn) {
func (d *DA) updateFinalizedHead(l1Finalized eth.L1BlockRef) {
d.l1FinalizedHead = l1Finalized
// Prune the state to the finalized head
d.state.Prune(l1Finalized.ID())
d.finalizedHead = d.state.lastPrunedCommitment
lastPrunedCommIncBlock := d.state.Prune(l1Finalized.ID())
d.log.Debug("updateFinalizedHead", "currFinalizedHead", d.finalizedHead.Number, "lastPrunedCommIncBlock", lastPrunedCommIncBlock.Number, "l1Finalized", l1Finalized.Number)
// If a commitment was pruned, set the finalized head to that commitment's inclusion block
// When no commitments are left to be pruned (one example is if we have failed over to ethda)
// then updateFinalizedFromL1 becomes the main driver of the finalized head.
// Note that updateFinalizedFromL1 is only called when d.state.NoCommitments() is true.
if lastPrunedCommIncBlock != (eth.L1BlockRef{}) {
d.finalizedHead = lastPrunedCommIncBlock
}
}

// updateFinalizedFromL1 updates the finalized head based on the challenge window.
Expand All @@ -133,6 +140,7 @@ func (d *DA) updateFinalizedFromL1(ctx context.Context, l1 L1Fetcher) error {
if err != nil {
return err
}
d.log.Debug("updateFinalizedFromL1", "currFinalizedHead", d.finalizedHead.Number, "newFinalizedHead", ref.Number, "l1FinalizedHead", d.l1FinalizedHead.Number, "challengeWindow", d.cfg.ChallengeWindow)
d.finalizedHead = ref
return nil
}
Expand Down
6 changes: 6 additions & 0 deletions op-alt-da/damock.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ func (c *MockDAClient) DeleteData(key []byte) error {
return c.store.Delete(key)
}

// DAErrFaker is a DA client that can be configured to return errors on GetInput
// and SetInput calls.
type DAErrFaker struct {
Client *MockDAClient

Expand Down Expand Up @@ -107,6 +109,10 @@ func (d *AltDADisabled) AdvanceL1Origin(ctx context.Context, l1 L1Fetcher, block
// FakeDAServer is a fake DA server for e2e tests.
// It is a small wrapper around DAServer that allows for setting request latencies,
// to mimic a DA service with slow responses (eg. eigenDA with 10 min batching interval).
//
// We use this FakeDaServer as opposed to the DAErrFaker client in the op-e2e altda system tests
// because the batcher service only has a constructor to build from CLI flags (no dependency injection),
// meaning the da client is built from an rpc url config instead of being injected.
type FakeDAServer struct {
*DAServer
putRequestLatency time.Duration
Expand Down
28 changes: 15 additions & 13 deletions op-alt-da/dastate.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,14 @@ func challengeKey(comm CommitmentData, inclusionBlockNumber uint64) string {
// In the special case of a L2 reorg, challenges are still tracked but commitments are removed.
// This will allow the altDA fetcher to find the expired challenge.
type State struct {
commitments []Commitment // commitments where the challenge/resolve period has not expired yet
expiredCommitments []Commitment // commitments where the challenge/resolve period has expired but not finalized
challenges []*Challenge // challenges ordered by L1 inclusion
expiredChallenges []*Challenge // challenges ordered by L1 inclusion
challengesMap map[string]*Challenge // challenges by serialized comm + block number for easy lookup
lastPrunedCommitment eth.L1BlockRef // the last commitment to be pruned
cfg Config
log log.Logger
metrics Metricer
commitments []Commitment // commitments where the challenge/resolve period has not expired yet
expiredCommitments []Commitment // commitments where the challenge/resolve period has expired but not finalized
challenges []*Challenge // challenges ordered by L1 inclusion
expiredChallenges []*Challenge // challenges ordered by L1 inclusion
challengesMap map[string]*Challenge // challenges by serialized comm + block number for easy lookup
cfg Config
log log.Logger
metrics Metricer
}

func NewState(log log.Logger, m Metricer, cfg Config) *State {
Expand Down Expand Up @@ -207,15 +206,17 @@ func (s *State) ExpireChallenges(origin eth.BlockID) {
}

// Prune removes challenges & commitments which have an expiry block number beyond the given block number.
func (s *State) Prune(origin eth.BlockID) {
func (s *State) Prune(origin eth.BlockID) eth.L1BlockRef {
// Commitments rely on challenges, so we prune commitments first.
s.pruneCommitments(origin)
lastPrunedCommIncBlock := s.pruneCommitments(origin)
s.pruneChallenges(origin)
return lastPrunedCommIncBlock
}

// pruneCommitments removes commitments which have are beyond a given block number.
// It will remove commitments in order of inclusion until it finds a commitment which is not beyond the given block number.
func (s *State) pruneCommitments(origin eth.BlockID) {
func (s *State) pruneCommitments(origin eth.BlockID) eth.L1BlockRef {
var lastPrunedCommIncBlock eth.L1BlockRef
for len(s.expiredCommitments) > 0 {
c := s.expiredCommitments[0]
challenge, ok := s.GetChallenge(c.data, c.inclusionBlock.Number)
Expand All @@ -236,8 +237,9 @@ func (s *State) pruneCommitments(origin eth.BlockID) {
s.expiredCommitments = s.expiredCommitments[1:]

// Record the latest inclusion block to be returned
s.lastPrunedCommitment = c.inclusionBlock
lastPrunedCommIncBlock = c.inclusionBlock
}
return lastPrunedCommIncBlock
}

// pruneChallenges removes challenges which have are beyond a given block number.
Expand Down
125 changes: 111 additions & 14 deletions op-e2e/actions/altda/altda_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package altda

import (
"log/slog"
"math/big"
"math/rand"
"testing"
Expand Down Expand Up @@ -49,6 +50,12 @@ type L2AltDA struct {

type AltDAParam func(p *e2eutils.TestParams)

func WithLogLevel(level slog.Level) AltDAParam {
return func(p *e2eutils.TestParams) {
p.LogLevel = level
}
}

func NewL2AltDA(t helpers.Testing, params ...AltDAParam) *L2AltDA {
p := &e2eutils.TestParams{
MaxSequencerDrift: 40,
Expand All @@ -57,11 +64,12 @@ func NewL2AltDA(t helpers.Testing, params ...AltDAParam) *L2AltDA {
L1BlockTime: 12,
UseAltDA: true,
AllocType: config.AllocTypeAltDA,
LogLevel: log.LevelDebug,
}
for _, apply := range params {
apply(p)
}
log := testlog.Logger(t, log.LvlDebug)
log := testlog.Logger(t, p.LogLevel)

dp := e2eutils.MakeDeployParams(t, p)
sd := e2eutils.Setup(t, dp, helpers.DefaultAlloc)
Expand All @@ -75,14 +83,13 @@ func NewL2AltDA(t helpers.Testing, params ...AltDAParam) *L2AltDA {
engine := helpers.NewL2Engine(t, log, sd.L2Cfg, jwtPath)
engCl := engine.EngineClient(t, sd.RollupCfg)

storage := &altda.DAErrFaker{Client: altda.NewMockDAClient(log)}

l1F, err := sources.NewL1Client(miner.RPCClient(), log, nil, sources.L1ClientDefaultConfig(sd.RollupCfg, false, sources.RPCKindBasic))
require.NoError(t, err)

altDACfg, err := sd.RollupCfg.GetOPAltDAConfig()
require.NoError(t, err)

storage := &altda.DAErrFaker{Client: altda.NewMockDAClient(log)}
daMgr := altda.NewAltDAWithStorage(log, altDACfg, storage, &altda.NoopMetrics{})

sequencer := helpers.NewL2Sequencer(t, log, l1F, miner.BlobStore(), daMgr, engCl, sd.RollupCfg, 0, nil)
Expand Down Expand Up @@ -177,6 +184,34 @@ func (a *L2AltDA) ActNewL2Tx(t helpers.Testing) {
a.lastCommBn = a.miner.L1Chain().CurrentBlock().Number.Uint64()
}

// ActNewL2TxFinalized sends a new L2 transaction, submits a batch containing it to L1
// and finalizes the L1 and L2 chains (including advancing enough to clear the altda challenge window).
//
// TODO: understand why (notation is l1unsafe/l1safe/l1finalized-l2unsafe/l2safe/l2finalized):
// - the first call advances heads by (0/0/17-71/71/1)
// - second call advances by 0/0/17-204/204/82,
// - but all subsequent calls advance status by exactly 0/0/17-204/204/204.
//
// 17 makes sense because challengeWindow=16 and we create 1 extra block before that,
// and 204 L2blocks = 17 L1blocks * 12 L2blocks/L1block (L1blocktime=12s, L2blocktime=1s)
func (a *L2AltDA) ActNewL2TxFinalized(t helpers.Testing) {
// Include a new l2 batcher transaction, submitting an input commitment to the l1.
a.ActNewL2Tx(t)
// Create ChallengeWindow empty blocks so the above batcher blocks can finalize (can't be challenged anymore)
a.ActL1Blocks(t, a.altDACfg.ChallengeWindow)
// Finalize the L1 chain and the L2 chain (by draining all events and running through derivation pipeline)
// TODO: understand why we need to drain the pipeline before AND after actL1Finalized
a.sequencer.ActL2PipelineFull(t)
a.ActL1Finalized(t)
a.sequencer.ActL2PipelineFull(t)

// Uncomment the below code to observe the behavior described in the TODO above
// syncStatus := a.sequencer.SyncStatus()
// a.log.Info("Sync status after ActNewL2TxFinalized",
// "unsafeL1", syncStatus.HeadL1.Number, "safeL1", syncStatus.SafeL1.Number, "finalizedL1", syncStatus.FinalizedL1.Number,
// "unsafeL2", syncStatus.UnsafeL2.Number, "safeL2", syncStatus.SafeL2.Number, "finalizedL2", syncStatus.FinalizedL2.Number)
}

func (a *L2AltDA) ActDeleteLastInput(t helpers.Testing) {
require.NoError(t, a.storage.Client.DeleteData(a.lastComm))
}
Expand Down Expand Up @@ -363,7 +398,7 @@ func TestAltDA_ChallengeResolved(gt *testing.T) {
}

// DA storage service goes offline while sequencer keeps making blocks. When storage comes back online, it should be able to catch up.
func TestAltDA_StorageError(gt *testing.T) {
func TestAltDA_StorageGetError(gt *testing.T) {
t := helpers.NewDefaultTesting(gt)
harness := NewL2AltDA(t)

Expand Down Expand Up @@ -528,19 +563,20 @@ func TestAltDA_Finalization(gt *testing.T) {
t := helpers.NewDefaultTesting(gt)
a := NewL2AltDA(t)

// build L1 block #1
// Notation everywhere below is l1unsafe/l1safe/l1finalized-l2unsafe/l2safe/l2finalized
// build L1 block #1: 0/0/0-0/0/0 -> 1/1/0-0/0/0
a.ActL1Blocks(t, 1)
a.miner.ActL1SafeNext(t)

// Fill with l2 blocks up to the L1 head
// Fill with l2 blocks up to the L1 head: 1/1/0:0/0/0 -> 1/1/0:1/1/0
a.sequencer.ActL1HeadSignal(t)
a.sequencer.ActBuildToL1Head(t)

a.sequencer.ActL2PipelineFull(t)
a.sequencer.ActL1SafeSignal(t)
require.Equal(t, uint64(1), a.sequencer.SyncStatus().SafeL1.Number)

// add L1 block #2
// add L1 block #2: 1/1/0:1/1/0 -> 2/2/1:2/1/0
a.ActL1Blocks(t, 1)
a.miner.ActL1SafeNext(t)
a.miner.ActL1FinalizeNext(t)
Expand All @@ -552,7 +588,7 @@ func TestAltDA_Finalization(gt *testing.T) {
a.sequencer.ActL1FinalizedSignal(t)
a.sequencer.ActL1SafeSignal(t)

// commit all the l2 blocks to L1
// commit all the l2 blocks to L1: 2/2/1:2/1/0 -> 3/2/1:2/1/0
a.batcher.ActSubmitAll(t)
a.miner.ActL1StartBlock(12)(t)
a.miner.ActL1IncludeTx(a.dp.Addresses.Batcher)(t)
Expand All @@ -561,31 +597,31 @@ func TestAltDA_Finalization(gt *testing.T) {
// verify
a.sequencer.ActL2PipelineFull(t)

// fill with more unsafe L2 blocks
// fill with more unsafe L2 blocks: 3/2/1:2/1/0 -> 3/2/1:3/1/0
a.sequencer.ActL1HeadSignal(t)
a.sequencer.ActBuildToL1Head(t)

// submit those blocks too, block #4
// submit those blocks too, block #4: 3/2/1:3/1/0 -> 4/2/1:3/1/0
a.batcher.ActSubmitAll(t)
a.miner.ActL1StartBlock(12)(t)
a.miner.ActL1IncludeTx(a.dp.Addresses.Batcher)(t)
a.miner.ActL1EndBlock(t)

// add some more L1 blocks #5, #6
// add some more L1 blocks #5, #6: 4/2/1:3/1/0 -> 6/2/1:3/1/0
a.miner.ActEmptyBlock(t)
a.miner.ActEmptyBlock(t)

// and more unsafe L2 blocks
// and more unsafe L2 blocks: 6/2/1:3/1/0 -> 6/2/1:6/1/0
a.sequencer.ActL1HeadSignal(t)
a.sequencer.ActBuildToL1Head(t)

// move safe/finalize markers: finalize the L1 chain block with the first batch, but not the second
// move safe/finalize markers: 6/2/1:6/1/0 -> 6/4/3:6/1/0
a.miner.ActL1SafeNext(t) // #2 -> #3
a.miner.ActL1SafeNext(t) // #3 -> #4
a.miner.ActL1FinalizeNext(t) // #1 -> #2
a.miner.ActL1FinalizeNext(t) // #2 -> #3

// L1 safe and finalized as expected
// L1 safe and finalized as expected:
a.sequencer.ActL2PipelineFull(t)
a.sequencer.ActL1FinalizedSignal(t)
a.sequencer.ActL1SafeSignal(t)
Expand All @@ -607,3 +643,64 @@ func TestAltDA_Finalization(gt *testing.T) {
// given 12s l1 time and 1s l2 time, l2 should be 12 * 3 = 36 blocks finalized
require.Equal(t, uint64(36), a.sequencer.SyncStatus().FinalizedL2.Number)
}

// This test tests ethDA -> altDA -> ethDA finalization behavior, simulating a temp altDA failure.
func TestAltDA_FinalizationAfterEthDAFailover(gt *testing.T) {
t := helpers.NewDefaultTesting(gt)
// we only print critical logs to be able to see the statusLogs
harness := NewL2AltDA(t, WithLogLevel(log.LevelDebug))

// We first call this twice because the first 2 times are irregular.
// See ActNewL2TxFinalized's TODO comment.
harness.ActNewL2TxFinalized(t)
harness.ActNewL2TxFinalized(t)

// ActNewL2TxFinalized advances L1 by (1+ChallengeWindow)L1 blocks, and there are 12 L2 blocks per L1 block.
diffL2Blocks := (1 + harness.altDACfg.ChallengeWindow) * 12

for i := 0; i < 5; i++ {
ssBefore := harness.sequencer.SyncStatus()
harness.ActNewL2TxFinalized(t)
ssAfter := harness.sequencer.SyncStatus()
// Finalized head should advance normally in altda mode
require.Equal(t, ssBefore.FinalizedL2.Number+diffL2Blocks, ssAfter.FinalizedL2.Number)
}

// We swap out altda batcher for ethda batcher
harness.batcher.ActAltDAFailoverToEthDA(t)

for i := 0; i < 3; i++ {
ssBefore := harness.sequencer.SyncStatus()
harness.ActNewL2TxFinalized(t)
if i == 0 {
// TODO: figure out why we need to act twice for the first time after failover.
// I think it's because the L1 driven finalizedHead is set to L1FinalizedHead-ChallengeWindow (see damgr.go updateFinalizedFromL1),
// so it trails behind by an extra challenge_window when we switch over to ethDA.
harness.ActNewL2TxFinalized(t)
}
ssAfter := harness.sequencer.SyncStatus()
// Even after failover, the finalized head should continue advancing normally
require.Equal(t, ssBefore.FinalizedL2.Number+diffL2Blocks, ssAfter.FinalizedL2.Number)
}

// Revert back to altda batcher (simulating that altda's temporary outage is resolved)
harness.batcher.ActAltDAFallbackToAltDA(t)

for i := 0; i < 3; i++ {
ssBefore := harness.sequencer.SyncStatus()
harness.ActNewL2TxFinalized(t)
ssAfter := harness.sequencer.SyncStatus()

// Even after fallback to altda, the finalized head should continue advancing normally
if i == 0 {
// This is the opposite as the altda->ethda direction. In this case, the first time we fallback to altda,
// the finalized head will advance by 2*diffL2Blocks: in ethda mode when driven by L1 finalization,
// the head is set to L1FinalizedHead-ChallengeWindow. After sending an altda commitment, the finalized head
// is now driven by the finalization of the altda commitment.
require.Equal(t, ssBefore.FinalizedL2.Number+2*diffL2Blocks, ssAfter.FinalizedL2.Number)
} else {
require.Equal(t, ssBefore.FinalizedL2.Number+diffL2Blocks, ssAfter.FinalizedL2.Number)
}

}
}
14 changes: 14 additions & 0 deletions op-e2e/actions/helpers/l2_batcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,20 @@ func (s *L2Batcher) ReadNextOutputFrame(t Testing) []byte {
return data.Bytes()
}

func (s *L2Batcher) ActAltDAFailoverToEthDA(t Testing) {
if !s.l2BatcherCfg.UseAltDA {
t.Fatalf("cannot failover to ethda when already using ethda")
}
s.l2BatcherCfg.UseAltDA = false
}

func (s *L2Batcher) ActAltDAFallbackToAltDA(t Testing) {
if s.l2BatcherCfg.UseAltDA {
t.Fatalf("cannot fallback to altDA when already using altDA")
}
s.l2BatcherCfg.UseAltDA = true
}

// ActL2BatchSubmit constructs a batch tx from previous buffered L2 blocks, and submits it to L1
func (s *L2Batcher) ActL2BatchSubmit(t Testing, txOpts ...func(tx *types.DynamicFeeTx)) {
s.ActL2BatchSubmitRaw(t, s.ReadNextOutputFrame(t), txOpts...)
Expand Down
4 changes: 3 additions & 1 deletion op-e2e/e2eutils/setup.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package e2eutils

import (
"log/slog"
"math/big"
"os"
"path"
Expand Down Expand Up @@ -50,6 +51,7 @@ type TestParams struct {
L1BlockTime uint64
UseAltDA bool
AllocType config.AllocType
LogLevel slog.Level
}

func MakeDeployParams(t require.TestingT, tp *TestParams) *DeployParams {
Expand All @@ -66,7 +68,7 @@ func MakeDeployParams(t require.TestingT, tp *TestParams) *DeployParams {
deployConfig.UseAltDA = tp.UseAltDA
ApplyDeployConfigForks(deployConfig)

logger := log.NewLogger(log.DiscardHandler())
logger := log.NewLogger(log.NewTerminalHandlerWithLevel(os.Stdout, tp.LogLevel, true))
require.NoError(t, deployConfig.Check(logger))
require.Equal(t, addresses.Batcher, deployConfig.BatchSenderAddress)
require.Equal(t, addresses.Proposer, deployConfig.L2OutputOracleProposer)
Expand Down