Skip to content

Commit

Permalink
test(batcher): new unit tests for unhappy failure cases (currently on…
Browse files Browse the repository at this point in the history
…ly for altda)
  • Loading branch information
samlaf committed Dec 7, 2024
1 parent 2be01c4 commit 6dd280b
Show file tree
Hide file tree
Showing 5 changed files with 388 additions and 13 deletions.
77 changes: 70 additions & 7 deletions op-alt-da/damock.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package altda

import (
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"net/http"
"sync"
Expand All @@ -16,22 +18,53 @@ import (
)

// MockDAClient mocks a DA storage provider to avoid running an HTTP DA server
// in unit tests.
// in unit tests. MockDAClient is goroutine-safe.
type MockDAClient struct {
CommitmentType CommitmentType
store ethdb.KeyValueStore
log log.Logger
mu sync.Mutex
CommitmentType CommitmentType
GenericCommitmentCount uint16 // next generic commitment (use counting commitment instead of hash to help with testing)
store ethdb.KeyValueStore
StoreCount int
log log.Logger
dropEveryNthPut uint // 0 means nothing gets dropped, 1 means every put errors, etc.
setInputRequestCount uint // number of put requests received, irrespective of whether they were successful
}

func NewMockDAClient(log log.Logger) *MockDAClient {
return &MockDAClient{
CommitmentType: Keccak256CommitmentType,
store: memorydb.New(),
StoreCount: 0,
log: log,
}
}

// NewCountingGenericCommitmentMockDAClient creates a MockDAClient that uses counting commitments.
// It's commitments are big-endian encoded uint16s of 0, 1, 2, etc. instead of actual hash or altda-layer related commitments.
// Used for testing to make sure we receive commitments in order following Holocene strict ordering rules.
func NewCountingGenericCommitmentMockDAClient(log log.Logger) *MockDAClient {
return &MockDAClient{
CommitmentType: GenericCommitmentType,
GenericCommitmentCount: 0,
store: memorydb.New(),
StoreCount: 0,
log: log,
}
}

// Fakes a da server that drops/errors on every Nth put request.
// Useful for testing the batcher's error handling.
// 0 means nothing gets dropped, 1 means every put errors, etc.
func (c *MockDAClient) DropEveryNthPut(n uint) {
c.mu.Lock()
defer c.mu.Unlock()
c.dropEveryNthPut = n
}

func (c *MockDAClient) GetInput(ctx context.Context, key CommitmentData) ([]byte, error) {
c.mu.Lock()
defer c.mu.Unlock()
c.log.Debug("Getting input", "key", key)
bytes, err := c.store.Get(key.Encode())
if err != nil {
return nil, ErrNotFound
Expand All @@ -40,12 +73,42 @@ func (c *MockDAClient) GetInput(ctx context.Context, key CommitmentData) ([]byte
}

func (c *MockDAClient) SetInput(ctx context.Context, data []byte) (CommitmentData, error) {
key := NewCommitmentData(c.CommitmentType, data)
return key, c.store.Put(key.Encode(), data)
c.mu.Lock()
defer c.mu.Unlock()
c.setInputRequestCount++
var key CommitmentData
if c.CommitmentType == GenericCommitmentType {
countCommitment := make([]byte, 2)
binary.BigEndian.PutUint16(countCommitment, c.GenericCommitmentCount)
key = NewGenericCommitment(countCommitment)
} else {
key = NewKeccak256Commitment(data)
}
var action string = "put"
if c.dropEveryNthPut > 0 && c.setInputRequestCount%c.dropEveryNthPut == 0 {
action = "dropped"
}
c.log.Debug("Setting input", "action", action, "key", key, "data", fmt.Sprintf("%x", data))
if action == "dropped" {
return nil, errors.New("put dropped")
}
err := c.store.Put(key.Encode(), data)
if err == nil {
c.GenericCommitmentCount++
c.StoreCount++
}
return key, err
}

func (c *MockDAClient) DeleteData(key []byte) error {
return c.store.Delete(key)
c.mu.Lock()
defer c.mu.Unlock()
c.log.Debug("Deleting data", "key", key)
err := c.store.Delete(key)
if err == nil {
c.StoreCount--
}
return err
}

type DAErrFaker struct {
Expand Down
6 changes: 5 additions & 1 deletion op-batcher/batcher/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ type RollupClient interface {
SyncStatus(ctx context.Context) (*eth.SyncStatus, error)
}

type AltDAClient interface {
SetInput(ctx context.Context, data []byte) (altda.CommitmentData, error)
}

// DriverSetup is the collection of input/output interfaces and configuration that the driver operates on.
type DriverSetup struct {
Log log.Logger
Expand All @@ -89,7 +93,7 @@ type DriverSetup struct {
L1Client L1Client
EndpointProvider dial.L2EndpointProvider
ChannelConfig ChannelConfigProvider
AltDA *altda.DAClient
AltDA AltDAClient
ChannelOutFactory ChannelOutFactory
}

Expand Down
216 changes: 216 additions & 0 deletions op-batcher/batcher/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,22 @@ package batcher
import (
"context"
"errors"
"fmt"
"math/big"
"testing"
"time"

altda "github.com/ethereum-optimism/optimism/op-alt-da"
"github.com/ethereum-optimism/optimism/op-batcher/compressor"
"github.com/ethereum-optimism/optimism/op-batcher/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-service/dial"
"github.com/ethereum-optimism/optimism/op-service/eth"
"github.com/ethereum-optimism/optimism/op-service/testlog"
"github.com/ethereum-optimism/optimism/op-service/testutils"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/log"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -114,5 +123,212 @@ func TestBatchSubmitter_SafeL1Origin_FailsToResolveRollupClient(t *testing.T) {
ep.rollupClientErr = errors.New("failed to resolve rollup client")

_, err := bs.safeL1Origin(context.Background())
fmt.Println(err)
require.Error(t, err)
}

// ======= ALTDA TESTS =======

// fakeL1Client is just a dummy struct. All fault injection is done via the fakeTxMgr (which doesn't interact with this fakeL1Client).
type fakeL1Client struct {
}

func (f *fakeL1Client) HeaderByNumber(ctx context.Context, number *big.Int) (*types.Header, error) {
if number == nil {
number = big.NewInt(0)
}
return &types.Header{
Number: number,
ParentHash: common.Hash{},
Time: 0,
}, nil
}
func (f *fakeL1Client) NonceAt(ctx context.Context, account common.Address, blockNumber *big.Int) (uint64, error) {
return 0, nil
}

func altDASetup(t *testing.T) (*BatchSubmitter, *mockL2EndpointProvider, *altda.MockDAClient, *testutils.FakeTxMgr) {
ep := newEndpointProvider()

rollupCfg := &rollup.Config{
Genesis: rollup.Genesis{L2: eth.BlockID{Number: 0}, L1: eth.BlockID{Number: genesisL1Origin}},
L2ChainID: big.NewInt(1234),
}
batcherCfg := BatcherConfig{
PollInterval: 1 * time.Second,
UseAltDA: true,
}

log := testlog.Logger(t, log.LevelDebug)
fakeTxMgr := testutils.NewFakeTxMgr(log.With("subsystem", "fake-txmgr"), common.Address{0})
l1Client := &fakeL1Client{}

channelCfg := ChannelConfig{
// SeqWindowSize: 15,
// SubSafetyMargin: 4,
ChannelTimeout: 10,
MaxFrameSize: 150, // so that each channel has exactly 1 frame
TargetNumFrames: 1,
BatchType: derive.SingularBatchType,
CompressorConfig: compressor.Config{
Kind: compressor.NoneKind,
},
}
mockAltDAClient := altda.NewCountingGenericCommitmentMockDAClient(log.With("subsystem", "da-client"))
return NewBatchSubmitter(DriverSetup{
Log: log,
Metr: metrics.NoopMetrics,
RollupConfig: rollupCfg,
ChannelConfig: channelCfg,
Config: batcherCfg,
EndpointProvider: ep,
Txmgr: fakeTxMgr,
L1Client: l1Client,
AltDA: mockAltDAClient,
}), ep, mockAltDAClient, fakeTxMgr
}

func fakeSyncStatus(unsafeL2BlockNum uint64, L1BlockRef eth.L1BlockRef) *eth.SyncStatus {
return &eth.SyncStatus{
UnsafeL2: eth.L2BlockRef{
Number: unsafeL2BlockNum,
L1Origin: eth.BlockID{
Number: 0,
},
},
SafeL2: eth.L2BlockRef{
Number: 0,
L1Origin: eth.BlockID{
Number: 0,
},
},
HeadL1: L1BlockRef,
}
}

// There are 4 failure cases (unhappy paths) that the op-batcher has to deal with.
// They are outlined in https://github.com/ethereum-optimism/optimism/tree/develop/op-batcher#happy-path
// This test suite covers these 4 cases in the context of AltDA.
func TestBatchSubmitter_AltDA_FailureCase1_L2Reorg(t *testing.T) {
bs, ep, mockAltDAClient, fakeTxMgr := altDASetup(t)

L1BlockZero := types.NewBlock(&types.Header{
Number: big.NewInt(0),
}, nil, nil, nil)
L1BlockZeroRef := eth.L1BlockRef{
Hash: L1BlockZero.Hash(),
Number: L1BlockZero.NumberU64(),
}
// We return incremental syncStatuses to force the op-batcher to entirely process each L2 block one by one.
// To test multi channel behavior, we could return a sync status that is multiple blocks ahead of the current L2 block.
ep.rollupClient.Mock.On("SyncStatus").Once().Return(fakeSyncStatus(1, L1BlockZeroRef), nil)
// We need twice here to send the altda commitments before the reorg happens.
// Altda commitments are stored in the channelManager and only on the next main loop tick sent to L1.
// TODO: is there a better way to test this that isnt so dependent on the internal implementation?
ep.rollupClient.Mock.On("SyncStatus").Twice().Return(fakeSyncStatus(2, L1BlockZeroRef), nil)
ep.rollupClient.Mock.On("SyncStatus").Once().Return(fakeSyncStatus(3, L1BlockZeroRef), nil)
ep.rollupClient.Mock.On("SyncStatus").Once().Return(fakeSyncStatus(1, L1BlockZeroRef), nil)
ep.rollupClient.Mock.On("SyncStatus").Once().Return(fakeSyncStatus(2, L1BlockZeroRef), nil)
ep.rollupClient.Mock.On("SyncStatus").Return(fakeSyncStatus(3, L1BlockZeroRef), nil)

L2BlockZero := newMiniL2BlockWithNumberParent(1, big.NewInt(0), common.HexToHash("0x0"))
L2BlockOne := newMiniL2BlockWithNumberParent(1, big.NewInt(1), L2BlockZero.Hash())
L2BlockTwo := newMiniL2BlockWithNumberParent(1, big.NewInt(2), L2BlockOne.Hash())
L2BlockTwoPrime := newMiniL2BlockWithNumberParentAndL1Information(1, big.NewInt(2), L2BlockOne.Hash(), 101, 0)
L2BlockThreePrime := newMiniL2BlockWithNumberParent(1, big.NewInt(3), L2BlockTwoPrime.Hash())

ep.ethClient.Mock.On("BlockByNumber", big.NewInt(0)).Once().Return(L2BlockZero, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(1)).Twice().Return(L2BlockOne, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(2)).Once().Return(L2BlockTwo, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(2)).Once().Return(L2BlockTwoPrime, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(3)).Twice().Return(L2BlockThreePrime, nil)

err := bs.StartBatchSubmitting()
require.NoError(t, err)
time.Sleep(5 * time.Second) // 5 seconds should be enough to process all the blocks above
err = bs.StopBatchSubmitting(context.Background())
require.NoError(t, err)

require.Equal(t, 5, mockAltDAClient.StoreCount)
require.Equal(t, uint64(5), fakeTxMgr.Nonce)

}

func TestBatchSubmitter_AltDA_FailureCase2_FailedL1Tx(t *testing.T) {
bs, ep, mockAltDAClient, fakeTxMgr := altDASetup(t)

L1Block0 := types.NewBlock(&types.Header{
Number: big.NewInt(0),
}, nil, nil, nil)
L1Block0Ref := eth.L1BlockRef{
Hash: L1Block0.Hash(),
Number: L1Block0.NumberU64(),
}
ep.rollupClient.Mock.On("SyncStatus").Return(fakeSyncStatus(4, L1Block0Ref), nil)

L2Block0 := newMiniL2BlockWithNumberParent(1, big.NewInt(0), common.HexToHash("0x0"))
L2Block1 := newMiniL2BlockWithNumberParent(1, big.NewInt(1), L2Block0.Hash())
L2Block2 := newMiniL2BlockWithNumberParent(1, big.NewInt(2), L2Block1.Hash())
L2Block3 := newMiniL2BlockWithNumberParent(1, big.NewInt(3), L2Block2.Hash())
L2Block4 := newMiniL2BlockWithNumberParent(1, big.NewInt(4), L2Block3.Hash())

ep.ethClient.Mock.On("BlockByNumber", big.NewInt(0)).Once().Return(L2Block0, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(1)).Once().Return(L2Block1, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(2)).Once().Return(L2Block2, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(3)).Once().Return(L2Block3, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(4)).Once().Return(L2Block4, nil)

fakeTxMgr.ErrorEveryNthSend(2)
err := bs.StartBatchSubmitting()
require.NoError(t, err)
time.Sleep(5 * time.Second) // 5 seconds should be enough to process all the blocks above
err = bs.StopBatchSubmitting(context.Background())
require.NoError(t, err)

// FIXME: storeCount=7 with current buggy implementation, because when an L1 tx fails,
// we BOTH rewind the altdaChannelCursor (to resubmit the failed tx) AND push back the frames into the channelManager.
// A quick fix (?) is to not push back if the failed tx was an altda tx.
require.Equal(t, 4, mockAltDAClient.StoreCount)
// TODO: we should prob also check that the commitments are in order?
require.Equal(t, uint64(4), fakeTxMgr.Nonce)
}

func TestBatchSubmitter_AltDA_FailureCase3_ChannelTimeout(t *testing.T) {
// TODO: implement this test
}

func TestBatchSubmitter_AltDA_FailureCase4_FailedBlobSubmission(t *testing.T) {
bs, ep, mockAltDAClient, fakeTxMgr := altDASetup(t)

L1Block0 := types.NewBlock(&types.Header{
Number: big.NewInt(0),
}, nil, nil, nil)
L1Block0Ref := eth.L1BlockRef{
Hash: L1Block0.Hash(),
Number: L1Block0.NumberU64(),
}
ep.rollupClient.Mock.On("SyncStatus").Return(fakeSyncStatus(4, L1Block0Ref), nil)

L2Block0 := newMiniL2BlockWithNumberParent(1, big.NewInt(0), common.HexToHash("0x0"))
L2Block1 := newMiniL2BlockWithNumberParent(1, big.NewInt(1), L2Block0.Hash())
L2Block2 := newMiniL2BlockWithNumberParent(1, big.NewInt(2), L2Block1.Hash())
L2Block3 := newMiniL2BlockWithNumberParent(1, big.NewInt(3), L2Block2.Hash())
L2Block4 := newMiniL2BlockWithNumberParent(1, big.NewInt(4), L2Block3.Hash())

ep.ethClient.Mock.On("BlockByNumber", big.NewInt(0)).Once().Return(L2Block0, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(1)).Once().Return(L2Block1, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(2)).Once().Return(L2Block2, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(3)).Once().Return(L2Block3, nil)
ep.ethClient.Mock.On("BlockByNumber", big.NewInt(4)).Once().Return(L2Block4, nil)

mockAltDAClient.DropEveryNthPut(2)

err := bs.StartBatchSubmitting()
require.NoError(t, err)
time.Sleep(5 * time.Second) // 5 seconds should be enough to process all the blocks above
err = bs.StopBatchSubmitting(context.Background())
require.NoError(t, err)

require.Equal(t, 4, mockAltDAClient.StoreCount)
require.Equal(t, uint64(4), fakeTxMgr.Nonce)
}
Loading

0 comments on commit 6dd280b

Please sign in to comment.