test(batcher): new unit tests for unhappy failure cases (currently on…

…ly for altda)
Layr-Labs · Dec 7, 2024 · 6dd280b · 6dd280b
1 parent 2be01c4
commit 6dd280b
Show file tree

Hide file tree

Showing 5 changed files with 388 additions and 13 deletions.
diff --git a/op-alt-da/damock.go b/op-alt-da/damock.go
@@ -2,7 +2,9 @@ package altda
 
 import (
 	"context"
+	"encoding/binary"
 	"errors"
+	"fmt"
 	"io"
 	"net/http"
 	"sync"
@@ -16,22 +18,53 @@ import (
 )
 
 // MockDAClient mocks a DA storage provider to avoid running an HTTP DA server
-// in unit tests.
+// in unit tests. MockDAClient is goroutine-safe.
 type MockDAClient struct {
-	CommitmentType CommitmentType
-	store          ethdb.KeyValueStore
-	log            log.Logger
+	mu                     sync.Mutex
+	CommitmentType         CommitmentType
+	GenericCommitmentCount uint16 // next generic commitment (use counting commitment instead of hash to help with testing)
+	store                  ethdb.KeyValueStore
+	StoreCount             int
+	log                    log.Logger
+	dropEveryNthPut        uint // 0 means nothing gets dropped, 1 means every put errors, etc.
+	setInputRequestCount   uint // number of put requests received, irrespective of whether they were successful
 }
 
 func NewMockDAClient(log log.Logger) *MockDAClient {
 	return &MockDAClient{
 		CommitmentType: Keccak256CommitmentType,
 		store:          memorydb.New(),
+		StoreCount:     0,
 		log:            log,
 	}
 }
 
+// NewCountingGenericCommitmentMockDAClient creates a MockDAClient that uses counting commitments.
+// It's commitments are big-endian encoded uint16s of 0, 1, 2, etc. instead of actual hash or altda-layer related commitments.
+// Used for testing to make sure we receive commitments in order following Holocene strict ordering rules.
+func NewCountingGenericCommitmentMockDAClient(log log.Logger) *MockDAClient {
+	return &MockDAClient{
+		CommitmentType:         GenericCommitmentType,
+		GenericCommitmentCount: 0,
+		store:                  memorydb.New(),
+		StoreCount:             0,
+		log:                    log,
+	}
+}
+
+// Fakes a da server that drops/errors on every Nth put request.
+// Useful for testing the batcher's error handling.
+// 0 means nothing gets dropped, 1 means every put errors, etc.
+func (c *MockDAClient) DropEveryNthPut(n uint) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.dropEveryNthPut = n
+}
+
 func (c *MockDAClient) GetInput(ctx context.Context, key CommitmentData) ([]byte, error) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.log.Debug("Getting input", "key", key)
 	bytes, err := c.store.Get(key.Encode())
 	if err != nil {
 		return nil, ErrNotFound
@@ -40,12 +73,42 @@ func (c *MockDAClient) GetInput(ctx context.Context, key CommitmentData) ([]byte
 }
 
 func (c *MockDAClient) SetInput(ctx context.Context, data []byte) (CommitmentData, error) {
-	key := NewCommitmentData(c.CommitmentType, data)
-	return key, c.store.Put(key.Encode(), data)
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.setInputRequestCount++
+	var key CommitmentData
+	if c.CommitmentType == GenericCommitmentType {
+		countCommitment := make([]byte, 2)
+		binary.BigEndian.PutUint16(countCommitment, c.GenericCommitmentCount)
+		key = NewGenericCommitment(countCommitment)
+	} else {
+		key = NewKeccak256Commitment(data)
+	}
+	var action string = "put"
+	if c.dropEveryNthPut > 0 && c.setInputRequestCount%c.dropEveryNthPut == 0 {
+		action = "dropped"
+	}
+	c.log.Debug("Setting input", "action", action, "key", key, "data", fmt.Sprintf("%x", data))
+	if action == "dropped" {
+		return nil, errors.New("put dropped")
+	}
+	err := c.store.Put(key.Encode(), data)
+	if err == nil {
+		c.GenericCommitmentCount++
+		c.StoreCount++
+	}
+	return key, err
 }
 
 func (c *MockDAClient) DeleteData(key []byte) error {
-	return c.store.Delete(key)
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.log.Debug("Deleting data", "key", key)
+	err := c.store.Delete(key)
+	if err == nil {
+		c.StoreCount--
+	}
+	return err
 }
 
 type DAErrFaker struct {

diff --git a/op-batcher/batcher/driver.go b/op-batcher/batcher/driver.go
@@ -79,6 +79,10 @@ type RollupClient interface {
 	SyncStatus(ctx context.Context) (*eth.SyncStatus, error)
 }
 
+type AltDAClient interface {
+	SetInput(ctx context.Context, data []byte) (altda.CommitmentData, error)
+}
+
 // DriverSetup is the collection of input/output interfaces and configuration that the driver operates on.
 type DriverSetup struct {
 	Log               log.Logger
@@ -89,7 +93,7 @@ type DriverSetup struct {
 	L1Client          L1Client
 	EndpointProvider  dial.L2EndpointProvider
 	ChannelConfig     ChannelConfigProvider
-	AltDA             *altda.DAClient
+	AltDA             AltDAClient
 	ChannelOutFactory ChannelOutFactory
 }
 

diff --git a/op-batcher/batcher/driver_test.go b/op-batcher/batcher/driver_test.go
@@ -3,13 +3,22 @@ package batcher
 import (
 	"context"
 	"errors"
+	"fmt"
+	"math/big"
 	"testing"
+	"time"
 
+	altda "github.com/ethereum-optimism/optimism/op-alt-da"
+	"github.com/ethereum-optimism/optimism/op-batcher/compressor"
 	"github.com/ethereum-optimism/optimism/op-batcher/metrics"
+	"github.com/ethereum-optimism/optimism/op-node/rollup"
+	"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
 	"github.com/ethereum-optimism/optimism/op-service/dial"
 	"github.com/ethereum-optimism/optimism/op-service/eth"
 	"github.com/ethereum-optimism/optimism/op-service/testlog"
 	"github.com/ethereum-optimism/optimism/op-service/testutils"
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/log"
 	"github.com/stretchr/testify/require"
 )
@@ -114,5 +123,212 @@ func TestBatchSubmitter_SafeL1Origin_FailsToResolveRollupClient(t *testing.T) {
 	ep.rollupClientErr = errors.New("failed to resolve rollup client")
 
 	_, err := bs.safeL1Origin(context.Background())
+	fmt.Println(err)
 	require.Error(t, err)
 }
+
+// ======= ALTDA TESTS =======
+
+// fakeL1Client is just a dummy struct. All fault injection is done via the fakeTxMgr (which doesn't interact with this fakeL1Client).
+type fakeL1Client struct {
+}
+
+func (f *fakeL1Client) HeaderByNumber(ctx context.Context, number *big.Int) (*types.Header, error) {
+	if number == nil {
+		number = big.NewInt(0)
+	}
+	return &types.Header{
+		Number:     number,
+		ParentHash: common.Hash{},
+		Time:       0,
+	}, nil
+}
+func (f *fakeL1Client) NonceAt(ctx context.Context, account common.Address, blockNumber *big.Int) (uint64, error) {
+	return 0, nil
+}
+
+func altDASetup(t *testing.T) (*BatchSubmitter, *mockL2EndpointProvider, *altda.MockDAClient, *testutils.FakeTxMgr) {
+	ep := newEndpointProvider()
+
+	rollupCfg := &rollup.Config{
+		Genesis:   rollup.Genesis{L2: eth.BlockID{Number: 0}, L1: eth.BlockID{Number: genesisL1Origin}},
+		L2ChainID: big.NewInt(1234),
+	}
+	batcherCfg := BatcherConfig{
+		PollInterval: 1 * time.Second,
+		UseAltDA:     true,
+	}
+
+	log := testlog.Logger(t, log.LevelDebug)
+	fakeTxMgr := testutils.NewFakeTxMgr(log.With("subsystem", "fake-txmgr"), common.Address{0})
+	l1Client := &fakeL1Client{}
+
+	channelCfg := ChannelConfig{
+		// SeqWindowSize:      15,
+		// SubSafetyMargin:    4,
+		ChannelTimeout:  10,
+		MaxFrameSize:    150, // so that each channel has exactly 1 frame
+		TargetNumFrames: 1,
+		BatchType:       derive.SingularBatchType,
+		CompressorConfig: compressor.Config{
+			Kind: compressor.NoneKind,
+		},
+	}
+	mockAltDAClient := altda.NewCountingGenericCommitmentMockDAClient(log.With("subsystem", "da-client"))
+	return NewBatchSubmitter(DriverSetup{
+		Log:              log,
+		Metr:             metrics.NoopMetrics,
+		RollupConfig:     rollupCfg,
+		ChannelConfig:    channelCfg,
+		Config:           batcherCfg,
+		EndpointProvider: ep,
+		Txmgr:            fakeTxMgr,
+		L1Client:         l1Client,
+		AltDA:            mockAltDAClient,
+	}), ep, mockAltDAClient, fakeTxMgr
+}
+
+func fakeSyncStatus(unsafeL2BlockNum uint64, L1BlockRef eth.L1BlockRef) *eth.SyncStatus {
+	return &eth.SyncStatus{
+		UnsafeL2: eth.L2BlockRef{
+			Number: unsafeL2BlockNum,
+			L1Origin: eth.BlockID{
+				Number: 0,
+			},
+		},
+		SafeL2: eth.L2BlockRef{
+			Number: 0,
+			L1Origin: eth.BlockID{
+				Number: 0,
+			},
+		},
+		HeadL1: L1BlockRef,
+	}
+}
+
+// There are 4 failure cases (unhappy paths) that the op-batcher has to deal with.
+// They are outlined in https://github.com/ethereum-optimism/optimism/tree/develop/op-batcher#happy-path
+// This test suite covers these 4 cases in the context of AltDA.
+func TestBatchSubmitter_AltDA_FailureCase1_L2Reorg(t *testing.T) {
+	bs, ep, mockAltDAClient, fakeTxMgr := altDASetup(t)
+
+	L1BlockZero := types.NewBlock(&types.Header{
+		Number: big.NewInt(0),
+	}, nil, nil, nil)
+	L1BlockZeroRef := eth.L1BlockRef{
+		Hash:   L1BlockZero.Hash(),
+		Number: L1BlockZero.NumberU64(),
+	}
+	// We return incremental syncStatuses to force the op-batcher to entirely process each L2 block one by one.
+	// To test multi channel behavior, we could return a sync status that is multiple blocks ahead of the current L2 block.
+	ep.rollupClient.Mock.On("SyncStatus").Once().Return(fakeSyncStatus(1, L1BlockZeroRef), nil)
+	// We need twice here to send the altda commitments before the reorg happens.
+	// Altda commitments are stored in the channelManager and only on the next main loop tick sent to L1.
+	// TODO: is there a better way to test this that isnt so dependent on the internal implementation?
+	ep.rollupClient.Mock.On("SyncStatus").Twice().Return(fakeSyncStatus(2, L1BlockZeroRef), nil)
+	ep.rollupClient.Mock.On("SyncStatus").Once().Return(fakeSyncStatus(3, L1BlockZeroRef), nil)
+	ep.rollupClient.Mock.On("SyncStatus").Once().Return(fakeSyncStatus(1, L1BlockZeroRef), nil)
+	ep.rollupClient.Mock.On("SyncStatus").Once().Return(fakeSyncStatus(2, L1BlockZeroRef), nil)
+	ep.rollupClient.Mock.On("SyncStatus").Return(fakeSyncStatus(3, L1BlockZeroRef), nil)
+
+	L2BlockZero := newMiniL2BlockWithNumberParent(1, big.NewInt(0), common.HexToHash("0x0"))
+	L2BlockOne := newMiniL2BlockWithNumberParent(1, big.NewInt(1), L2BlockZero.Hash())
+	L2BlockTwo := newMiniL2BlockWithNumberParent(1, big.NewInt(2), L2BlockOne.Hash())
+	L2BlockTwoPrime := newMiniL2BlockWithNumberParentAndL1Information(1, big.NewInt(2), L2BlockOne.Hash(), 101, 0)
+	L2BlockThreePrime := newMiniL2BlockWithNumberParent(1, big.NewInt(3), L2BlockTwoPrime.Hash())
+
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(0)).Once().Return(L2BlockZero, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(1)).Twice().Return(L2BlockOne, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(2)).Once().Return(L2BlockTwo, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(2)).Once().Return(L2BlockTwoPrime, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(3)).Twice().Return(L2BlockThreePrime, nil)
+
+	err := bs.StartBatchSubmitting()
+	require.NoError(t, err)
+	time.Sleep(5 * time.Second) // 5 seconds should be enough to process all the blocks above
+	err = bs.StopBatchSubmitting(context.Background())
+	require.NoError(t, err)
+
+	require.Equal(t, 5, mockAltDAClient.StoreCount)
+	require.Equal(t, uint64(5), fakeTxMgr.Nonce)
+
+}
+
+func TestBatchSubmitter_AltDA_FailureCase2_FailedL1Tx(t *testing.T) {
+	bs, ep, mockAltDAClient, fakeTxMgr := altDASetup(t)
+
+	L1Block0 := types.NewBlock(&types.Header{
+		Number: big.NewInt(0),
+	}, nil, nil, nil)
+	L1Block0Ref := eth.L1BlockRef{
+		Hash:   L1Block0.Hash(),
+		Number: L1Block0.NumberU64(),
+	}
+	ep.rollupClient.Mock.On("SyncStatus").Return(fakeSyncStatus(4, L1Block0Ref), nil)
+
+	L2Block0 := newMiniL2BlockWithNumberParent(1, big.NewInt(0), common.HexToHash("0x0"))
+	L2Block1 := newMiniL2BlockWithNumberParent(1, big.NewInt(1), L2Block0.Hash())
+	L2Block2 := newMiniL2BlockWithNumberParent(1, big.NewInt(2), L2Block1.Hash())
+	L2Block3 := newMiniL2BlockWithNumberParent(1, big.NewInt(3), L2Block2.Hash())
+	L2Block4 := newMiniL2BlockWithNumberParent(1, big.NewInt(4), L2Block3.Hash())
+
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(0)).Once().Return(L2Block0, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(1)).Once().Return(L2Block1, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(2)).Once().Return(L2Block2, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(3)).Once().Return(L2Block3, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(4)).Once().Return(L2Block4, nil)
+
+	fakeTxMgr.ErrorEveryNthSend(2)
+	err := bs.StartBatchSubmitting()
+	require.NoError(t, err)
+	time.Sleep(5 * time.Second) // 5 seconds should be enough to process all the blocks above
+	err = bs.StopBatchSubmitting(context.Background())
+	require.NoError(t, err)
+
+	// FIXME: storeCount=7 with current buggy implementation, because when an L1 tx fails,
+	// we BOTH rewind the altdaChannelCursor (to resubmit the failed tx) AND push back the frames into the channelManager.
+	// A quick fix (?) is to not push back if the failed tx was an altda tx.
+	require.Equal(t, 4, mockAltDAClient.StoreCount)
+	// TODO: we should prob also check that the commitments are in order?
+	require.Equal(t, uint64(4), fakeTxMgr.Nonce)
+}
+
+func TestBatchSubmitter_AltDA_FailureCase3_ChannelTimeout(t *testing.T) {
+	// TODO: implement this test
+}
+
+func TestBatchSubmitter_AltDA_FailureCase4_FailedBlobSubmission(t *testing.T) {
+	bs, ep, mockAltDAClient, fakeTxMgr := altDASetup(t)
+
+	L1Block0 := types.NewBlock(&types.Header{
+		Number: big.NewInt(0),
+	}, nil, nil, nil)
+	L1Block0Ref := eth.L1BlockRef{
+		Hash:   L1Block0.Hash(),
+		Number: L1Block0.NumberU64(),
+	}
+	ep.rollupClient.Mock.On("SyncStatus").Return(fakeSyncStatus(4, L1Block0Ref), nil)
+
+	L2Block0 := newMiniL2BlockWithNumberParent(1, big.NewInt(0), common.HexToHash("0x0"))
+	L2Block1 := newMiniL2BlockWithNumberParent(1, big.NewInt(1), L2Block0.Hash())
+	L2Block2 := newMiniL2BlockWithNumberParent(1, big.NewInt(2), L2Block1.Hash())
+	L2Block3 := newMiniL2BlockWithNumberParent(1, big.NewInt(3), L2Block2.Hash())
+	L2Block4 := newMiniL2BlockWithNumberParent(1, big.NewInt(4), L2Block3.Hash())
+
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(0)).Once().Return(L2Block0, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(1)).Once().Return(L2Block1, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(2)).Once().Return(L2Block2, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(3)).Once().Return(L2Block3, nil)
+	ep.ethClient.Mock.On("BlockByNumber", big.NewInt(4)).Once().Return(L2Block4, nil)
+
+	mockAltDAClient.DropEveryNthPut(2)
+
+	err := bs.StartBatchSubmitting()
+	require.NoError(t, err)
+	time.Sleep(5 * time.Second) // 5 seconds should be enough to process all the blocks above
+	err = bs.StopBatchSubmitting(context.Background())
+	require.NoError(t, err)
+
+	require.Equal(t, 4, mockAltDAClient.StoreCount)
+	require.Equal(t, uint64(4), fakeTxMgr.Nonce)
+}