diff --git a/emulator/driver.go b/emulator/driver.go index 4c33dc26..3772220a 100644 --- a/emulator/driver.go +++ b/emulator/driver.go @@ -3,6 +3,7 @@ package emulator import ( "bytes" "context" + "errors" "testing" "github.com/filecoin-project/go-f3/gpbft" @@ -22,9 +23,9 @@ type Driver struct { } // NewDriver instantiates a new Driver with the given GPBFT options. See -// Driver.StartInstance. +// Driver.RequireStartInstance. func NewDriver(t *testing.T, o ...gpbft.Option) *Driver { - h := newHost(t) + h := newHost(t, AdhocSigning()) participant, err := gpbft.NewParticipant(h, o...) require.NoError(t, err) return &Driver{ @@ -34,15 +35,24 @@ func NewDriver(t *testing.T, o ...gpbft.Option) *Driver { } } +func (d *Driver) SetSigning(signing Signing) { d.host.Signing = signing } + // StartInstance sets the current instances and starts emulation for it by signalling the // start of instance to the emulated honest gpbft.Participant. // // See NewInstance. -func (d *Driver) StartInstance(id uint64) { - d.require.NoError(d.subject.StartInstanceAt(id, d.host.Time())) +func (d *Driver) StartInstance(id uint64) error { + if err := d.subject.StartInstanceAt(id, d.host.Time()); err != nil { + return err + } // Trigger alarm once based on the implicit assumption that go-f3 uses alarm to // kickstart an instance internally. - d.RequireDeliverAlarm() + if triggered, err := d.DeliverAlarm(); err != nil { + return err + } else if !triggered { + return errors.New("alarm did not trigger at start") + } + return nil } // AddInstance adds an instance to the list of instances known by the driver. @@ -56,7 +66,7 @@ func (d *Driver) PeekLastBroadcastRequest() *gpbft.GMessage { return d.host.peekLastBroadcast() } -func (d *Driver) deliverAlarm() (bool, error) { +func (d *Driver) DeliverAlarm() (bool, error) { if d.host.maybeReceiveAlarm() { return true, d.subject.ReceiveAlarm() } @@ -78,8 +88,8 @@ func (d *Driver) prepareMessage(partialMessage *gpbft.GMessage) *gpbft.GMessage mb := instance.NewMessageBuilder(partialMessage.Vote, partialMessage.Justification, withValidTicket) mb.NetworkName = d.host.NetworkName() - mb.SigningMarshaler = d.host.adhocSigning - msg, err := mb.Build(context.Background(), d.host.adhocSigning, partialMessage.Sender) + mb.SigningMarshaler = d.host + msg, err := mb.Build(context.Background(), d.host, partialMessage.Sender) d.require.NoError(err) d.require.NotNil(msg) if !withValidTicket { diff --git a/emulator/driver_assertions.go b/emulator/driver_assertions.go index 518f70ee..37be76dd 100644 --- a/emulator/driver_assertions.go +++ b/emulator/driver_assertions.go @@ -19,8 +19,14 @@ func (d *Driver) RequireErrOnDeliverMessage(message *gpbft.GMessage, err error, } } +// RequireStartInstance asserts that instance with the given ID is started. See +// StartInstance. +func (d *Driver) RequireStartInstance(id uint64) { + d.require.NoError(d.StartInstance(id)) +} + func (d *Driver) RequireDeliverAlarm() { - delivered, err := d.deliverAlarm() + delivered, err := d.DeliverAlarm() d.require.NoError(err) d.require.True(delivered) } diff --git a/emulator/host.go b/emulator/host.go index 20f107a9..7fdafc79 100644 --- a/emulator/host.go +++ b/emulator/host.go @@ -15,7 +15,7 @@ const networkName = "emulator-net" var _ gpbft.Host = (*driverHost)(nil) type driverHost struct { - adhocSigning + Signing t *testing.T id gpbft.ActorID @@ -25,10 +25,11 @@ type driverHost struct { chain map[uint64]*Instance } -func newHost(t *testing.T) *driverHost { +func newHost(t *testing.T, signing Signing) *driverHost { return &driverHost{ - t: t, - chain: make(map[uint64]*Instance), + Signing: signing, + t: t, + chain: make(map[uint64]*Instance), } } diff --git a/emulator/instance.go b/emulator/instance.go index 93cdb179..8a361c4e 100644 --- a/emulator/instance.go +++ b/emulator/instance.go @@ -21,6 +21,7 @@ type Instance struct { powerTable *gpbft.PowerTable beacon []byte decision *gpbft.Justification + signing Signing } // NewInstance instantiates a new Instance for emulation. If absent, the @@ -28,7 +29,7 @@ type Instance struct { // public keys Power Table CID, etc. for the given params. The given proposal // must contain at least one tipset. // -// See Driver.StartInstance. +// See Driver.RequireStartInstance. func NewInstance(t *testing.T, id uint64, powerEntries gpbft.PowerEntries, proposal ...gpbft.TipSet) *Instance { // UX of the gpbft API is pretty painful; encapsulate the pain of getting an // instance going here at the price of accepting partial data and implicitly @@ -67,9 +68,11 @@ func NewInstance(t *testing.T, id uint64, powerEntries gpbft.PowerEntries, propo Commitments: [32]byte{}, PowerTable: ptCid, }, + signing: AdhocSigning(), } } +func (i *Instance) SetSigning(signing Signing) { i.signing = signing } func (i *Instance) Proposal() gpbft.ECChain { return i.proposal } func (i *Instance) GetDecision() *gpbft.Justification { return i.decision } func (i *Instance) ID() uint64 { return i.id } @@ -134,7 +137,7 @@ func (i *Instance) NewJustification(round uint64, step gpbft.Phase, vote gpbft.E } func (i *Instance) NewJustificationWithPayload(payload gpbft.Payload, from ...gpbft.ActorID) *gpbft.Justification { - msg := signing.MarshalPayloadForSigning(networkName, &payload) + msg := i.signing.MarshalPayloadForSigning(networkName, &payload) qr := gpbft.QuorumResult{ Signers: make([]int, len(from)), PubKeys: make([]gpbft.PubKey, len(from)), @@ -144,13 +147,13 @@ func (i *Instance) NewJustificationWithPayload(payload gpbft.Payload, from ...gp index, found := i.powerTable.Lookup[actor] require.True(i.t, found) entry := i.powerTable.Entries[index] - signature, err := signing.Sign(context.Background(), entry.PubKey, msg) + signature, err := i.signing.Sign(context.Background(), entry.PubKey, msg) require.NoError(i.t, err) qr.Signatures[j] = signature qr.PubKeys[j] = entry.PubKey qr.Signers[j] = index } - aggregate, err := signing.Aggregate(qr.PubKeys, qr.Signatures) + aggregate, err := i.signing.Aggregate(qr.PubKeys, qr.Signatures) require.NoError(i.t, err) return &gpbft.Justification{ Vote: payload, diff --git a/emulator/signing.go b/emulator/signing.go index 0bb35476..6979f618 100644 --- a/emulator/signing.go +++ b/emulator/signing.go @@ -10,18 +10,30 @@ import ( ) var ( - _ gpbft.Verifier = (*adhocSigning)(nil) - _ gpbft.Signer = (*adhocSigning)(nil) - _ gpbft.SigningMarshaler = (*adhocSigning)(nil) - - signing adhocSigning + _ Signing = (*adhocSigning)(nil) + _ Signing = (*erroneousSigning)(nil) + _ Signing = (*panicSigning)(nil) ) -// adhocSigning marshals, signs and verifies messages on behalf of any given +type Signing interface { + gpbft.Verifier + gpbft.Signer + gpbft.SigningMarshaler +} + +// AdhocSigning marshals, signs and verifies messages on behalf of any given // public key but uniquely and deterministically so using crc32 hash function for // performance. This implementation is not secure nor collision resistant. A // typical Instance power table is small enough to make the risk of collisions // negligible. +func AdhocSigning() Signing { return adhocSigning{} } + +// ErroneousSigning returns an error for every Signing API that can return an error. +func ErroneousSigning() Signing { return erroneousSigning{} } + +// PanicSigning panics for every Signing API. +func PanicSigning() Signing { return panicSigning{} } + type adhocSigning struct{} func (s adhocSigning) Sign(_ context.Context, sender gpbft.PubKey, msg []byte) ([]byte, error) { @@ -84,3 +96,32 @@ func (s adhocSigning) VerifyAggregate(payload, got []byte, signers []gpbft.PubKe func (s adhocSigning) MarshalPayloadForSigning(name gpbft.NetworkName, payload *gpbft.Payload) []byte { return payload.MarshalForSigning(name) } + +type erroneousSigning struct{} + +func (p erroneousSigning) Verify(gpbft.PubKey, []byte, []byte) error { + return errors.New("err Verify") +} + +func (p erroneousSigning) VerifyAggregate([]byte, []byte, []gpbft.PubKey) error { + return errors.New("err VerifyAggregate") +} + +func (p erroneousSigning) Aggregate([]gpbft.PubKey, [][]byte) ([]byte, error) { + return nil, errors.New("err Aggregate") +} +func (p erroneousSigning) Sign(context.Context, gpbft.PubKey, []byte) ([]byte, error) { + return nil, errors.New("err Sign") +} + +func (p erroneousSigning) MarshalPayloadForSigning(gpbft.NetworkName, *gpbft.Payload) []byte { + return nil +} + +type panicSigning struct{} + +func (p panicSigning) Verify(gpbft.PubKey, []byte, []byte) error { panic("π") } +func (p panicSigning) VerifyAggregate([]byte, []byte, []gpbft.PubKey) error { panic("π") } +func (p panicSigning) Aggregate([]gpbft.PubKey, [][]byte) ([]byte, error) { panic("π") } +func (p panicSigning) Sign(context.Context, gpbft.PubKey, []byte) ([]byte, error) { panic("π") } +func (p panicSigning) MarshalPayloadForSigning(gpbft.NetworkName, *gpbft.Payload) []byte { panic("π") } diff --git a/gpbft/gpbft.go b/gpbft/gpbft.go index 1d621e4f..f1badcb4 100644 --- a/gpbft/gpbft.go +++ b/gpbft/gpbft.go @@ -872,11 +872,14 @@ func (i *instance) broadcast(round uint64, step Phase, value ECChain, createTick if createTicket { mb.BeaconForTicket = i.beacon } + + // Capture the broadcast and metrics first. Because, otherwise the instance will + // end up with partial re-broadcast messages if RequestBroadcast panics. + i.broadcasted.record(mb) + metrics.broadcastCounter.Add(context.TODO(), 1, metric.WithAttributes(attrPhase[p.Step])) if err := i.participant.host.RequestBroadcast(mb); err != nil { i.log("failed to request broadcast: %v", err) } - i.broadcasted.record(mb) - metrics.broadcastCounter.Add(context.TODO(), 1, metric.WithAttributes(attrPhase[p.Step])) } // tryRebroadcast checks whether re-broadcast timeout has elapsed, and if so @@ -957,10 +960,13 @@ func (i *instance) rebroadcast() error { mbs := i.broadcasted.messagesByRound[round] for _, mb := range mbs { if err := i.participant.host.RequestBroadcast(mb); err != nil { - return err + // Silently log the error and proceed. This is consistent with the behaviour of + // instance for regular broadcasts. + i.log("failed to request rebroadcast %s at round %d: %v", mb.Payload.Step, mb.Payload.Round, err) + } else { + i.log("rebroadcasting %s at round %d for value %s", mb.Payload.Step.String(), mb.Payload.Round, mb.Payload.Value) + metrics.reBroadcastCounter.Add(context.TODO(), 1) } - i.log("rebroadcasting %s at round %d for value %s", mb.Payload.Step.String(), mb.Payload.Round, mb.Payload.Value) - metrics.reBroadcastCounter.Add(context.TODO(), 1) } } return nil diff --git a/gpbft/gpbft_test.go b/gpbft/gpbft_test.go index 5ab2f727..66657c67 100644 --- a/gpbft/gpbft_test.go +++ b/gpbft/gpbft_test.go @@ -20,6 +20,87 @@ var ( tipSet4 = gpbft.TipSet{Epoch: 4, Key: []byte("lobstermucher")} ) +func TestGPBFT_UnevenPowerDistribution(t *testing.T) { + t.Parallel() + newInstanceAndDriver := func(t *testing.T) (*emulator.Instance, *emulator.Driver) { + driver := emulator.NewDriver(t) + instance := emulator.NewInstance(t, + 0, + gpbft.PowerEntries{ + gpbft.PowerEntry{ + ID: 0, + Power: gpbft.NewStoragePower(1), + }, + gpbft.PowerEntry{ + ID: 1, + Power: gpbft.NewStoragePower(1), + }, + gpbft.PowerEntry{ + ID: 2, + Power: gpbft.NewStoragePower(2), + }, + gpbft.PowerEntry{ + ID: 3, + Power: gpbft.NewStoragePower(3), + }, + gpbft.PowerEntry{ + ID: 4, + Power: gpbft.NewStoragePower(5), + }, + }, + tipset0, tipSet1, tipSet2, tipSet3, + ) + driver.AddInstance(instance) + driver.RequireNoBroadcast() + return instance, driver + } + + t.Run("Late arriving COMMIT produces decision at future round", func(t *testing.T) { + instance, driver := newInstanceAndDriver(t) + + baseChain := instance.Proposal().BaseChain() + proposal1 := baseChain.Extend(tipSet1.Key) + proposal2 := proposal1.Extend(tipSet2.Key) + + driver.StartInstance(instance.ID()) + driver.RequireQuality() + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 1, + Vote: instance.NewQuality(proposal1), + }) + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 2, + Vote: instance.NewQuality(proposal2), + }) + driver.RequireDeliverAlarm() + + driver.RequirePrepare(baseChain) + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 1, + Vote: instance.NewPrepare(0, proposal1), + }) + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 4, + Vote: instance.NewPrepare(0, proposal2), + }) + + driver.RequireCommitForBottom(0) + evidenceOfPrepareForProposal2AtRound10 := instance.NewJustification(10, gpbft.PREPARE_PHASE, proposal2, 4, 3) + commitToProposal2AtRound10 := instance.NewCommit(10, proposal2) + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 4, + Vote: commitToProposal2AtRound10, + Justification: evidenceOfPrepareForProposal2AtRound10, + }) + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 3, + Vote: commitToProposal2AtRound10, + Justification: evidenceOfPrepareForProposal2AtRound10, + }) + driver.RequireDecide(proposal2, instance.NewJustification(10, gpbft.COMMIT_PHASE, proposal2, 4, 3)) + }) +} + func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { t.Parallel() newInstanceAndDriver := func(t *testing.T) (*emulator.Instance, *emulator.Driver) { @@ -45,7 +126,7 @@ func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { t.Run("Decides proposal on strong quorum", func(t *testing.T) { instance, driver := newInstanceAndDriver(t) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() @@ -80,7 +161,7 @@ func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { t.Run("Decides base on lack of quorum", func(t *testing.T) { instance, driver := newInstanceAndDriver(t) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() @@ -118,7 +199,7 @@ func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { t.Run("Converges on base when quorum not possible", func(t *testing.T) { instance, driver := newInstanceAndDriver(t) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() @@ -190,7 +271,7 @@ func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { Vote: instance.NewPrepare(0, instance.Proposal()), }) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequirePrepare(instance.Proposal()) evidenceOfPrepare := instance.NewJustification(0, gpbft.PREPARE_PHASE, instance.Proposal(), 0, 1) @@ -214,7 +295,7 @@ func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { tipSet3, tipSet4, ) driver.AddInstance(futureInstance) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireDeliverMessage(&gpbft.GMessage{ Sender: 1, Vote: instance.NewQuality(instance.Proposal()), @@ -256,7 +337,7 @@ func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { // Start the future instance and expect progress to commit without any timeouts // based on previously queued messages. - driver.StartInstance(futureInstance.ID()) + driver.RequireStartInstance(futureInstance.ID()) driver.RequireQuality() driver.RequirePrepare(futureInstance.Proposal()) driver.RequireCommit(0, futureInstance.Proposal(), instance.NewJustification(0, gpbft.PREPARE_PHASE, futureInstance.Proposal(), 0, 1)) @@ -264,7 +345,7 @@ func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { t.Run("Rebroadcasts selected messages on timeout", func(t *testing.T) { instance, driver := newInstanceAndDriver(t) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() driver.RequireDeliverAlarm() @@ -442,6 +523,129 @@ func TestGPBFT_WithEvenPowerDistribution(t *testing.T) { }) } +func TestGPBFT_WithExactOneThirdToTwoThirdPowerDistribution(t *testing.T) { + t.Parallel() + newInstanceAndDriver := func(t *testing.T) (*emulator.Instance, *emulator.Driver) { + driver := emulator.NewDriver(t) + instance := emulator.NewInstance(t, + 0, + gpbft.PowerEntries{ + gpbft.PowerEntry{ + ID: 0, + Power: gpbft.NewStoragePower(1), + }, + gpbft.PowerEntry{ + ID: 1, + Power: gpbft.NewStoragePower(2), + }, + }, + tipset0, tipSet1, tipSet2, tipSet3, tipSet4, + ) + driver.AddInstance(instance) + driver.RequireNoBroadcast() + return instance, driver + } + + t.Run("Decides alternative proposal from participant with 2/3 of power", func(t *testing.T) { + // Test that messages from participant with 2/3 of power are sufficient on their + // own to reach a decision for that participant's proposal. + + instance, driver := newInstanceAndDriver(t) + alternativeProposal := instance.Proposal().BaseChain().Extend(tipSet1.Key) + + driver.StartInstance(instance.ID()) + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 1, + Vote: instance.NewQuality(alternativeProposal), + }) + // Participants wait until either there is a quorum *for their own proposal*, + // i.e. instance proposal, or a timeout. Here, the only other QUALITY message + // being delivered is for the alternative proposal. Hence, the timeout trigger to + // end QUALITY and force progress to PREPARE. + driver.RequireDeliverAlarm() + + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 1, + Vote: instance.NewPrepare(0, alternativeProposal), + }) + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 1, + Vote: instance.NewCommit(0, alternativeProposal), + Justification: instance.NewJustification(0, gpbft.PREPARE_PHASE, alternativeProposal, 1), + }) + driver.RequireDeliverMessage(&gpbft.GMessage{ + Sender: 1, + Vote: instance.NewDecide(0, alternativeProposal), + Justification: instance.NewJustification(0, gpbft.COMMIT_PHASE, alternativeProposal, 1), + }) + driver.RequireDecision(instance.ID(), alternativeProposal) + }) + + t.Run("Gets stuck if participant with 2/3 of power sends no messages", func(t *testing.T) { + // Test that if only messages from participant with 1/3 of power are delivered + // the instance gets stuck on PREPARE for base chain, rebroadcasting QUALITY and + // PREPARE. After delivering PREPARE from the participant with 2/3 of power it + // then gets stuck at COMMIT for base rebroadcasting QUALITY, PREPARE and COMMIT. + // And finally after delivering COMMIT from participant with 2/3 of power it + // decides on base justified by that participant's COMMIT. + instance, driver := newInstanceAndDriver(t) + baseChain := instance.Proposal().BaseChain() + + driver.StartInstance(instance.ID()) + + // Deliver QUALITY from participant with 1/3 of power. + driver.RequireQuality() + driver.RequireNoBroadcast() + // Trigger timeout to force progress to PREPARE since without timeout there is no + // strong quorum for instance proposal. + driver.RequireDeliverAlarm() + + // Deliver PREPARE from participant with 1/3 of power. + driver.RequirePrepare(baseChain) + // Trigger timeout of PREPARE phase to force a scheduled re-broadcast. + driver.RequireDeliverAlarm() + + // Trigger timeout of re-broadcast and expect QUALITY and PREPARE for base to be + // re-broadcasted. + driver.RequireDeliverAlarm() + driver.RequireQuality() + driver.RequirePrepare(baseChain) + driver.RequireNoBroadcast() + + // Unstuck the instance from PREPARE. + driver.RequireDeliverMessage( + &gpbft.GMessage{ + Sender: 1, + Vote: instance.NewPrepare(0, baseChain), + }, + ) + driver.RequireCommit(0, baseChain, instance.NewJustification(0, gpbft.PREPARE_PHASE, baseChain, 1, 0)) + + // Trigger timeout of COMMIT phase to force a scheduled re-broadcast. + driver.RequireDeliverAlarm() + + // Trigger timeout of re-broadcast and expect QUALITY, PREPARE, and COMMIT for + // base to be re-broadcasted. + driver.RequireDeliverAlarm() + driver.RequireQuality() + driver.RequirePrepare(baseChain) + driver.RequireCommit(0, baseChain, instance.NewJustification(0, gpbft.PREPARE_PHASE, baseChain, 1)) + driver.RequireNoBroadcast() + + // Unstuck the instance from COMMIT. + driver.RequireDeliverMessage( + &gpbft.GMessage{ + Sender: 1, + Vote: instance.NewCommit(0, baseChain), + Justification: instance.NewJustification(0, gpbft.PREPARE_PHASE, baseChain, 1), + }, + ) + + // Assert that decision is reached with justification from participant with 1/3 of power + driver.RequireDecide(baseChain, instance.NewJustification(0, gpbft.COMMIT_PHASE, baseChain, 1)) + }) +} + func TestGPBFT_SkipsToDecide(t *testing.T) { newInstanceAndDriver := func(t *testing.T) (*emulator.Instance, *emulator.Driver) { driver := emulator.NewDriver(t) @@ -469,7 +673,7 @@ func TestGPBFT_SkipsToDecide(t *testing.T) { instance, driver := newInstanceAndDriver(t) wantDecision := instance.Proposal().Extend(tipSet4.Key) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() @@ -494,7 +698,7 @@ func TestGPBFT_SkipsToDecide(t *testing.T) { Justification: instance.NewJustification(0, gpbft.COMMIT_PHASE, wantDecision, 1), }) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) // Expect immediate decision. driver.RequireDecision(instance.ID(), wantDecision) }) @@ -520,7 +724,7 @@ func TestGPBFT_SoloParticipant(t *testing.T) { t.Run("Decides proposal with no timeout", func(t *testing.T) { instance, driver := newInstanceAndDriver(t) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequirePrepare(instance.Proposal()) driver.RequireCommit( @@ -538,7 +742,7 @@ func TestGPBFT_SoloParticipant(t *testing.T) { t.Run("Decides base on QUALITY timeout", func(t *testing.T) { instance, driver := newInstanceAndDriver(t) baseChain := instance.Proposal().BaseChain() - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireDeliverAlarm() driver.RequireQuality() driver.RequirePrepare(baseChain) @@ -581,7 +785,7 @@ func TestGPBFT_SkipsToRound(t *testing.T) { instance, driver := newInstanceAndDriver(t) futureRoundProposal := instance.Proposal().Extend(tipSet4.Key) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() @@ -619,7 +823,7 @@ func TestGPBFT_SkipsToRound(t *testing.T) { instance, driver := newInstanceAndDriver(t) futureRoundProposal := instance.Proposal().Extend(tipSet4.Key) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() @@ -643,7 +847,7 @@ func TestGPBFT_SkipsToRound(t *testing.T) { instance, driver := newInstanceAndDriver(t) futureRoundProposal := instance.Proposal().Extend(tipSet4.Key) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() @@ -687,7 +891,7 @@ func TestGPBFT_SkipsToRound(t *testing.T) { Ticket: emulator.ValidTicket, }) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() // Expect skip to round. @@ -726,7 +930,7 @@ func TestGPBFT_Equivocations(t *testing.T) { instance.Proposal().Extend(tipSet4.Key), } - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) // Send the first Quality message for instance proposal driver.RequireDeliverMessage(&gpbft.GMessage{ @@ -858,7 +1062,7 @@ func TestGPBFT_Equivocations(t *testing.T) { } // Start the instance before sending decide messages to avoid skip to decide. - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequirePrepare(instance.Proposal()) driver.RequireCommit(0, instance.Proposal(), evidenceOfPrepare) @@ -913,7 +1117,7 @@ func TestGPBFT_ImpossibleQuorum(t *testing.T) { instance, driver := newInstanceAndDriver(t) alternativeProposal := instance.Proposal().BaseChain().Extend([]byte("barreleye")) - driver.StartInstance(instance.ID()) + driver.RequireStartInstance(instance.ID()) driver.RequireQuality() driver.RequireNoBroadcast() driver.RequireDeliverAlarm() @@ -1342,7 +1546,7 @@ func TestGPBFT_DropOld(t *testing.T) { driver.AddInstance(newInstance) // We immediately skip to the "new" instance. - driver.StartInstance(2) + driver.RequireStartInstance(2) // All messages from the old instance should be dropped. diff --git a/gpbft/metrics.go b/gpbft/metrics.go index 4243e9c2..0aa5b52a 100644 --- a/gpbft/metrics.go +++ b/gpbft/metrics.go @@ -42,30 +42,28 @@ var ( attrCacheKindJustification = attribute.String("kind", "justification") metrics = struct { - phaseCounter metric.Int64Counter - roundHistogram metric.Int64Histogram - broadcastCounter metric.Int64Counter - reBroadcastCounter metric.Int64Counter - reBroadcastAttemptCounter metric.Int64Counter - errorCounter metric.Int64Counter - currentInstance metric.Int64Gauge - currentRound metric.Int64Gauge - currentPhase metric.Int64Gauge - skipCounter metric.Int64Counter - epochsPerInstance metric.Int64Gauge - validationCache metric.Int64Counter + phaseCounter metric.Int64Counter + roundHistogram metric.Int64Histogram + broadcastCounter metric.Int64Counter + reBroadcastCounter metric.Int64Counter + errorCounter metric.Int64Counter + currentInstance metric.Int64Gauge + currentRound metric.Int64Gauge + currentPhase metric.Int64Gauge + skipCounter metric.Int64Counter + epochsPerInstance metric.Int64Gauge + validationCache metric.Int64Counter }{ phaseCounter: measurements.Must(meter.Int64Counter("f3_gpbft_phase_counter", metric.WithDescription("Number of times phases change"))), roundHistogram: measurements.Must(meter.Int64Histogram("f3_gpbft_round_histogram", metric.WithDescription("Histogram of rounds per instance"), metric.WithExplicitBucketBoundaries(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 20.0, 50.0, 100.0, 1000.0), )), - broadcastCounter: measurements.Must(meter.Int64Counter("f3_gpbft_broadcast_counter", metric.WithDescription("Number of broadcasted messages"))), - reBroadcastCounter: measurements.Must(meter.Int64Counter("f3_gpbft_rebroadcast_counter", metric.WithDescription("Number of rebroadcasted messages"))), - reBroadcastAttemptCounter: measurements.Must(meter.Int64Counter("f3_gpbft_rebroadcast_attempt_counter", metric.WithDescription("Number of rebroadcast attempts"))), - errorCounter: measurements.Must(meter.Int64Counter("f3_gpbft_error_counter", metric.WithDescription("Number of errors"))), - currentInstance: measurements.Must(meter.Int64Gauge("f3_gpbft_current_instance", metric.WithDescription("The ID of the current instance"))), - currentRound: measurements.Must(meter.Int64Gauge("f3_gpbft_current_round", metric.WithDescription("The current round number"))), + broadcastCounter: measurements.Must(meter.Int64Counter("f3_gpbft_broadcast_counter", metric.WithDescription("Number of broadcasted messages"))), + reBroadcastCounter: measurements.Must(meter.Int64Counter("f3_gpbft_rebroadcast_counter", metric.WithDescription("Number of rebroadcasted messages"))), + errorCounter: measurements.Must(meter.Int64Counter("f3_gpbft_error_counter", metric.WithDescription("Number of errors"))), + currentInstance: measurements.Must(meter.Int64Gauge("f3_gpbft_current_instance", metric.WithDescription("The ID of the current instance"))), + currentRound: measurements.Must(meter.Int64Gauge("f3_gpbft_current_round", metric.WithDescription("The current round number"))), currentPhase: measurements.Must(meter.Int64Gauge("f3_gpbft_current_phase", metric.WithDescription("The current phase represented as numeric value of gpbft.Phase: "+ "0=INITIAL, 1=QUALITY, 2=CONVERGE, 3=PREPARE, 4=COMMIT, 5=DECIDE, and 6=TERMINATED"))), diff --git a/gpbft/participant_test.go b/gpbft/participant_test.go index e98c3f40..310a71ba 100644 --- a/gpbft/participant_test.go +++ b/gpbft/participant_test.go @@ -9,6 +9,7 @@ import ( "testing" "time" + "github.com/filecoin-project/go-f3/emulator" "github.com/filecoin-project/go-f3/gpbft" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -1095,6 +1096,96 @@ func TestParticipant_ValidateMessageParallel(t *testing.T) { subject.assertHostExpectations() } +func TestParticipant_WithMisbehavingSigner(t *testing.T) { + newDriverAndInstance := func(t *testing.T) (*emulator.Driver, *emulator.Instance) { + driver := emulator.NewDriver(t) + instance := emulator.NewInstance(t, + 0, + gpbft.PowerEntries{ + gpbft.PowerEntry{ + ID: 0, + Power: gpbft.NewStoragePower(1), + }, + gpbft.PowerEntry{ + ID: 1, + Power: gpbft.NewStoragePower(1), + }, + }, + tipset0, tipSet1, tipSet2, + ) + driver.AddInstance(instance) + driver.RequireNoBroadcast() + return driver, instance + } + + t.Run("erroneous signing makes no broadcast", func(t *testing.T) { + driver, instance := newDriverAndInstance(t) + + // Error at every signing operation. + driver.SetSigning(emulator.ErroneousSigning()) + + // Start the instance, which should begin QUALITY. + driver.RequireStartInstance(instance.ID()) + // Expect no broadcast as failure to broadcast should be silently logged. + driver.RequireNoBroadcast() + + // Trigger alarm to begin PREPARE + driver.RequireDeliverAlarm() + // Expect no broadcast as failure to broadcast should be silently logged. + driver.RequireNoBroadcast() + + // Trigger alarm to timeout PREPARE + driver.RequireDeliverAlarm() + // Trigger alarm to timeout rebroadcast and begin rebroadcasting messages. + driver.RequireDeliverAlarm() + // Expect no broadcast as failure to broadcast should be silently logged. + driver.RequireNoBroadcast() + }) + + t.Run("panic while signing is handled gracefully", func(t *testing.T) { + driver, instance := newDriverAndInstance(t) + + // Panic at every signing operation. + driver.SetSigning(emulator.PanicSigning()) + + // Expect that instance does not start, capturs panic and returns an error. + require.ErrorContains(t, driver.StartInstance(instance.ID()), "participant panicked") + // Expect no broadcast as the instance is not started + driver.RequireNoBroadcast() + + // Expect alarm trigger fails and there was a scheduled alarm. + alarmScheduled, err := driver.DeliverAlarm() + require.ErrorContains(t, err, "participant panicked") + require.True(t, alarmScheduled) + + // Switch to plausible signing to get the instance started. + driver.SetSigning(emulator.AdhocSigning()) + driver.RequireStartInstance(instance.ID()) + driver.RequireQuality() + + // Switch back to panic signing + driver.SetSigning(emulator.PanicSigning()) + + // Expect alarm trigger fails again and there was an alarm scheduled by QUALITY. + alarmScheduled, err = driver.DeliverAlarm() + require.ErrorContains(t, err, "participant panicked") + require.True(t, alarmScheduled) + // Expect no broadcast. + driver.RequireNoBroadcast() + + // Switch to plausible signing and progress to PREPARE. + driver.SetSigning(emulator.AdhocSigning()) + // Trigger timeout at PREPARE to schedule re-broadcast. + driver.RequireDeliverAlarm() + // Trigger re-broadcast timeout to force broadcast attempt for QUALITY and + // PREPARE from round 0, which should have been scheduled regardless of signing + // panic. + driver.RequireDeliverAlarm() + driver.RequireQuality() + driver.RequirePrepare(instance.Proposal().BaseChain()) + }) +} + type validatedMessage struct { msg *gpbft.GMessage } diff --git a/gpbft/ticket_quality.go b/gpbft/ticket_quality.go index 4717b06e..78ae55d9 100644 --- a/gpbft/ticket_quality.go +++ b/gpbft/ticket_quality.go @@ -20,6 +20,9 @@ import ( // We additionally use log-base-2 instead of natural logarithm as it is easier to implement, // and it is just a linear factor on all tickets, meaning it does not influence their ordering. func ComputeTicketQuality(ticket []byte, power int64) float64 { + if power <= 0 { + return math.Inf(1) + } // we could use Blake2b-128 but 256 is more common and more widely supported ticketHash := blake2b.Sum256(ticket) quality := linearToExpDist(ticketHash[:16]) diff --git a/gpbft/ticket_quality_test.go b/gpbft/ticket_quality_test.go index 8f479d45..7bd49876 100644 --- a/gpbft/ticket_quality_test.go +++ b/gpbft/ticket_quality_test.go @@ -2,12 +2,14 @@ package gpbft import ( "bytes" + "math" "math/big" "runtime" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" ) func TestTQ_BigLog2_Table(t *testing.T) { @@ -21,6 +23,7 @@ func TestTQ_BigLog2_Table(t *testing.T) { {"0.(9)8", "fffffffffffff8000000000000000000", -1, 0.9999999999999999}, {"0.(9)7", "fffffffffffff7000000000000000000", -1, 0.9999999999999997}, {"0.5", "80000000000000000000000000000000", -1, 0.0}, + {"2^-129", "0", -129, 0.0}, {"2^-128", "1", -128, 0.0}, {"2^-127", "2", -127, 0.0}, {"2^-127 + eps", "3", -127, 0.5849625007211563}, @@ -49,7 +52,7 @@ func TestTQ_BigLog2_Table(t *testing.T) { } } -func FuzzTQ_linearToExp(f *testing.F) { +func FuzzTQ_LinearToExp(f *testing.F) { f.Add(make([]byte, 16)) f.Add(bytes.Repeat([]byte{0xff}, 16)) f.Add(bytes.Repeat([]byte{0xa0}, 16)) @@ -61,3 +64,56 @@ func FuzzTQ_linearToExp(f *testing.F) { runtime.KeepAlive(q) }) } + +func TestComputeTicketQuality(t *testing.T) { + t.Run("Non-zero for non-zero power", func(t *testing.T) { + ticket := generateTicket(t) + power := int64(10) + quality := ComputeTicketQuality(ticket, power) + require.Greater(t, quality, 0.0, "Expected positive quality value, got %f", quality) + }) + + t.Run("Weighed by power", func(t *testing.T) { + ticket := generateTicket(t) + quality1 := ComputeTicketQuality(ticket, 10) + quality2 := ComputeTicketQuality(ticket, 11) + require.Less(t, quality2, quality1, "Expected quality2 to be less than quality1 due to weight by power, got quality1=%f, quality2=%f", quality1, quality2) + }) + + t.Run("Zero power is handled gracefully", func(t *testing.T) { + ticket := generateTicket(t) + quality := ComputeTicketQuality(ticket, 0) + require.True(t, math.IsInf(quality, 1), "Expected quality to be infinity with power 0, got %f", quality) + }) + + t.Run("Negative power is handled gracefully", func(t *testing.T) { + ticket := generateTicket(t) + quality := ComputeTicketQuality(ticket, -5) + require.True(t, math.IsInf(quality, 1), "Expected quality to be infinity for negative power, got %f", quality) + }) + + t.Run("Different tickets should have different qualities", func(t *testing.T) { + quality1 := ComputeTicketQuality(generateTicket(t), 1413) + quality2 := ComputeTicketQuality(generateTicket(t), 1413) + require.NotEqual(t, quality1, quality2, "Expected different qualities for different tickets, got quality1=%f, quality2=%f", quality1, quality2) + }) + + t.Run("Tickets with same 16 byte prefix should different quality", func(t *testing.T) { + prefix := generateTicket(t) + ticket1 := append(prefix, 14) + ticket2 := append(prefix, 13) + require.NotEqual(t, ticket1, ticket2) + + quality1 := ComputeTicketQuality(ticket1, 1413) + quality2 := ComputeTicketQuality(ticket2, 1413) + require.NotEqual(t, quality1, quality2, "Expected different qualities for different tickets with the same 16 byte prefix, got quality1=%f, quality2=%f", quality1, quality2) + }) +} + +func generateTicket(t *testing.T) []byte { + var ticket [16]byte + n, err := rand.Read(ticket[:]) + require.NoError(t, err) + require.Equal(t, 16, n) + return ticket[:] +}