Skip to content
This repository has been archived by the owner on May 23, 2024. It is now read-only.

Verify that previous ATX points to correct ATX when handling incoming ATXs #27

Merged
merged 28 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/systest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ jobs:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- uses: extractions/netrc@v2
with:
machine: github.com
username: ${{ secrets.GH_ACTION_TOKEN_USER }}
password: ${{ secrets.GH_ACTION_TOKEN }}
if: vars.GOPRIVATE

- name: Push go-spacemesh build to docker hub
run: make dockerpush

Expand All @@ -103,6 +110,13 @@ jobs:
shell: bash
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT

- uses: extractions/netrc@v2
with:
machine: github.com
username: ${{ secrets.GH_ACTION_TOKEN_USER }}
password: ${{ secrets.GH_ACTION_TOKEN }}
if: vars.GOPRIVATE

- name: Build tests docker image
run: make -C systest docker

Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,16 @@

See [RELEASE](./RELEASE.md) for workflow instructions.

## Release v1.5.2-hotfix1

This release includes our first CVE fix. A vulnerability was found in the way a node handles incoming ATXs. We urge all
node operators to update to this version as soon as possible.

### Improvements

* Fixed a vulnerability in the way a node handles incoming ATXs. This vulnerability allows an attacker to claim rewards
for a full tick amount although they should not be eligible for them.

## Release v1.5.2

### Improvements
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ RUN make get-libs
COPY go.mod .
COPY go.sum .

RUN go mod download
RUN --mount=type=secret,id=mynetrc,dst=/root/.netrc go mod download

# Here we copy the rest of the source code
COPY . .
Expand Down
11 changes: 9 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,11 @@ list-versions:

dockerbuild-go:
DOCKER_BUILDKIT=1 docker build \
--secret id=mynetrc,src=$(HOME)/.netrc \
--build-arg VERSION=${VERSION} \
-t go-spacemesh:$(SHA) \
-t $(DOCKER_HUB)/$(DOCKER_IMAGE_REPO):$(DOCKER_IMAGE_VERSION) .
-t $(DOCKER_HUB)/$(DOCKER_IMAGE_REPO):$(DOCKER_IMAGE_VERSION) \
.
.PHONY: dockerbuild-go

dockerpush: dockerbuild-go dockerpush-only
Expand All @@ -171,7 +173,12 @@ endif
.PHONY: dockerpush-only

dockerbuild-bs:
DOCKER_BUILDKIT=1 docker build -t go-spacemesh-bs:$(SHA) -t $(DOCKER_HUB)/$(DOCKER_IMAGE_REPO)-bs:$(DOCKER_IMAGE_VERSION) -f ./bootstrap.Dockerfile .
DOCKER_BUILDKIT=1 docker build \
--secret id=mynetrc,src=$(HOME)/.netrc \
-t go-spacemesh-bs:$(SHA) \
-t $(DOCKER_HUB)/$(DOCKER_IMAGE_REPO)-bs:$(DOCKER_IMAGE_VERSION) \
-f ./bootstrap.Dockerfile \
.
.PHONY: dockerbuild-bs

dockerpush-bs: dockerbuild-bs dockerpush-bs-only
Expand Down
235 changes: 170 additions & 65 deletions activation/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,16 +213,14 @@ func (h *Handler) SyntacticallyValidateDeps(
ctx context.Context,
atx *types.ActivationTx,
) (*types.VerifiedActivationTx, *mwire.MalfeasanceProof, error) {
var (
commitmentATX *types.ATXID
err error
)
var commitmentATX *types.ATXID
if atx.PrevATXID == types.EmptyATXID {
if err := h.validateInitialAtx(ctx, atx); err != nil {
return nil, nil, err
}
commitmentATX = atx.CommitmentATX
commitmentATX = atx.CommitmentATX // checked to be non-nil in syntactic validation
} else {
var err error
commitmentATX, err = h.getCommitmentAtx(atx)
if err != nil {
return nil, nil, fmt.Errorf("commitment atx for %s not found: %w", atx.SmesherID, err)
Expand Down Expand Up @@ -403,76 +401,182 @@ func (h *Handler) cacheAtx(ctx context.Context, atx *types.ActivationTxHeader, n
return nil
}

// storeAtx stores an ATX and notifies subscribers of the ATXID.
func (h *Handler) storeAtx(ctx context.Context, atx *types.VerifiedActivationTx) (*mwire.MalfeasanceProof, error) {
var nonce *types.VRFPostIndex
malicious, err := h.cdb.IsMalicious(atx.SmesherID)
// checkDoublePublish verifies if a node has already published an ATX in the same epoch.
func (h *Handler) checkDoublePublish(
ctx context.Context,
tx sql.Executor,
atx *types.VerifiedActivationTx,
) (*mwire.MalfeasanceProof, error) {
prev, err := atxs.GetByEpochAndNodeID(tx, atx.PublishEpoch, atx.SmesherID)
if err != nil && !errors.Is(err, sql.ErrNotFound) {
return nil, err
}

// do ID check to be absolutely sure.
if prev == nil || prev.ID() == atx.ID() {
return nil, nil
}
if _, ok := h.signers[atx.SmesherID]; ok {
// if we land here we tried to publish 2 ATXs in the same epoch
// don't punish ourselves but fail validation and thereby the handling of the incoming ATX
return nil, fmt.Errorf("%s already published an ATX in epoch %d", atx.SmesherID.ShortString(), atx.PublishEpoch)
}

var atxProof mwire.AtxProof
for i, a := range []*types.VerifiedActivationTx{prev, atx} {
atxProof.Messages[i] = mwire.AtxProofMsg{
InnerMsg: types.ATXMetadata{
PublishEpoch: a.PublishEpoch,
MsgHash: wire.ActivationTxToWireV1(a.ActivationTx).HashInnerBytes(),
},
SmesherID: a.SmesherID,
Signature: a.Signature,
}
}
proof := &mwire.MalfeasanceProof{
Layer: atx.PublishEpoch.FirstLayer(),
Proof: mwire.Proof{
Type: mwire.MultipleATXs,
Data: &atxProof,
},
}
encoded, err := codec.Encode(proof)
if err != nil {
return nil, fmt.Errorf("checking if node is malicious: %w", err)
h.log.With().Panic("failed to encode malfeasance proof", log.Err(err))
}
var proof *mwire.MalfeasanceProof
if err := h.cdb.WithTx(ctx, func(tx *sql.Tx) error {
if malicious {
if err := atxs.Add(tx, atx); err != nil && !errors.Is(err, sql.ErrObjectExists) {
return fmt.Errorf("add atx to db: %w", err)
}
return nil
if err := identities.SetMalicious(tx, atx.SmesherID, encoded, time.Now()); err != nil {
return nil, fmt.Errorf("add malfeasance proof: %w", err)
}

h.log.WithContext(ctx).With().Warning("smesher produced more than one atx in the same epoch",
log.Stringer("smesher", atx.SmesherID),
log.Object("prev", prev),
log.Object("curr", atx),
)

return proof, nil
}

// checkWrongPrevAtx verifies if the previous ATX referenced in the ATX is correct.
func (h *Handler) checkWrongPrevAtx(
ctx context.Context,
tx sql.Executor,
atx *types.VerifiedActivationTx,
) (*mwire.MalfeasanceProof, error) {
prevID, err := atxs.PrevIDByNodeID(tx, atx.SmesherID, atx.PublishEpoch)
if err != nil && !errors.Is(err, sql.ErrNotFound) {
return nil, fmt.Errorf("get last atx by node id: %w", err)
}
if prevID == atx.PrevATXID {
return nil, nil
}

if _, ok := h.signers[atx.SmesherID]; ok {
// if we land here we tried to publish an ATX with a wrong prevATX
h.log.WithContext(ctx).With().Warning(
"Node produced an ATX with a wrong prevATX. This can happened when the node wasn't synced when "+
"registering at PoET",
log.Stringer("smesher", atx.SmesherID),
log.ShortStringer("expected", prevID),
log.ShortStringer("actual", atx.PrevATXID),
)
return nil, fmt.Errorf("%s referenced incorrect previous ATX", atx.SmesherID.ShortString())
}

// check if atx.PrevATXID is actually the last published ATX by the same node
prev, err := atxs.Get(tx, prevID)
if err != nil {
return nil, fmt.Errorf("get prev atx: %w", err)
}

// if atx references a previous ATX that is not the last ATX by the same node, there must be at least one
// atx published between prevATX and the current epoch
var atx2 *types.VerifiedActivationTx
pubEpoch := h.clock.CurrentLayer().GetEpoch()
for pubEpoch > prev.PublishEpoch {
id, err := atxs.PrevIDByNodeID(tx, atx.SmesherID, pubEpoch)
if err != nil {
return nil, fmt.Errorf("get prev atx id by node id: %w", err)
}

prev, err := atxs.GetByEpochAndNodeID(tx, atx.PublishEpoch, atx.SmesherID)
if err != nil && !errors.Is(err, sql.ErrNotFound) {
return err
atx2, err = atxs.Get(tx, id)
if err != nil {
return nil, fmt.Errorf("get prev atx: %w", err)
}

// do ID check to be absolutely sure.
if prev != nil && prev.ID() != atx.ID() {
if _, ok := h.signers[atx.SmesherID]; ok {
// if we land here we tried to publish 2 ATXs in the same epoch
// don't punish ourselves but fail validation and thereby the handling of the incoming ATX
return fmt.Errorf("%s already published an ATX in epoch %d", atx.SmesherID.ShortString(),
atx.PublishEpoch,
)
}

var atxProof mwire.AtxProof
for i, a := range []*types.VerifiedActivationTx{prev, atx} {
atxProof.Messages[i] = mwire.AtxProofMsg{
InnerMsg: types.ATXMetadata{
PublishEpoch: a.PublishEpoch,
MsgHash: wire.ActivationTxToWireV1(a.ActivationTx).HashInnerBytes(),
},
SmesherID: a.SmesherID,
Signature: a.Signature,
}
}
proof = &mwire.MalfeasanceProof{
Layer: atx.PublishEpoch.FirstLayer(),
Proof: mwire.Proof{
Type: mwire.MultipleATXs,
Data: &atxProof,
},
}
encoded, err := codec.Encode(proof)
if err != nil {
h.log.With().Panic("failed to encode malfeasance proof", log.Err(err))
}
if err := identities.SetMalicious(tx, atx.SmesherID, encoded, time.Now()); err != nil {
return fmt.Errorf("add malfeasance proof: %w", err)
}

h.log.WithContext(ctx).With().Warning("smesher produced more than one atx in the same epoch",
log.Stringer("smesher", atx.SmesherID),
log.Object("prev", prev),
log.Object("curr", atx),
)
if atx.ID() != atx2.ID() && atx.PrevATXID == atx2.PrevATXID {
// found an ATX that points to the same previous ATX
break
}
pubEpoch = atx2.PublishEpoch
}

if atx2 == nil || atx2.PrevATXID != atx.PrevATXID {
// something went wrong, we couldn't find an ATX that points to the same previous ATX
// this should never happen since we are checking in other places that all ATXs from the same node
// form a chain
return nil, errors.New("failed double previous check: could not find an ATX with same previous ATX")
}

proof := &mwire.MalfeasanceProof{
Layer: atx.PublishEpoch.FirstLayer(),
Proof: mwire.Proof{
Type: mwire.InvalidPrevATX,
Data: &mwire.InvalidPrevATXProof{
Atx1: *wire.ActivationTxToWireV1(atx.ActivationTx),
Atx2: *wire.ActivationTxToWireV1(atx2.ActivationTx),
},
},
}

if err := identities.SetMalicious(tx, atx.SmesherID, codec.MustEncode(proof), time.Now()); err != nil {
return nil, fmt.Errorf("add malfeasance proof: %w", err)
}

h.log.WithContext(ctx).With().Warning("smesher referenced the wrong previous in published ATX",
log.Stringer("smesher", atx.SmesherID),
log.ShortStringer("expected", prevID),
log.ShortStringer("actual", atx.PrevATXID),
)
return proof, nil
}

func (h *Handler) checkMalicious(
ctx context.Context,
tx *sql.Tx,
atx *types.VerifiedActivationTx,
) (*mwire.MalfeasanceProof, error) {
malicious, err := identities.IsMalicious(tx, atx.SmesherID)
if err != nil {
return nil, fmt.Errorf("checking if node is malicious: %w", err)
}
if malicious {
return nil, nil
}
proof, err := h.checkDoublePublish(ctx, tx, atx)
if proof != nil || err != nil {
return proof, err
}
return h.checkWrongPrevAtx(ctx, tx, atx)
}

// storeAtx stores an ATX and notifies subscribers of the ATXID.
func (h *Handler) storeAtx(ctx context.Context, atx *types.VerifiedActivationTx) (*mwire.MalfeasanceProof, error) {
var nonce *types.VRFPostIndex
var proof *mwire.MalfeasanceProof
err := h.cdb.WithTx(ctx, func(tx *sql.Tx) error {
var err error
proof, err = h.checkMalicious(ctx, tx, atx)
if err != nil {
return fmt.Errorf("check malicious: %w", err)
}
nonce, err = atxs.AddGettingNonce(tx, atx)
if err != nil && !errors.Is(err, sql.ErrObjectExists) {
return fmt.Errorf("add atx to db: %w", err)
}
return nil
}); err != nil {
})
if err != nil {
return nil, fmt.Errorf("store atx: %w", err)
}
if nonce == nil {
Expand Down Expand Up @@ -512,7 +616,7 @@ func (h *Handler) HandleSyncedAtx(ctx context.Context, expHash types.Hash32, pee

// HandleGossipAtx handles the atx gossip data channel.
func (h *Handler) HandleGossipAtx(ctx context.Context, peer p2p.Peer, msg []byte) error {
proof, err := h.handleAtx(ctx, types.Hash32{}, peer, msg)
proof, err := h.handleAtx(ctx, types.EmptyHash32, peer, msg)
if err != nil && !errors.Is(err, errMalformedData) && !errors.Is(err, errKnownAtx) {
h.log.WithContext(ctx).With().Warning("failed to process atx gossip",
log.Stringer("sender", peer),
Expand Down Expand Up @@ -621,7 +725,7 @@ func (h *Handler) processATX(
return proof, err
}

if expHash != (types.Hash32{}) && vAtx.ID().Hash32() != expHash {
if expHash != types.EmptyHash32 && vAtx.ID().Hash32() != expHash {
return nil, fmt.Errorf(
"%w: atx want %s, got %s",
errWrongHash,
Expand All @@ -637,7 +741,8 @@ func (h *Handler) processATX(
events.ReportNewActivation(vAtx)
h.log.WithContext(ctx).With().Info(
"new atx", log.Inline(vAtx),
log.Bool("malicious", proof != nil))
log.Bool("malicious", proof != nil),
)
return proof, err
}

Expand Down
Loading
Loading