-
Notifications
You must be signed in to change notification settings - Fork 137
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
automod: test capture framework (#470)
This PR is currently rebased on top of #466, to demonstrate testing that rule. **UPDATE:** that PR merged, so now against `main` Adds a `hepa` command to "capture" the current state of a real-world account: currently some account metadata (identity, profile, etc), plus some recent post records. This gets serialized to JSON for easy dumping to file, like: ```shell go run ./cmd/hepa/ capture-recent atproto.com > automod/testdata/capture_atprotocom.json ``` Then, a test helper function which loads this file, and processes all the post records using an engine fixture. Combined, these fixtures make it easy to do test-driven-development of new rules. You find an account which recently sent spam or violated some policy, take a capture snapshot, set up a test case, and then write a rule which triggers and satisfies the test. Some notes: - tried moving the "test helpers" in to a sub-package (`indigo/automod/automodtest`) but hit a circular import, so left where it is - this won't work with all rule types, and some captures/rules may need additional mocking (eg, additional identities in the mock directory), but that should be fine - it usually isn't appropriate to capture real-world content in to public code. we can be careful about what we add in this repo (indigo); the "hackerdarkweb" example included in this PR seems fine to snapshot to me. the code does strip "Private" account metadata by default. - probably could use docs/comments. i'm not sure where best to put effort, feedback welcome!
- Loading branch information
Showing
15 changed files
with
1,743 additions
and
222 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package automod | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestNoOpCaptureReplyRule(t *testing.T) { | ||
assert := assert.New(t) | ||
|
||
engine := EngineTestFixture() | ||
capture := MustLoadCapture("testdata/capture_atprotocom.json") | ||
assert.NoError(ProcessCaptureRules(&engine, capture)) | ||
c, err := engine.GetCount("automod-quota", "report", PeriodDay) | ||
assert.NoError(err) | ||
assert.Equal(0, c) | ||
c, err = engine.GetCount("automod-quota", "takedown", PeriodDay) | ||
assert.NoError(err) | ||
assert.Equal(0, c) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
package automod | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
|
||
comatproto "github.com/bluesky-social/indigo/api/atproto" | ||
"github.com/bluesky-social/indigo/atproto/identity" | ||
"github.com/bluesky-social/indigo/atproto/syntax" | ||
"github.com/bluesky-social/indigo/xrpc" | ||
) | ||
|
||
func (e *Engine) FetchAndProcessRecord(ctx context.Context, aturi syntax.ATURI) error { | ||
// resolve URI, identity, and record | ||
if aturi.RecordKey() == "" { | ||
return fmt.Errorf("need a full, not partial, AT-URI: %s", aturi) | ||
} | ||
ident, err := e.Directory.Lookup(ctx, aturi.Authority()) | ||
if err != nil { | ||
return fmt.Errorf("resolving AT-URI authority: %v", err) | ||
} | ||
pdsURL := ident.PDSEndpoint() | ||
if pdsURL == "" { | ||
return fmt.Errorf("could not resolve PDS endpoint for AT-URI account: %s", ident.DID.String()) | ||
} | ||
pdsClient := xrpc.Client{Host: ident.PDSEndpoint()} | ||
|
||
e.Logger.Info("fetching record", "did", ident.DID.String(), "collection", aturi.Collection().String(), "rkey", aturi.RecordKey().String()) | ||
out, err := comatproto.RepoGetRecord(ctx, &pdsClient, "", aturi.Collection().String(), ident.DID.String(), aturi.RecordKey().String()) | ||
if err != nil { | ||
return fmt.Errorf("fetching record from Relay (%s): %v", aturi, err) | ||
} | ||
if out.Cid == nil { | ||
return fmt.Errorf("expected a CID in getRecord response") | ||
} | ||
return e.ProcessRecord(ctx, ident.DID, aturi.Path(), *out.Cid, out.Value.Val) | ||
} | ||
|
||
func (e *Engine) FetchRecent(ctx context.Context, atid syntax.AtIdentifier, limit int) (*identity.Identity, []*comatproto.RepoListRecords_Record, error) { | ||
ident, err := e.Directory.Lookup(ctx, atid) | ||
if err != nil { | ||
return nil, nil, fmt.Errorf("failed to resolve AT identifier: %v", err) | ||
} | ||
pdsURL := ident.PDSEndpoint() | ||
if pdsURL == "" { | ||
return nil, nil, fmt.Errorf("could not resolve PDS endpoint for account: %s", ident.DID.String()) | ||
} | ||
pdsClient := xrpc.Client{Host: ident.PDSEndpoint()} | ||
|
||
resp, err := comatproto.RepoListRecords(ctx, &pdsClient, "app.bsky.feed.post", "", int64(limit), ident.DID.String(), false, "", "") | ||
if err != nil { | ||
return nil, nil, fmt.Errorf("failed to fetch record list: %v", err) | ||
} | ||
e.Logger.Info("got recent posts", "did", ident.DID.String(), "pds", pdsURL, "count", len(resp.Records)) | ||
return ident, resp.Records, nil | ||
} | ||
|
||
func (e *Engine) FetchAndProcessRecent(ctx context.Context, atid syntax.AtIdentifier, limit int) error { | ||
|
||
ident, records, err := e.FetchRecent(ctx, atid, limit) | ||
if err != nil { | ||
return err | ||
} | ||
// records are most-recent first; we want recent but oldest-first, so iterate backwards | ||
for i := range records { | ||
rec := records[len(records)-i-1] | ||
aturi, err := syntax.ParseATURI(rec.Uri) | ||
if err != nil { | ||
return fmt.Errorf("parsing PDS record response: %v", err) | ||
} | ||
err = e.ProcessRecord(ctx, ident.DID, aturi.Path(), rec.Cid, rec.Value.Val) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
} | ||
|
||
type AccountCapture struct { | ||
CapturedAt syntax.Datetime `json:"capturedAt"` | ||
AccountMeta AccountMeta `json:"accountMeta"` | ||
PostRecords []comatproto.RepoListRecords_Record `json:"postRecords"` | ||
} | ||
|
||
func (e *Engine) CaptureRecent(ctx context.Context, atid syntax.AtIdentifier, limit int) (*AccountCapture, error) { | ||
ident, records, err := e.FetchRecent(ctx, atid, limit) | ||
if err != nil { | ||
return nil, err | ||
} | ||
pr := []comatproto.RepoListRecords_Record{} | ||
for _, r := range records { | ||
if r != nil { | ||
pr = append(pr, *r) | ||
} | ||
} | ||
|
||
// clear any pre-parsed key, which would fail to marshal as JSON | ||
ident.ParsedPublicKey = nil | ||
am, err := e.GetAccountMeta(ctx, ident) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// auto-clear sensitive PII (eg, account email) | ||
am.Private = nil | ||
|
||
ac := AccountCapture{ | ||
CapturedAt: syntax.DatetimeNow(), | ||
AccountMeta: *am, | ||
PostRecords: pr, | ||
} | ||
return &ac, nil | ||
} |
Oops, something went wrong.