From d98fd5db30d0098ef5b5bee6161dfa5683d450ad Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 10 Nov 2023 17:10:44 -0800 Subject: [PATCH 01/35] automod: start sketching package API --- automod/countstore.go | 67 +++++++++++++++++++++++ automod/engine.go | 66 +++++++++++++++++++++++ automod/event.go | 122 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 255 insertions(+) create mode 100644 automod/countstore.go create mode 100644 automod/engine.go create mode 100644 automod/event.go diff --git a/automod/countstore.go b/automod/countstore.go new file mode 100644 index 000000000..ed1e94f67 --- /dev/null +++ b/automod/countstore.go @@ -0,0 +1,67 @@ +package automod + +import ( + "context" + "fmt" + "log/slog" + "time" +) + +const ( + PeriodTotal = "total" + PeriodDay = "day" + PeriodHour = "hour" +) + +type CountStore interface { + GetCount(ctx context.Context, key, period string) (int, error) + Increment(ctx context.Context, key string) (int, error) +} + +// TODO: this implementation isn't race-safe (yet)! +type MemCountStore struct { + Counts map[string]int +} + +func NewMemCountStore() MemCountStore { + return MemCountStore{ + Counts: make(map[string]int), + } +} + +func PeriodKey(key, period string) string { + switch period { + case PeriodTotal: + return key + case PeriodDay: + t := time.Now().UTC().Format(time.DateOnly) + return fmt.Sprintf("%s:%s", key, t) + case PeriodHour: + t := time.Now().UTC().Format(time.RFC3339)[0:13] + return fmt.Sprintf("%s:%s", key, t) + default: + slog.Warn("unhandled counter period", "period", period) + return key + } +} + +func (s *MemCountStore) GetCount(ctx context.Context, key, period string) (int, error) { + v, ok := s.Counts[PeriodKey(key, period)] + if !ok { + return 0, nil + } + return v, nil +} + +func (s *MemCountStore) Increment(ctx context.Context, key string) error { + for _, p := range []string{PeriodTotal, PeriodDay, PeriodHour} { + k := PeriodKey(key, p) + v, ok := s.Counts[k] + if !ok { + v = 0 + } + v = v + 1 + s.Counts[k] = v + } + return nil +} diff --git a/automod/engine.go b/automod/engine.go new file mode 100644 index 000000000..a680607c3 --- /dev/null +++ b/automod/engine.go @@ -0,0 +1,66 @@ +package automod + +import ( + "context" + "log/slog" + "sync" + + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/xrpc" +) + +// runtime for executing rules, managing state, and recording moderation actions +type Engine struct { + // current rule sets. will eventually be possible to swap these out at runtime + RulesMap sync.Map + Directory identity.Directory + // used to persist moderation actions in mod service (optional) + AdminClient *xrpc.Client + CountStore CountStore +} + +func (e *Engine) ExecuteIdentity() error { + ctx := context.Background() + + // similar to an HTTP server, we want to recover any panics from rule execution + defer func() { + if r := recover(); r != nil { + slog.Error("automod event execution exception", "err", r) + // TODO: mark repo as dirty? + // TODO: circuit-break on repeated panics? + } + }() + + _ = ctx + return nil +} + +func (e *Engine) ExecuteCommit() error { + ctx := context.Background() + + // similar to an HTTP server, we want to recover any panics from rule execution + defer func() { + if r := recover(); r != nil { + slog.Error("automod event execution exception", "err", r) + // TODO: mark repo as dirty? + // TODO: circuit-break on repeated panics? + } + }() + + _ = ctx + return nil +} + +func (e *Engine) PersistModActions() error { + // XXX + return nil +} + +func (e *Engine) GetCount(key, period string) (int, error) { + return e.CountStore.GetCount(context.TODO(), key, period) +} + +func (e *Engine) InSet(name, val string) (bool, error) { + // XXX: implement + return false, nil +} diff --git a/automod/event.go b/automod/event.go new file mode 100644 index 000000000..31c813a5c --- /dev/null +++ b/automod/event.go @@ -0,0 +1,122 @@ +package automod + +import ( + "github.com/bluesky-social/indigo/atproto/identity" +) + +type ModReport struct { + Reason string + Comment string +} + +// information about a repo/account/identity, always pre-populated and relevant to many rules +type AccountMeta struct { + Identity identity.Identity + // TODO: createdAt / age +} + +// base type for events. events are both containers for data about the event itself (similar to an HTTP request type); aggregate results and state (counters, mod actions) to be persisted after all rules are run; and act as an API for additional network reads and operations. +type Event struct { + engine Engine + Err *error + Account AccountMeta + CounterIncrements []string + AccountLabels []string + AccountFlags []string + AccountReports []ModReport + AccountTakedown bool +} + +func (e *Event) CountTotal(key string) int { + v, err := e.engine.GetCount(key, PeriodTotal) + if err != nil { + e.Err = &err + return 0 + } + return v +} + +func (e *Event) CountDay(key string) int { + v, err := e.engine.GetCount(key, PeriodDay) + if err != nil { + e.Err = &err + return 0 + } + return v +} + +func (e *Event) CountHour(key string) int { + v, err := e.engine.GetCount(key, PeriodHour) + if err != nil { + e.Err = &err + return 0 + } + return v +} + +func (e *Event) InSet(name, val string) bool { + v, err := e.engine.InSet(name, val) + if err != nil { + e.Err = &err + return false + } + return v +} + +func (e *Event) IncrementCounter(key string) { + e.CounterIncrements = append(e.CounterIncrements, key) +} + +func (e *Event) TakedownAccount() { + e.AccountTakedown = true +} + +func (e *Event) AddLabelAccount(val string) { + e.AccountLabels = append(e.AccountLabels, val) +} + +func (e *Event) AddFlag(val string) { + e.AccountFlags = append(e.AccountFlags, val) +} + +func (e *Event) ReportAccount(reason, comment string) { + e.AccountReports = append(e.AccountReports, ModReport{Reason: reason, Comment: comment}) +} + +type IdentityEvent struct { + Event +} + +type RecordEvent struct { + Event + RecordLabels []string + RecordTakedown bool + RecordReports []ModReport + RecordFlags []string + // TODO: commit metadata +} + +func (e *RecordEvent) Takedown() { + e.RecordTakedown = true +} + +func (e *RecordEvent) AddLabel(val string) { + e.RecordLabels = append(e.RecordLabels, val) +} + +func (e *RecordEvent) AddFlag(val string) { + e.RecordFlags = append(e.RecordFlags, val) +} + +func (e *RecordEvent) Report(reason, comment string) { + e.RecordReports = append(e.RecordReports, ModReport{Reason: reason, Comment: comment}) +} + +type PostEvent struct { + RecordEvent + // TODO: thread context +} + +type IdentityRuleFunc = func(evt IdentityEvent) error +type RecordRuleFunc = func(evt RecordEvent) error +type PostRuleFunc = func(evt PostEvent) error From 86161b4557af053f28e343334ba951f612991169 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 10 Nov 2023 19:06:03 -0800 Subject: [PATCH 02/35] hepa: initial skelton of daemon --- automod/doc.go | 8 ++ automod/engine.go | 102 ++++++++++++++- automod/event.go | 13 +- cmd/hepa/README.md | 7 ++ cmd/hepa/firehose.go | 287 +++++++++++++++++++++++++++++++++++++++++++ cmd/hepa/main.go | 193 +++++++++++++++++++++++++++++ cmd/hepa/metrics.go | 54 ++++++++ cmd/hepa/server.go | 111 +++++++++++++++++ 8 files changed, 766 insertions(+), 9 deletions(-) create mode 100644 automod/doc.go create mode 100644 cmd/hepa/README.md create mode 100644 cmd/hepa/firehose.go create mode 100644 cmd/hepa/main.go create mode 100644 cmd/hepa/metrics.go create mode 100644 cmd/hepa/server.go diff --git a/automod/doc.go b/automod/doc.go new file mode 100644 index 000000000..c0ba398d4 --- /dev/null +++ b/automod/doc.go @@ -0,0 +1,8 @@ +// Auto-Moderation rules engine for anti-spam and other moderation tasks. +// +// The code in this package includes an "engine" which processes atproto commit events (and identity updates), maintains caches and counters, and pushes moderation decisions to an external mod service (eg, appview). A framework for writing new "rules" for the engine to execute are also provided. +// +// It does not provide label API endpoints like queryLabels; see labelmaker for a self-contained labeling service. +// +// Code for subscribing to a firehose is not included here; see cmd/hepa for a complete service built on this library. +package automod diff --git a/automod/engine.go b/automod/engine.go index a680607c3..911eb856c 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -1,11 +1,19 @@ package automod import ( + "bytes" "context" + "fmt" "log/slog" + "strings" "sync" + comatproto "github.com/bluesky-social/indigo/api/atproto" "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" + lexutil "github.com/bluesky-social/indigo/lex/util" + "github.com/bluesky-social/indigo/repo" + "github.com/bluesky-social/indigo/repomgr" "github.com/bluesky-social/indigo/xrpc" ) @@ -19,7 +27,7 @@ type Engine struct { CountStore CountStore } -func (e *Engine) ExecuteIdentity() error { +func (e *Engine) ProcessIdentityEvent(t string, did syntax.DID) error { ctx := context.Background() // similar to an HTTP server, we want to recover any panics from rule execution @@ -31,12 +39,28 @@ func (e *Engine) ExecuteIdentity() error { } }() + ident, err := e.Directory.LookupDID(ctx, did) + if err != nil { + return fmt.Errorf("resolving identity: %w", err) + } + if ident == nil { + return fmt.Errorf("identity not found for did: %s", did.String()) + } + + evt := IdentityEvent{ + Event{ + Engine: e, + Account: AccountMeta{Identity: ident}, + }, + } + e.CallIdentityRules(&evt) + _ = ctx return nil } -func (e *Engine) ExecuteCommit() error { - ctx := context.Background() +// this method takes a full firehose commit event. it must not be a tooBig +func (e *Engine) ProcessCommit(ctx context.Context, commit *comatproto.SyncSubscribeRepos_Commit) error { // similar to an HTTP server, we want to recover any panics from rule execution defer func() { @@ -47,10 +71,82 @@ func (e *Engine) ExecuteCommit() error { } }() + r, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(commit.Blocks)) + if err != nil { + // TODO: handle this case (instead of return nil) + slog.Error("reading repo from car", "size_bytes", len(commit.Blocks), "err", err) + return nil + } + + did, err := syntax.ParseDID(commit.Repo) + if err != nil { + return fmt.Errorf("bad DID syntax in event: %w", err) + } + + ident, err := e.Directory.LookupDID(ctx, did) + if err != nil { + return fmt.Errorf("resolving identity: %w", err) + } + if ident == nil { + return fmt.Errorf("identity not found for did: %s", did.String()) + } + + for _, op := range commit.Ops { + ek := repomgr.EventKind(op.Action) + logOp := slog.With("op_path", op.Path, "op_cid", op.Cid) + switch ek { + case repomgr.EvtKindCreateRecord: + rc, rec, err := r.GetRecord(ctx, op.Path) + if err != nil { + // TODO: handle this case (instead of return nil) + logOp.Error("fetching record from event CAR slice", "err", err) + return nil + } + if lexutil.LexLink(rc) != *op.Cid { + // TODO: handle this case (instead of return nil) + logOp.Error("mismatch in record and op cid", "record_cid", rc) + return nil + } + + if strings.HasPrefix(op.Path, "app.bsky.feed.post/") { + // TODO: handle as a PostEvent specially + } else { + // XXX: pass record in to event + _ = rec + evt := RecordEvent{ + Event{ + Engine: e, + Account: AccountMeta{Identity: ident}, + }, + []string{}, + false, + []ModReport{}, + []string{}, + } + e.CallRecordRules(&evt) + // TODO persist + } + case repomgr.EvtKindUpdateRecord: + slog.Info("ignoring record update", "did", commit.Repo, "seq", commit.Seq, "path", op.Path) + return nil + case repomgr.EvtKindDeleteRecord: + slog.Info("ignoring record deletion", "did", commit.Repo, "seq", commit.Seq, "path", op.Path) + return nil + } + } + _ = ctx return nil } +func (e *Engine) CallIdentityRules(evt *IdentityEvent) error { + return nil +} + +func (e *Engine) CallRecordRules(evt *RecordEvent) error { + return nil +} + func (e *Engine) PersistModActions() error { // XXX return nil diff --git a/automod/event.go b/automod/event.go index 31c813a5c..37f1c17ff 100644 --- a/automod/event.go +++ b/automod/event.go @@ -11,13 +11,13 @@ type ModReport struct { // information about a repo/account/identity, always pre-populated and relevant to many rules type AccountMeta struct { - Identity identity.Identity + Identity *identity.Identity // TODO: createdAt / age } // base type for events. events are both containers for data about the event itself (similar to an HTTP request type); aggregate results and state (counters, mod actions) to be persisted after all rules are run; and act as an API for additional network reads and operations. type Event struct { - engine Engine + Engine *Engine Err *error Account AccountMeta CounterIncrements []string @@ -28,7 +28,7 @@ type Event struct { } func (e *Event) CountTotal(key string) int { - v, err := e.engine.GetCount(key, PeriodTotal) + v, err := e.Engine.GetCount(key, PeriodTotal) if err != nil { e.Err = &err return 0 @@ -37,7 +37,7 @@ func (e *Event) CountTotal(key string) int { } func (e *Event) CountDay(key string) int { - v, err := e.engine.GetCount(key, PeriodDay) + v, err := e.Engine.GetCount(key, PeriodDay) if err != nil { e.Err = &err return 0 @@ -46,7 +46,7 @@ func (e *Event) CountDay(key string) int { } func (e *Event) CountHour(key string) int { - v, err := e.engine.GetCount(key, PeriodHour) + v, err := e.Engine.GetCount(key, PeriodHour) if err != nil { e.Err = &err return 0 @@ -55,7 +55,7 @@ func (e *Event) CountHour(key string) int { } func (e *Event) InSet(name, val string) bool { - v, err := e.engine.InSet(name, val) + v, err := e.Engine.InSet(name, val) if err != nil { e.Err = &err return false @@ -89,6 +89,7 @@ type IdentityEvent struct { type RecordEvent struct { Event + RecordLabels []string RecordTakedown bool RecordReports []ModReport diff --git a/cmd/hepa/README.md b/cmd/hepa/README.md new file mode 100644 index 000000000..c05311713 --- /dev/null +++ b/cmd/hepa/README.md @@ -0,0 +1,7 @@ + +HEPA +==== + +This is a simple auto-moderation daemon which wraps the automod package. + +The name is a reference to HEPA air filters, which help keep the local atmosphere clean and healthy for humans. diff --git a/cmd/hepa/firehose.go b/cmd/hepa/firehose.go new file mode 100644 index 000000000..dc86b2f75 --- /dev/null +++ b/cmd/hepa/firehose.go @@ -0,0 +1,287 @@ +package main + +import ( + "bytes" + "context" + "fmt" + "net/http" + "net/url" + "strings" + "time" + + comatproto "github.com/bluesky-social/indigo/api/atproto" + //bsky "github.com/bluesky-social/indigo/api/bsky" + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/events" + "github.com/bluesky-social/indigo/events/schedulers/autoscaling" + "github.com/bluesky-social/indigo/repo" + + "github.com/carlmjohnson/versioninfo" + "github.com/gorilla/websocket" + "github.com/ipfs/go-cid" + typegen "github.com/whyrusleeping/cbor-gen" +) + +func (s *Server) getLastCursor() (int64, error) { + var lastSeq LastSeq + if err := s.db.Find(&lastSeq).Error; err != nil { + return 0, err + } + + if lastSeq.ID == 0 { + return 0, s.db.Create(&lastSeq).Error + } + + return lastSeq.Seq, nil +} + +func (s *Server) updateLastCursor(curs int64) error { + return s.db.Model(LastSeq{}).Where("id = 1").Update("seq", curs).Error +} + +func (s *Server) Run(ctx context.Context) error { + cur, err := s.getLastCursor() + if err != nil { + return fmt.Errorf("get last cursor: %w", err) + } + + err = s.bfs.LoadJobs(ctx) + if err != nil { + return fmt.Errorf("loading backfill jobs: %w", err) + } + go s.bf.Start() + go s.discoverRepos() + + d := websocket.DefaultDialer + u, err := url.Parse(s.bgshost) + if err != nil { + return fmt.Errorf("invalid bgshost URI: %w", err) + } + u.Path = "xrpc/com.atproto.sync.subscribeRepos" + if cur != 0 { + u.RawQuery = fmt.Sprintf("cursor=%d", cur) + } + con, _, err := d.Dial(u.String(), http.Header{ + "User-Agent": []string{fmt.Sprintf("palomar/%s", versioninfo.Short())}, + }) + if err != nil { + return fmt.Errorf("events dial failed: %w", err) + } + + rsc := &events.RepoStreamCallbacks{ + RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { + ctx := context.Background() + ctx, span := tracer.Start(ctx, "RepoCommit") + defer span.End() + + defer func() { + if evt.Seq%50 == 0 { + if err := s.updateLastCursor(evt.Seq); err != nil { + s.logger.Error("failed to persist cursor", "err", err) + } + } + }() + logEvt := s.logger.With("repo", evt.Repo, "rev", evt.Rev, "seq", evt.Seq) + if evt.TooBig && evt.Prev != nil { + // TODO: handle this case (instead of return nil) + logEvt.Error("skipping non-genesis tooBig events for now") + return nil + } + + if evt.TooBig { + if err := s.processTooBigCommit(ctx, evt); err != nil { + // TODO: handle this case (instead of return nil) + logEvt.Error("failed to process tooBig event", "err", err) + return nil + } + + return nil + } + + if !s.skipBackfill { + // Check if we've backfilled this repo, if not, we should enqueue it + job, err := s.bfs.GetJob(ctx, evt.Repo) + if job == nil && err == nil { + logEvt.Info("enqueueing backfill job for new repo") + if err := s.bfs.EnqueueJob(evt.Repo); err != nil { + logEvt.Warn("failed to enqueue backfill job", "err", err) + } + } + } + + if err = s.engine.ProcessCommit(ctx, evt); err != nil { + // TODO: handle this, instead of return nul + logEvt.Error("failed to process commit", "err", err) + return nil + } + + return nil + + }, + RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error { + ctx := context.Background() + ctx, span := tracer.Start(ctx, "RepoHandle") + defer span.End() + + did, err := syntax.ParseDID(evt.Did) + if err != nil { + s.logger.Error("bad DID in RepoHandle event", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) + return nil + } + if err := s.engine.ProcessIdentityEvent("handle", did); err != nil { + s.logger.Error("processing handle update failed", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) + } + return nil + }, + } + + return events.HandleRepoStream( + ctx, con, autoscaling.NewScheduler( + autoscaling.DefaultAutoscaleSettings(), + s.bgshost, + rsc.EventHandler, + ), + ) +} + +func (s *Server) discoverRepos() { + ctx := context.Background() + log := s.logger.With("func", "discoverRepos") + log.Info("starting repo discovery") + + cursor := "" + limit := int64(500) + + totalEnqueued := 0 + totalSkipped := 0 + totalErrored := 0 + + for { + resp, err := comatproto.SyncListRepos(ctx, s.bgsxrpc, cursor, limit) + if err != nil { + log.Error("failed to list repos", "err", err) + time.Sleep(5 * time.Second) + continue + } + log.Info("got repo page", "count", len(resp.Repos), "cursor", resp.Cursor) + enqueued := 0 + skipped := 0 + errored := 0 + for _, repo := range resp.Repos { + job, err := s.bfs.GetJob(ctx, repo.Did) + if job == nil && err == nil { + log.Info("enqueuing backfill job for new repo", "did", repo.Did) + if err := s.bfs.EnqueueJob(repo.Did); err != nil { + log.Warn("failed to enqueue backfill job", "err", err) + errored++ + continue + } + enqueued++ + } else if err != nil { + log.Warn("failed to get backfill job", "did", repo.Did, "err", err) + errored++ + } else { + skipped++ + } + } + log.Info("enqueued repos", "enqueued", enqueued, "skipped", skipped, "errored", errored) + totalEnqueued += enqueued + totalSkipped += skipped + totalErrored += errored + if resp.Cursor != nil && *resp.Cursor != "" { + cursor = *resp.Cursor + } else { + break + } + } + + log.Info("finished repo discovery", "totalEnqueued", totalEnqueued, "totalSkipped", totalSkipped, "totalErrored", totalErrored) +} + +func (s *Server) handleCreateOrUpdate(ctx context.Context, rawDID string, path string, recP *typegen.CBORMarshaler, rcid *cid.Cid) error { + // Since this gets called in a backfill job, we need to check if the path is a post or profile + if !strings.Contains(path, "app.bsky.feed.post") && !strings.Contains(path, "app.bsky.actor.profile") { + return nil + } + + did, err := syntax.ParseDID(rawDID) + if err != nil { + return fmt.Errorf("bad DID syntax in event: %w", err) + } + + ident, err := s.dir.LookupDID(ctx, did) + if err != nil { + return fmt.Errorf("resolving identity: %w", err) + } + if ident == nil { + return fmt.Errorf("identity not found for did: %s", did.String()) + } + rec := *recP + + _ = rec + /* XXX: + switch rec := rec.(type) { + case *bsky.FeedPost: + // XXX: if err := s.indexPost(ctx, ident, rec, path, *rcid); err != nil { + _ = rec + if err := s.engine.ProcessCommit(ctx, evt); err != nil { + postsFailed.Inc() + return fmt.Errorf("processing post for %s: %w", did.String(), err) + } + postsIndexed.Inc() + case *bsky.ActorProfile: + // XXX: if err := s.indexProfile(ctx, ident, rec, path, *rcid); err != nil { + if err := s.engine.ProcessCommit(ctx, evt); err != nil { + profilesFailed.Inc() + return fmt.Errorf("processing profile for %s: %w", did.String(), err) + } + profilesIndexed.Inc() + default: + } + */ + return nil +} + +func (s *Server) handleDelete(ctx context.Context, rawDID, path string) error { + // TODO: just ignoring for now + return nil +} + +func (s *Server) processTooBigCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) error { + repodata, err := comatproto.SyncGetRepo(ctx, s.bgsxrpc, evt.Repo, "") + if err != nil { + return err + } + + r, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(repodata)) + if err != nil { + return err + } + + did, err := syntax.ParseDID(evt.Repo) + if err != nil { + return fmt.Errorf("bad DID in repo event: %w", err) + } + + return r.ForEach(ctx, "", func(k string, v cid.Cid) error { + if strings.HasPrefix(k, "app.bsky.feed.post") || strings.HasPrefix(k, "app.bsky.actor.profile") { + rcid, rec, err := r.GetRecord(ctx, k) + if err != nil { + // TODO: handle this case (instead of return nil) + s.logger.Error("failed to get record from repo checkout", "path", k, "err", err) + return nil + } + + // TODO: may want to treat this as a regular event? + _ = rcid + _ = did + _ = rec + /* XXX: + if err := s.engine.ProcessRecord(ctx, did, m, rec); err != nil { + return fmt.Errorf("processing record from tooBig commit: %w", err) + } + */ + } + return nil + }) +} diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go new file mode 100644 index 000000000..a6cfb07e2 --- /dev/null +++ b/cmd/hepa/main.go @@ -0,0 +1,193 @@ +package main + +import ( + "context" + "fmt" + "log" + "log/slog" + "net/http" + "os" + "time" + + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/util/cliutil" + + "github.com/carlmjohnson/versioninfo" + _ "github.com/joho/godotenv/autoload" + cli "github.com/urfave/cli/v2" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/sdk/resource" + tracesdk "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + "golang.org/x/time/rate" +) + +func main() { + if err := run(os.Args); err != nil { + slog.Error("exiting", "err", err) + os.Exit(-1) + } +} + +func run(args []string) error { + + app := cli.App{ + Name: "hepa", + Usage: "automod daemon (cleans the atmosphere)", + Version: versioninfo.Short(), + } + + app.Flags = []cli.Flag{ + &cli.StringFlag{ + Name: "atp-bgs-host", + Usage: "hostname and port of BGS to subscribe to", + Value: "wss://bsky.social", + EnvVars: []string{"ATP_BGS_HOST"}, + }, + &cli.StringFlag{ + Name: "atp-plc-host", + Usage: "method, hostname, and port of PLC registry", + Value: "https://plc.directory", + EnvVars: []string{"ATP_PLC_HOST"}, + }, + &cli.IntFlag{ + Name: "max-metadb-connections", + EnvVars: []string{"MAX_METADB_CONNECTIONS"}, + Value: 40, + }, + } + + app.Commands = []*cli.Command{ + runCmd, + } + + return app.Run(args) +} + +var runCmd = &cli.Command{ + Name: "run", + Usage: "run the service", + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "database-url", + Value: "sqlite://data/hepa/automod.db", + EnvVars: []string{"DATABASE_URL"}, + }, + &cli.BoolFlag{ + Name: "readonly", + EnvVars: []string{"HEPA_READONLY", "READONLY"}, + }, + &cli.StringFlag{ + Name: "bind", + Usage: "IP or address, and port, to listen on for HTTP APIs", + Value: ":3999", + EnvVars: []string{"HEPA_BIND"}, + }, + &cli.StringFlag{ + Name: "metrics-listen", + Usage: "IP or address, and port, to listen on for metrics APIs", + Value: ":3998", + EnvVars: []string{"HEPA_METRICS_LISTEN"}, + }, + &cli.IntFlag{ + Name: "bgs-sync-rate-limit", + Usage: "max repo sync (checkout) requests per second to upstream (BGS)", + Value: 8, + EnvVars: []string{"HEPA_BGS_SYNC_RATE_LIMIT"}, + }, + &cli.IntFlag{ + Name: "plc-rate-limit", + Usage: "max number of requests per second to PLC registry", + Value: 100, + EnvVars: []string{"HEPA_PLC_RATE_LIMIT"}, + }, + }, + Action: func(cctx *cli.Context) error { + ctx := context.Background() + logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelInfo, + })) + slog.SetDefault(logger) + + // Enable OTLP HTTP exporter + // For relevant environment variables: + // https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace#readme-environment-variables + // At a minimum, you need to set + // OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 + if ep := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); ep != "" { + slog.Info("setting up trace exporter", "endpoint", ep) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + exp, err := otlptracehttp.New(ctx) + if err != nil { + log.Fatal("failed to create trace exporter", "error", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + if err := exp.Shutdown(ctx); err != nil { + slog.Error("failed to shutdown trace exporter", "error", err) + } + }() + + tp := tracesdk.NewTracerProvider( + tracesdk.WithBatcher(exp), + tracesdk.WithResource(resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String("hepa"), + attribute.String("env", os.Getenv("ENVIRONMENT")), // DataDog + attribute.String("environment", os.Getenv("ENVIRONMENT")), // Others + attribute.Int64("ID", 1), + )), + ) + otel.SetTracerProvider(tp) + } + + db, err := cliutil.SetupDatabase(cctx.String("database-url"), cctx.Int("max-metadb-connections")) + if err != nil { + return err + } + + // TODO: replace this with "bingo" resolver? + base := identity.BaseDirectory{ + PLCURL: cctx.String("atp-plc-host"), + HTTPClient: http.Client{ + Timeout: time.Second * 15, + }, + PLCLimiter: rate.NewLimiter(rate.Limit(cctx.Int("plc-rate-limit")), 1), + TryAuthoritativeDNS: true, + SkipDNSDomainSuffixes: []string{".bsky.social"}, + } + dir := identity.NewCacheDirectory(&base, 1_500_000, time.Hour*24, time.Minute*2) + + srv, err := NewServer( + db, + &dir, + Config{ + BGSHost: cctx.String("atp-bgs-host"), + Logger: logger, + BGSSyncRateLimit: cctx.Int("bgs-sync-rate-limit"), + }, + ) + if err != nil { + return err + } + + go func() { + if err := srv.RunMetrics(cctx.String("metrics-listen")); err != nil { + slog.Error("failed to start metrics endpoint", "error", err) + panic(fmt.Errorf("failed to start metrics endpoint: %w", err)) + } + }() + + // TODO: if cctx.Bool("readonly") ... + + if err := srv.Run(ctx); err != nil { + return fmt.Errorf("failed to run automod service: %w", err) + } + return nil + }, +} diff --git a/cmd/hepa/metrics.go b/cmd/hepa/metrics.go new file mode 100644 index 000000000..704dad317 --- /dev/null +++ b/cmd/hepa/metrics.go @@ -0,0 +1,54 @@ +package main + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "go.opentelemetry.io/otel" +) + +var tracer = otel.Tracer("hepa") + +var postsReceived = promauto.NewCounter(prometheus.CounterOpts{ + Name: "hepa_posts_received", + Help: "Number of posts received", +}) + +var postsIndexed = promauto.NewCounter(prometheus.CounterOpts{ + Name: "hepa_posts_indexed", + Help: "Number of posts indexed", +}) + +var postsFailed = promauto.NewCounter(prometheus.CounterOpts{ + Name: "hepa_posts_failed", + Help: "Number of posts that failed indexing", +}) + +var postsDeleted = promauto.NewCounter(prometheus.CounterOpts{ + Name: "hepa_posts_deleted", + Help: "Number of posts deleted", +}) + +var profilesReceived = promauto.NewCounter(prometheus.CounterOpts{ + Name: "hepa_profiles_received", + Help: "Number of profiles received", +}) + +var profilesIndexed = promauto.NewCounter(prometheus.CounterOpts{ + Name: "hepa_profiles_indexed", + Help: "Number of profiles indexed", +}) + +var profilesFailed = promauto.NewCounter(prometheus.CounterOpts{ + Name: "hepa_profiles_failed", + Help: "Number of profiles that failed indexing", +}) + +var profilesDeleted = promauto.NewCounter(prometheus.CounterOpts{ + Name: "hepa_profiles_deleted", + Help: "Number of profiles deleted", +}) + +var currentSeq = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "hepa_current_seq", + Help: "Current sequence number", +}) diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go new file mode 100644 index 000000000..385924035 --- /dev/null +++ b/cmd/hepa/server.go @@ -0,0 +1,111 @@ +package main + +import ( + "fmt" + "log/slog" + "net/http" + "os" + "strings" + + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/backfill" + "github.com/bluesky-social/indigo/xrpc" + + "github.com/prometheus/client_golang/prometheus/promhttp" + gorm "gorm.io/gorm" +) + +type Server struct { + db *gorm.DB + bgshost string + bgsxrpc *xrpc.Client + dir identity.Directory + logger *slog.Logger + engine *automod.Engine + skipBackfill bool + + bfs *backfill.Gormstore + bf *backfill.Backfiller +} + +type LastSeq struct { + ID uint `gorm:"primarykey"` + Seq int64 +} + +type Config struct { + BGSHost string + Logger *slog.Logger + BGSSyncRateLimit int + MaxEventConcurrency int +} + +func NewServer(db *gorm.DB, dir identity.Directory, config Config) (*Server, error) { + logger := config.Logger + if logger == nil { + logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelInfo, + })) + } + + logger.Info("running database migrations") + db.AutoMigrate(&LastSeq{}) + db.AutoMigrate(&backfill.GormDBJob{}) + + bgsws := config.BGSHost + if !strings.HasPrefix(bgsws, "ws") { + return nil, fmt.Errorf("specified bgs host must include 'ws://' or 'wss://'") + } + + bgshttp := strings.Replace(bgsws, "ws", "http", 1) + bgsxrpc := &xrpc.Client{ + Host: bgshttp, + } + + engine := automod.Engine{} + + s := &Server{ + db: db, + bgshost: config.BGSHost, // NOTE: the original URL, not 'bgshttp' + bgsxrpc: bgsxrpc, + dir: dir, + logger: logger, + engine: &engine, + skipBackfill: true, + } + + bfstore := backfill.NewGormstore(db) + opts := backfill.DefaultBackfillOptions() + if config.BGSSyncRateLimit > 0 { + opts.SyncRequestsPerSecond = config.BGSSyncRateLimit + opts.ParallelBackfills = 2 * config.BGSSyncRateLimit + } else { + opts.SyncRequestsPerSecond = 8 + } + opts.CheckoutPath = fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo", bgshttp) + if config.MaxEventConcurrency > 0 { + opts.ParallelRecordCreates = config.MaxEventConcurrency + } else { + opts.ParallelRecordCreates = 20 + } + opts.NSIDFilter = "app.bsky." + bf := backfill.NewBackfiller( + "hepa", + bfstore, + s.handleCreateOrUpdate, + s.handleCreateOrUpdate, + s.handleDelete, + opts, + ) + + s.bfs = bfstore + s.bf = bf + + return s, nil +} + +func (s *Server) RunMetrics(listen string) error { + http.Handle("/metrics", promhttp.Handler()) + return http.ListenAndServe(listen, nil) +} From 5541d2c0631f165defa005d3afb7345239573cfa Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 14:52:57 -0800 Subject: [PATCH 03/35] automod: building --- automod/engine.go | 4 ++++ cmd/hepa/firehose.go | 5 +++++ cmd/hepa/main.go | 2 +- cmd/hepa/server.go | 4 +++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/automod/engine.go b/automod/engine.go index 911eb856c..ccd9fca8d 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -63,6 +63,7 @@ func (e *Engine) ProcessIdentityEvent(t string, did syntax.DID) error { func (e *Engine) ProcessCommit(ctx context.Context, commit *comatproto.SyncSubscribeRepos_Commit) error { // similar to an HTTP server, we want to recover any panics from rule execution + /* defer func() { if r := recover(); r != nil { slog.Error("automod event execution exception", "err", r) @@ -70,6 +71,7 @@ func (e *Engine) ProcessCommit(ctx context.Context, commit *comatproto.SyncSubsc // TODO: circuit-break on repeated panics? } }() + */ r, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(commit.Blocks)) if err != nil { @@ -140,10 +142,12 @@ func (e *Engine) ProcessCommit(ctx context.Context, commit *comatproto.SyncSubsc } func (e *Engine) CallIdentityRules(evt *IdentityEvent) error { + slog.Info("calling rules on identity event") return nil } func (e *Engine) CallRecordRules(evt *RecordEvent) error { + slog.Info("calling rules on record event") return nil } diff --git a/cmd/hepa/firehose.go b/cmd/hepa/firehose.go index dc86b2f75..1f2ca19fe 100644 --- a/cmd/hepa/firehose.go +++ b/cmd/hepa/firehose.go @@ -145,6 +145,11 @@ func (s *Server) Run(ctx context.Context) error { } func (s *Server) discoverRepos() { + if s.skipBackfill { + s.logger.Info("skipping repo discovery") + return + } + ctx := context.Background() log := s.logger.With("func", "discoverRepos") log.Info("starting repo discovery") diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index a6cfb07e2..cf11a943f 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -43,7 +43,7 @@ func run(args []string) error { &cli.StringFlag{ Name: "atp-bgs-host", Usage: "hostname and port of BGS to subscribe to", - Value: "wss://bsky.social", + Value: "wss://bsky.network", EnvVars: []string{"ATP_BGS_HOST"}, }, &cli.StringFlag{ diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 385924035..6affc8f5e 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -63,7 +63,9 @@ func NewServer(db *gorm.DB, dir identity.Directory, config Config) (*Server, err Host: bgshttp, } - engine := automod.Engine{} + engine := automod.Engine{ + Directory: dir, + } s := &Server{ db: db, From eb7f8b839198e51e8862ad7d0415eb9be1c660a1 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 17:20:15 -0800 Subject: [PATCH 04/35] automod: significantly simplify firehose consumption, for now --- automod/countstore.go | 6 +- automod/engine.go | 142 ++++++++------------ cmd/hepa/consumer.go | 123 ++++++++++++++++++ cmd/hepa/firehose.go | 292 ------------------------------------------ cmd/hepa/main.go | 97 ++------------ cmd/hepa/metrics.go | 54 -------- cmd/hepa/otel.go | 56 ++++++++ cmd/hepa/server.go | 79 ++---------- 8 files changed, 258 insertions(+), 591 deletions(-) create mode 100644 cmd/hepa/consumer.go delete mode 100644 cmd/hepa/firehose.go delete mode 100644 cmd/hepa/metrics.go create mode 100644 cmd/hepa/otel.go diff --git a/automod/countstore.go b/automod/countstore.go index ed1e94f67..792f82df6 100644 --- a/automod/countstore.go +++ b/automod/countstore.go @@ -15,7 +15,7 @@ const ( type CountStore interface { GetCount(ctx context.Context, key, period string) (int, error) - Increment(ctx context.Context, key string) (int, error) + Increment(ctx context.Context, key string) error } // TODO: this implementation isn't race-safe (yet)! @@ -45,7 +45,7 @@ func PeriodKey(key, period string) string { } } -func (s *MemCountStore) GetCount(ctx context.Context, key, period string) (int, error) { +func (s MemCountStore) GetCount(ctx context.Context, key, period string) (int, error) { v, ok := s.Counts[PeriodKey(key, period)] if !ok { return 0, nil @@ -53,7 +53,7 @@ func (s *MemCountStore) GetCount(ctx context.Context, key, period string) (int, return v, nil } -func (s *MemCountStore) Increment(ctx context.Context, key string) error { +func (s MemCountStore) Increment(ctx context.Context, key string) error { for _, p := range []string{PeriodTotal, PeriodDay, PeriodHour} { k := PeriodKey(key, p) v, ok := s.Counts[k] diff --git a/automod/engine.go b/automod/engine.go index ccd9fca8d..c1bd0919f 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -1,41 +1,36 @@ package automod import ( - "bytes" "context" "fmt" "log/slog" "strings" "sync" - comatproto "github.com/bluesky-social/indigo/api/atproto" + appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" - lexutil "github.com/bluesky-social/indigo/lex/util" - "github.com/bluesky-social/indigo/repo" - "github.com/bluesky-social/indigo/repomgr" "github.com/bluesky-social/indigo/xrpc" ) -// runtime for executing rules, managing state, and recording moderation actions +// runtime for executing rules, managing state, and recording moderation actions. +// +// TODO: careful when initializing: several fields should not be null or zero, even though they are pointer type. type Engine struct { + Logger *slog.Logger + Directory identity.Directory // current rule sets. will eventually be possible to swap these out at runtime RulesMap sync.Map - Directory identity.Directory // used to persist moderation actions in mod service (optional) AdminClient *xrpc.Client CountStore CountStore } -func (e *Engine) ProcessIdentityEvent(t string, did syntax.DID) error { - ctx := context.Background() - +func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax.DID) error { // similar to an HTTP server, we want to recover any panics from rule execution defer func() { if r := recover(); r != nil { - slog.Error("automod event execution exception", "err", r) - // TODO: mark repo as dirty? - // TODO: circuit-break on repeated panics? + e.Logger.Error("automod event execution exception", "err", r) } }() @@ -53,37 +48,18 @@ func (e *Engine) ProcessIdentityEvent(t string, did syntax.DID) error { Account: AccountMeta{Identity: ident}, }, } - e.CallIdentityRules(&evt) - - _ = ctx + // TODO: call rules + _ = evt return nil } -// this method takes a full firehose commit event. it must not be a tooBig -func (e *Engine) ProcessCommit(ctx context.Context, commit *comatproto.SyncSubscribeRepos_Commit) error { - +func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path string, rec any) error { // similar to an HTTP server, we want to recover any panics from rule execution - /* defer func() { if r := recover(); r != nil { - slog.Error("automod event execution exception", "err", r) - // TODO: mark repo as dirty? - // TODO: circuit-break on repeated panics? + e.Logger.Error("automod event execution exception", "err", r) } }() - */ - - r, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(commit.Blocks)) - if err != nil { - // TODO: handle this case (instead of return nil) - slog.Error("reading repo from car", "size_bytes", len(commit.Blocks), "err", err) - return nil - } - - did, err := syntax.ParseDID(commit.Repo) - if err != nil { - return fmt.Errorf("bad DID syntax in event: %w", err) - } ident, err := e.Directory.LookupDID(ctx, did) if err != nil { @@ -92,75 +68,61 @@ func (e *Engine) ProcessCommit(ctx context.Context, commit *comatproto.SyncSubsc if ident == nil { return fmt.Errorf("identity not found for did: %s", did.String()) } + collection := strings.SplitN(path, "/", 2)[0] - for _, op := range commit.Ops { - ek := repomgr.EventKind(op.Action) - logOp := slog.With("op_path", op.Path, "op_cid", op.Cid) - switch ek { - case repomgr.EvtKindCreateRecord: - rc, rec, err := r.GetRecord(ctx, op.Path) - if err != nil { - // TODO: handle this case (instead of return nil) - logOp.Error("fetching record from event CAR slice", "err", err) - return nil - } - if lexutil.LexLink(rc) != *op.Cid { - // TODO: handle this case (instead of return nil) - logOp.Error("mismatch in record and op cid", "record_cid", rc) - return nil - } - - if strings.HasPrefix(op.Path, "app.bsky.feed.post/") { - // TODO: handle as a PostEvent specially - } else { - // XXX: pass record in to event - _ = rec - evt := RecordEvent{ - Event{ - Engine: e, - Account: AccountMeta{Identity: ident}, - }, - []string{}, - false, - []ModReport{}, - []string{}, - } - e.CallRecordRules(&evt) - // TODO persist - } - case repomgr.EvtKindUpdateRecord: - slog.Info("ignoring record update", "did", commit.Repo, "seq", commit.Seq, "path", op.Path) - return nil - case repomgr.EvtKindDeleteRecord: - slog.Info("ignoring record deletion", "did", commit.Repo, "seq", commit.Seq, "path", op.Path) - return nil + switch collection { + case "app.bsky.feed.post": + post, ok := rec.(*appbsky.FeedPost) + if !ok { + return fmt.Errorf("mismatch between collection (%s) and type", collection) } + evt := e.NewPostEvent(ident, path, post) + e.Logger.Info("processing post", "did", ident.DID, "path", path) + _ = evt + // TODO: call rules + default: + evt := e.NewRecordEvent(ident, path, rec) + e.Logger.Info("processing record", "did", ident.DID, "path", path) + _ = evt + // TODO: call rules } - _ = ctx return nil } -func (e *Engine) CallIdentityRules(evt *IdentityEvent) error { - slog.Info("calling rules on identity event") - return nil -} - -func (e *Engine) CallRecordRules(evt *RecordEvent) error { - slog.Info("calling rules on record event") - return nil +func (e *Engine) NewPostEvent(ident *identity.Identity, path string, post *appbsky.FeedPost) PostEvent { + return PostEvent{ + RecordEvent { + Event{ + Engine: e, + Account: AccountMeta{Identity: ident}, + }, + []string{}, + false, + []ModReport{}, + []string{}, + }, + } } -func (e *Engine) PersistModActions() error { - // XXX - return nil +func (e *Engine) NewRecordEvent(ident *identity.Identity, path string, rec any) RecordEvent { + return RecordEvent{ + Event{ + Engine: e, + Account: AccountMeta{Identity: ident}, + }, + []string{}, + false, + []ModReport{}, + []string{}, + } } func (e *Engine) GetCount(key, period string) (int, error) { return e.CountStore.GetCount(context.TODO(), key, period) } -func (e *Engine) InSet(name, val string) (bool, error) { +func (e *Engine) InSet(setName, val string) (bool, error) { // XXX: implement return false, nil } diff --git a/cmd/hepa/consumer.go b/cmd/hepa/consumer.go new file mode 100644 index 000000000..9634fa85f --- /dev/null +++ b/cmd/hepa/consumer.go @@ -0,0 +1,123 @@ +package main + +import ( + "bytes" + "context" + "fmt" + "net/http" + "net/url" + + comatproto "github.com/bluesky-social/indigo/api/atproto" + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/events/schedulers/autoscaling" + lexutil "github.com/bluesky-social/indigo/lex/util" + + "github.com/bluesky-social/indigo/events" + "github.com/bluesky-social/indigo/repo" + "github.com/bluesky-social/indigo/repomgr" + "github.com/carlmjohnson/versioninfo" + "github.com/gorilla/websocket" +) + +func (s *Server) RunConsumer(ctx context.Context) error { + + // TODO: persist cursor in a database or local disk + cur := 0 + + dialer := websocket.DefaultDialer + u, err := url.Parse(s.bgshost) + if err != nil { + return fmt.Errorf("invalid bgshost URI: %w", err) + } + u.Path = "xrpc/com.atproto.sync.subscribeRepos" + if cur != 0 { + u.RawQuery = fmt.Sprintf("cursor=%d", cur) + } + s.logger.Info("subscribing to repo event stream", "upstream", s.bgshost, "cursor", cur) + con, _, err := dialer.Dial(u.String(), http.Header{ + "User-Agent": []string{fmt.Sprintf("hepa/%s", versioninfo.Short())}, + }) + if err != nil { + return fmt.Errorf("subscribing to firehose failed (dialing): %w", err) + } + + rsc := &events.RepoStreamCallbacks{ + RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { + return s.HandleRepoCommit(ctx, evt) + }, + RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error { + did, err := syntax.ParseDID(evt.Did) + if err != nil { + s.logger.Error("bad DID in RepoHandle event", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) + return nil + } + if err := s.engine.ProcessIdentityEvent(ctx, "handle", did); err != nil { + s.logger.Error("processing handle update failed", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) + } + return nil + }, + // TODO: other event callbacks as needed + } + + return events.HandleRepoStream( + ctx, con, autoscaling.NewScheduler( + autoscaling.DefaultAutoscaleSettings(), + s.bgshost, + rsc.EventHandler, + ), + ) +} + +// NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better. +func (s *Server) HandleRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) error { + + logger := s.logger.With("event", "commit", "did", evt.Repo, "rev", evt.Rev, "seq", evt.Seq) + // XXX: debug, not info + logger.Info("received commit event") + + if evt.TooBig { + logger.Warn("skipping tooBig events for now") + return nil + } + + did, err := syntax.ParseDID(evt.Repo) + if err != nil { + logger.Error("bad DID syntax in event", "err", err) + return nil + } + + rr, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(evt.Blocks)) + if err != nil { + logger.Error("failed to read repo from car", "err", err) + return nil + } + + for _, op := range evt.Ops { + logger = logger.With("eventKind", op.Action, "path", op.Path) + + ek := repomgr.EventKind(op.Action) + switch ek { + case repomgr.EvtKindCreateRecord: + // read the record from blocks, and verify CID + rc, rec, err := rr.GetRecord(ctx, op.Path) + if err != nil { + logger.Error("reading record from event blocks (CAR)", "err", err) + break + } + if op.Cid == nil || lexutil.LexLink(rc) != *op.Cid { + logger.Error("mismatch between commit op CID and record block", "recordCID", rc, "opCID", op.Cid) + break + } + + err = s.engine.ProcessRecord(ctx, did, op.Path, rec) + if err != nil { + logger.Error("engine failed to process record", "err", err) + continue + } + default: + // TODO: other event types: update, delete + } + } + + return nil +} diff --git a/cmd/hepa/firehose.go b/cmd/hepa/firehose.go deleted file mode 100644 index 1f2ca19fe..000000000 --- a/cmd/hepa/firehose.go +++ /dev/null @@ -1,292 +0,0 @@ -package main - -import ( - "bytes" - "context" - "fmt" - "net/http" - "net/url" - "strings" - "time" - - comatproto "github.com/bluesky-social/indigo/api/atproto" - //bsky "github.com/bluesky-social/indigo/api/bsky" - "github.com/bluesky-social/indigo/atproto/syntax" - "github.com/bluesky-social/indigo/events" - "github.com/bluesky-social/indigo/events/schedulers/autoscaling" - "github.com/bluesky-social/indigo/repo" - - "github.com/carlmjohnson/versioninfo" - "github.com/gorilla/websocket" - "github.com/ipfs/go-cid" - typegen "github.com/whyrusleeping/cbor-gen" -) - -func (s *Server) getLastCursor() (int64, error) { - var lastSeq LastSeq - if err := s.db.Find(&lastSeq).Error; err != nil { - return 0, err - } - - if lastSeq.ID == 0 { - return 0, s.db.Create(&lastSeq).Error - } - - return lastSeq.Seq, nil -} - -func (s *Server) updateLastCursor(curs int64) error { - return s.db.Model(LastSeq{}).Where("id = 1").Update("seq", curs).Error -} - -func (s *Server) Run(ctx context.Context) error { - cur, err := s.getLastCursor() - if err != nil { - return fmt.Errorf("get last cursor: %w", err) - } - - err = s.bfs.LoadJobs(ctx) - if err != nil { - return fmt.Errorf("loading backfill jobs: %w", err) - } - go s.bf.Start() - go s.discoverRepos() - - d := websocket.DefaultDialer - u, err := url.Parse(s.bgshost) - if err != nil { - return fmt.Errorf("invalid bgshost URI: %w", err) - } - u.Path = "xrpc/com.atproto.sync.subscribeRepos" - if cur != 0 { - u.RawQuery = fmt.Sprintf("cursor=%d", cur) - } - con, _, err := d.Dial(u.String(), http.Header{ - "User-Agent": []string{fmt.Sprintf("palomar/%s", versioninfo.Short())}, - }) - if err != nil { - return fmt.Errorf("events dial failed: %w", err) - } - - rsc := &events.RepoStreamCallbacks{ - RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { - ctx := context.Background() - ctx, span := tracer.Start(ctx, "RepoCommit") - defer span.End() - - defer func() { - if evt.Seq%50 == 0 { - if err := s.updateLastCursor(evt.Seq); err != nil { - s.logger.Error("failed to persist cursor", "err", err) - } - } - }() - logEvt := s.logger.With("repo", evt.Repo, "rev", evt.Rev, "seq", evt.Seq) - if evt.TooBig && evt.Prev != nil { - // TODO: handle this case (instead of return nil) - logEvt.Error("skipping non-genesis tooBig events for now") - return nil - } - - if evt.TooBig { - if err := s.processTooBigCommit(ctx, evt); err != nil { - // TODO: handle this case (instead of return nil) - logEvt.Error("failed to process tooBig event", "err", err) - return nil - } - - return nil - } - - if !s.skipBackfill { - // Check if we've backfilled this repo, if not, we should enqueue it - job, err := s.bfs.GetJob(ctx, evt.Repo) - if job == nil && err == nil { - logEvt.Info("enqueueing backfill job for new repo") - if err := s.bfs.EnqueueJob(evt.Repo); err != nil { - logEvt.Warn("failed to enqueue backfill job", "err", err) - } - } - } - - if err = s.engine.ProcessCommit(ctx, evt); err != nil { - // TODO: handle this, instead of return nul - logEvt.Error("failed to process commit", "err", err) - return nil - } - - return nil - - }, - RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error { - ctx := context.Background() - ctx, span := tracer.Start(ctx, "RepoHandle") - defer span.End() - - did, err := syntax.ParseDID(evt.Did) - if err != nil { - s.logger.Error("bad DID in RepoHandle event", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) - return nil - } - if err := s.engine.ProcessIdentityEvent("handle", did); err != nil { - s.logger.Error("processing handle update failed", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) - } - return nil - }, - } - - return events.HandleRepoStream( - ctx, con, autoscaling.NewScheduler( - autoscaling.DefaultAutoscaleSettings(), - s.bgshost, - rsc.EventHandler, - ), - ) -} - -func (s *Server) discoverRepos() { - if s.skipBackfill { - s.logger.Info("skipping repo discovery") - return - } - - ctx := context.Background() - log := s.logger.With("func", "discoverRepos") - log.Info("starting repo discovery") - - cursor := "" - limit := int64(500) - - totalEnqueued := 0 - totalSkipped := 0 - totalErrored := 0 - - for { - resp, err := comatproto.SyncListRepos(ctx, s.bgsxrpc, cursor, limit) - if err != nil { - log.Error("failed to list repos", "err", err) - time.Sleep(5 * time.Second) - continue - } - log.Info("got repo page", "count", len(resp.Repos), "cursor", resp.Cursor) - enqueued := 0 - skipped := 0 - errored := 0 - for _, repo := range resp.Repos { - job, err := s.bfs.GetJob(ctx, repo.Did) - if job == nil && err == nil { - log.Info("enqueuing backfill job for new repo", "did", repo.Did) - if err := s.bfs.EnqueueJob(repo.Did); err != nil { - log.Warn("failed to enqueue backfill job", "err", err) - errored++ - continue - } - enqueued++ - } else if err != nil { - log.Warn("failed to get backfill job", "did", repo.Did, "err", err) - errored++ - } else { - skipped++ - } - } - log.Info("enqueued repos", "enqueued", enqueued, "skipped", skipped, "errored", errored) - totalEnqueued += enqueued - totalSkipped += skipped - totalErrored += errored - if resp.Cursor != nil && *resp.Cursor != "" { - cursor = *resp.Cursor - } else { - break - } - } - - log.Info("finished repo discovery", "totalEnqueued", totalEnqueued, "totalSkipped", totalSkipped, "totalErrored", totalErrored) -} - -func (s *Server) handleCreateOrUpdate(ctx context.Context, rawDID string, path string, recP *typegen.CBORMarshaler, rcid *cid.Cid) error { - // Since this gets called in a backfill job, we need to check if the path is a post or profile - if !strings.Contains(path, "app.bsky.feed.post") && !strings.Contains(path, "app.bsky.actor.profile") { - return nil - } - - did, err := syntax.ParseDID(rawDID) - if err != nil { - return fmt.Errorf("bad DID syntax in event: %w", err) - } - - ident, err := s.dir.LookupDID(ctx, did) - if err != nil { - return fmt.Errorf("resolving identity: %w", err) - } - if ident == nil { - return fmt.Errorf("identity not found for did: %s", did.String()) - } - rec := *recP - - _ = rec - /* XXX: - switch rec := rec.(type) { - case *bsky.FeedPost: - // XXX: if err := s.indexPost(ctx, ident, rec, path, *rcid); err != nil { - _ = rec - if err := s.engine.ProcessCommit(ctx, evt); err != nil { - postsFailed.Inc() - return fmt.Errorf("processing post for %s: %w", did.String(), err) - } - postsIndexed.Inc() - case *bsky.ActorProfile: - // XXX: if err := s.indexProfile(ctx, ident, rec, path, *rcid); err != nil { - if err := s.engine.ProcessCommit(ctx, evt); err != nil { - profilesFailed.Inc() - return fmt.Errorf("processing profile for %s: %w", did.String(), err) - } - profilesIndexed.Inc() - default: - } - */ - return nil -} - -func (s *Server) handleDelete(ctx context.Context, rawDID, path string) error { - // TODO: just ignoring for now - return nil -} - -func (s *Server) processTooBigCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) error { - repodata, err := comatproto.SyncGetRepo(ctx, s.bgsxrpc, evt.Repo, "") - if err != nil { - return err - } - - r, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(repodata)) - if err != nil { - return err - } - - did, err := syntax.ParseDID(evt.Repo) - if err != nil { - return fmt.Errorf("bad DID in repo event: %w", err) - } - - return r.ForEach(ctx, "", func(k string, v cid.Cid) error { - if strings.HasPrefix(k, "app.bsky.feed.post") || strings.HasPrefix(k, "app.bsky.actor.profile") { - rcid, rec, err := r.GetRecord(ctx, k) - if err != nil { - // TODO: handle this case (instead of return nil) - s.logger.Error("failed to get record from repo checkout", "path", k, "err", err) - return nil - } - - // TODO: may want to treat this as a regular event? - _ = rcid - _ = did - _ = rec - /* XXX: - if err := s.engine.ProcessRecord(ctx, did, m, rec); err != nil { - return fmt.Errorf("processing record from tooBig commit: %w", err) - } - */ - } - return nil - }) -} diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index cf11a943f..3d3f2d809 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -3,24 +3,16 @@ package main import ( "context" "fmt" - "log" "log/slog" "net/http" "os" "time" "github.com/bluesky-social/indigo/atproto/identity" - "github.com/bluesky-social/indigo/util/cliutil" "github.com/carlmjohnson/versioninfo" _ "github.com/joho/godotenv/autoload" cli "github.com/urfave/cli/v2" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" - "go.opentelemetry.io/otel/sdk/resource" - tracesdk "go.opentelemetry.io/otel/sdk/trace" - semconv "go.opentelemetry.io/otel/semconv/v1.4.0" "golang.org/x/time/rate" ) @@ -52,11 +44,6 @@ func run(args []string) error { Value: "https://plc.directory", EnvVars: []string{"ATP_PLC_HOST"}, }, - &cli.IntFlag{ - Name: "max-metadb-connections", - EnvVars: []string{"MAX_METADB_CONNECTIONS"}, - Value: 40, - }, } app.Commands = []*cli.Command{ @@ -68,35 +55,14 @@ func run(args []string) error { var runCmd = &cli.Command{ Name: "run", - Usage: "run the service", + Usage: "run the hepa daemon", Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "database-url", - Value: "sqlite://data/hepa/automod.db", - EnvVars: []string{"DATABASE_URL"}, - }, - &cli.BoolFlag{ - Name: "readonly", - EnvVars: []string{"HEPA_READONLY", "READONLY"}, - }, - &cli.StringFlag{ - Name: "bind", - Usage: "IP or address, and port, to listen on for HTTP APIs", - Value: ":3999", - EnvVars: []string{"HEPA_BIND"}, - }, &cli.StringFlag{ Name: "metrics-listen", Usage: "IP or address, and port, to listen on for metrics APIs", - Value: ":3998", + Value: ":3989", EnvVars: []string{"HEPA_METRICS_LISTEN"}, }, - &cli.IntFlag{ - Name: "bgs-sync-rate-limit", - Usage: "max repo sync (checkout) requests per second to upstream (BGS)", - Value: 8, - EnvVars: []string{"HEPA_BGS_SYNC_RATE_LIMIT"}, - }, &cli.IntFlag{ Name: "plc-rate-limit", Usage: "max number of requests per second to PLC registry", @@ -111,48 +77,9 @@ var runCmd = &cli.Command{ })) slog.SetDefault(logger) - // Enable OTLP HTTP exporter - // For relevant environment variables: - // https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace#readme-environment-variables - // At a minimum, you need to set - // OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 - if ep := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); ep != "" { - slog.Info("setting up trace exporter", "endpoint", ep) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() + configOTEL("hepa") - exp, err := otlptracehttp.New(ctx) - if err != nil { - log.Fatal("failed to create trace exporter", "error", err) - } - defer func() { - ctx, cancel := context.WithTimeout(context.Background(), time.Second) - defer cancel() - if err := exp.Shutdown(ctx); err != nil { - slog.Error("failed to shutdown trace exporter", "error", err) - } - }() - - tp := tracesdk.NewTracerProvider( - tracesdk.WithBatcher(exp), - tracesdk.WithResource(resource.NewWithAttributes( - semconv.SchemaURL, - semconv.ServiceNameKey.String("hepa"), - attribute.String("env", os.Getenv("ENVIRONMENT")), // DataDog - attribute.String("environment", os.Getenv("ENVIRONMENT")), // Others - attribute.Int64("ID", 1), - )), - ) - otel.SetTracerProvider(tp) - } - - db, err := cliutil.SetupDatabase(cctx.String("database-url"), cctx.Int("max-metadb-connections")) - if err != nil { - return err - } - - // TODO: replace this with "bingo" resolver? - base := identity.BaseDirectory{ + baseDir := identity.BaseDirectory{ PLCURL: cctx.String("atp-plc-host"), HTTPClient: http.Client{ Timeout: time.Second * 15, @@ -161,21 +88,20 @@ var runCmd = &cli.Command{ TryAuthoritativeDNS: true, SkipDNSDomainSuffixes: []string{".bsky.social"}, } - dir := identity.NewCacheDirectory(&base, 1_500_000, time.Hour*24, time.Minute*2) + dir := identity.NewCacheDirectory(&baseDir, 1_500_000, time.Hour*24, time.Minute*2) srv, err := NewServer( - db, &dir, Config{ - BGSHost: cctx.String("atp-bgs-host"), - Logger: logger, - BGSSyncRateLimit: cctx.Int("bgs-sync-rate-limit"), + BGSHost: cctx.String("atp-bgs-host"), + Logger: logger, }, ) if err != nil { return err } + // prometheus HTTP endpoint: /metrics go func() { if err := srv.RunMetrics(cctx.String("metrics-listen")); err != nil { slog.Error("failed to start metrics endpoint", "error", err) @@ -183,10 +109,9 @@ var runCmd = &cli.Command{ } }() - // TODO: if cctx.Bool("readonly") ... - - if err := srv.Run(ctx); err != nil { - return fmt.Errorf("failed to run automod service: %w", err) + // the main service loop + if err := srv.RunConsumer(ctx); err != nil { + return fmt.Errorf("failure consuming and processing firehose: %w", err) } return nil }, diff --git a/cmd/hepa/metrics.go b/cmd/hepa/metrics.go deleted file mode 100644 index 704dad317..000000000 --- a/cmd/hepa/metrics.go +++ /dev/null @@ -1,54 +0,0 @@ -package main - -import ( - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - "go.opentelemetry.io/otel" -) - -var tracer = otel.Tracer("hepa") - -var postsReceived = promauto.NewCounter(prometheus.CounterOpts{ - Name: "hepa_posts_received", - Help: "Number of posts received", -}) - -var postsIndexed = promauto.NewCounter(prometheus.CounterOpts{ - Name: "hepa_posts_indexed", - Help: "Number of posts indexed", -}) - -var postsFailed = promauto.NewCounter(prometheus.CounterOpts{ - Name: "hepa_posts_failed", - Help: "Number of posts that failed indexing", -}) - -var postsDeleted = promauto.NewCounter(prometheus.CounterOpts{ - Name: "hepa_posts_deleted", - Help: "Number of posts deleted", -}) - -var profilesReceived = promauto.NewCounter(prometheus.CounterOpts{ - Name: "hepa_profiles_received", - Help: "Number of profiles received", -}) - -var profilesIndexed = promauto.NewCounter(prometheus.CounterOpts{ - Name: "hepa_profiles_indexed", - Help: "Number of profiles indexed", -}) - -var profilesFailed = promauto.NewCounter(prometheus.CounterOpts{ - Name: "hepa_profiles_failed", - Help: "Number of profiles that failed indexing", -}) - -var profilesDeleted = promauto.NewCounter(prometheus.CounterOpts{ - Name: "hepa_profiles_deleted", - Help: "Number of profiles deleted", -}) - -var currentSeq = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "hepa_current_seq", - Help: "Current sequence number", -}) diff --git a/cmd/hepa/otel.go b/cmd/hepa/otel.go new file mode 100644 index 000000000..918bd1b7d --- /dev/null +++ b/cmd/hepa/otel.go @@ -0,0 +1,56 @@ +package main + +import ( + "os" + "log/slog" + "log" + "context" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/sdk/resource" + tracesdk "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" +) + +var tracer = otel.Tracer("hepa") + +// Enable OTLP HTTP exporter +// For relevant environment variables: +// https://pkg.go.dev/go.opentelemetry.io/otel/exporters/otlp/otlptrace#readme-environment-variables +// At a minimum, you need to set +// OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318 +// TODO: this should be in cliutil or something +func configOTEL(serviceName string) { + if ep := os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT"); ep != "" { + slog.Info("setting up trace exporter", "endpoint", ep) + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + exp, err := otlptracehttp.New(ctx) + if err != nil { + log.Fatal("failed to create trace exporter", "error", err) + } + defer func() { + ctx, cancel := context.WithTimeout(context.Background(), time.Second) + defer cancel() + if err := exp.Shutdown(ctx); err != nil { + slog.Error("failed to shutdown trace exporter", "error", err) + } + }() + + tp := tracesdk.NewTracerProvider( + tracesdk.WithBatcher(exp), + tracesdk.WithResource(resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String(serviceName), + attribute.String("env", os.Getenv("ENVIRONMENT")), // DataDog + attribute.String("environment", os.Getenv("ENVIRONMENT")), // Others + attribute.Int64("ID", 1), + )), + ) + otel.SetTracerProvider(tp) + } +} diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 6affc8f5e..682f84f95 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -9,39 +9,22 @@ import ( "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/automod" - "github.com/bluesky-social/indigo/backfill" - "github.com/bluesky-social/indigo/xrpc" "github.com/prometheus/client_golang/prometheus/promhttp" - gorm "gorm.io/gorm" ) type Server struct { - db *gorm.DB - bgshost string - bgsxrpc *xrpc.Client - dir identity.Directory - logger *slog.Logger - engine *automod.Engine - skipBackfill bool - - bfs *backfill.Gormstore - bf *backfill.Backfiller -} - -type LastSeq struct { - ID uint `gorm:"primarykey"` - Seq int64 + bgshost string + logger *slog.Logger + engine *automod.Engine } type Config struct { - BGSHost string - Logger *slog.Logger - BGSSyncRateLimit int - MaxEventConcurrency int + BGSHost string + Logger *slog.Logger } -func NewServer(db *gorm.DB, dir identity.Directory, config Config) (*Server, error) { +func NewServer(dir identity.Directory, config Config) (*Server, error) { logger := config.Logger if logger == nil { logger = slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ @@ -49,60 +32,24 @@ func NewServer(db *gorm.DB, dir identity.Directory, config Config) (*Server, err })) } - logger.Info("running database migrations") - db.AutoMigrate(&LastSeq{}) - db.AutoMigrate(&backfill.GormDBJob{}) - bgsws := config.BGSHost if !strings.HasPrefix(bgsws, "ws") { return nil, fmt.Errorf("specified bgs host must include 'ws://' or 'wss://'") } - bgshttp := strings.Replace(bgsws, "ws", "http", 1) - bgsxrpc := &xrpc.Client{ - Host: bgshttp, - } - engine := automod.Engine{ + Logger: logger, Directory: dir, + CountStore: automod.NewMemCountStore(), + // TODO: RulesMap (loaded/config from somewhere) + // TODO: AdminClient (XRPC with mod access) } s := &Server{ - db: db, - bgshost: config.BGSHost, // NOTE: the original URL, not 'bgshttp' - bgsxrpc: bgsxrpc, - dir: dir, - logger: logger, - engine: &engine, - skipBackfill: true, - } - - bfstore := backfill.NewGormstore(db) - opts := backfill.DefaultBackfillOptions() - if config.BGSSyncRateLimit > 0 { - opts.SyncRequestsPerSecond = config.BGSSyncRateLimit - opts.ParallelBackfills = 2 * config.BGSSyncRateLimit - } else { - opts.SyncRequestsPerSecond = 8 + bgshost: config.BGSHost, + logger: logger, + engine: &engine, } - opts.CheckoutPath = fmt.Sprintf("%s/xrpc/com.atproto.sync.getRepo", bgshttp) - if config.MaxEventConcurrency > 0 { - opts.ParallelRecordCreates = config.MaxEventConcurrency - } else { - opts.ParallelRecordCreates = 20 - } - opts.NSIDFilter = "app.bsky." - bf := backfill.NewBackfiller( - "hepa", - bfstore, - s.handleCreateOrUpdate, - s.handleCreateOrUpdate, - s.handleDelete, - opts, - ) - - s.bfs = bfstore - s.bf = bf return s, nil } From e4744a36048e1bbad9f12a849d39438198eb3a90 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 17:38:40 -0800 Subject: [PATCH 05/35] automod: basic set storage API --- automod/engine.go | 21 ++++++++++----------- automod/setstore.go | 30 ++++++++++++++++++++++++++++++ cmd/hepa/otel.go | 6 +++--- cmd/hepa/server.go | 5 +++-- 4 files changed, 46 insertions(+), 16 deletions(-) create mode 100644 automod/setstore.go diff --git a/automod/engine.go b/automod/engine.go index c1bd0919f..23641e65f 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -5,25 +5,24 @@ import ( "fmt" "log/slog" "strings" - "sync" appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" - "github.com/bluesky-social/indigo/xrpc" ) // runtime for executing rules, managing state, and recording moderation actions. // // TODO: careful when initializing: several fields should not be null or zero, even though they are pointer type. type Engine struct { - Logger *slog.Logger + Logger *slog.Logger Directory identity.Directory // current rule sets. will eventually be possible to swap these out at runtime - RulesMap sync.Map + // TODO: RulesMap sync.Map // used to persist moderation actions in mod service (optional) - AdminClient *xrpc.Client - CountStore CountStore + // TODO: AdminClient *xrpc.Client + Counters CountStore + Sets SetStore } func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax.DID) error { @@ -92,7 +91,7 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path string, func (e *Engine) NewPostEvent(ident *identity.Identity, path string, post *appbsky.FeedPost) PostEvent { return PostEvent{ - RecordEvent { + RecordEvent{ Event{ Engine: e, Account: AccountMeta{Identity: ident}, @@ -119,10 +118,10 @@ func (e *Engine) NewRecordEvent(ident *identity.Identity, path string, rec any) } func (e *Engine) GetCount(key, period string) (int, error) { - return e.CountStore.GetCount(context.TODO(), key, period) + return e.Counters.GetCount(context.TODO(), key, period) } -func (e *Engine) InSet(setName, val string) (bool, error) { - // XXX: implement - return false, nil +// checks if `val` is an element of set `name` +func (e *Engine) InSet(name, val string) (bool, error) { + return e.Sets.InSet(context.TODO(), name, val) } diff --git a/automod/setstore.go b/automod/setstore.go new file mode 100644 index 000000000..d113d1bb6 --- /dev/null +++ b/automod/setstore.go @@ -0,0 +1,30 @@ +package automod + +import ( + "context" + "fmt" +) + +type SetStore interface { + InSet(ctx context.Context, name, val string) (bool, error) +} + +// TODO: this implementation isn't race-safe (yet)! +type MemSetStore struct { + Sets map[string]map[string]bool +} + +func NewMemSetStore() MemSetStore { + return MemSetStore{ + Sets: make(map[string]map[string]bool), + } +} + +func (s MemSetStore) InSet(ctx context.Context, name, val string) (bool, error) { + set, ok := s.Sets[name] + if !ok { + return false, fmt.Errorf("not a known set: %s", name) + } + _, ok = set[val] + return ok, nil +} diff --git a/cmd/hepa/otel.go b/cmd/hepa/otel.go index 918bd1b7d..b551325c2 100644 --- a/cmd/hepa/otel.go +++ b/cmd/hepa/otel.go @@ -1,10 +1,10 @@ package main import ( - "os" - "log/slog" - "log" "context" + "log" + "log/slog" + "os" "time" "go.opentelemetry.io/otel" diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 682f84f95..332f3e611 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -38,9 +38,10 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { } engine := automod.Engine{ - Logger: logger, + Logger: logger, Directory: dir, - CountStore: automod.NewMemCountStore(), + Counters: automod.NewMemCountStore(), + Sets: automod.NewMemSetStore(), // TODO: RulesMap (loaded/config from somewhere) // TODO: AdminClient (XRPC with mod access) } From 85d4cd22563b818979bb8e76248be73674f5bd22 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 19:39:29 -0800 Subject: [PATCH 06/35] automod: rules sub-package, a bit of tests --- automod/engine.go | 21 +++++++-- automod/engine_test.go | 86 ++++++++++++++++++++++++++++++++++ automod/event.go | 21 +++++---- automod/rules/all.go | 14 ++++++ automod/rules/hashtags.go | 26 ++++++++++ automod/rules/hashtags_test.go | 68 +++++++++++++++++++++++++++ automod/ruleset.go | 46 ++++++++++++++++++ automod/setstore.go | 4 +- cmd/hepa/server.go | 13 ++--- 9 files changed, 277 insertions(+), 22 deletions(-) create mode 100644 automod/engine_test.go create mode 100644 automod/rules/all.go create mode 100644 automod/rules/hashtags.go create mode 100644 automod/rules/hashtags_test.go create mode 100644 automod/ruleset.go diff --git a/automod/engine.go b/automod/engine.go index 23641e65f..215690bb8 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -9,6 +9,7 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/xrpc" ) // runtime for executing rules, managing state, and recording moderation actions. @@ -17,12 +18,11 @@ import ( type Engine struct { Logger *slog.Logger Directory identity.Directory - // current rule sets. will eventually be possible to swap these out at runtime - // TODO: RulesMap sync.Map + Rules RuleSet + Counters CountStore + Sets SetStore // used to persist moderation actions in mod service (optional) - // TODO: AdminClient *xrpc.Client - Counters CountStore - Sets SetStore + AdminClient *xrpc.Client } func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax.DID) error { @@ -48,7 +48,11 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. }, } // TODO: call rules + // TODO: handle errors _ = evt + if evt.Err != nil { + return evt.Err + } return nil } @@ -79,11 +83,17 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path string, e.Logger.Info("processing post", "did", ident.DID, "path", path) _ = evt // TODO: call rules + if evt.Err != nil { + return evt.Err + } default: evt := e.NewRecordEvent(ident, path, rec) e.Logger.Info("processing record", "did", ident.DID, "path", path) _ = evt // TODO: call rules + if evt.Err != nil { + return evt.Err + } } return nil @@ -101,6 +111,7 @@ func (e *Engine) NewPostEvent(ident *identity.Identity, path string, post *appbs []ModReport{}, []string{}, }, + post, } } diff --git a/automod/engine_test.go b/automod/engine_test.go new file mode 100644 index 000000000..d1e3dbfac --- /dev/null +++ b/automod/engine_test.go @@ -0,0 +1,86 @@ +package automod + +import ( + "context" + "log/slog" + "testing" + + appbsky "github.com/bluesky-social/indigo/api/bsky" + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/xrpc" + + "github.com/stretchr/testify/assert" +) + +func simpleRule(evt *PostEvent) error { + for _, tag := range evt.Post.Tags { + if evt.InSet("banned-hashtags", tag) { + evt.AddLabel("bad-hashtag") + break + } + } + for _, facet := range evt.Post.Facets { + for _, feat := range facet.Features { + if feat.RichtextFacet_Tag != nil { + tag := feat.RichtextFacet_Tag.Tag + if evt.InSet("banned-hashtags", tag) { + evt.AddLabel("bad-hashtag") + break + } + } + } + } + return nil +} + +func engineFixture() Engine { + rules := RuleSet{ + PostRules: []PostRuleFunc{ + simpleRule, + }, + } + sets := NewMemSetStore() + sets.Sets["banned-hashtags"] = make(map[string]bool) + sets.Sets["banned-hashtags"]["slur"] = true + dir := identity.NewMockDirectory() + id1 := identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + } + dir.Insert(id1) + adminc := xrpc.Client{ + Host: "http://dummy.local", + } + engine := Engine{ + Logger: slog.Default(), + Directory: &dir, + Counters: NewMemCountStore(), + Sets: sets, + Rules: rules, + AdminClient: &adminc, + } + return engine +} + +func TestEngineBasics(t *testing.T) { + assert := assert.New(t) + ctx := context.Background() + + engine := engineFixture() + id1 := identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + } + path := "app.bsky.feed.post/abc123" + p1 := appbsky.FeedPost{ + Text: "some post blah", + } + assert.NoError(engine.ProcessRecord(ctx, id1.DID, path, &p1)) + + p2 := appbsky.FeedPost{ + Text: "some post blah", + Tags: []string{"one", "slur"}, + } + assert.NoError(engine.ProcessRecord(ctx, id1.DID, path, &p2)) +} diff --git a/automod/event.go b/automod/event.go index 37f1c17ff..e9aacaa5c 100644 --- a/automod/event.go +++ b/automod/event.go @@ -1,6 +1,7 @@ package automod import ( + appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" ) @@ -18,7 +19,7 @@ type AccountMeta struct { // base type for events. events are both containers for data about the event itself (similar to an HTTP request type); aggregate results and state (counters, mod actions) to be persisted after all rules are run; and act as an API for additional network reads and operations. type Event struct { Engine *Engine - Err *error + Err error Account AccountMeta CounterIncrements []string AccountLabels []string @@ -30,7 +31,7 @@ type Event struct { func (e *Event) CountTotal(key string) int { v, err := e.Engine.GetCount(key, PeriodTotal) if err != nil { - e.Err = &err + e.Err = err return 0 } return v @@ -39,7 +40,7 @@ func (e *Event) CountTotal(key string) int { func (e *Event) CountDay(key string) int { v, err := e.Engine.GetCount(key, PeriodDay) if err != nil { - e.Err = &err + e.Err = err return 0 } return v @@ -48,7 +49,7 @@ func (e *Event) CountDay(key string) int { func (e *Event) CountHour(key string) int { v, err := e.Engine.GetCount(key, PeriodHour) if err != nil { - e.Err = &err + e.Err = err return 0 } return v @@ -57,7 +58,7 @@ func (e *Event) CountHour(key string) int { func (e *Event) InSet(name, val string) bool { v, err := e.Engine.InSet(name, val) if err != nil { - e.Err = &err + e.Err = err return false } return v @@ -115,9 +116,11 @@ func (e *RecordEvent) Report(reason, comment string) { type PostEvent struct { RecordEvent - // TODO: thread context + + Post *appbsky.FeedPost + // TODO: post thread context (root, parent) } -type IdentityRuleFunc = func(evt IdentityEvent) error -type RecordRuleFunc = func(evt RecordEvent) error -type PostRuleFunc = func(evt PostEvent) error +type IdentityRuleFunc = func(evt *IdentityEvent) error +type RecordRuleFunc = func(evt *RecordEvent) error +type PostRuleFunc = func(evt *PostEvent) error diff --git a/automod/rules/all.go b/automod/rules/all.go new file mode 100644 index 000000000..19b837080 --- /dev/null +++ b/automod/rules/all.go @@ -0,0 +1,14 @@ +package rules + +import ( + "github.com/bluesky-social/indigo/automod" +) + +func DefaultRules() automod.RuleSet { + rules := automod.RuleSet{ + PostRules: []automod.PostRuleFunc{ + BanHashtagsPostRule, + }, + } + return rules +} diff --git a/automod/rules/hashtags.go b/automod/rules/hashtags.go new file mode 100644 index 000000000..3e91cb4cf --- /dev/null +++ b/automod/rules/hashtags.go @@ -0,0 +1,26 @@ +package rules + +import ( + "github.com/bluesky-social/indigo/automod" +) + +func BanHashtagsPostRule(evt *automod.PostEvent) error { + for _, tag := range evt.Post.Tags { + if evt.InSet("banned-hashtags", tag) { + evt.AddLabel("bad-hashtag") + break + } + } + for _, facet := range evt.Post.Facets { + for _, feat := range facet.Features { + if feat.RichtextFacet_Tag != nil { + tag := feat.RichtextFacet_Tag.Tag + if evt.InSet("banned-hashtags", tag) { + evt.AddLabel("bad-hashtag") + break + } + } + } + } + return nil +} diff --git a/automod/rules/hashtags_test.go b/automod/rules/hashtags_test.go new file mode 100644 index 000000000..328693b56 --- /dev/null +++ b/automod/rules/hashtags_test.go @@ -0,0 +1,68 @@ +package rules + +import ( + "log/slog" + "testing" + + appbsky "github.com/bluesky-social/indigo/api/bsky" + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/xrpc" + + "github.com/stretchr/testify/assert" +) + +func engineFixture() automod.Engine { + rules := automod.RuleSet{ + PostRules: []automod.PostRuleFunc{ + BanHashtagsPostRule, + }, + } + sets := automod.NewMemSetStore() + sets.Sets["banned-hashtags"] = make(map[string]bool) + sets.Sets["banned-hashtags"]["slur"] = true + dir := identity.NewMockDirectory() + id1 := identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + } + dir.Insert(id1) + adminc := xrpc.Client{ + Host: "http://dummy.local", + } + engine := automod.Engine{ + Logger: slog.Default(), + Directory: &dir, + Counters: automod.NewMemCountStore(), + Sets: sets, + Rules: rules, + AdminClient: &adminc, + } + return engine +} + +func TestBanHashtagPostRule(t *testing.T) { + assert := assert.New(t) + + engine := engineFixture() + id1 := identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + } + rkey := "abc123" + p1 := appbsky.FeedPost{ + Text: "some post blah", + } + evt1 := engine.NewPostEvent(&id1, rkey, &p1) + assert.NoError(BanHashtagsPostRule(&evt1)) + assert.Empty(evt1.RecordLabels) + + p2 := appbsky.FeedPost{ + Text: "some post blah", + Tags: []string{"one", "slur"}, + } + evt2 := engine.NewPostEvent(&id1, rkey, &p2) + assert.NoError(BanHashtagsPostRule(&evt2)) + assert.NotEmpty(evt2.RecordLabels) +} diff --git a/automod/ruleset.go b/automod/ruleset.go new file mode 100644 index 000000000..fe106dcd4 --- /dev/null +++ b/automod/ruleset.go @@ -0,0 +1,46 @@ +package automod + +type RuleSet struct { + PostRules []PostRuleFunc + RecordRules []RecordRuleFunc + IdentityRules []IdentityRuleFunc +} + +func (r *RuleSet) CallPostRules(evt *PostEvent) error { + for _, f := range r.PostRules { + err := f(evt) + if err != nil { + return err + } + if evt.Err != nil { + return evt.Err + } + } + return nil +} + +func (r *RuleSet) CallRecordRules(evt *RecordEvent) error { + for _, f := range r.RecordRules { + err := f(evt) + if err != nil { + return err + } + if evt.Err != nil { + return evt.Err + } + } + return nil +} + +func (r *RuleSet) CallIdentityRules(evt *IdentityEvent) error { + for _, f := range r.IdentityRules { + err := f(evt) + if err != nil { + return err + } + if evt.Err != nil { + return evt.Err + } + } + return nil +} diff --git a/automod/setstore.go b/automod/setstore.go index d113d1bb6..5748905ff 100644 --- a/automod/setstore.go +++ b/automod/setstore.go @@ -2,7 +2,6 @@ package automod import ( "context" - "fmt" ) type SetStore interface { @@ -23,7 +22,8 @@ func NewMemSetStore() MemSetStore { func (s MemSetStore) InSet(ctx context.Context, name, val string) (bool, error) { set, ok := s.Sets[name] if !ok { - return false, fmt.Errorf("not a known set: %s", name) + // NOTE: currently returns false when entire set isn't found + return false, nil } _, ok = set[val] return ok, nil diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 332f3e611..5abf6a30f 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -9,6 +9,7 @@ import ( "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/automod/rules" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -38,12 +39,12 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { } engine := automod.Engine{ - Logger: logger, - Directory: dir, - Counters: automod.NewMemCountStore(), - Sets: automod.NewMemSetStore(), - // TODO: RulesMap (loaded/config from somewhere) - // TODO: AdminClient (XRPC with mod access) + Logger: logger, + Directory: dir, + Counters: automod.NewMemCountStore(), + Sets: automod.NewMemSetStore(), + Rules: rules.DefaultRules(), + AdminClient: nil, // TODO: AppView with mod access, via config } s := &Server{ From 0eb79b722e06b5cca088474693b12a92f2c7366e Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 22:32:44 -0800 Subject: [PATCH 07/35] automod: misleading URL and mention rules --- automod/engine.go | 2 + automod/event.go | 3 ++ automod/rules/fixture_test.go | 44 +++++++++++++++++ automod/rules/hashtags_test.go | 32 ------------ automod/rules/misleading.go | 85 ++++++++++++++++++++++++++++++++ automod/rules/misleading_test.go | 75 ++++++++++++++++++++++++++++ 6 files changed, 209 insertions(+), 32 deletions(-) create mode 100644 automod/rules/fixture_test.go create mode 100644 automod/rules/misleading.go create mode 100644 automod/rules/misleading_test.go diff --git a/automod/engine.go b/automod/engine.go index 215690bb8..af71672d2 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -104,6 +104,7 @@ func (e *Engine) NewPostEvent(ident *identity.Identity, path string, post *appbs RecordEvent{ Event{ Engine: e, + Logger: e.Logger, Account: AccountMeta{Identity: ident}, }, []string{}, @@ -119,6 +120,7 @@ func (e *Engine) NewRecordEvent(ident *identity.Identity, path string, rec any) return RecordEvent{ Event{ Engine: e, + Logger: e.Logger, Account: AccountMeta{Identity: ident}, }, []string{}, diff --git a/automod/event.go b/automod/event.go index e9aacaa5c..116763715 100644 --- a/automod/event.go +++ b/automod/event.go @@ -1,6 +1,8 @@ package automod import ( + "log/slog" + appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" ) @@ -20,6 +22,7 @@ type AccountMeta struct { type Event struct { Engine *Engine Err error + Logger *slog.Logger Account AccountMeta CounterIncrements []string AccountLabels []string diff --git a/automod/rules/fixture_test.go b/automod/rules/fixture_test.go new file mode 100644 index 000000000..6328c4702 --- /dev/null +++ b/automod/rules/fixture_test.go @@ -0,0 +1,44 @@ +package rules + +import ( + "log/slog" + + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/xrpc" +) + +func engineFixture() automod.Engine { + rules := automod.RuleSet{ + PostRules: []automod.PostRuleFunc{ + BanHashtagsPostRule, + }, + } + sets := automod.NewMemSetStore() + sets.Sets["banned-hashtags"] = make(map[string]bool) + sets.Sets["banned-hashtags"]["slur"] = true + dir := identity.NewMockDirectory() + id1 := identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + } + id2 := identity.Identity{ + DID: syntax.DID("did:plc:abc222"), + Handle: syntax.Handle("imposter.example.com"), + } + dir.Insert(id1) + dir.Insert(id2) + adminc := xrpc.Client{ + Host: "http://dummy.local", + } + engine := automod.Engine{ + Logger: slog.Default(), + Directory: &dir, + Counters: automod.NewMemCountStore(), + Sets: sets, + Rules: rules, + AdminClient: &adminc, + } + return engine +} diff --git a/automod/rules/hashtags_test.go b/automod/rules/hashtags_test.go index 328693b56..2d4ab92fa 100644 --- a/automod/rules/hashtags_test.go +++ b/automod/rules/hashtags_test.go @@ -1,47 +1,15 @@ package rules import ( - "log/slog" "testing" appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" - "github.com/bluesky-social/indigo/automod" - "github.com/bluesky-social/indigo/xrpc" "github.com/stretchr/testify/assert" ) -func engineFixture() automod.Engine { - rules := automod.RuleSet{ - PostRules: []automod.PostRuleFunc{ - BanHashtagsPostRule, - }, - } - sets := automod.NewMemSetStore() - sets.Sets["banned-hashtags"] = make(map[string]bool) - sets.Sets["banned-hashtags"]["slur"] = true - dir := identity.NewMockDirectory() - id1 := identity.Identity{ - DID: syntax.DID("did:plc:abc111"), - Handle: syntax.Handle("handle.example.com"), - } - dir.Insert(id1) - adminc := xrpc.Client{ - Host: "http://dummy.local", - } - engine := automod.Engine{ - Logger: slog.Default(), - Directory: &dir, - Counters: automod.NewMemCountStore(), - Sets: sets, - Rules: rules, - AdminClient: &adminc, - } - return engine -} - func TestBanHashtagPostRule(t *testing.T) { assert := assert.New(t) diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go new file mode 100644 index 000000000..e7af93fa4 --- /dev/null +++ b/automod/rules/misleading.go @@ -0,0 +1,85 @@ +package rules + +import ( + "context" + "net/url" + + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" +) + +func MisleadingURLPostRule(evt *automod.PostEvent) error { + for _, facet := range evt.Post.Facets { + for _, feat := range facet.Features { + if feat.RichtextFacet_Link != nil { + if int(facet.Index.ByteEnd) > len([]byte(evt.Post.Text)) || facet.Index.ByteStart > facet.Index.ByteEnd { + evt.Logger.Warn("invalid facet range") + evt.AddLabel("invalid") // TODO: or some other "this record is corrupt" indicator? + continue + } + txt := string([]byte(evt.Post.Text)[facet.Index.ByteStart:facet.Index.ByteEnd]) + + linkURL, err := url.Parse(feat.RichtextFacet_Link.Uri) + if err != nil { + evt.Logger.Warn("invalid link metadata URL", "uri", feat.RichtextFacet_Link.Uri) + continue + } + + // try parsing as a full URL + textURL, err := url.Parse(txt) + if err != nil { + evt.Logger.Warn("invalid link text URL", "uri", txt) + continue + } + + // for now just compare domains to handle the most obvious cases + // this public code will obviously get discovered and bypassed. this doesn't earn you any security cred! + if linkURL.Host != textURL.Host { + evt.Logger.Warn("misleading mismatched domains", "link", linkURL.Host, "text", textURL.Host) + evt.AddLabel("misleading") + } + } + } + } + return nil +} + +func MisleadingMentionPostRule(evt *automod.PostEvent) error { + // TODO: do we really need to route context around? probably + ctx := context.TODO() + for _, facet := range evt.Post.Facets { + for _, feat := range facet.Features { + if feat.RichtextFacet_Mention != nil { + if int(facet.Index.ByteEnd) > len([]byte(evt.Post.Text)) || facet.Index.ByteStart > facet.Index.ByteEnd { + evt.Logger.Warn("invalid facet range") + evt.AddLabel("invalid") // TODO: or some other "this record is corrupt" indicator? + continue + } + txt := string([]byte(evt.Post.Text)[facet.Index.ByteStart:facet.Index.ByteEnd]) + if txt[0] == '@' { + txt = txt[1:] + } + handle, err := syntax.ParseHandle(txt) + if err != nil { + evt.Logger.Warn("mention was not a valid handle", "text", txt) + continue + } + + mentioned, err := evt.Engine.Directory.LookupHandle(ctx, handle) + if err != nil { + evt.Logger.Warn("could not resolve handle", "handle", handle) + evt.AddLabel("misleading") + break + } + + // TODO: check if mentioned DID was recently updated? might be a caching issue + if mentioned.DID.String() != feat.RichtextFacet_Mention.Did { + evt.Logger.Warn("misleading mention", "text", txt, "did", mentioned.DID) + evt.AddLabel("misleading") + continue + } + } + } + } + return nil +} diff --git a/automod/rules/misleading_test.go b/automod/rules/misleading_test.go new file mode 100644 index 000000000..16b6dd574 --- /dev/null +++ b/automod/rules/misleading_test.go @@ -0,0 +1,75 @@ +package rules + +import ( + "testing" + + appbsky "github.com/bluesky-social/indigo/api/bsky" + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" + + "github.com/stretchr/testify/assert" +) + +func TestMisleadingURLPostRule(t *testing.T) { + assert := assert.New(t) + + engine := engineFixture() + id1 := identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + } + rkey := "abc123" + p1 := appbsky.FeedPost{ + Text: "https://safe.com/ is very reputable", + Facets: []*appbsky.RichtextFacet{ + &appbsky.RichtextFacet{ + Features: []*appbsky.RichtextFacet_Features_Elem{ + &appbsky.RichtextFacet_Features_Elem{ + RichtextFacet_Link: &appbsky.RichtextFacet_Link{ + Uri: "https://evil.com", + }, + }, + }, + Index: &appbsky.RichtextFacet_ByteSlice{ + ByteStart: 0, + ByteEnd: 16, + }, + }, + }, + } + evt1 := engine.NewPostEvent(&id1, rkey, &p1) + assert.NoError(MisleadingURLPostRule(&evt1)) + assert.NotEmpty(evt1.RecordLabels) +} + +func TestMisleadingMentionPostRule(t *testing.T) { + assert := assert.New(t) + + engine := engineFixture() + id1 := identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + } + rkey := "abc123" + p1 := appbsky.FeedPost{ + Text: "@handle.example.com is a friend", + Facets: []*appbsky.RichtextFacet{ + &appbsky.RichtextFacet{ + Features: []*appbsky.RichtextFacet_Features_Elem{ + &appbsky.RichtextFacet_Features_Elem{ + RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{ + Did: "did:plc:abc222", + }, + }, + }, + Index: &appbsky.RichtextFacet_ByteSlice{ + ByteStart: 1, + ByteEnd: 19, + }, + }, + }, + } + evt1 := engine.NewPostEvent(&id1, rkey, &p1) + assert.NoError(MisleadingMentionPostRule(&evt1)) + assert.NotEmpty(evt1.RecordLabels) +} From f9c1fe632a84fba058050846c633aa184169147c Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 22:42:27 -0800 Subject: [PATCH 08/35] automod: helper to load sets from JSON file --- automod/setstore.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/automod/setstore.go b/automod/setstore.go index 5748905ff..29acddbc7 100644 --- a/automod/setstore.go +++ b/automod/setstore.go @@ -2,6 +2,9 @@ package automod import ( "context" + "encoding/json" + "io" + "os" ) type SetStore interface { @@ -28,3 +31,31 @@ func (s MemSetStore) InSet(ctx context.Context, name, val string) (bool, error) _, ok = set[val] return ok, nil } + +func (s *MemSetStore) LoadFromFileJSON(p string) error { + + f, err := os.Open(p) + if err != nil { + return err + } + defer func() { _ = f.Close() }() + + raw, err := io.ReadAll(f) + if err != nil { + return err + } + + var rules map[string][]string + if err := json.Unmarshal(raw, &rules); err != nil { + return err + } + + for name, l := range rules { + m := make(map[string]bool, len(l)) + for _, val := range l { + m[val] = true + } + s.Sets[name] = m + } + return nil +} From f9af94d92822a69f1947c8dfb32c180382bcb3f2 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 23:14:44 -0800 Subject: [PATCH 09/35] automod: persist mod actions --- automod/engine.go | 18 +++-- automod/engine_test.go | 5 +- automod/event.go | 120 +++++++++++++++++++++++++++++-- automod/rules/hashtags_test.go | 7 +- automod/rules/misleading_test.go | 10 +-- cmd/hepa/consumer.go | 2 +- 6 files changed, 143 insertions(+), 19 deletions(-) diff --git a/automod/engine.go b/automod/engine.go index af71672d2..2194be5fe 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -56,7 +56,7 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. return nil } -func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path string, rec any) error { +func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID string, rec any) error { // similar to an HTTP server, we want to recover any panics from rule execution defer func() { if r := recover(); r != nil { @@ -79,7 +79,7 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path string, if !ok { return fmt.Errorf("mismatch between collection (%s) and type", collection) } - evt := e.NewPostEvent(ident, path, post) + evt := e.NewPostEvent(ident, path, recCID, post) e.Logger.Info("processing post", "did", ident.DID, "path", path) _ = evt // TODO: call rules @@ -87,7 +87,7 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path string, return evt.Err } default: - evt := e.NewRecordEvent(ident, path, rec) + evt := e.NewRecordEvent(ident, path, recCID, rec) e.Logger.Info("processing record", "did", ident.DID, "path", path) _ = evt // TODO: call rules @@ -99,7 +99,8 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path string, return nil } -func (e *Engine) NewPostEvent(ident *identity.Identity, path string, post *appbsky.FeedPost) PostEvent { +func (e *Engine) NewPostEvent(ident *identity.Identity, path, recCID string, post *appbsky.FeedPost) PostEvent { + parts := strings.SplitN(path, "/", 2) return PostEvent{ RecordEvent{ Event{ @@ -107,6 +108,9 @@ func (e *Engine) NewPostEvent(ident *identity.Identity, path string, post *appbs Logger: e.Logger, Account: AccountMeta{Identity: ident}, }, + parts[0], + parts[1], + recCID, []string{}, false, []ModReport{}, @@ -116,13 +120,17 @@ func (e *Engine) NewPostEvent(ident *identity.Identity, path string, post *appbs } } -func (e *Engine) NewRecordEvent(ident *identity.Identity, path string, rec any) RecordEvent { +func (e *Engine) NewRecordEvent(ident *identity.Identity, path, recCID string, rec any) RecordEvent { + parts := strings.SplitN(path, "/", 2) return RecordEvent{ Event{ Engine: e, Logger: e.Logger, Account: AccountMeta{Identity: ident}, }, + parts[0], + parts[1], + recCID, []string{}, false, []ModReport{}, diff --git a/automod/engine_test.go b/automod/engine_test.go index d1e3dbfac..95bfa61a9 100644 --- a/automod/engine_test.go +++ b/automod/engine_test.go @@ -73,14 +73,15 @@ func TestEngineBasics(t *testing.T) { Handle: syntax.Handle("handle.example.com"), } path := "app.bsky.feed.post/abc123" + cid1 := "cid123" p1 := appbsky.FeedPost{ Text: "some post blah", } - assert.NoError(engine.ProcessRecord(ctx, id1.DID, path, &p1)) + assert.NoError(engine.ProcessRecord(ctx, id1.DID, path, cid1, &p1)) p2 := appbsky.FeedPost{ Text: "some post blah", Tags: []string{"one", "slur"}, } - assert.NoError(engine.ProcessRecord(ctx, id1.DID, path, &p2)) + assert.NoError(engine.ProcessRecord(ctx, id1.DID, path, cid1, &p2)) } diff --git a/automod/event.go b/automod/event.go index 116763715..a2453904d 100644 --- a/automod/event.go +++ b/automod/event.go @@ -1,15 +1,18 @@ package automod import ( + "context" + "fmt" "log/slog" + comatproto "github.com/bluesky-social/indigo/api/atproto" appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" ) type ModReport struct { - Reason string - Comment string + ReasonType string + Comment string } // information about a repo/account/identity, always pre-populated and relevant to many rules @@ -84,7 +87,61 @@ func (e *Event) AddFlag(val string) { } func (e *Event) ReportAccount(reason, comment string) { - e.AccountReports = append(e.AccountReports, ModReport{Reason: reason, Comment: comment}) + e.AccountReports = append(e.AccountReports, ModReport{ReasonType: reason, Comment: comment}) +} + +func (e *Event) PersistAccountActions(ctx context.Context) error { + if e.Engine.AdminClient == nil { + return nil + } + xrpcc := e.Engine.AdminClient + if len(e.AccountLabels) > 0 { + _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ + Action: "com.atproto.admin.defs#createLabels", + CreateLabelVals: e.AccountLabels, + Reason: "automod", + CreatedBy: xrpcc.Auth.Did, + Subject: &comatproto.AdminTakeModerationAction_Input_Subject{ + AdminDefs_RepoRef: &comatproto.AdminDefs_RepoRef{ + Did: e.Account.Identity.DID.String(), + }, + }, + }) + if err != nil { + return err + } + } + // TODO: AccountFlags + for _, mr := range e.AccountReports { + _, err := comatproto.ModerationCreateReport(ctx, xrpcc, &comatproto.ModerationCreateReport_Input{ + ReasonType: &mr.ReasonType, + Reason: &mr.Comment, + Subject: &comatproto.ModerationCreateReport_Input_Subject{ + AdminDefs_RepoRef: &comatproto.AdminDefs_RepoRef{ + Did: e.Account.Identity.DID.String(), + }, + }, + }) + if err != nil { + return err + } + } + if e.AccountTakedown { + _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ + Action: "com.atproto.admin.defs#takedown", + Reason: "automod", + CreatedBy: xrpcc.Auth.Did, + Subject: &comatproto.AdminTakeModerationAction_Input_Subject{ + AdminDefs_RepoRef: &comatproto.AdminDefs_RepoRef{ + Did: e.Account.Identity.DID.String(), + }, + }, + }) + if err != nil { + return err + } + } + return nil } type IdentityEvent struct { @@ -94,6 +151,9 @@ type IdentityEvent struct { type RecordEvent struct { Event + Collection string + RecordKey string + CID string RecordLabels []string RecordTakedown bool RecordReports []ModReport @@ -114,7 +174,59 @@ func (e *RecordEvent) AddFlag(val string) { } func (e *RecordEvent) Report(reason, comment string) { - e.RecordReports = append(e.RecordReports, ModReport{Reason: reason, Comment: comment}) + e.RecordReports = append(e.RecordReports, ModReport{ReasonType: reason, Comment: comment}) +} + +func (e *RecordEvent) PersistRecordActions(ctx context.Context) error { + if e.Engine.AdminClient == nil { + return nil + } + strongRef := comatproto.RepoStrongRef{ + Cid: e.CID, + Uri: fmt.Sprintf("at://%s/%s/%s", e.Account.Identity.DID, e.Collection, e.RecordKey), + } + xrpcc := e.Engine.AdminClient + if len(e.RecordLabels) > 0 { + _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ + Action: "com.atproto.admin.defs#createLabels", + CreateLabelVals: e.RecordLabels, + Reason: "automod", + CreatedBy: xrpcc.Auth.Did, + Subject: &comatproto.AdminTakeModerationAction_Input_Subject{ + RepoStrongRef: &strongRef, + }, + }) + if err != nil { + return err + } + } + // TODO: AccountFlags + for _, mr := range e.RecordReports { + _, err := comatproto.ModerationCreateReport(ctx, xrpcc, &comatproto.ModerationCreateReport_Input{ + ReasonType: &mr.ReasonType, + Reason: &mr.Comment, + Subject: &comatproto.ModerationCreateReport_Input_Subject{ + RepoStrongRef: &strongRef, + }, + }) + if err != nil { + return err + } + } + if e.RecordTakedown { + _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ + Action: "com.atproto.admin.defs#takedown", + Reason: "automod", + CreatedBy: xrpcc.Auth.Did, + Subject: &comatproto.AdminTakeModerationAction_Input_Subject{ + RepoStrongRef: &strongRef, + }, + }) + if err != nil { + return err + } + } + return nil } type PostEvent struct { diff --git a/automod/rules/hashtags_test.go b/automod/rules/hashtags_test.go index 2d4ab92fa..5c95e5583 100644 --- a/automod/rules/hashtags_test.go +++ b/automod/rules/hashtags_test.go @@ -18,11 +18,12 @@ func TestBanHashtagPostRule(t *testing.T) { DID: syntax.DID("did:plc:abc111"), Handle: syntax.Handle("handle.example.com"), } - rkey := "abc123" + path := "app.bsky.feed.post/abc123" + cid1 := "cid123" p1 := appbsky.FeedPost{ Text: "some post blah", } - evt1 := engine.NewPostEvent(&id1, rkey, &p1) + evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) assert.NoError(BanHashtagsPostRule(&evt1)) assert.Empty(evt1.RecordLabels) @@ -30,7 +31,7 @@ func TestBanHashtagPostRule(t *testing.T) { Text: "some post blah", Tags: []string{"one", "slur"}, } - evt2 := engine.NewPostEvent(&id1, rkey, &p2) + evt2 := engine.NewPostEvent(&id1, path, cid1, &p2) assert.NoError(BanHashtagsPostRule(&evt2)) assert.NotEmpty(evt2.RecordLabels) } diff --git a/automod/rules/misleading_test.go b/automod/rules/misleading_test.go index 16b6dd574..ce8138133 100644 --- a/automod/rules/misleading_test.go +++ b/automod/rules/misleading_test.go @@ -18,7 +18,8 @@ func TestMisleadingURLPostRule(t *testing.T) { DID: syntax.DID("did:plc:abc111"), Handle: syntax.Handle("handle.example.com"), } - rkey := "abc123" + path := "app.bsky.feed.post/abc123" + cid1 := "cid123" p1 := appbsky.FeedPost{ Text: "https://safe.com/ is very reputable", Facets: []*appbsky.RichtextFacet{ @@ -37,7 +38,7 @@ func TestMisleadingURLPostRule(t *testing.T) { }, }, } - evt1 := engine.NewPostEvent(&id1, rkey, &p1) + evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) assert.NoError(MisleadingURLPostRule(&evt1)) assert.NotEmpty(evt1.RecordLabels) } @@ -50,7 +51,8 @@ func TestMisleadingMentionPostRule(t *testing.T) { DID: syntax.DID("did:plc:abc111"), Handle: syntax.Handle("handle.example.com"), } - rkey := "abc123" + path := "app.bsky.feed.post/abc123" + cid1 := "cid123" p1 := appbsky.FeedPost{ Text: "@handle.example.com is a friend", Facets: []*appbsky.RichtextFacet{ @@ -69,7 +71,7 @@ func TestMisleadingMentionPostRule(t *testing.T) { }, }, } - evt1 := engine.NewPostEvent(&id1, rkey, &p1) + evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) assert.NoError(MisleadingMentionPostRule(&evt1)) assert.NotEmpty(evt1.RecordLabels) } diff --git a/cmd/hepa/consumer.go b/cmd/hepa/consumer.go index 9634fa85f..8cb99e50f 100644 --- a/cmd/hepa/consumer.go +++ b/cmd/hepa/consumer.go @@ -109,7 +109,7 @@ func (s *Server) HandleRepoCommit(ctx context.Context, evt *comatproto.SyncSubsc break } - err = s.engine.ProcessRecord(ctx, did, op.Path, rec) + err = s.engine.ProcessRecord(ctx, did, op.Path, op.Cid.String(), rec) if err != nil { logger.Error("engine failed to process record", "err", err) continue From 1d120a51fb57abb3dc8865869d18ed6631e03438 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 23:34:22 -0800 Subject: [PATCH 10/35] automod: more persisting and logging --- automod/countstore.go | 1 + automod/engine.go | 41 ++++++++++++++++++++++++++++++++--------- automod/engine_test.go | 2 +- automod/event.go | 36 ++++++++++++++++++++++++++++++++++-- automod/util.go | 13 +++++++++++++ 5 files changed, 81 insertions(+), 12 deletions(-) create mode 100644 automod/util.go diff --git a/automod/countstore.go b/automod/countstore.go index 792f82df6..ca07ec118 100644 --- a/automod/countstore.go +++ b/automod/countstore.go @@ -16,6 +16,7 @@ const ( type CountStore interface { GetCount(ctx context.Context, key, period string) (int, error) Increment(ctx context.Context, key string) error + // TODO: batch increment method } // TODO: this implementation isn't race-safe (yet)! diff --git a/automod/engine.go b/automod/engine.go index 2194be5fe..6c17f817b 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -47,12 +47,19 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. Account: AccountMeta{Identity: ident}, }, } - // TODO: call rules - // TODO: handle errors - _ = evt + if err := e.Rules.CallIdentityRules(&evt); err != nil { + return err + } if evt.Err != nil { return evt.Err } + evt.CanonicalLogLine() + if err := evt.PersistAccountActions(ctx); err != nil { + return err + } + if err := evt.PersistCounters(ctx); err != nil { + return err + } return nil } @@ -81,19 +88,35 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID } evt := e.NewPostEvent(ident, path, recCID, post) e.Logger.Info("processing post", "did", ident.DID, "path", path) - _ = evt - // TODO: call rules + if err := e.Rules.CallPostRules(&evt); err != nil { + return err + } if evt.Err != nil { return evt.Err } + evt.CanonicalLogLine() + if err := evt.PersistAccountActions(ctx); err != nil { + return err + } + if err := evt.PersistCounters(ctx); err != nil { + return err + } default: evt := e.NewRecordEvent(ident, path, recCID, rec) e.Logger.Info("processing record", "did", ident.DID, "path", path) - _ = evt - // TODO: call rules + if err := e.Rules.CallRecordRules(&evt); err != nil { + return err + } if evt.Err != nil { return evt.Err } + evt.CanonicalLogLine() + if err := evt.PersistAccountActions(ctx); err != nil { + return err + } + if err := evt.PersistCounters(ctx); err != nil { + return err + } } return nil @@ -105,7 +128,7 @@ func (e *Engine) NewPostEvent(ident *identity.Identity, path, recCID string, pos RecordEvent{ Event{ Engine: e, - Logger: e.Logger, + Logger: e.Logger.With("did", ident.DID, "collection", parts[0], "rkey", parts[1]), Account: AccountMeta{Identity: ident}, }, parts[0], @@ -125,7 +148,7 @@ func (e *Engine) NewRecordEvent(ident *identity.Identity, path, recCID string, r return RecordEvent{ Event{ Engine: e, - Logger: e.Logger, + Logger: e.Logger.With("did", ident.DID, "collection", parts[0], "rkey", parts[1]), Account: AccountMeta{Identity: ident}, }, parts[0], diff --git a/automod/engine_test.go b/automod/engine_test.go index 95bfa61a9..8a937a107 100644 --- a/automod/engine_test.go +++ b/automod/engine_test.go @@ -83,5 +83,5 @@ func TestEngineBasics(t *testing.T) { Text: "some post blah", Tags: []string{"one", "slur"}, } - assert.NoError(engine.ProcessRecord(ctx, id1.DID, path, cid1, &p2)) + assert.Error(engine.ProcessRecord(ctx, id1.DID, path, cid1, &p2)) } diff --git a/automod/event.go b/automod/event.go index a2453904d..98fe17218 100644 --- a/automod/event.go +++ b/automod/event.go @@ -98,7 +98,7 @@ func (e *Event) PersistAccountActions(ctx context.Context) error { if len(e.AccountLabels) > 0 { _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ Action: "com.atproto.admin.defs#createLabels", - CreateLabelVals: e.AccountLabels, + CreateLabelVals: dedupeStrings(e.AccountLabels), Reason: "automod", CreatedBy: xrpcc.Auth.Did, Subject: &comatproto.AdminTakeModerationAction_Input_Subject{ @@ -144,6 +144,25 @@ func (e *Event) PersistAccountActions(ctx context.Context) error { return nil } +func (e *Event) PersistCounters(ctx context.Context) error { + for _, k := range dedupeStrings(e.CounterIncrements) { + err := e.Engine.Counters.Increment(ctx, k) + if err != nil { + return err + } + } + return nil +} + +func (e *Event) CanonicalLogLine() { + e.Logger.Info("canonical-event-line", + "accountLabels", e.AccountLabels, + "accountFlags", e.AccountFlags, + "accountTakedown", e.AccountTakedown, + "accountReports", len(e.AccountReports), + ) +} + type IdentityEvent struct { Event } @@ -189,7 +208,7 @@ func (e *RecordEvent) PersistRecordActions(ctx context.Context) error { if len(e.RecordLabels) > 0 { _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ Action: "com.atproto.admin.defs#createLabels", - CreateLabelVals: e.RecordLabels, + CreateLabelVals: dedupeStrings(e.RecordLabels), Reason: "automod", CreatedBy: xrpcc.Auth.Did, Subject: &comatproto.AdminTakeModerationAction_Input_Subject{ @@ -229,6 +248,19 @@ func (e *RecordEvent) PersistRecordActions(ctx context.Context) error { return nil } +func (e *RecordEvent) CanonicalLogLine() { + e.Logger.Info("canonical-event-line", + "accountLabels", e.AccountLabels, + "accountFlags", e.AccountFlags, + "accountTakedown", e.AccountTakedown, + "accountReports", len(e.AccountReports), + "recordLabels", e.RecordLabels, + "recordFlags", e.RecordFlags, + "recordTakedown", e.RecordTakedown, + "recordReports", len(e.RecordReports), + ) +} + type PostEvent struct { RecordEvent diff --git a/automod/util.go b/automod/util.go new file mode 100644 index 000000000..6934fe54b --- /dev/null +++ b/automod/util.go @@ -0,0 +1,13 @@ +package automod + +func dedupeStrings(in []string) []string { + var out []string + seen := make(map[string]bool) + for _, v := range in { + if !seen[v] { + out = append(out, v) + seen[v] = true + } + } + return out +} From 79df1f060779bc8a6b76a66e953b78fcc5a9bc7e Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 13 Nov 2023 23:53:41 -0800 Subject: [PATCH 11/35] hepa: wire up mod config --- cmd/hepa/main.go | 29 +++++++++++++++++++++++++++-- cmd/hepa/server.go | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index 3d3f2d809..f7815cfc5 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -44,6 +44,12 @@ func run(args []string) error { Value: "https://plc.directory", EnvVars: []string{"ATP_PLC_HOST"}, }, + &cli.StringFlag{ + Name: "atp-mod-host", + Usage: "method, hostname, and port of moderation service", + Value: "https://api.bsky.app", + EnvVars: []string{"ATP_MOD_HOST"}, + }, } app.Commands = []*cli.Command{ @@ -69,6 +75,21 @@ var runCmd = &cli.Command{ Value: 100, EnvVars: []string{"HEPA_PLC_RATE_LIMIT"}, }, + &cli.StringFlag{ + Name: "mod-handle", + Usage: "for mod service login", + EnvVars: []string{"HEPA_MOD_AUTH_HANDLE"}, + }, + &cli.StringFlag{ + Name: "mod-password", + Usage: "for mod service login", + EnvVars: []string{"ATP_MOD_AUTH_PASSWORD"}, + }, + &cli.StringFlag{ + Name: "mod-admin-token", + Usage: "admin authentication password for mod service", + EnvVars: []string{"ATP_MOD_AUTH_ADMIN_TOKEN"}, + }, }, Action: func(cctx *cli.Context) error { ctx := context.Background() @@ -93,8 +114,12 @@ var runCmd = &cli.Command{ srv, err := NewServer( &dir, Config{ - BGSHost: cctx.String("atp-bgs-host"), - Logger: logger, + BGSHost: cctx.String("atp-bgs-host"), + Logger: logger, + ModHost: cctx.String("atp-mod-host"), + ModAdminToken: cctx.String("mod-admin-token"), + ModUsername: cctx.String("mod-handle"), + ModPassword: cctx.String("mod-password"), }, ) if err != nil { diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 5abf6a30f..e693b232f 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -1,15 +1,19 @@ package main import ( + "context" "fmt" "log/slog" "net/http" "os" "strings" + comatproto "github.com/bluesky-social/indigo/api/atproto" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/rules" + "github.com/bluesky-social/indigo/util" + "github.com/bluesky-social/indigo/xrpc" "github.com/prometheus/client_golang/prometheus/promhttp" ) @@ -21,8 +25,12 @@ type Server struct { } type Config struct { - BGSHost string - Logger *slog.Logger + BGSHost string + ModHost string + ModAdminToken string + ModUsername string + ModPassword string + Logger *slog.Logger } func NewServer(dir identity.Directory, config Config) (*Server, error) { @@ -38,13 +46,35 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { return nil, fmt.Errorf("specified bgs host must include 'ws://' or 'wss://'") } + // TODO: this isn't a very robust way to handle a peristent client + var xrpcc *xrpc.Client + if config.ModAdminToken != "" { + xrpcc = &xrpc.Client{ + Client: util.RobustHTTPClient(), + Host: config.ModHost, + AdminToken: &config.ModAdminToken, + } + + auth, err := comatproto.ServerCreateSession(context.TODO(), xrpcc, &comatproto.ServerCreateSession_Input{ + Identifier: config.ModUsername, + Password: config.ModPassword, + }) + if err != nil { + return nil, err + } + xrpcc.Auth.AccessJwt = auth.AccessJwt + xrpcc.Auth.RefreshJwt = auth.RefreshJwt + xrpcc.Auth.Did = auth.Did + xrpcc.Auth.Handle = auth.Handle + } + engine := automod.Engine{ Logger: logger, Directory: dir, Counters: automod.NewMemCountStore(), Sets: automod.NewMemSetStore(), Rules: rules.DefaultRules(), - AdminClient: nil, // TODO: AppView with mod access, via config + AdminClient: xrpcc, } s := &Server{ From b53bcd47373f5f1fbe2c6e39a41dde5389755ba8 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 14 Nov 2023 00:12:30 -0800 Subject: [PATCH 12/35] automod: fix trivial err --- automod/engine_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automod/engine_test.go b/automod/engine_test.go index 8a937a107..95bfa61a9 100644 --- a/automod/engine_test.go +++ b/automod/engine_test.go @@ -83,5 +83,5 @@ func TestEngineBasics(t *testing.T) { Text: "some post blah", Tags: []string{"one", "slur"}, } - assert.Error(engine.ProcessRecord(ctx, id1.DID, path, cid1, &p2)) + assert.NoError(engine.ProcessRecord(ctx, id1.DID, path, cid1, &p2)) } From fbec7f692e4c043d3b69b126d151fc593a17e4ed Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 14 Nov 2023 16:32:11 -0800 Subject: [PATCH 13/35] hepa: load set config from JSON file --- automod/rules/example_sets.json | 6 ++++++ cmd/hepa/main.go | 6 ++++++ cmd/hepa/server.go | 12 +++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 automod/rules/example_sets.json diff --git a/automod/rules/example_sets.json b/automod/rules/example_sets.json new file mode 100644 index 000000000..d66de7831 --- /dev/null +++ b/automod/rules/example_sets.json @@ -0,0 +1,6 @@ +{ + "banned-hashtags": [ + "slur", + "anotherslur" + ] +} diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index f7815cfc5..05bd5e578 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -90,6 +90,11 @@ var runCmd = &cli.Command{ Usage: "admin authentication password for mod service", EnvVars: []string{"ATP_MOD_AUTH_ADMIN_TOKEN"}, }, + &cli.StringFlag{ + Name: "sets-json-path", + Usage: "file path of JSON file containing static sets", + EnvVars: []string{"HEPA_SETS_JSON_PATH"}, + }, }, Action: func(cctx *cli.Context) error { ctx := context.Background() @@ -120,6 +125,7 @@ var runCmd = &cli.Command{ ModAdminToken: cctx.String("mod-admin-token"), ModUsername: cctx.String("mod-handle"), ModPassword: cctx.String("mod-password"), + SetsFileJSON: cctx.String("sets-json-path"), }, ) if err != nil { diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index e693b232f..f6aa9145a 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -30,6 +30,7 @@ type Config struct { ModAdminToken string ModUsername string ModPassword string + SetsFileJSON string Logger *slog.Logger } @@ -68,11 +69,20 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { xrpcc.Auth.Handle = auth.Handle } + sets := automod.NewMemSetStore() + if config.SetsFileJSON != "" { + if err := sets.LoadFromFileJSON(config.SetsFileJSON); err != nil { + return nil, err + } else { + logger.Info("loaded set config from JSON", "path", config.SetsFileJSON) + } + } + engine := automod.Engine{ Logger: logger, Directory: dir, Counters: automod.NewMemCountStore(), - Sets: automod.NewMemSetStore(), + Sets: sets, Rules: rules.DefaultRules(), AdminClient: xrpcc, } From 01ae2e9dda7c0981cd1dfc08145f1f7740187313 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 14 Nov 2023 18:37:06 -0800 Subject: [PATCH 14/35] automod: redis identity cache; early parallelism; fix rules --- automod/engine.go | 4 +- automod/redis_directory.go | 336 +++++++++++++++++++++++++++++++++++++ automod/rules/all.go | 2 + cmd/hepa/consumer.go | 8 +- cmd/hepa/main.go | 26 ++- go.mod | 10 ++ go.sum | 90 ++++++++++ 7 files changed, 467 insertions(+), 9 deletions(-) create mode 100644 automod/redis_directory.go diff --git a/automod/engine.go b/automod/engine.go index 6c17f817b..3527c460e 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -87,7 +87,7 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID return fmt.Errorf("mismatch between collection (%s) and type", collection) } evt := e.NewPostEvent(ident, path, recCID, post) - e.Logger.Info("processing post", "did", ident.DID, "path", path) + e.Logger.Debug("processing post", "did", ident.DID, "path", path) if err := e.Rules.CallPostRules(&evt); err != nil { return err } @@ -103,7 +103,7 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID } default: evt := e.NewRecordEvent(ident, path, recCID, rec) - e.Logger.Info("processing record", "did", ident.DID, "path", path) + e.Logger.Debug("processing record", "did", ident.DID, "path", path) if err := e.Rules.CallRecordRules(&evt); err != nil { return err } diff --git a/automod/redis_directory.go b/automod/redis_directory.go new file mode 100644 index 000000000..acf9d178e --- /dev/null +++ b/automod/redis_directory.go @@ -0,0 +1,336 @@ +package automod + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" + + "github.com/go-redis/cache/v9" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/redis/go-redis/v9" +) + +// uses redis as a cache for identity lookups. includes a local cache layer as well, for hot keys +type RedisDirectory struct { + Inner identity.Directory + ErrTTL time.Duration + HitTTL time.Duration + + handleCache *cache.Cache + identityCache *cache.Cache + didLookupChans sync.Map + handleLookupChans sync.Map +} + +type HandleEntry struct { + Updated time.Time + DID syntax.DID + Err error +} + +type IdentityEntry struct { + Updated time.Time + Identity *identity.Identity + Err error +} + +var _ identity.Directory = (*RedisDirectory)(nil) + +func NewRedisDirectory(inner identity.Directory, redisURL string, hitTTL, errTTL time.Duration) (*RedisDirectory, error) { + opt, err := redis.ParseURL(redisURL) + if err != nil { + return nil, err + } + rdb := redis.NewClient(opt) + // check redis connection + _, err = rdb.Ping(context.TODO()).Result() + if err != nil { + return nil, err + } + handleCache := cache.New(&cache.Options{ + Redis: rdb, + LocalCache: cache.NewTinyLFU(10_000, hitTTL), + }) + identityCache := cache.New(&cache.Options{ + Redis: rdb, + LocalCache: cache.NewTinyLFU(10_000, hitTTL), + }) + return &RedisDirectory{ + Inner: inner, + ErrTTL: errTTL, + HitTTL: hitTTL, + handleCache: handleCache, + identityCache: identityCache, + }, nil +} + +func (d *RedisDirectory) IsHandleStale(e *HandleEntry) bool { + if e.Err != nil && time.Since(e.Updated) > d.ErrTTL { + return true + } + return false +} + +func (d *RedisDirectory) IsIdentityStale(e *IdentityEntry) bool { + if e.Err != nil && time.Since(e.Updated) > d.ErrTTL { + return true + } + return false +} + +func (d *RedisDirectory) updateHandle(ctx context.Context, h syntax.Handle) (*HandleEntry, error) { + ident, err := d.Inner.LookupHandle(ctx, h) + if err != nil { + he := HandleEntry{ + Updated: time.Now(), + DID: "", + Err: err, + } + d.handleCache.Set(&cache.Item{ + Ctx: ctx, + Key: h.String(), + Value: he, + TTL: d.ErrTTL, + }) + return &he, nil + } + + entry := IdentityEntry{ + Updated: time.Now(), + Identity: ident, + Err: nil, + } + he := HandleEntry{ + Updated: time.Now(), + DID: ident.DID, + Err: nil, + } + + d.identityCache.Set(&cache.Item{ + Ctx: ctx, + Key: ident.DID.String(), + Value: entry, + TTL: d.HitTTL, + }) + d.handleCache.Set(&cache.Item{ + Ctx: ctx, + Key: h.String(), + Value: he, + TTL: d.HitTTL, + }) + return &he, nil +} + +func (d *RedisDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (syntax.DID, error) { + var entry HandleEntry + err := d.handleCache.Get(ctx, h.String(), &entry) + if err != nil && err != cache.ErrCacheMiss { + return "", err + } + if err != cache.ErrCacheMiss && !d.IsHandleStale(&entry) { + handleCacheHits.Inc() + return entry.DID, entry.Err + } + handleCacheMisses.Inc() + + // Coalesce multiple requests for the same Handle + res := make(chan struct{}) + val, loaded := d.handleLookupChans.LoadOrStore(h.String(), res) + if loaded { + handleRequestsCoalesced.Inc() + // Wait for the result from the pending request + select { + case <-val.(chan struct{}): + // The result should now be in the cache + err := d.handleCache.Get(ctx, h.String(), entry) + if err != nil && err != cache.ErrCacheMiss { + return "", err + } + if err != cache.ErrCacheMiss && !d.IsHandleStale(&entry) { + return entry.DID, entry.Err + } + return "", fmt.Errorf("identity not found in cache after coalesce returned") + case <-ctx.Done(): + return "", ctx.Err() + } + } + + var did syntax.DID + // Update the Handle Entry from PLC and cache the result + newEntry, err := d.updateHandle(ctx, h) + if err == nil && newEntry != nil { + did = newEntry.DID + } + // Cleanup the coalesce map and close the results channel + d.handleLookupChans.Delete(h.String()) + // Callers waiting will now get the result from the cache + close(res) + + return did, err +} + +func (d *RedisDirectory) updateDID(ctx context.Context, did syntax.DID) (*IdentityEntry, error) { + ident, err := d.Inner.LookupDID(ctx, did) + // wipe parsed public key; it's a waste of space + if nil == err { + ident.ParsedPublicKey = nil + } + // persist the identity lookup error, instead of processing it immediately + entry := IdentityEntry{ + Updated: time.Now(), + Identity: ident, + Err: err, + } + var he *HandleEntry + // if *not* an error, then also update the handle cache + if nil == err && !ident.Handle.IsInvalidHandle() { + he = &HandleEntry{ + Updated: time.Now(), + DID: did, + Err: nil, + } + } + + d.identityCache.Set(&cache.Item{ + Ctx: ctx, + Key: did.String(), + Value: entry, + TTL: d.HitTTL, + }) + if he != nil { + d.handleCache.Set(&cache.Item{ + Ctx: ctx, + Key: ident.Handle.String(), + Value: *he, + TTL: d.HitTTL, + }) + } + return &entry, nil +} + +func (d *RedisDirectory) LookupDID(ctx context.Context, did syntax.DID) (*identity.Identity, error) { + var entry IdentityEntry + err := d.identityCache.Get(ctx, did.String(), &entry) + if err != nil && err != cache.ErrCacheMiss { + return nil, err + } + if err != cache.ErrCacheMiss && !d.IsIdentityStale(&entry) { + identityCacheHits.Inc() + return entry.Identity, entry.Err + } + identityCacheMisses.Inc() + + // Coalesce multiple requests for the same DID + res := make(chan struct{}) + val, loaded := d.didLookupChans.LoadOrStore(did.String(), res) + if loaded { + identityRequestsCoalesced.Inc() + // Wait for the result from the pending request + select { + case <-val.(chan struct{}): + // The result should now be in the cache + err = d.identityCache.Get(ctx, did.String(), &entry) + if err != nil && err != cache.ErrCacheMiss { + return nil, err + } + if err != cache.ErrCacheMiss && !d.IsIdentityStale(&entry) { + return entry.Identity, entry.Err + } + return nil, fmt.Errorf("identity not found in cache after coalesce returned") + case <-ctx.Done(): + return nil, ctx.Err() + } + } + + var doc *identity.Identity + // Update the Identity Entry from PLC and cache the result + newEntry, err := d.updateDID(ctx, did) + if err == nil && newEntry != nil { + doc = newEntry.Identity + } + // Cleanup the coalesce map and close the results channel + d.didLookupChans.Delete(did.String()) + // Callers waiting will now get the result from the cache + close(res) + + return doc, err +} + +func (d *RedisDirectory) LookupHandle(ctx context.Context, h syntax.Handle) (*identity.Identity, error) { + did, err := d.ResolveHandle(ctx, h) + if err != nil { + return nil, err + } + ident, err := d.LookupDID(ctx, did) + if err != nil { + return nil, err + } + + declared, err := ident.DeclaredHandle() + if err != nil { + return nil, err + } + if declared != h { + return nil, fmt.Errorf("handle does not match that declared in DID document") + } + return ident, nil +} + +func (d *RedisDirectory) Lookup(ctx context.Context, a syntax.AtIdentifier) (*identity.Identity, error) { + handle, err := a.AsHandle() + if nil == err { // if not an error, is a handle + return d.LookupHandle(ctx, handle) + } + did, err := a.AsDID() + if nil == err { // if not an error, is a DID + return d.LookupDID(ctx, did) + } + return nil, fmt.Errorf("at-identifier neither a Handle nor a DID") +} + +func (d *RedisDirectory) Purge(ctx context.Context, a syntax.AtIdentifier) error { + handle, err := a.AsHandle() + if nil == err { // if not an error, is a handle + return d.handleCache.Delete(ctx, handle.String()) + } + did, err := a.AsDID() + if nil == err { // if not an error, is a DID + return d.identityCache.Delete(ctx, did.String()) + } + return fmt.Errorf("at-identifier neither a Handle nor a DID") +} + +var handleCacheHits = promauto.NewCounter(prometheus.CounterOpts{ + Name: "atproto_redis_directory_handle_cache_hits", + Help: "Number of cache hits for ATProto handle lookups", +}) + +var handleCacheMisses = promauto.NewCounter(prometheus.CounterOpts{ + Name: "atproto_redis_directory_handle_cache_misses", + Help: "Number of cache misses for ATProto handle lookups", +}) + +var identityCacheHits = promauto.NewCounter(prometheus.CounterOpts{ + Name: "atproto_redis_directory_identity_cache_hits", + Help: "Number of cache hits for ATProto identity lookups", +}) + +var identityCacheMisses = promauto.NewCounter(prometheus.CounterOpts{ + Name: "atproto_redis_directory_identity_cache_misses", + Help: "Number of cache misses for ATProto identity lookups", +}) + +var identityRequestsCoalesced = promauto.NewCounter(prometheus.CounterOpts{ + Name: "atproto_redis_directory_identity_requests_coalesced", + Help: "Number of identity requests coalesced", +}) + +var handleRequestsCoalesced = promauto.NewCounter(prometheus.CounterOpts{ + Name: "atproto_redis_directory_handle_requests_coalesced", + Help: "Number of handle requests coalesced", +}) diff --git a/automod/rules/all.go b/automod/rules/all.go index 19b837080..8e90c35ac 100644 --- a/automod/rules/all.go +++ b/automod/rules/all.go @@ -7,6 +7,8 @@ import ( func DefaultRules() automod.RuleSet { rules := automod.RuleSet{ PostRules: []automod.PostRuleFunc{ + MisleadingURLPostRule, + MisleadingMentionPostRule, BanHashtagsPostRule, }, } diff --git a/cmd/hepa/consumer.go b/cmd/hepa/consumer.go index 8cb99e50f..36d522f37 100644 --- a/cmd/hepa/consumer.go +++ b/cmd/hepa/consumer.go @@ -59,9 +59,12 @@ func (s *Server) RunConsumer(ctx context.Context) error { // TODO: other event callbacks as needed } + // start at higher parallelism (somewhat arbitrary) + scaleSettings := autoscaling.DefaultAutoscaleSettings() + scaleSettings.Concurrency = 6 return events.HandleRepoStream( ctx, con, autoscaling.NewScheduler( - autoscaling.DefaultAutoscaleSettings(), + scaleSettings, s.bgshost, rsc.EventHandler, ), @@ -72,8 +75,7 @@ func (s *Server) RunConsumer(ctx context.Context) error { func (s *Server) HandleRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) error { logger := s.logger.With("event", "commit", "did", evt.Repo, "rev", evt.Rev, "seq", evt.Seq) - // XXX: debug, not info - logger.Info("received commit event") + logger.Debug("received commit event") if evt.TooBig { logger.Warn("skipping tooBig events for now") diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index 05bd5e578..dbcadbcee 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -9,6 +9,7 @@ import ( "time" "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/automod" "github.com/carlmjohnson/versioninfo" _ "github.com/joho/godotenv/autoload" @@ -83,18 +84,25 @@ var runCmd = &cli.Command{ &cli.StringFlag{ Name: "mod-password", Usage: "for mod service login", - EnvVars: []string{"ATP_MOD_AUTH_PASSWORD"}, + EnvVars: []string{"HEPA_MOD_AUTH_PASSWORD"}, }, &cli.StringFlag{ Name: "mod-admin-token", Usage: "admin authentication password for mod service", - EnvVars: []string{"ATP_MOD_AUTH_ADMIN_TOKEN"}, + EnvVars: []string{"HEPA_MOD_AUTH_ADMIN_TOKEN"}, }, &cli.StringFlag{ Name: "sets-json-path", Usage: "file path of JSON file containing static sets", EnvVars: []string{"HEPA_SETS_JSON_PATH"}, }, + &cli.StringFlag{ + Name: "redis-url", + Usage: "redis connection URL", + // redis://:@localhost:6379/ + // redis://localhost:6379/0 + EnvVars: []string{"HEPA_REDIS_URL"}, + }, }, Action: func(cctx *cli.Context) error { ctx := context.Background() @@ -114,10 +122,20 @@ var runCmd = &cli.Command{ TryAuthoritativeDNS: true, SkipDNSDomainSuffixes: []string{".bsky.social"}, } - dir := identity.NewCacheDirectory(&baseDir, 1_500_000, time.Hour*24, time.Minute*2) + var dir identity.Directory + if cctx.String("redis-url") != "" { + rdir, err := automod.NewRedisDirectory(&baseDir, cctx.String("redis-url"), time.Hour*24, time.Minute*2) + if err != nil { + return err + } + dir = rdir + } else { + cdir := identity.NewCacheDirectory(&baseDir, 1_500_000, time.Hour*24, time.Minute*2) + dir = &cdir + } srv, err := NewServer( - &dir, + dir, Config{ BGSHost: cctx.String("atp-bgs-host"), Logger: logger, diff --git a/go.mod b/go.mod index 9ecb7ff9c..459f9861c 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/carlmjohnson/versioninfo v0.22.5 github.com/dustinkirkland/golang-petname v0.0.0-20230626224747-e794b9370d49 github.com/flosch/pongo2/v6 v6.0.0 + github.com/go-redis/cache/v9 v9.0.0 github.com/goccy/go-json v0.10.2 github.com/gocql/gocql v1.6.0 github.com/golang-jwt/jwt v3.2.2+incompatible @@ -46,6 +47,7 @@ require ( github.com/polydawn/refmt v0.89.1-0.20221221234430-40501e09de1f github.com/prometheus/client_golang v1.14.0 github.com/prometheus/client_model v0.3.0 + github.com/redis/go-redis/v9 v9.3.0 github.com/rivo/uniseg v0.1.0 github.com/samber/slog-echo v1.2.1 github.com/scylladb/gocqlx/v2 v2.8.1-0.20230309105046-dec046bd85e6 @@ -76,6 +78,14 @@ require ( gorm.io/plugin/opentelemetry v0.1.3 ) +require ( + github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/klauspost/compress v1.13.6 // indirect + github.com/vmihailenco/go-tinylfu v0.2.2 // indirect + github.com/vmihailenco/msgpack/v5 v5.3.4 // indirect + github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect +) + require ( github.com/alexbrainman/goissue34681 v0.0.0-20191006012335-3fc7a47baff5 // indirect github.com/beorn7/perks v1.0.1 // indirect diff --git a/go.sum b/go.sum index b2aaa467c..76a053df1 100644 --- a/go.sum +++ b/go.sum @@ -71,6 +71,10 @@ github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4Yn github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/brianvoe/gofakeit/v6 v6.20.2 h1:FLloufuC7NcbHqDzVQ42CG9AKryS1gAGCRt8nQRsW+Y= github.com/brianvoe/gofakeit/v6 v6.20.2/go.mod h1:Ow6qC71xtwm79anlwKRlWZW6zVq9D2XHE4QSSMP/rU8= +github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= +github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= +github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA= +github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0= github.com/carlmjohnson/versioninfo v0.22.5 h1:O00sjOLUAFxYQjlN/bzYTuZiS0y6fWDQjMRvwtKgwwc= github.com/carlmjohnson/versioninfo v0.22.5/go.mod h1:QT9mph3wcVfISUKd0i9sZfVrPviHuSF+cUtLjm2WSf8= github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= @@ -99,6 +103,8 @@ github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/decred/dcrd/crypto/blake256 v1.0.1/go.mod h1:2OfgNZ5wDpcsFmHmCK5gZTPcCXqlm2ArzUIkw9czNJo= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 h1:8UrgZ3GkP4i/CLijOJx79Yu+etlyjdBU4sfcs2WYQMs= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= +github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= github.com/dustinkirkland/golang-petname v0.0.0-20230626224747-e794b9370d49 h1:6SNWi8VxQeCSwmLuTbEvJd7xvPmdS//zvMBWweZLgck= github.com/dustinkirkland/golang-petname v0.0.0-20230626224747-e794b9370d49/go.mod h1:V+Qd57rJe8gd4eiGzZyg4h54VLHmYVVw54iMnlAMrF8= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -112,6 +118,9 @@ github.com/flosch/pongo2/v6 v6.0.0/go.mod h1:CuDpFm47R0uGGE7z13/tTlt1Y6zdxvr2RLT github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -127,11 +136,15 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logfmt/logfmt v0.5.1 h1:otpy5pqBCBZ1ng9RQ0dPu4PN7ba75Y/aA+UpowDyNVA= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-redis/cache/v9 v9.0.0 h1:0thdtFo0xJi0/WXbRVu8B066z8OvVymXTJGaXrVWnN0= +github.com/go-redis/cache/v9 v9.0.0/go.mod h1:cMwi1N8ASBOufbIvk7cdXe2PbPjK/WMRL95FFHWsSgI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= @@ -206,6 +219,7 @@ github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hf github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= @@ -239,9 +253,11 @@ github.com/hashicorp/golang-lru/arc/v2 v2.0.6 h1:4NU7uP5vSoK6TbaMj3NtY478TTAWLso github.com/hashicorp/golang-lru/arc/v2 v2.0.6/go.mod h1:cfdDIX05DWvYV6/shsxDfa/OVcRieOt+q4FnM8x+Xno= github.com/hashicorp/golang-lru/v2 v2.0.6 h1:3xi/Cafd1NaoEnS/yDssIiuVeDVywU0QdFGl3aQaQHM= github.com/hashicorp/golang-lru/v2 v2.0.6/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huin/goupnp v1.0.3 h1:N8No57ls+MnjlB+JPiCVSOyy/ot7MJTqlo7rn+NYSqQ= github.com/huin/goupnp v1.0.3/go.mod h1:ZxNlw5WqJj6wSsRK5+YfflQGXYfccj5VgQsMNixHM7Y= github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/icrowley/fake v0.0.0-20221112152111-d7b7e2276db2 h1:qU3v73XG4QAqCPHA4HOpfC1EfUvtLIDvQK4mNQ0LvgI= github.com/icrowley/fake v0.0.0-20221112152111-d7b7e2276db2/go.mod h1:dQ6TM/OGAe+cMws81eTe4Btv1dKxfPZ2CX+YaAFAPN4= github.com/ipfs/bbloom v0.0.4 h1:Gi+8EGJ2y5qiD5FbsbpX/TMNcJw8gSqr7eyjHa4Fhvs= @@ -367,6 +383,8 @@ github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8 github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc= +github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU= @@ -491,6 +509,32 @@ github.com/multiformats/go-varint v0.0.7 h1:sWSGR+f/eu5ABZA2ZpYKBILXTTs9JWpdEM/n github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= +github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= +github.com/onsi/ginkgo/v2 v2.1.3/go.mod h1:vw5CSIxN1JObi/U8gcbwft7ZxR2dgaR70JSE3/PpL4c= +github.com/onsi/ginkgo/v2 v2.1.4/go.mod h1:um6tUpWM/cxCK3/FK8BXqEiUMUwRgSM4JXG47RKZmLU= +github.com/onsi/ginkgo/v2 v2.1.6/go.mod h1:MEH45j8TBi6u9BMogfbp0stKC5cdGjumZj5Y7AG4VIk= +github.com/onsi/ginkgo/v2 v2.3.0/go.mod h1:Eew0uilEqZmIEZr8JrvYlvOM7Rr6xzTmMV8AyFNU9d0= +github.com/onsi/ginkgo/v2 v2.4.0/go.mod h1:iHkDK1fKGcBoEHT5W7YBq4RFWaQulw+caOMkAt4OrFo= +github.com/onsi/ginkgo/v2 v2.5.0/go.mod h1:Luc4sArBICYCS8THh8v3i3i5CuSZO+RaQRaJoeNwomw= +github.com/onsi/ginkgo/v2 v2.7.0/go.mod h1:yjiuMwPokqY1XauOgju45q3sJt6VzQ/Fict1LFVcsAo= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.17.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= +github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= +github.com/onsi/gomega v1.20.1/go.mod h1:DtrZpjmvpn2mPm4YWQa0/ALMDj9v4YxLgojwPeREyVo= +github.com/onsi/gomega v1.21.1/go.mod h1:iYAIXgPSaDHak0LCMA+AWBpIKBr8WZicMxnE8luStNc= +github.com/onsi/gomega v1.22.1/go.mod h1:x6n7VNe4hw0vkyYUM4mjIXx3JbLiPaBPNgB7PRQ1tuM= +github.com/onsi/gomega v1.24.0/go.mod h1:Z/NWtiqwBrwUt4/2loMmHL63EDLnYHmVbuBpDr2vQAg= +github.com/onsi/gomega v1.24.1/go.mod h1:3AOiACssS3/MajrniINInwbfOOtfZvplPzuRSmvt1jM= +github.com/onsi/gomega v1.25.0 h1:Vw7br2PCDYijJHSfBOWhov+8cAnUf8MfMaIOV323l6Y= +github.com/onsi/gomega v1.25.0/go.mod h1:r+zV744Re+DiYCIPRlYOTxn0YkOLcAnW8k1xXdMPGhM= github.com/opensearch-project/opensearch-go/v2 v2.2.0 h1:6RicCBiqboSVtLMjSiKgVQIsND4I3sxELg9uwWe/TKM= github.com/opensearch-project/opensearch-go/v2 v2.2.0/go.mod h1:R8NTTQMmfSRsmZdfEn2o9ZSuSXn0WTHPYhzgl7LCFLY= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= @@ -542,6 +586,9 @@ github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJf github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= github.com/prometheus/statsd_exporter v0.22.7 h1:7Pji/i2GuhK6Lu7DHrtTkFmNBCudCPT1pX2CziuyQR0= github.com/prometheus/statsd_exporter v0.22.7/go.mod h1:N/TevpjkIh9ccs6nuzY3jQn9dFqnUakOjnEuMPJJJnI= +github.com/redis/go-redis/v9 v9.0.0-rc.4/go.mod h1:Vo3EsyWnicKnSKCA7HhgnvnyA74wOA69Cd2Meli5mmA= +github.com/redis/go-redis/v9 v9.3.0 h1:RiVDjmig62jIWp7Kk4XVLs0hzV6pI3PyTnnL0cnn0u0= +github.com/redis/go-redis/v9 v9.3.0/go.mod h1:hdY0cQFCN4fnSYT6TkisLufl/4W5UIXyv0b/CLO2V2M= github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= @@ -582,6 +629,7 @@ github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpE github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= @@ -598,6 +646,12 @@ github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyC github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/valyala/fasttemplate v1.2.2 h1:lxLXG0uE3Qnshl9QyaK6XJxMXlQZELvChBOCmQD0Loo= github.com/valyala/fasttemplate v1.2.2/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= +github.com/vmihailenco/go-tinylfu v0.2.2 h1:H1eiG6HM36iniK6+21n9LLpzx1G9R3DJa2UjUjbynsI= +github.com/vmihailenco/go-tinylfu v0.2.2/go.mod h1:CutYi2Q9puTxfcolkliPq4npPuofg9N9t8JVrjzwa3Q= +github.com/vmihailenco/msgpack/v5 v5.3.4 h1:qMKAwOV+meBw2Y8k9cVwAy7qErtYCwBzZ2ellBfvnqc= +github.com/vmihailenco/msgpack/v5 v5.3.4/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/warpfork/go-testmark v0.11.0 h1:J6LnV8KpceDvo7spaNU4+DauH2n1x+6RaO2rJrmpQ9U= github.com/warpfork/go-testmark v0.11.0/go.mod h1:jhEf8FVxd+F17juRubpmut64NEG6I2rgkUhlcqqXwE0= github.com/warpfork/go-wish v0.0.0-20200122115046-b9ea61034e4a/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= @@ -618,6 +672,7 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= gitlab.com/yawning/secp256k1-voi v0.0.0-20230815035612-a7264edccf80 h1:+Hti+G65Kc88hK0GFQ6NzzncsOmoqxmlXaxM1+FPPqM= gitlab.com/yawning/secp256k1-voi v0.0.0-20230815035612-a7264edccf80/go.mod h1:/y/V339mxv2sZmYYR64O07VuCpdNZqCTwO8ZcouTMI8= @@ -684,6 +739,7 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.1.0/go.mod h1:RecgLatLF4+eUMCP1PoPZQb+cVrJcOPbHkTkbkB9sbw= golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw= golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= @@ -721,12 +777,16 @@ golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI= +golang.org/x/mod v0.7.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -749,6 +809,7 @@ golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= @@ -757,11 +818,17 @@ golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211015210444-4f30a5c0130f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= +golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.1.0/go.mod h1:Cx3nUiGt4eDBEyega/BKRp+/AlGL8hYe7U9odMt2Cco= +golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY= +golang.org/x/net v0.3.0/go.mod h1:MBQ8lrhLObU/6UmLb4fmbmk5OcyYmqtbGd/9yIeKjEE= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8= @@ -791,6 +858,7 @@ golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190219092855-153ac476189d/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -802,8 +870,11 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -824,6 +895,7 @@ golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210309074719-68d13333faf2/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -833,16 +905,22 @@ golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211019181941-9d821ace8654/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211103235746-7861aae1554b/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220422013727-9388b58f7150/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220708085239-5a0f0661e09d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -853,6 +931,9 @@ golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXR golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= +golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= +golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.11.0/go.mod h1:zC9APTIj3jG3FdV/Ons+XE1riIZXG4aZ4GTHiPZJPIU= @@ -864,6 +945,8 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= @@ -919,9 +1002,13 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= +golang.org/x/tools v0.1.10/go.mod h1:Uh6Zz+xoGYZom868N8YTex3t7RhtHDBrE8Gzo9bV56E= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= +golang.org/x/tools v0.4.0/go.mod h1:UE5sM2OK9E/d67R0ANs2xJizIymRP5gJU295PvKXxjQ= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.8.0 h1:vSDcovVPld282ceKgDimkRSC8kpaH1dgyc9UMzlt84Y= golang.org/x/tools v0.8.0/go.mod h1:JxBZ99ISMI5ViVkT1tr6tdNmXeTrcpVSD3vZ1RsRdN4= @@ -1026,8 +1113,11 @@ gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= From b195b04539d9b72e8200ece71fdb073606695d3c Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 14 Nov 2023 18:48:44 -0800 Subject: [PATCH 15/35] automod: fix label creation/action --- automod/engine.go | 6 ++++++ automod/event.go | 3 ++- cmd/hepa/server.go | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/automod/engine.go b/automod/engine.go index 3527c460e..9b4d98e7c 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -98,6 +98,9 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID if err := evt.PersistAccountActions(ctx); err != nil { return err } + if err := evt.PersistRecordActions(ctx); err != nil { + return err + } if err := evt.PersistCounters(ctx); err != nil { return err } @@ -114,6 +117,9 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID if err := evt.PersistAccountActions(ctx); err != nil { return err } + if err := evt.PersistRecordActions(ctx); err != nil { + return err + } if err := evt.PersistCounters(ctx); err != nil { return err } diff --git a/automod/event.go b/automod/event.go index 98fe17218..26428f970 100644 --- a/automod/event.go +++ b/automod/event.go @@ -206,8 +206,9 @@ func (e *RecordEvent) PersistRecordActions(ctx context.Context) error { } xrpcc := e.Engine.AdminClient if len(e.RecordLabels) > 0 { + // TODO: this does an action, not just create labels; will update after event refactor _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ - Action: "com.atproto.admin.defs#createLabels", + Action: "com.atproto.admin.defs#flag", CreateLabelVals: dedupeStrings(e.RecordLabels), Reason: "automod", CreatedBy: xrpcc.Auth.Did, diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index f6aa9145a..555bcaa97 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -54,6 +54,7 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { Client: util.RobustHTTPClient(), Host: config.ModHost, AdminToken: &config.ModAdminToken, + Auth: &xrpc.AuthInfo{}, } auth, err := comatproto.ServerCreateSession(context.TODO(), xrpcc, &comatproto.ServerCreateSession_Input{ From 1045d0503b964a85fb2b4f624f40505643326d5f Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 15 Nov 2023 18:48:54 -0800 Subject: [PATCH 16/35] automod: redis counters; some refactors --- automod/countstore.go | 22 ++++++------- automod/engine.go | 19 +++-------- automod/engine_test.go | 4 +-- automod/event.go | 61 +++++++++++++++++----------------- automod/redis_counters.go | 65 +++++++++++++++++++++++++++++++++++++ automod/redis_directory.go | 20 +++++++----- automod/rules/all.go | 1 + automod/rules/hashtags.go | 4 +-- automod/rules/misleading.go | 10 +++--- automod/rules/replies.go | 16 +++++++++ cmd/hepa/main.go | 1 + cmd/hepa/server.go | 14 +++++++- 12 files changed, 162 insertions(+), 75 deletions(-) create mode 100644 automod/redis_counters.go create mode 100644 automod/rules/replies.go diff --git a/automod/countstore.go b/automod/countstore.go index ca07ec118..641594b41 100644 --- a/automod/countstore.go +++ b/automod/countstore.go @@ -14,8 +14,8 @@ const ( ) type CountStore interface { - GetCount(ctx context.Context, key, period string) (int, error) - Increment(ctx context.Context, key string) error + GetCount(ctx context.Context, name, val, period string) (int, error) + Increment(ctx context.Context, name, val string) error // TODO: batch increment method } @@ -30,33 +30,33 @@ func NewMemCountStore() MemCountStore { } } -func PeriodKey(key, period string) string { +func PeriodBucket(name, val, period string) string { switch period { case PeriodTotal: - return key + return fmt.Sprintf("%s/%s", name, val) case PeriodDay: t := time.Now().UTC().Format(time.DateOnly) - return fmt.Sprintf("%s:%s", key, t) + return fmt.Sprintf("%s/%s/%s", name, val, t) case PeriodHour: t := time.Now().UTC().Format(time.RFC3339)[0:13] - return fmt.Sprintf("%s:%s", key, t) + return fmt.Sprintf("%s/%s/%s", name, val, t) default: slog.Warn("unhandled counter period", "period", period) - return key + return fmt.Sprintf("%s/%s", name, val) } } -func (s MemCountStore) GetCount(ctx context.Context, key, period string) (int, error) { - v, ok := s.Counts[PeriodKey(key, period)] +func (s MemCountStore) GetCount(ctx context.Context, name, val, period string) (int, error) { + v, ok := s.Counts[PeriodBucket(name, val, period)] if !ok { return 0, nil } return v, nil } -func (s MemCountStore) Increment(ctx context.Context, key string) error { +func (s MemCountStore) Increment(ctx context.Context, name, val string) error { for _, p := range []string{PeriodTotal, PeriodDay, PeriodHour} { - k := PeriodKey(key, p) + k := PeriodBucket(name, val, p) v, ok := s.Counts[k] if !ok { v = 0 diff --git a/automod/engine.go b/automod/engine.go index 9b4d98e7c..24edb8949 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -54,10 +54,7 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. return evt.Err } evt.CanonicalLogLine() - if err := evt.PersistAccountActions(ctx); err != nil { - return err - } - if err := evt.PersistCounters(ctx); err != nil { + if err := evt.PersistActions(ctx); err != nil { return err } return nil @@ -95,10 +92,7 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID return evt.Err } evt.CanonicalLogLine() - if err := evt.PersistAccountActions(ctx); err != nil { - return err - } - if err := evt.PersistRecordActions(ctx); err != nil { + if err := evt.PersistActions(ctx); err != nil { return err } if err := evt.PersistCounters(ctx); err != nil { @@ -114,10 +108,7 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID return evt.Err } evt.CanonicalLogLine() - if err := evt.PersistAccountActions(ctx); err != nil { - return err - } - if err := evt.PersistRecordActions(ctx); err != nil { + if err := evt.PersistActions(ctx); err != nil { return err } if err := evt.PersistCounters(ctx); err != nil { @@ -167,8 +158,8 @@ func (e *Engine) NewRecordEvent(ident *identity.Identity, path, recCID string, r } } -func (e *Engine) GetCount(key, period string) (int, error) { - return e.Counters.GetCount(context.TODO(), key, period) +func (e *Engine) GetCount(name, val, period string) (int, error) { + return e.Counters.GetCount(context.TODO(), name, val, period) } // checks if `val` is an element of set `name` diff --git a/automod/engine_test.go b/automod/engine_test.go index 95bfa61a9..d4050e931 100644 --- a/automod/engine_test.go +++ b/automod/engine_test.go @@ -16,7 +16,7 @@ import ( func simpleRule(evt *PostEvent) error { for _, tag := range evt.Post.Tags { if evt.InSet("banned-hashtags", tag) { - evt.AddLabel("bad-hashtag") + evt.AddRecordLabel("bad-hashtag") break } } @@ -25,7 +25,7 @@ func simpleRule(evt *PostEvent) error { if feat.RichtextFacet_Tag != nil { tag := feat.RichtextFacet_Tag.Tag if evt.InSet("banned-hashtags", tag) { - evt.AddLabel("bad-hashtag") + evt.AddRecordLabel("bad-hashtag") break } } diff --git a/automod/event.go b/automod/event.go index 26428f970..baf62b6e4 100644 --- a/automod/event.go +++ b/automod/event.go @@ -21,39 +21,26 @@ type AccountMeta struct { // TODO: createdAt / age } +type CounterRef struct { + Name string + Val string +} + // base type for events. events are both containers for data about the event itself (similar to an HTTP request type); aggregate results and state (counters, mod actions) to be persisted after all rules are run; and act as an API for additional network reads and operations. type Event struct { Engine *Engine Err error Logger *slog.Logger Account AccountMeta - CounterIncrements []string + CounterIncrements []CounterRef AccountLabels []string AccountFlags []string AccountReports []ModReport AccountTakedown bool } -func (e *Event) CountTotal(key string) int { - v, err := e.Engine.GetCount(key, PeriodTotal) - if err != nil { - e.Err = err - return 0 - } - return v -} - -func (e *Event) CountDay(key string) int { - v, err := e.Engine.GetCount(key, PeriodDay) - if err != nil { - e.Err = err - return 0 - } - return v -} - -func (e *Event) CountHour(key string) int { - v, err := e.Engine.GetCount(key, PeriodHour) +func (e *Event) GetCount(name, val, period string) int { + v, err := e.Engine.GetCount(name, val, period) if err != nil { e.Err = err return 0 @@ -70,19 +57,19 @@ func (e *Event) InSet(name, val string) bool { return v } -func (e *Event) IncrementCounter(key string) { - e.CounterIncrements = append(e.CounterIncrements, key) +func (e *Event) Increment(name, val string) { + e.CounterIncrements = append(e.CounterIncrements, CounterRef{Name: name, Val: val}) } func (e *Event) TakedownAccount() { e.AccountTakedown = true } -func (e *Event) AddLabelAccount(val string) { +func (e *Event) AddAccountLabel(val string) { e.AccountLabels = append(e.AccountLabels, val) } -func (e *Event) AddFlag(val string) { +func (e *Event) AddAccountFlag(val string) { e.AccountFlags = append(e.AccountFlags, val) } @@ -144,9 +131,14 @@ func (e *Event) PersistAccountActions(ctx context.Context) error { return nil } +func (e *Event) PersistActions(ctx context.Context) error { + return e.PersistAccountActions(ctx) +} + func (e *Event) PersistCounters(ctx context.Context) error { - for _, k := range dedupeStrings(e.CounterIncrements) { - err := e.Engine.Counters.Increment(ctx, k) + // TODO: dedupe this array + for _, ref := range e.CounterIncrements { + err := e.Engine.Counters.Increment(ctx, ref.Name, ref.Val) if err != nil { return err } @@ -180,19 +172,19 @@ type RecordEvent struct { // TODO: commit metadata } -func (e *RecordEvent) Takedown() { +func (e *RecordEvent) TakedownRecord() { e.RecordTakedown = true } -func (e *RecordEvent) AddLabel(val string) { +func (e *RecordEvent) AddRecordLabel(val string) { e.RecordLabels = append(e.RecordLabels, val) } -func (e *RecordEvent) AddFlag(val string) { +func (e *RecordEvent) AddRecordFlag(val string) { e.RecordFlags = append(e.RecordFlags, val) } -func (e *RecordEvent) Report(reason, comment string) { +func (e *RecordEvent) ReportRecord(reason, comment string) { e.RecordReports = append(e.RecordReports, ModReport{ReasonType: reason, Comment: comment}) } @@ -249,6 +241,13 @@ func (e *RecordEvent) PersistRecordActions(ctx context.Context) error { return nil } +func (e *RecordEvent) PersistActions(ctx context.Context) error { + if err := e.PersistAccountActions(ctx); err != nil { + return err + } + return e.PersistRecordActions(ctx) +} + func (e *RecordEvent) CanonicalLogLine() { e.Logger.Info("canonical-event-line", "accountLabels", e.AccountLabels, diff --git a/automod/redis_counters.go b/automod/redis_counters.go new file mode 100644 index 000000000..f95fdbd8c --- /dev/null +++ b/automod/redis_counters.go @@ -0,0 +1,65 @@ +package automod + +import ( + "context" + "time" + + "github.com/redis/go-redis/v9" +) + +var redisCountPrefix string = "count/" + +type RedisCountStore struct { + Client *redis.Client +} + +func NewRedisCountStore(redisURL string) (*RedisCountStore, error) { + opt, err := redis.ParseURL(redisURL) + if err != nil { + return nil, err + } + rdb := redis.NewClient(opt) + // check redis connection + _, err = rdb.Ping(context.TODO()).Result() + if err != nil { + return nil, err + } + rcs := RedisCountStore{ + Client: rdb, + } + return &rcs, nil +} + +func (s *RedisCountStore) GetCount(ctx context.Context, name, val, period string) (int, error) { + key := redisCountPrefix + PeriodBucket(name, val, period) + c, err := s.Client.Get(ctx, key).Int() + if err == redis.Nil { + return 0, nil + } else if err != nil { + return 0, err + } + return c, nil +} + +func (s *RedisCountStore) Increment(ctx context.Context, name, val string) error { + + var key string + + // increment multiple counters in a single redis round-trip + multi := s.Client.Pipeline() + + key = redisCountPrefix + PeriodBucket(name, val, PeriodHour) + multi.Incr(ctx, key) + multi.Expire(ctx, key, 2*time.Hour) + + key = redisCountPrefix + PeriodBucket(name, val, PeriodDay) + multi.Incr(ctx, key) + multi.Expire(ctx, key, 48*time.Hour) + + key = redisCountPrefix + PeriodBucket(name, val, PeriodTotal) + multi.Incr(ctx, key) + // no expiration for total + + _, err := multi.Exec(ctx) + return err +} diff --git a/automod/redis_directory.go b/automod/redis_directory.go index acf9d178e..5add6ab0e 100644 --- a/automod/redis_directory.go +++ b/automod/redis_directory.go @@ -15,6 +15,8 @@ import ( "github.com/redis/go-redis/v9" ) +var redisDirPrefix string = "dir/" + // uses redis as a cache for identity lookups. includes a local cache layer as well, for hot keys type RedisDirectory struct { Inner identity.Directory @@ -93,7 +95,7 @@ func (d *RedisDirectory) updateHandle(ctx context.Context, h syntax.Handle) (*Ha } d.handleCache.Set(&cache.Item{ Ctx: ctx, - Key: h.String(), + Key: redisDirPrefix + h.String(), Value: he, TTL: d.ErrTTL, }) @@ -113,13 +115,13 @@ func (d *RedisDirectory) updateHandle(ctx context.Context, h syntax.Handle) (*Ha d.identityCache.Set(&cache.Item{ Ctx: ctx, - Key: ident.DID.String(), + Key: redisDirPrefix + ident.DID.String(), Value: entry, TTL: d.HitTTL, }) d.handleCache.Set(&cache.Item{ Ctx: ctx, - Key: h.String(), + Key: redisDirPrefix + h.String(), Value: he, TTL: d.HitTTL, }) @@ -128,7 +130,7 @@ func (d *RedisDirectory) updateHandle(ctx context.Context, h syntax.Handle) (*Ha func (d *RedisDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (syntax.DID, error) { var entry HandleEntry - err := d.handleCache.Get(ctx, h.String(), &entry) + err := d.handleCache.Get(ctx, redisDirPrefix+h.String(), &entry) if err != nil && err != cache.ErrCacheMiss { return "", err } @@ -147,7 +149,7 @@ func (d *RedisDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (sy select { case <-val.(chan struct{}): // The result should now be in the cache - err := d.handleCache.Get(ctx, h.String(), entry) + err := d.handleCache.Get(ctx, redisDirPrefix+h.String(), entry) if err != nil && err != cache.ErrCacheMiss { return "", err } @@ -198,14 +200,14 @@ func (d *RedisDirectory) updateDID(ctx context.Context, did syntax.DID) (*Identi d.identityCache.Set(&cache.Item{ Ctx: ctx, - Key: did.String(), + Key: redisDirPrefix + did.String(), Value: entry, TTL: d.HitTTL, }) if he != nil { d.handleCache.Set(&cache.Item{ Ctx: ctx, - Key: ident.Handle.String(), + Key: redisDirPrefix + ident.Handle.String(), Value: *he, TTL: d.HitTTL, }) @@ -215,7 +217,7 @@ func (d *RedisDirectory) updateDID(ctx context.Context, did syntax.DID) (*Identi func (d *RedisDirectory) LookupDID(ctx context.Context, did syntax.DID) (*identity.Identity, error) { var entry IdentityEntry - err := d.identityCache.Get(ctx, did.String(), &entry) + err := d.identityCache.Get(ctx, redisDirPrefix+did.String(), &entry) if err != nil && err != cache.ErrCacheMiss { return nil, err } @@ -234,7 +236,7 @@ func (d *RedisDirectory) LookupDID(ctx context.Context, did syntax.DID) (*identi select { case <-val.(chan struct{}): // The result should now be in the cache - err = d.identityCache.Get(ctx, did.String(), &entry) + err = d.identityCache.Get(ctx, redisDirPrefix+did.String(), &entry) if err != nil && err != cache.ErrCacheMiss { return nil, err } diff --git a/automod/rules/all.go b/automod/rules/all.go index 8e90c35ac..36f2df578 100644 --- a/automod/rules/all.go +++ b/automod/rules/all.go @@ -9,6 +9,7 @@ func DefaultRules() automod.RuleSet { PostRules: []automod.PostRuleFunc{ MisleadingURLPostRule, MisleadingMentionPostRule, + ReplyCountPostRule, BanHashtagsPostRule, }, } diff --git a/automod/rules/hashtags.go b/automod/rules/hashtags.go index 3e91cb4cf..609c2cc28 100644 --- a/automod/rules/hashtags.go +++ b/automod/rules/hashtags.go @@ -7,7 +7,7 @@ import ( func BanHashtagsPostRule(evt *automod.PostEvent) error { for _, tag := range evt.Post.Tags { if evt.InSet("banned-hashtags", tag) { - evt.AddLabel("bad-hashtag") + evt.AddRecordLabel("bad-hashtag") break } } @@ -16,7 +16,7 @@ func BanHashtagsPostRule(evt *automod.PostEvent) error { if feat.RichtextFacet_Tag != nil { tag := feat.RichtextFacet_Tag.Tag if evt.InSet("banned-hashtags", tag) { - evt.AddLabel("bad-hashtag") + evt.AddRecordLabel("bad-hashtag") break } } diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go index e7af93fa4..66ad99ddb 100644 --- a/automod/rules/misleading.go +++ b/automod/rules/misleading.go @@ -14,7 +14,7 @@ func MisleadingURLPostRule(evt *automod.PostEvent) error { if feat.RichtextFacet_Link != nil { if int(facet.Index.ByteEnd) > len([]byte(evt.Post.Text)) || facet.Index.ByteStart > facet.Index.ByteEnd { evt.Logger.Warn("invalid facet range") - evt.AddLabel("invalid") // TODO: or some other "this record is corrupt" indicator? + evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? continue } txt := string([]byte(evt.Post.Text)[facet.Index.ByteStart:facet.Index.ByteEnd]) @@ -36,7 +36,7 @@ func MisleadingURLPostRule(evt *automod.PostEvent) error { // this public code will obviously get discovered and bypassed. this doesn't earn you any security cred! if linkURL.Host != textURL.Host { evt.Logger.Warn("misleading mismatched domains", "link", linkURL.Host, "text", textURL.Host) - evt.AddLabel("misleading") + evt.AddRecordLabel("misleading") } } } @@ -52,7 +52,7 @@ func MisleadingMentionPostRule(evt *automod.PostEvent) error { if feat.RichtextFacet_Mention != nil { if int(facet.Index.ByteEnd) > len([]byte(evt.Post.Text)) || facet.Index.ByteStart > facet.Index.ByteEnd { evt.Logger.Warn("invalid facet range") - evt.AddLabel("invalid") // TODO: or some other "this record is corrupt" indicator? + evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? continue } txt := string([]byte(evt.Post.Text)[facet.Index.ByteStart:facet.Index.ByteEnd]) @@ -68,14 +68,14 @@ func MisleadingMentionPostRule(evt *automod.PostEvent) error { mentioned, err := evt.Engine.Directory.LookupHandle(ctx, handle) if err != nil { evt.Logger.Warn("could not resolve handle", "handle", handle) - evt.AddLabel("misleading") + evt.AddRecordLabel("misleading") break } // TODO: check if mentioned DID was recently updated? might be a caching issue if mentioned.DID.String() != feat.RichtextFacet_Mention.Did { evt.Logger.Warn("misleading mention", "text", txt, "did", mentioned.DID) - evt.AddLabel("misleading") + evt.AddRecordLabel("misleading") continue } } diff --git a/automod/rules/replies.go b/automod/rules/replies.go new file mode 100644 index 000000000..e69a37217 --- /dev/null +++ b/automod/rules/replies.go @@ -0,0 +1,16 @@ +package rules + +import ( + "github.com/bluesky-social/indigo/automod" +) + +func ReplyCountPostRule(evt *automod.PostEvent) error { + if evt.Post.Reply != nil { + did := evt.Account.Identity.DID.String() + if evt.GetCount("reply", did, automod.PeriodDay) > 3 { + evt.AddAccountFlag("frequent-replier") + } + evt.Increment("reply", did) + } + return nil +} diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index dbcadbcee..357156baf 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -144,6 +144,7 @@ var runCmd = &cli.Command{ ModUsername: cctx.String("mod-handle"), ModPassword: cctx.String("mod-password"), SetsFileJSON: cctx.String("sets-json-path"), + RedisURL: cctx.String("redis-url"), }, ) if err != nil { diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 555bcaa97..92e452401 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -31,6 +31,7 @@ type Config struct { ModUsername string ModPassword string SetsFileJSON string + RedisURL string Logger *slog.Logger } @@ -79,10 +80,21 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { } } + var counters automod.CountStore + if config.RedisURL != "" { + c, err := automod.NewRedisCountStore(config.RedisURL) + if err != nil { + return nil, err + } + counters = c + } else { + counters = automod.NewMemCountStore() + } + engine := automod.Engine{ Logger: logger, Directory: dir, - Counters: automod.NewMemCountStore(), + Counters: counters, Sets: sets, Rules: rules.DefaultRules(), AdminClient: xrpcc, From d46ed60a32102a4d42a4cd73c87b7a81a9e52317 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 15 Nov 2023 19:05:51 -0800 Subject: [PATCH 17/35] rules: refactor with some helpers --- automod/rules/hashtags.go | 13 +---- automod/rules/helpers.go | 78 ++++++++++++++++++++++++++ automod/rules/misleading.go | 108 +++++++++++++++++------------------- 3 files changed, 131 insertions(+), 68 deletions(-) create mode 100644 automod/rules/helpers.go diff --git a/automod/rules/hashtags.go b/automod/rules/hashtags.go index 609c2cc28..cef83f45a 100644 --- a/automod/rules/hashtags.go +++ b/automod/rules/hashtags.go @@ -5,22 +5,11 @@ import ( ) func BanHashtagsPostRule(evt *automod.PostEvent) error { - for _, tag := range evt.Post.Tags { + for _, tag := range ExtractHashtags(evt.Post) { if evt.InSet("banned-hashtags", tag) { evt.AddRecordLabel("bad-hashtag") break } } - for _, facet := range evt.Post.Facets { - for _, feat := range facet.Features { - if feat.RichtextFacet_Tag != nil { - tag := feat.RichtextFacet_Tag.Tag - if evt.InSet("banned-hashtags", tag) { - evt.AddRecordLabel("bad-hashtag") - break - } - } - } - } return nil } diff --git a/automod/rules/helpers.go b/automod/rules/helpers.go new file mode 100644 index 000000000..d702ef059 --- /dev/null +++ b/automod/rules/helpers.go @@ -0,0 +1,78 @@ +package rules + +import ( + "fmt" + + appbsky "github.com/bluesky-social/indigo/api/bsky" +) + +func dedupeStrings(in []string) []string { + var out []string + seen := make(map[string]bool) + for _, v := range in { + if !seen[v] { + out = append(out, v) + seen[v] = true + } + } + return out +} + +func ExtractHashtags(post *appbsky.FeedPost) []string { + var tags []string + for _, tag := range post.Tags { + tags = append(tags, tag) + } + for _, facet := range post.Facets { + for _, feat := range facet.Features { + if feat.RichtextFacet_Tag != nil { + tags = append(tags, feat.RichtextFacet_Tag.Tag) + } + } + } + return dedupeStrings(tags) +} + +type PostFacet struct { + Text string + URL *string + DID *string + Tag *string +} + +func ExtractFacets(post *appbsky.FeedPost) ([]PostFacet, error) { + var out []PostFacet + + for _, facet := range post.Facets { + for _, feat := range facet.Features { + if int(facet.Index.ByteEnd) > len([]byte(post.Text)) || facet.Index.ByteStart > facet.Index.ByteEnd { + return nil, fmt.Errorf("invalid facet byte range") + } + + txt := string([]byte(post.Text)[facet.Index.ByteStart:facet.Index.ByteEnd]) + if txt == "" { + return nil, fmt.Errorf("empty facet text") + } + + if feat.RichtextFacet_Link != nil { + out = append(out, PostFacet{ + Text: txt, + URL: &feat.RichtextFacet_Link.Uri, + }) + } + if feat.RichtextFacet_Tag != nil { + out = append(out, PostFacet{ + Text: txt, + Tag: &feat.RichtextFacet_Tag.Tag, + }) + } + if feat.RichtextFacet_Mention != nil { + out = append(out, PostFacet{ + Text: txt, + DID: &feat.RichtextFacet_Mention.Did, + }) + } + } + } + return out, nil +} diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go index 66ad99ddb..44dc46de3 100644 --- a/automod/rules/misleading.go +++ b/automod/rules/misleading.go @@ -9,35 +9,32 @@ import ( ) func MisleadingURLPostRule(evt *automod.PostEvent) error { - for _, facet := range evt.Post.Facets { - for _, feat := range facet.Features { - if feat.RichtextFacet_Link != nil { - if int(facet.Index.ByteEnd) > len([]byte(evt.Post.Text)) || facet.Index.ByteStart > facet.Index.ByteEnd { - evt.Logger.Warn("invalid facet range") - evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? - continue - } - txt := string([]byte(evt.Post.Text)[facet.Index.ByteStart:facet.Index.ByteEnd]) - - linkURL, err := url.Parse(feat.RichtextFacet_Link.Uri) - if err != nil { - evt.Logger.Warn("invalid link metadata URL", "uri", feat.RichtextFacet_Link.Uri) - continue - } + facets, err := ExtractFacets(evt.Post) + if err != nil { + evt.Logger.Warn("invalid facets", "err", err) + evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? + return nil + } + for _, facet := range facets { + if facet.URL != nil { + linkURL, err := url.Parse(*facet.URL) + if err != nil { + evt.Logger.Warn("invalid link metadata URL", "uri", facet.URL) + continue + } - // try parsing as a full URL - textURL, err := url.Parse(txt) - if err != nil { - evt.Logger.Warn("invalid link text URL", "uri", txt) - continue - } + // try parsing as a full URL + textURL, err := url.Parse(facet.Text) + if err != nil { + evt.Logger.Warn("invalid link text URL", "uri", facet.Text) + continue + } - // for now just compare domains to handle the most obvious cases - // this public code will obviously get discovered and bypassed. this doesn't earn you any security cred! - if linkURL.Host != textURL.Host { - evt.Logger.Warn("misleading mismatched domains", "link", linkURL.Host, "text", textURL.Host) - evt.AddRecordLabel("misleading") - } + // for now just compare domains to handle the most obvious cases + // this public code will obviously get discovered and bypassed. this doesn't earn you any security cred! + if linkURL.Host != textURL.Host { + evt.Logger.Warn("misleading mismatched domains", "link", linkURL.Host, "text", textURL.Host) + evt.AddRecordLabel("misleading") } } } @@ -47,37 +44,36 @@ func MisleadingURLPostRule(evt *automod.PostEvent) error { func MisleadingMentionPostRule(evt *automod.PostEvent) error { // TODO: do we really need to route context around? probably ctx := context.TODO() - for _, facet := range evt.Post.Facets { - for _, feat := range facet.Features { - if feat.RichtextFacet_Mention != nil { - if int(facet.Index.ByteEnd) > len([]byte(evt.Post.Text)) || facet.Index.ByteStart > facet.Index.ByteEnd { - evt.Logger.Warn("invalid facet range") - evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? - continue - } - txt := string([]byte(evt.Post.Text)[facet.Index.ByteStart:facet.Index.ByteEnd]) - if txt[0] == '@' { - txt = txt[1:] - } - handle, err := syntax.ParseHandle(txt) - if err != nil { - evt.Logger.Warn("mention was not a valid handle", "text", txt) - continue - } + facets, err := ExtractFacets(evt.Post) + if err != nil { + evt.Logger.Warn("invalid facets", "err", err) + evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? + return nil + } + for _, facet := range facets { + if facet.DID != nil { + txt := facet.Text + if txt[0] == '@' { + txt = txt[1:] + } + handle, err := syntax.ParseHandle(txt) + if err != nil { + evt.Logger.Warn("mention was not a valid handle", "text", txt) + continue + } - mentioned, err := evt.Engine.Directory.LookupHandle(ctx, handle) - if err != nil { - evt.Logger.Warn("could not resolve handle", "handle", handle) - evt.AddRecordLabel("misleading") - break - } + mentioned, err := evt.Engine.Directory.LookupHandle(ctx, handle) + if err != nil { + evt.Logger.Warn("could not resolve handle", "handle", handle) + evt.AddRecordLabel("misleading") + break + } - // TODO: check if mentioned DID was recently updated? might be a caching issue - if mentioned.DID.String() != feat.RichtextFacet_Mention.Did { - evt.Logger.Warn("misleading mention", "text", txt, "did", mentioned.DID) - evt.AddRecordLabel("misleading") - continue - } + // TODO: check if mentioned DID was recently updated? might be a caching issue + if mentioned.DID.String() != *facet.DID { + evt.Logger.Warn("misleading mention", "text", txt, "did", facet.DID) + evt.AddRecordLabel("misleading") + continue } } } From ef07c1e218d3c54dea303afb0b552485aa73b73b Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 15 Nov 2023 21:04:44 -0800 Subject: [PATCH 18/35] automod: add generic caching, and hydrate some account meta --- automod/account_meta.go | 98 ++++++++++++++++++++++++++++++++ automod/cachestore.go | 36 ++++++++++++ automod/engine.go | 44 +++++++++----- automod/event.go | 9 +-- automod/redis_cache.go | 63 ++++++++++++++++++++ automod/redis_directory.go | 28 +++++++-- automod/rules/all.go | 1 + automod/rules/hashtags_test.go | 13 +++-- automod/rules/misleading_test.go | 21 ++++--- automod/rules/profile.go | 13 +++++ cmd/hepa/main.go | 7 +++ cmd/hepa/server.go | 18 ++++++ 12 files changed, 310 insertions(+), 41 deletions(-) create mode 100644 automod/account_meta.go create mode 100644 automod/cachestore.go create mode 100644 automod/redis_cache.go create mode 100644 automod/rules/profile.go diff --git a/automod/account_meta.go b/automod/account_meta.go new file mode 100644 index 000000000..bdc1494f6 --- /dev/null +++ b/automod/account_meta.go @@ -0,0 +1,98 @@ +package automod + +import ( + "context" + "encoding/json" + "fmt" + "time" + + appbsky "github.com/bluesky-social/indigo/api/bsky" + "github.com/bluesky-social/indigo/atproto/identity" +) + +type ProfileSummary struct { + HasAvatar bool + Description *string + DisplayName *string +} + +type AccountPrivate struct { + Email string + EmailConfirmed bool +} + +// information about a repo/account/identity, always pre-populated and relevant to many rules +type AccountMeta struct { + Identity *identity.Identity + Profile ProfileSummary + Private *AccountPrivate + AccountLabels []string + FollowersCount int64 + FollowsCount int64 + PostsCount int64 + IndexedAt *time.Time +} + +func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) (*AccountMeta, error) { + + // wipe parsed public key; it's a waste of space and can't serialize + ident.ParsedPublicKey = nil + + existing, err := e.Cache.Get(ctx, "acct", ident.DID.String()) + if err != nil { + return nil, err + } + if existing != "" { + var am AccountMeta + err := json.Unmarshal([]byte(existing), &am) + if err != nil { + return nil, fmt.Errorf("parsing AccountMeta from cache: %v", err) + } + am.Identity = ident + return &am, nil + } + + // fetch account metadata + pv, err := appbsky.ActorGetProfile(ctx, e.BskyClient, ident.DID.String()) + if err != nil { + return nil, err + } + + var labels []string + for _, lbl := range pv.Labels { + labels = append(labels, lbl.Val) + } + + am := AccountMeta{ + Identity: ident, + Profile: ProfileSummary{ + HasAvatar: pv.Avatar != nil, + Description: pv.Description, + DisplayName: pv.DisplayName, + }, + AccountLabels: dedupeStrings(labels), + } + if pv.PostsCount != nil { + am.PostsCount = *pv.PostsCount + } + if pv.FollowersCount != nil { + am.FollowersCount = *pv.FollowersCount + } + if pv.FollowsCount != nil { + am.FollowsCount = *pv.FollowsCount + } + + if e.AdminClient != nil { + // XXX: get admin-level info (email, indexed at, etc). requires lexgen update + } + + val, err := json.Marshal(&am) + if err != nil { + return nil, err + } + + if err := e.Cache.Set(ctx, "acct", ident.DID.String(), string(val)); err != nil { + return nil, err + } + return &am, nil +} diff --git a/automod/cachestore.go b/automod/cachestore.go new file mode 100644 index 000000000..4fd33585e --- /dev/null +++ b/automod/cachestore.go @@ -0,0 +1,36 @@ +package automod + +import ( + "context" + "time" + + "github.com/hashicorp/golang-lru/v2/expirable" +) + +type CacheStore interface { + Get(ctx context.Context, name, key string) (string, error) + Set(ctx context.Context, name, key string, val string) error +} + +type MemCacheStore struct { + Data *expirable.LRU[string, string] +} + +func NewMemCacheStore(capacity int, ttl time.Duration) MemCacheStore { + return MemCacheStore{ + Data: expirable.NewLRU[string, string](capacity, nil, ttl), + } +} + +func (s MemCacheStore) Get(ctx context.Context, name, key string) (string, error) { + v, ok := s.Data.Get(name + "/" + key) + if !ok { + return "", nil + } + return v, nil +} + +func (s MemCacheStore) Set(ctx context.Context, name, key string, val string) error { + s.Data.Add(name+"/"+key, val) + return nil +} diff --git a/automod/engine.go b/automod/engine.go index 24edb8949..5df009043 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -16,11 +16,13 @@ import ( // // TODO: careful when initializing: several fields should not be null or zero, even though they are pointer type. type Engine struct { - Logger *slog.Logger - Directory identity.Directory - Rules RuleSet - Counters CountStore - Sets SetStore + Logger *slog.Logger + Directory identity.Directory + Rules RuleSet + Counters CountStore + Sets SetStore + Cache CacheStore + BskyClient *xrpc.Client // used to persist moderation actions in mod service (optional) AdminClient *xrpc.Client } @@ -41,10 +43,14 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. return fmt.Errorf("identity not found for did: %s", did.String()) } + am, err := e.GetAccountMeta(ctx, ident) + if err != nil { + return err + } evt := IdentityEvent{ Event{ Engine: e, - Account: AccountMeta{Identity: ident}, + Account: *am, }, } if err := e.Rules.CallIdentityRules(&evt); err != nil { @@ -62,11 +68,13 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID string, rec any) error { // similar to an HTTP server, we want to recover any panics from rule execution + /* XXX defer func() { if r := recover(); r != nil { e.Logger.Error("automod event execution exception", "err", r) } }() + */ ident, err := e.Directory.LookupDID(ctx, did) if err != nil { @@ -83,7 +91,11 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID if !ok { return fmt.Errorf("mismatch between collection (%s) and type", collection) } - evt := e.NewPostEvent(ident, path, recCID, post) + am, err := e.GetAccountMeta(ctx, ident) + if err != nil { + return err + } + evt := e.NewPostEvent(*am, path, recCID, post) e.Logger.Debug("processing post", "did", ident.DID, "path", path) if err := e.Rules.CallPostRules(&evt); err != nil { return err @@ -99,7 +111,11 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID return err } default: - evt := e.NewRecordEvent(ident, path, recCID, rec) + am, err := e.GetAccountMeta(ctx, ident) + if err != nil { + return err + } + evt := e.NewRecordEvent(*am, path, recCID, rec) e.Logger.Debug("processing record", "did", ident.DID, "path", path) if err := e.Rules.CallRecordRules(&evt); err != nil { return err @@ -119,14 +135,14 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID return nil } -func (e *Engine) NewPostEvent(ident *identity.Identity, path, recCID string, post *appbsky.FeedPost) PostEvent { +func (e *Engine) NewPostEvent(am AccountMeta, path, recCID string, post *appbsky.FeedPost) PostEvent { parts := strings.SplitN(path, "/", 2) return PostEvent{ RecordEvent{ Event{ Engine: e, - Logger: e.Logger.With("did", ident.DID, "collection", parts[0], "rkey", parts[1]), - Account: AccountMeta{Identity: ident}, + Logger: e.Logger.With("did", am.Identity.DID, "collection", parts[0], "rkey", parts[1]), + Account: am, }, parts[0], parts[1], @@ -140,13 +156,13 @@ func (e *Engine) NewPostEvent(ident *identity.Identity, path, recCID string, pos } } -func (e *Engine) NewRecordEvent(ident *identity.Identity, path, recCID string, rec any) RecordEvent { +func (e *Engine) NewRecordEvent(am AccountMeta, path, recCID string, rec any) RecordEvent { parts := strings.SplitN(path, "/", 2) return RecordEvent{ Event{ Engine: e, - Logger: e.Logger.With("did", ident.DID, "collection", parts[0], "rkey", parts[1]), - Account: AccountMeta{Identity: ident}, + Logger: e.Logger.With("did", am.Identity.DID, "collection", parts[0], "rkey", parts[1]), + Account: am, }, parts[0], parts[1], diff --git a/automod/event.go b/automod/event.go index baf62b6e4..0924293e1 100644 --- a/automod/event.go +++ b/automod/event.go @@ -7,7 +7,6 @@ import ( comatproto "github.com/bluesky-social/indigo/api/atproto" appbsky "github.com/bluesky-social/indigo/api/bsky" - "github.com/bluesky-social/indigo/atproto/identity" ) type ModReport struct { @@ -15,12 +14,6 @@ type ModReport struct { Comment string } -// information about a repo/account/identity, always pre-populated and relevant to many rules -type AccountMeta struct { - Identity *identity.Identity - // TODO: createdAt / age -} - type CounterRef struct { Name string Val string @@ -84,7 +77,7 @@ func (e *Event) PersistAccountActions(ctx context.Context) error { xrpcc := e.Engine.AdminClient if len(e.AccountLabels) > 0 { _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ - Action: "com.atproto.admin.defs#createLabels", + Action: "com.atproto.admin.defs#flag", CreateLabelVals: dedupeStrings(e.AccountLabels), Reason: "automod", CreatedBy: xrpcc.Auth.Did, diff --git a/automod/redis_cache.go b/automod/redis_cache.go new file mode 100644 index 000000000..c5724826d --- /dev/null +++ b/automod/redis_cache.go @@ -0,0 +1,63 @@ +package automod + +import ( + "context" + "time" + + "github.com/go-redis/cache/v9" + "github.com/redis/go-redis/v9" +) + +type RedisCacheStore struct { + Data *cache.Cache + TTL time.Duration +} + +var _ CacheStore = (*RedisCacheStore)(nil) + +func NewRedisCacheStore(redisURL string, ttl time.Duration) (*RedisCacheStore, error) { + opt, err := redis.ParseURL(redisURL) + if err != nil { + return nil, err + } + rdb := redis.NewClient(opt) + // check redis connection + _, err = rdb.Ping(context.TODO()).Result() + if err != nil { + return nil, err + } + data := cache.New(&cache.Options{ + Redis: rdb, + LocalCache: cache.NewTinyLFU(10_000, ttl), + }) + return &RedisCacheStore{ + Data: data, + TTL: ttl, + }, nil +} + +func redisCacheKey(name, key string) string { + return "cache/" + name + "/" + key +} + +func (s RedisCacheStore) Get(ctx context.Context, name, key string) (string, error) { + var val string + err := s.Data.Get(ctx, redisCacheKey(name, key), &val) + if err == cache.ErrCacheMiss { + return "", nil + } + if err != nil { + return "", err + } + return val, nil +} + +func (s RedisCacheStore) Set(ctx context.Context, name, key string, val string) error { + s.Data.Set(&cache.Item{ + Ctx: ctx, + Key: redisCacheKey(name, key), + Value: val, + TTL: s.TTL, + }) + return nil +} diff --git a/automod/redis_directory.go b/automod/redis_directory.go index 5add6ab0e..4a76c8b4d 100644 --- a/automod/redis_directory.go +++ b/automod/redis_directory.go @@ -93,15 +93,19 @@ func (d *RedisDirectory) updateHandle(ctx context.Context, h syntax.Handle) (*Ha DID: "", Err: err, } - d.handleCache.Set(&cache.Item{ + err = d.handleCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + h.String(), Value: he, TTL: d.ErrTTL, }) + if err != nil { + return nil, err + } return &he, nil } + ident.ParsedPublicKey = nil entry := IdentityEntry{ Updated: time.Now(), Identity: ident, @@ -113,18 +117,24 @@ func (d *RedisDirectory) updateHandle(ctx context.Context, h syntax.Handle) (*Ha Err: nil, } - d.identityCache.Set(&cache.Item{ + err = d.identityCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + ident.DID.String(), Value: entry, TTL: d.HitTTL, }) - d.handleCache.Set(&cache.Item{ + if err != nil { + return nil, err + } + err = d.handleCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + h.String(), Value: he, TTL: d.HitTTL, }) + if err != nil { + return nil, err + } return &he, nil } @@ -178,7 +188,7 @@ func (d *RedisDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (sy func (d *RedisDirectory) updateDID(ctx context.Context, did syntax.DID) (*IdentityEntry, error) { ident, err := d.Inner.LookupDID(ctx, did) - // wipe parsed public key; it's a waste of space + // wipe parsed public key; it's a waste of space and can't serialize if nil == err { ident.ParsedPublicKey = nil } @@ -198,19 +208,25 @@ func (d *RedisDirectory) updateDID(ctx context.Context, did syntax.DID) (*Identi } } - d.identityCache.Set(&cache.Item{ + err = d.identityCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + did.String(), Value: entry, TTL: d.HitTTL, }) + if err != nil { + return nil, err + } if he != nil { - d.handleCache.Set(&cache.Item{ + err = d.handleCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + ident.Handle.String(), Value: *he, TTL: d.HitTTL, }) + if err != nil { + return nil, err + } } return &entry, nil } diff --git a/automod/rules/all.go b/automod/rules/all.go index 36f2df578..d8583be44 100644 --- a/automod/rules/all.go +++ b/automod/rules/all.go @@ -11,6 +11,7 @@ func DefaultRules() automod.RuleSet { MisleadingMentionPostRule, ReplyCountPostRule, BanHashtagsPostRule, + AccountDemoPostRule, }, } return rules diff --git a/automod/rules/hashtags_test.go b/automod/rules/hashtags_test.go index 5c95e5583..ac3a3fc26 100644 --- a/automod/rules/hashtags_test.go +++ b/automod/rules/hashtags_test.go @@ -6,6 +6,7 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" "github.com/stretchr/testify/assert" ) @@ -14,16 +15,18 @@ func TestBanHashtagPostRule(t *testing.T) { assert := assert.New(t) engine := engineFixture() - id1 := identity.Identity{ - DID: syntax.DID("did:plc:abc111"), - Handle: syntax.Handle("handle.example.com"), + am1 := automod.AccountMeta{ + Identity: &identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + }, } path := "app.bsky.feed.post/abc123" cid1 := "cid123" p1 := appbsky.FeedPost{ Text: "some post blah", } - evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) + evt1 := engine.NewPostEvent(am1, path, cid1, &p1) assert.NoError(BanHashtagsPostRule(&evt1)) assert.Empty(evt1.RecordLabels) @@ -31,7 +34,7 @@ func TestBanHashtagPostRule(t *testing.T) { Text: "some post blah", Tags: []string{"one", "slur"}, } - evt2 := engine.NewPostEvent(&id1, path, cid1, &p2) + evt2 := engine.NewPostEvent(am1, path, cid1, &p2) assert.NoError(BanHashtagsPostRule(&evt2)) assert.NotEmpty(evt2.RecordLabels) } diff --git a/automod/rules/misleading_test.go b/automod/rules/misleading_test.go index ce8138133..edc7a0d19 100644 --- a/automod/rules/misleading_test.go +++ b/automod/rules/misleading_test.go @@ -6,6 +6,7 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" "github.com/stretchr/testify/assert" ) @@ -14,9 +15,11 @@ func TestMisleadingURLPostRule(t *testing.T) { assert := assert.New(t) engine := engineFixture() - id1 := identity.Identity{ - DID: syntax.DID("did:plc:abc111"), - Handle: syntax.Handle("handle.example.com"), + am1 := automod.AccountMeta{ + Identity: &identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + }, } path := "app.bsky.feed.post/abc123" cid1 := "cid123" @@ -38,7 +41,7 @@ func TestMisleadingURLPostRule(t *testing.T) { }, }, } - evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) + evt1 := engine.NewPostEvent(am1, path, cid1, &p1) assert.NoError(MisleadingURLPostRule(&evt1)) assert.NotEmpty(evt1.RecordLabels) } @@ -47,9 +50,11 @@ func TestMisleadingMentionPostRule(t *testing.T) { assert := assert.New(t) engine := engineFixture() - id1 := identity.Identity{ - DID: syntax.DID("did:plc:abc111"), - Handle: syntax.Handle("handle.example.com"), + am1 := automod.AccountMeta{ + Identity: &identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + }, } path := "app.bsky.feed.post/abc123" cid1 := "cid123" @@ -71,7 +76,7 @@ func TestMisleadingMentionPostRule(t *testing.T) { }, }, } - evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) + evt1 := engine.NewPostEvent(am1, path, cid1, &p1) assert.NoError(MisleadingMentionPostRule(&evt1)) assert.NotEmpty(evt1.RecordLabels) } diff --git a/automod/rules/profile.go b/automod/rules/profile.go new file mode 100644 index 000000000..e6f70bdc4 --- /dev/null +++ b/automod/rules/profile.go @@ -0,0 +1,13 @@ +package rules + +import ( + "github.com/bluesky-social/indigo/automod" +) + +// this is a dummy rule to demonstrate accessing account metadata (eg, profile) from within post handler +func AccountDemoPostRule(evt *automod.PostEvent) error { + if evt.Account.Profile.Description != nil && len(evt.Post.Text) > 5 && *evt.Account.Profile.Description == evt.Post.Text { + evt.AddRecordFlag("own-profile-description") + } + return nil +} diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index 357156baf..8480963b3 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -51,6 +51,12 @@ func run(args []string) error { Value: "https://api.bsky.app", EnvVars: []string{"ATP_MOD_HOST"}, }, + &cli.StringFlag{ + Name: "atp-bsky-host", + Usage: "method, hostname, and port of bsky API (appview) service", + Value: "https://api.bsky.app", + EnvVars: []string{"ATP_BSKY_HOST"}, + }, } app.Commands = []*cli.Command{ @@ -138,6 +144,7 @@ var runCmd = &cli.Command{ dir, Config{ BGSHost: cctx.String("atp-bgs-host"), + BskyHost: cctx.String("atp-bsky-host"), Logger: logger, ModHost: cctx.String("atp-mod-host"), ModAdminToken: cctx.String("mod-admin-token"), diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 92e452401..eb784b28d 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -7,6 +7,7 @@ import ( "net/http" "os" "strings" + "time" comatproto "github.com/bluesky-social/indigo/api/atproto" "github.com/bluesky-social/indigo/atproto/identity" @@ -26,6 +27,7 @@ type Server struct { type Config struct { BGSHost string + BskyHost string ModHost string ModAdminToken string ModUsername string @@ -91,13 +93,29 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { counters = automod.NewMemCountStore() } + var cache automod.CacheStore + if config.RedisURL != "" { + c, err := automod.NewRedisCacheStore(config.RedisURL, 30*time.Minute) + if err != nil { + return nil, err + } + cache = c + } else { + cache = automod.NewMemCacheStore(5_000, 30*time.Minute) + } + engine := automod.Engine{ Logger: logger, Directory: dir, Counters: counters, Sets: sets, + Cache: cache, Rules: rules.DefaultRules(), AdminClient: xrpcc, + BskyClient: &xrpc.Client{ + Client: util.RobustHTTPClient(), + Host: config.BskyHost, + }, } s := &Server{ From 6b1c597741bba3bdedfa2c2628a6ee9f1879838b Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 16 Nov 2023 16:29:34 -0800 Subject: [PATCH 19/35] automod: fix tests (no XRPC in testing) --- automod/account_meta.go | 10 ++++++++++ automod/engine_test.go | 15 +++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/automod/account_meta.go b/automod/account_meta.go index bdc1494f6..daedfaf54 100644 --- a/automod/account_meta.go +++ b/automod/account_meta.go @@ -38,6 +38,16 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( // wipe parsed public key; it's a waste of space and can't serialize ident.ParsedPublicKey = nil + // fallback in case client wasn't configured (eg, testing) + if e.BskyClient == nil { + e.Logger.Warn("skipping account meta hydration") + am := AccountMeta{ + Identity: ident, + Profile: ProfileSummary{}, + } + return &am, nil + } + existing, err := e.Cache.Get(ctx, "acct", ident.DID.String()) if err != nil { return nil, err diff --git a/automod/engine_test.go b/automod/engine_test.go index d4050e931..aa2e3e0ef 100644 --- a/automod/engine_test.go +++ b/automod/engine_test.go @@ -8,7 +8,6 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" - "github.com/bluesky-social/indigo/xrpc" "github.com/stretchr/testify/assert" ) @@ -49,16 +48,12 @@ func engineFixture() Engine { Handle: syntax.Handle("handle.example.com"), } dir.Insert(id1) - adminc := xrpc.Client{ - Host: "http://dummy.local", - } engine := Engine{ - Logger: slog.Default(), - Directory: &dir, - Counters: NewMemCountStore(), - Sets: sets, - Rules: rules, - AdminClient: &adminc, + Logger: slog.Default(), + Directory: &dir, + Counters: NewMemCountStore(), + Sets: sets, + Rules: rules, } return engine } From c0a109245ed3cd8cb2adb0c0cd21e24a95414b35 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 16 Nov 2023 17:11:22 -0800 Subject: [PATCH 20/35] HACK: remove did doc from createSession to un-break it (temporarily) --- api/atproto/servercreateSession.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/api/atproto/servercreateSession.go b/api/atproto/servercreateSession.go index ac197250c..77beaf087 100644 --- a/api/atproto/servercreateSession.go +++ b/api/atproto/servercreateSession.go @@ -7,7 +7,6 @@ package atproto import ( "context" - "github.com/bluesky-social/indigo/lex/util" "github.com/bluesky-social/indigo/xrpc" ) @@ -20,13 +19,13 @@ type ServerCreateSession_Input struct { // ServerCreateSession_Output is the output of a com.atproto.server.createSession call. type ServerCreateSession_Output struct { - AccessJwt string `json:"accessJwt" cborgen:"accessJwt"` - Did string `json:"did" cborgen:"did"` - DidDoc *util.LexiconTypeDecoder `json:"didDoc,omitempty" cborgen:"didDoc,omitempty"` - Email *string `json:"email,omitempty" cborgen:"email,omitempty"` - EmailConfirmed *bool `json:"emailConfirmed,omitempty" cborgen:"emailConfirmed,omitempty"` - Handle string `json:"handle" cborgen:"handle"` - RefreshJwt string `json:"refreshJwt" cborgen:"refreshJwt"` + AccessJwt string `json:"accessJwt" cborgen:"accessJwt"` + Did string `json:"did" cborgen:"did"` + //DidDoc *util.LexiconTypeDecoder `json:"didDoc,omitempty" cborgen:"didDoc,omitempty"` + Email *string `json:"email,omitempty" cborgen:"email,omitempty"` + EmailConfirmed *bool `json:"emailConfirmed,omitempty" cborgen:"emailConfirmed,omitempty"` + Handle string `json:"handle" cborgen:"handle"` + RefreshJwt string `json:"refreshJwt" cborgen:"refreshJwt"` } // ServerCreateSession calls the XRPC method "com.atproto.server.createSession". From f67fb9860d944ffe81d2f5a03292b5f7912944a4 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 16 Nov 2023 17:02:54 -0800 Subject: [PATCH 21/35] automod: private admin state hydration --- automod/account_meta.go | 22 ++++++++++++++++++++-- automod/rules/all.go | 1 + automod/rules/private.go | 17 +++++++++++++++++ cmd/hepa/main.go | 2 +- 4 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 automod/rules/private.go diff --git a/automod/account_meta.go b/automod/account_meta.go index daedfaf54..625205c04 100644 --- a/automod/account_meta.go +++ b/automod/account_meta.go @@ -6,8 +6,10 @@ import ( "fmt" "time" + comatproto "github.com/bluesky-social/indigo/api/atproto" appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" ) type ProfileSummary struct { @@ -19,6 +21,7 @@ type ProfileSummary struct { type AccountPrivate struct { Email string EmailConfirmed bool + IndexedAt time.Time } // information about a repo/account/identity, always pre-populated and relevant to many rules @@ -30,7 +33,6 @@ type AccountMeta struct { FollowersCount int64 FollowsCount int64 PostsCount int64 - IndexedAt *time.Time } func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) (*AccountMeta, error) { @@ -93,7 +95,23 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( } if e.AdminClient != nil { - // XXX: get admin-level info (email, indexed at, etc). requires lexgen update + pv, err := comatproto.AdminGetAccountInfo(ctx, e.AdminClient, ident.DID.String()) + if err != nil { + return nil, err + } + ap := AccountPrivate{} + if pv.Email != nil && *pv.Email != "" { + ap.Email = *pv.Email + } + if pv.EmailConfirmedAt != nil && *pv.EmailConfirmedAt != "" { + ap.EmailConfirmed = true + } + ts, err := syntax.ParseDatetimeTime(pv.IndexedAt) + if err != nil { + return nil, err + } + ap.IndexedAt = ts + am.Private = &ap } val, err := json.Marshal(&am) diff --git a/automod/rules/all.go b/automod/rules/all.go index d8583be44..86f4b6bdf 100644 --- a/automod/rules/all.go +++ b/automod/rules/all.go @@ -12,6 +12,7 @@ func DefaultRules() automod.RuleSet { ReplyCountPostRule, BanHashtagsPostRule, AccountDemoPostRule, + AccountPrivateDemoPostRule, }, } return rules diff --git a/automod/rules/private.go b/automod/rules/private.go new file mode 100644 index 000000000..8f00d3bf1 --- /dev/null +++ b/automod/rules/private.go @@ -0,0 +1,17 @@ +package rules + +import ( + "strings" + + "github.com/bluesky-social/indigo/automod" +) + +// dummy rule. this leaks PII (account email) in logs and should never be used in real life +func AccountPrivateDemoPostRule(evt *automod.PostEvent) error { + if evt.Account.Private != nil { + if strings.HasSuffix(evt.Account.Private.Email, "@blueskyweb.xyz") { + evt.Logger.Info("hello dev!", "email", evt.Account.Private.Email) + } + } + return nil +} diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index 8480963b3..297a10602 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -126,7 +126,7 @@ var runCmd = &cli.Command{ }, PLCLimiter: rate.NewLimiter(rate.Limit(cctx.Int("plc-rate-limit")), 1), TryAuthoritativeDNS: true, - SkipDNSDomainSuffixes: []string{".bsky.social"}, + SkipDNSDomainSuffixes: []string{".bsky.social", ".staging.bsky.dev"}, } var dir identity.Directory if cctx.String("redis-url") != "" { From 37042b1a983a5455e39f4c135ac395fdb4b69e92 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 16 Nov 2023 18:49:11 -0800 Subject: [PATCH 22/35] hepa: persist cursor state in redis --- cmd/hepa/consumer.go | 7 +++- cmd/hepa/main.go | 6 +++ cmd/hepa/server.go | 92 +++++++++++++++++++++++++++++++++++++++----- 3 files changed, 95 insertions(+), 10 deletions(-) diff --git a/cmd/hepa/consumer.go b/cmd/hepa/consumer.go index 36d522f37..b9b1434f4 100644 --- a/cmd/hepa/consumer.go +++ b/cmd/hepa/consumer.go @@ -22,7 +22,10 @@ import ( func (s *Server) RunConsumer(ctx context.Context) error { // TODO: persist cursor in a database or local disk - cur := 0 + cur, err := s.ReadLastCursor(ctx) + if err != nil { + return err + } dialer := websocket.DefaultDialer u, err := url.Parse(s.bgshost) @@ -43,9 +46,11 @@ func (s *Server) RunConsumer(ctx context.Context) error { rsc := &events.RepoStreamCallbacks{ RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { + s.lastSeq = evt.Seq return s.HandleRepoCommit(ctx, evt) }, RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error { + s.lastSeq = evt.Seq did, err := syntax.ParseDID(evt.Did) if err != nil { s.logger.Error("bad DID in RepoHandle event", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index 297a10602..e0e6ae12f 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -166,6 +166,12 @@ var runCmd = &cli.Command{ } }() + go func() { + if err := srv.RunPersistCursor(ctx); err != nil { + slog.Error("cursor routine failed", "err", err) + } + }() + // the main service loop if err := srv.RunConsumer(ctx); err != nil { return fmt.Errorf("failure consuming and processing firehose: %w", err) diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index eb784b28d..a09e37c7d 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -17,12 +17,15 @@ import ( "github.com/bluesky-social/indigo/xrpc" "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/redis/go-redis/v9" ) type Server struct { bgshost string logger *slog.Logger engine *automod.Engine + rdb *redis.Client + lastSeq int64 } type Config struct { @@ -83,24 +86,34 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { } var counters automod.CountStore + var cache automod.CacheStore + var rdb *redis.Client if config.RedisURL != "" { - c, err := automod.NewRedisCountStore(config.RedisURL) + // generic client, for cursor state + opt, err := redis.ParseURL(config.RedisURL) + if err != nil { + return nil, err + } + rdb = redis.NewClient(opt) + // check redis connection + _, err = rdb.Ping(context.TODO()).Result() if err != nil { return nil, err } - counters = c - } else { - counters = automod.NewMemCountStore() - } - var cache automod.CacheStore - if config.RedisURL != "" { - c, err := automod.NewRedisCacheStore(config.RedisURL, 30*time.Minute) + cnt, err := automod.NewRedisCountStore(config.RedisURL) if err != nil { return nil, err } - cache = c + counters = cnt + + csh, err := automod.NewRedisCacheStore(config.RedisURL, 30*time.Minute) + if err != nil { + return nil, err + } + cache = csh } else { + counters = automod.NewMemCountStore() cache = automod.NewMemCacheStore(5_000, 30*time.Minute) } @@ -122,6 +135,7 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { bgshost: config.BGSHost, logger: logger, engine: &engine, + rdb: rdb, } return s, nil @@ -131,3 +145,63 @@ func (s *Server) RunMetrics(listen string) error { http.Handle("/metrics", promhttp.Handler()) return http.ListenAndServe(listen, nil) } + +var cursorKey = "hepa/seq" + +func (s *Server) ReadLastCursor(ctx context.Context) (int64, error) { + // if redis isn't configured, just skip + if s.rdb == nil { + s.logger.Info("redis not configured, skipping cursor read") + return 0, nil + } + + val, err := s.rdb.Get(ctx, cursorKey).Int64() + if err == redis.Nil { + s.logger.Info("no pre-existing cursor in redis") + return 0, nil + } + s.logger.Info("successfully found prior subscription cursor seq in redis", "seq", val) + return val, err +} + +func (s *Server) PersistCursor(ctx context.Context) error { + // if redis isn't configured, just skip + if s.rdb == nil { + return nil + } + if s.lastSeq <= 0 { + return nil + } + err := s.rdb.Set(ctx, cursorKey, s.lastSeq, 14*24*time.Hour).Err() + return err +} + +// this method runs in a loop, persisting the current cursor state every 5 seconds +func (s *Server) RunPersistCursor(ctx context.Context) error { + + // if redis isn't configured, just skip + if s.rdb == nil { + return nil + } + ticker := time.NewTicker(5 * time.Second) + for { + select { + case <-ctx.Done(): + if s.lastSeq >= 1 { + s.logger.Info("persisting final cursor seq value", "seq", s.lastSeq) + err := s.PersistCursor(ctx) + if err != nil { + s.logger.Error("failed to persist cursor", "err", err, "seq", s.lastSeq) + } + } + return nil + case <-ticker.C: + if s.lastSeq >= 1 { + err := s.PersistCursor(ctx) + if err != nil { + s.logger.Error("failed to persist cursor", "err", err, "seq", s.lastSeq) + } + } + } + } +} From 21b494df3bd057d3114a97c7698990ff8fd02e28 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 16 Nov 2023 19:32:42 -0800 Subject: [PATCH 23/35] syntax: fix AT-URI Path() impl --- atproto/syntax/aturi.go | 8 ++++---- atproto/syntax/aturi_test.go | 27 ++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/atproto/syntax/aturi.go b/atproto/syntax/aturi.go index cdf340fc4..cd499fecf 100644 --- a/atproto/syntax/aturi.go +++ b/atproto/syntax/aturi.go @@ -62,14 +62,14 @@ func (n ATURI) Authority() AtIdentifier { // Returns path segment, without leading slash, as would be used in an atproto repository key. Or empty string if there is no path. func (n ATURI) Path() string { parts := strings.SplitN(string(n), "/", 5) - if len(parts) < 3 { + if len(parts) < 4 { // something has gone wrong (would not validate) return "" } - if len(parts) == 3 { - return parts[2] + if len(parts) == 4 { + return parts[3] } - return parts[2] + "/" + parts[3] + return parts[3] + "/" + parts[4] } // Returns a valid NSID if there is one in the appropriate part of the path, otherwise empty. diff --git a/atproto/syntax/aturi_test.go b/atproto/syntax/aturi_test.go index 2851e35ae..4ba278300 100644 --- a/atproto/syntax/aturi_test.go +++ b/atproto/syntax/aturi_test.go @@ -20,11 +20,20 @@ func TestInteropATURIsValid(t *testing.T) { if len(line) == 0 || line[0] == '#' { continue } - _, err := ParseATURI(line) + aturi, err := ParseATURI(line) if err != nil { fmt.Println("FAILED, GOOD: " + line) } assert.NoError(err) + + // check that Path() is working + col := aturi.Collection() + rkey := aturi.RecordKey() + if rkey != "" { + assert.Equal(col.String()+"/"+rkey.String(), aturi.Path()) + } else if col != "" { + assert.Equal(col.String(), aturi.Path()) + } } assert.NoError(scanner.Err()) } @@ -67,7 +76,22 @@ func TestATURIParts(t *testing.T) { rkey := uri.RecordKey() assert.Equal(parts[3], rkey.String()) } +} + +func TestATURIPath(t *testing.T) { + assert := assert.New(t) + uri1, err := ParseATURI("at://did:abc:123/io.nsid.someFunc/record-key") + assert.NoError(err) + assert.Equal("io.nsid.someFunc/record-key", uri1.Path()) + + uri2, err := ParseATURI("at://did:abc:123/io.nsid.someFunc") + assert.NoError(err) + assert.Equal("io.nsid.someFunc", uri2.Path()) + + uri3, err := ParseATURI("at://did:abc:123") + assert.NoError(err) + assert.Equal("", uri3.Path()) } func TestATURINormalize(t *testing.T) { @@ -93,5 +117,6 @@ func TestATURINoPanic(t *testing.T) { _ = bad.RecordKey() _ = bad.Normalize() _ = bad.String() + _ = bad.Path() } } From cc9ed968d744a8b401275ea31756b674850ab3ec Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 16 Nov 2023 20:01:00 -0800 Subject: [PATCH 24/35] automod: process individual pre-existing records (by AT-URI) --- automod/engine.go | 45 +++++++++++--- cmd/hepa/main.go | 147 ++++++++++++++++++++++++++++++--------------- cmd/hepa/server.go | 11 ++++ 3 files changed, 147 insertions(+), 56 deletions(-) diff --git a/automod/engine.go b/automod/engine.go index 5df009043..759f630b2 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -6,6 +6,7 @@ import ( "log/slog" "strings" + comatproto "github.com/bluesky-social/indigo/api/atproto" appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" @@ -16,13 +17,14 @@ import ( // // TODO: careful when initializing: several fields should not be null or zero, even though they are pointer type. type Engine struct { - Logger *slog.Logger - Directory identity.Directory - Rules RuleSet - Counters CountStore - Sets SetStore - Cache CacheStore - BskyClient *xrpc.Client + Logger *slog.Logger + Directory identity.Directory + Rules RuleSet + Counters CountStore + Sets SetStore + Cache CacheStore + RelayClient *xrpc.Client + BskyClient *xrpc.Client // used to persist moderation actions in mod service (optional) AdminClient *xrpc.Client } @@ -68,13 +70,11 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID string, rec any) error { // similar to an HTTP server, we want to recover any panics from rule execution - /* XXX defer func() { if r := recover(); r != nil { e.Logger.Error("automod event execution exception", "err", r) } }() - */ ident, err := e.Directory.LookupDID(ctx, did) if err != nil { @@ -135,6 +135,33 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID return nil } +func (e *Engine) FetchAndProcessRecord(ctx context.Context, uri string) error { + // resolve URI, identity, and record + aturi, err := syntax.ParseATURI(uri) + if err != nil { + return fmt.Errorf("parsing AT-URI argument: %v", err) + } + if aturi.RecordKey() == "" { + return fmt.Errorf("need a full, not partial, AT-URI: %s", uri) + } + if e.RelayClient == nil { + return fmt.Errorf("can't fetch record without relay client configured") + } + ident, err := e.Directory.Lookup(ctx, aturi.Authority()) + if err != nil { + return fmt.Errorf("resolving AT-URI authority: %v", err) + } + e.Logger.Info("fetching record", "did", ident.DID.String(), "collection", aturi.Collection().String(), "rkey", aturi.RecordKey().String()) + out, err := comatproto.RepoGetRecord(ctx, e.RelayClient, "", aturi.Collection().String(), ident.DID.String(), aturi.RecordKey().String()) + if err != nil { + return fmt.Errorf("fetching record from Relay (%s): %v", aturi, err) + } + if out.Cid == nil { + return fmt.Errorf("expected a CID in getRecord response") + } + return e.ProcessRecord(ctx, ident.DID, aturi.Path(), *out.Cid, out.Value.Val) +} + func (e *Engine) NewPostEvent(am AccountMeta, path, recCID string, post *appbsky.FeedPost) PostEvent { parts := strings.SplitN(path, "/", 2) return PostEvent{ diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index e0e6ae12f..c652c432b 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -57,30 +57,12 @@ func run(args []string) error { Value: "https://api.bsky.app", EnvVars: []string{"ATP_BSKY_HOST"}, }, - } - - app.Commands = []*cli.Command{ - runCmd, - } - - return app.Run(args) -} - -var runCmd = &cli.Command{ - Name: "run", - Usage: "run the hepa daemon", - Flags: []cli.Flag{ &cli.StringFlag{ - Name: "metrics-listen", - Usage: "IP or address, and port, to listen on for metrics APIs", - Value: ":3989", - EnvVars: []string{"HEPA_METRICS_LISTEN"}, - }, - &cli.IntFlag{ - Name: "plc-rate-limit", - Usage: "max number of requests per second to PLC registry", - Value: 100, - EnvVars: []string{"HEPA_PLC_RATE_LIMIT"}, + Name: "redis-url", + Usage: "redis connection URL", + // redis://:@localhost:6379/ + // redis://localhost:6379/0 + EnvVars: []string{"HEPA_REDIS_URL"}, }, &cli.StringFlag{ Name: "mod-handle", @@ -97,17 +79,60 @@ var runCmd = &cli.Command{ Usage: "admin authentication password for mod service", EnvVars: []string{"HEPA_MOD_AUTH_ADMIN_TOKEN"}, }, + &cli.IntFlag{ + Name: "plc-rate-limit", + Usage: "max number of requests per second to PLC registry", + Value: 100, + EnvVars: []string{"HEPA_PLC_RATE_LIMIT"}, + }, &cli.StringFlag{ Name: "sets-json-path", Usage: "file path of JSON file containing static sets", EnvVars: []string{"HEPA_SETS_JSON_PATH"}, }, + } + + app.Commands = []*cli.Command{ + runCmd, + processRecordCmd, + } + + return app.Run(args) +} + +func configDirectory(cctx *cli.Context) (identity.Directory, error) { + baseDir := identity.BaseDirectory{ + PLCURL: cctx.String("atp-plc-host"), + HTTPClient: http.Client{ + Timeout: time.Second * 15, + }, + PLCLimiter: rate.NewLimiter(rate.Limit(cctx.Int("plc-rate-limit")), 1), + TryAuthoritativeDNS: true, + SkipDNSDomainSuffixes: []string{".bsky.social", ".staging.bsky.dev"}, + } + var dir identity.Directory + if cctx.String("redis-url") != "" { + rdir, err := automod.NewRedisDirectory(&baseDir, cctx.String("redis-url"), time.Hour*24, time.Minute*2) + if err != nil { + return nil, err + } + dir = rdir + } else { + cdir := identity.NewCacheDirectory(&baseDir, 1_500_000, time.Hour*24, time.Minute*2) + dir = &cdir + } + return dir, nil +} + +var runCmd = &cli.Command{ + Name: "run", + Usage: "run the hepa daemon", + Flags: []cli.Flag{ &cli.StringFlag{ - Name: "redis-url", - Usage: "redis connection URL", - // redis://:@localhost:6379/ - // redis://localhost:6379/0 - EnvVars: []string{"HEPA_REDIS_URL"}, + Name: "metrics-listen", + Usage: "IP or address, and port, to listen on for metrics APIs", + Value: ":3989", + EnvVars: []string{"HEPA_METRICS_LISTEN"}, }, }, Action: func(cctx *cli.Context) error { @@ -119,25 +144,9 @@ var runCmd = &cli.Command{ configOTEL("hepa") - baseDir := identity.BaseDirectory{ - PLCURL: cctx.String("atp-plc-host"), - HTTPClient: http.Client{ - Timeout: time.Second * 15, - }, - PLCLimiter: rate.NewLimiter(rate.Limit(cctx.Int("plc-rate-limit")), 1), - TryAuthoritativeDNS: true, - SkipDNSDomainSuffixes: []string{".bsky.social", ".staging.bsky.dev"}, - } - var dir identity.Directory - if cctx.String("redis-url") != "" { - rdir, err := automod.NewRedisDirectory(&baseDir, cctx.String("redis-url"), time.Hour*24, time.Minute*2) - if err != nil { - return err - } - dir = rdir - } else { - cdir := identity.NewCacheDirectory(&baseDir, 1_500_000, time.Hour*24, time.Minute*2) - dir = &cdir + dir, err := configDirectory(cctx) + if err != nil { + return err } srv, err := NewServer( @@ -179,3 +188,47 @@ var runCmd = &cli.Command{ return nil }, } + +var processRecordCmd = &cli.Command{ + Name: "process-record", + Usage: "process a single record in isolation", + ArgsUsage: ``, + Flags: []cli.Flag{}, + Action: func(cctx *cli.Context) error { + uri := cctx.Args().First() + if uri == "" { + return fmt.Errorf("expected a single AT-URI argument") + } + + ctx := context.Background() + logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelInfo, + })) + slog.SetDefault(logger) + + dir, err := configDirectory(cctx) + if err != nil { + return err + } + + srv, err := NewServer( + dir, + Config{ + BGSHost: cctx.String("atp-bgs-host"), + BskyHost: cctx.String("atp-bsky-host"), + Logger: logger, + ModHost: cctx.String("atp-mod-host"), + ModAdminToken: cctx.String("mod-admin-token"), + ModUsername: cctx.String("mod-handle"), + ModPassword: cctx.String("mod-password"), + SetsFileJSON: cctx.String("sets-json-path"), + RedisURL: cctx.String("redis-url"), + }, + ) + if err != nil { + return err + } + + return srv.engine.FetchAndProcessRecord(ctx, uri) + }, +} diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index a09e37c7d..6120a222d 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -117,6 +117,13 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { cache = automod.NewMemCacheStore(5_000, 30*time.Minute) } + relayURL := config.BGSHost + if strings.HasPrefix(relayURL, "ws") { + relayURL = "http" + relayURL[2:] + } + // XXX: + relayURL = "https://bsky.social" + engine := automod.Engine{ Logger: logger, Directory: dir, @@ -129,6 +136,10 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { Client: util.RobustHTTPClient(), Host: config.BskyHost, }, + RelayClient: &xrpc.Client{ + Client: util.RobustHTTPClient(), + Host: relayURL, + }, } s := &Server{ From c54a5e46cd7ed6e4fca5c45cd900a0bb36387f51 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 17 Nov 2023 18:50:41 -0800 Subject: [PATCH 25/35] hepa: dockerfile and github build actions --- .github/workflows/container-hepa-aws.yaml | 52 +++++++++++++++++++++ .github/workflows/container-hepa-ghcr.yaml | 54 ++++++++++++++++++++++ cmd/hepa/Dockerfile | 37 +++++++++++++++ 3 files changed, 143 insertions(+) create mode 100644 .github/workflows/container-hepa-aws.yaml create mode 100644 .github/workflows/container-hepa-ghcr.yaml create mode 100644 cmd/hepa/Dockerfile diff --git a/.github/workflows/container-hepa-aws.yaml b/.github/workflows/container-hepa-aws.yaml new file mode 100644 index 000000000..336af4321 --- /dev/null +++ b/.github/workflows/container-hepa-aws.yaml @@ -0,0 +1,52 @@ +name: container-hepa-aws +on: [push] +env: + REGISTRY: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_REGISTRY }} + USERNAME: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_USERNAME }} + PASSWORD: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_PASSWORD }} + # github.repository as / + IMAGE_NAME: hepa + +jobs: + container-hepa-aws: + if: github.repository == 'bluesky-social/indigo' + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Setup Docker buildx + uses: docker/setup-buildx-action@v1 + + - name: Log into registry ${{ env.REGISTRY }} + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ env.USERNAME }} + password: ${{ env.PASSWORD }} + + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v4 + with: + images: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=sha,enable=true,priority=100,prefix=,suffix=,format=long + + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v4 + with: + context: . + file: ./cmd/hepa/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/container-hepa-ghcr.yaml b/.github/workflows/container-hepa-ghcr.yaml new file mode 100644 index 000000000..bcb3269d9 --- /dev/null +++ b/.github/workflows/container-hepa-ghcr.yaml @@ -0,0 +1,54 @@ +name: container-hepa-ghcr +on: + push: + branches: + - main + - bnewbold/automod +env: + REGISTRY: ghcr.io + # github.repository as / + IMAGE_NAME: ${{ github.repository }} + +jobs: + container-hepa-ghcr: + if: github.repository == 'bluesky-social/indigo' + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Setup Docker buildx + uses: docker/setup-buildx-action@v1 + + - name: Log into registry ${{ env.REGISTRY }} + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v4 + with: + images: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=sha,enable=true,priority=100,prefix=hepa:,suffix=,format=long + + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v4 + with: + context: . + file: ./cmd/hepa/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/cmd/hepa/Dockerfile b/cmd/hepa/Dockerfile new file mode 100644 index 000000000..cfee95d3b --- /dev/null +++ b/cmd/hepa/Dockerfile @@ -0,0 +1,37 @@ +# Run this dockerfile from the top level of the indigo git repository like: +# +# podman build -f ./cmd/hepa/Dockerfile -t hepa . + +### Compile stage +FROM golang:1.21-alpine3.18 AS build-env +RUN apk add --no-cache build-base make git + +ADD . /dockerbuild +WORKDIR /dockerbuild + +# timezone data for alpine builds +ENV GOEXPERIMENT=loopvar +RUN GIT_VERSION=$(git describe --tags --long --always) && \ + go build -tags timetzdata -o /hepa ./cmd/hepa + +### Run stage +FROM alpine:3.18 + +RUN apk add --no-cache --update dumb-init ca-certificates +ENTRYPOINT ["dumb-init", "--"] + +WORKDIR / +RUN mkdir -p data/hepa +COPY --from=build-env /hepa / + +# small things to make golang binaries work well under alpine +ENV GODEBUG=netdns=go +ENV TZ=Etc/UTC + +EXPOSE 2210 + +CMD ["/hepa"] + +LABEL org.opencontainers.image.source=https://github.com/bluesky-social/indigo +LABEL org.opencontainers.image.description="ATP Auto-Moderation Service (hepa)" +LABEL org.opencontainers.image.licenses=MIT From aef1908455d36fea1e191f6e1b5034f36d9dabc8 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 17 Nov 2023 18:52:52 -0800 Subject: [PATCH 26/35] makefile and HACKING entries for hepa --- HACKING.md | 2 ++ Makefile | 1 + 2 files changed, 3 insertions(+) diff --git a/HACKING.md b/HACKING.md index f36821aa9..ef36c48c9 100644 --- a/HACKING.md +++ b/HACKING.md @@ -13,6 +13,7 @@ Run with, eg, `go run ./cmd/bigsky`): - `cmd/fakermaker`: helper to generate fake accounts and content for testing - `cmd/supercollider`: event stream load generation tool - `cmd/sonar`: event stream monitoring tool +- `cmd/hepa`: auto-moderation rule engine service - `gen`: dev tool to run CBOR type codegen Packages: @@ -23,6 +24,7 @@ Packages: - `atproto/crypto`: crytographic helpers (signing, key generation and serialization) - `atproto/syntax`: string types and parsers for identifiers, datetimes, etc - `atproto/identity`: DID and handle resolution +- `automod`: moderation and anti-spam rules engine - `bgs`: server implementation for crawling, etc - `carstore`: library for storing repo data in CAR files on disk, plus a metadata SQL db - `events`: types, codegen CBOR helpers, and persistence for event feeds diff --git a/Makefile b/Makefile index d0b3b44cb..d9a85b8da 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ build: ## Build all executables go build ./cmd/stress go build ./cmd/fakermaker go build ./cmd/labelmaker + go build ./cmd/hepa go build ./cmd/supercollider go build -o ./sonar-cli ./cmd/sonar go build ./cmd/palomar From a70cd1cabf1b296c22f1093ee10d68faaf1ce090 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 17 Nov 2023 19:09:19 -0800 Subject: [PATCH 27/35] automod brief docs/context --- automod/README.md | 1 + automod/doc.go | 5 +++-- cmd/hepa/README.md | 17 ++++++++++++++++- 3 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 automod/README.md diff --git a/automod/README.md b/automod/README.md new file mode 100644 index 000000000..9d29cc346 --- /dev/null +++ b/automod/README.md @@ -0,0 +1 @@ +See `./doc.go` diff --git a/automod/doc.go b/automod/doc.go index c0ba398d4..c84472aeb 100644 --- a/automod/doc.go +++ b/automod/doc.go @@ -1,8 +1,9 @@ // Auto-Moderation rules engine for anti-spam and other moderation tasks. // -// The code in this package includes an "engine" which processes atproto commit events (and identity updates), maintains caches and counters, and pushes moderation decisions to an external mod service (eg, appview). A framework for writing new "rules" for the engine to execute are also provided. +// This package (`github.com/bluesky-social/indigo/automod`) contains a "rules engine" to augment human moderators in the atproto network. Batches of rules are processed for novel "events" such as a new post or update of an account handle. Counters and other statistics are collected, which can drive subsequent rule invocations. The outcome of rules can be moderation events like "report account for human review" or "label post". A lot of what this package does is collect and maintain caches of relevant metadata about accounts and pieces of content, so that rules have efficient access to this information. // -// It does not provide label API endpoints like queryLabels; see labelmaker for a self-contained labeling service. +// A primary design goal is to have a flexible framework to allow new rules to be written and deployed rapidly in response to new patterns of spam and abuse. Some examples rules are included in the `automod/rules` package, but the expectation is that some real-world rules will be kept secret. // // Code for subscribing to a firehose is not included here; see cmd/hepa for a complete service built on this library. +// The `hepa` command is an example daemon which integrates this rules engine in to a stand-alone service. package automod diff --git a/cmd/hepa/README.md b/cmd/hepa/README.md index c05311713..766952ade 100644 --- a/cmd/hepa/README.md +++ b/cmd/hepa/README.md @@ -1,7 +1,22 @@ -HEPA +hepa ==== This is a simple auto-moderation daemon which wraps the automod package. The name is a reference to HEPA air filters, which help keep the local atmosphere clean and healthy for humans. + +Available commands, flags, and config are documented in the usage (`--help`). + +Current features and design decisions: + +- all state (counters) and caches stored in Redis +- consumes from Relay firehose; no backfill functionality yet +- which rules are included configured at compile time +- admin access to fetch private account metadata, and to persist moderation actions, is optional. it is possible for anybody to run a `hepa` instance + +This is not a "labeling service" per say, in that it pushes labels in to an existing moderation service, and doesn't provide API endpoints or label streams. see `labelmaker` for a self-contained labeling service. + +Performance is generally slow when first starting up, because account-level metadata is being fetched (and cached) for every firehose event. After the caches have "warmed up", events are processed faster. + +See the `automod` package's README for more documentation. From 499d1fa5f717647106b49ad805d04b3d6562af7c Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 19 Nov 2023 19:43:00 -0800 Subject: [PATCH 28/35] automod: update docs --- automod/README.md | 24 +++++++++++++++++++++++- automod/doc.go | 5 +---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/automod/README.md b/automod/README.md index 9d29cc346..3177f4318 100644 --- a/automod/README.md +++ b/automod/README.md @@ -1 +1,23 @@ -See `./doc.go` +indigo/automod +============== + +This package (`github.com/bluesky-social/indigo/automod`) contains a "rules engine" to augment human moderators in the atproto network. Batches of rules are processed for novel "events" such as a new post or update of an account handle. Counters and other statistics are collected, which can drive subsequent rule invocations. The outcome of rules can be moderation events like "report account for human review" or "label post". A lot of what this package does is collect and maintain caches of relevant metadata about accounts and pieces of content, so that rules have efficient access to this information. + +A primary design goal is to have a flexible framework to allow new rules to be written and deployed rapidly in response to new patterns of spam and abuse. + +Some example rules are included in the `automod/rules` package, but the expectation is that some real-world rules will be kept secret. + +Code for subscribing to a firehose is not included here; see `cmd/hepa` for a complete service built on this library. + + +## Design + +Prior art and inspiration: + +* The [SQRL language](https://sqrl-lang.github.io/sqrl/) and runtime was originally developed by an industry vendor named Smyte, then acquired by Twitter, with some core Javascript components released open source in 2023. The SQRL documentation is extensive and describes many of the design trade-offs and features specific to rules engines. Bluesky considered adopting SQRL but decided to start with a simpler runtime with rules in a known language (golang). + +* Reddit's [automod system](https://www.reddit.com/wiki/automoderator/) is simple an accessible for non-technical sub-reddit community moderators. Discord has a large ecosystem of bots which can help communities manage some moderation tasks, in particular mitigating spam and brigading. + +* Facebook's FXL and Haxl rule languages have been in use for over a decade. The 2012 paper ["The Facebook Immune System"](https://css.csail.mit.edu/6.858/2012/readings/facebook-immune.pdf) gives a good overview of design goals and how a rules engine fits in to a an overall anti-spam/anti-abuse pipeline. + +* Email anti-spam systems like SpamAssassin and rspamd have been modular and configurable for several decades. diff --git a/automod/doc.go b/automod/doc.go index c84472aeb..e6c025f88 100644 --- a/automod/doc.go +++ b/automod/doc.go @@ -2,8 +2,5 @@ // // This package (`github.com/bluesky-social/indigo/automod`) contains a "rules engine" to augment human moderators in the atproto network. Batches of rules are processed for novel "events" such as a new post or update of an account handle. Counters and other statistics are collected, which can drive subsequent rule invocations. The outcome of rules can be moderation events like "report account for human review" or "label post". A lot of what this package does is collect and maintain caches of relevant metadata about accounts and pieces of content, so that rules have efficient access to this information. // -// A primary design goal is to have a flexible framework to allow new rules to be written and deployed rapidly in response to new patterns of spam and abuse. Some examples rules are included in the `automod/rules` package, but the expectation is that some real-world rules will be kept secret. -// -// Code for subscribing to a firehose is not included here; see cmd/hepa for a complete service built on this library. -// The `hepa` command is an example daemon which integrates this rules engine in to a stand-alone service. +// See `automod/README.md` for more background, and `cmd/hepa` for a daemon built on this package. package automod From 6aee7717c78ab9047c911602e272d0e2a306a7e6 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 19 Nov 2023 20:03:02 -0800 Subject: [PATCH 29/35] automod: refactor event types --- automod/engine.go | 91 ++++++++------------------------ automod/engine_test.go | 6 +-- automod/event.go | 43 +++++++-------- automod/rules/hashtags.go | 5 +- automod/rules/hashtags_test.go | 8 +-- automod/rules/misleading.go | 9 ++-- automod/rules/misleading_test.go | 8 +-- automod/rules/private.go | 3 +- automod/rules/profile.go | 5 +- automod/rules/replies.go | 5 +- automod/ruleset.go | 50 +++++++++++++----- 11 files changed, 106 insertions(+), 127 deletions(-) diff --git a/automod/engine.go b/automod/engine.go index 759f630b2..6513e5d3d 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -7,7 +7,6 @@ import ( "strings" comatproto "github.com/bluesky-social/indigo/api/atproto" - appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/xrpc" @@ -50,7 +49,7 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. return err } evt := IdentityEvent{ - Event{ + RepoEvent{ Engine: e, Account: *am, }, @@ -83,53 +82,25 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID if ident == nil { return fmt.Errorf("identity not found for did: %s", did.String()) } - collection := strings.SplitN(path, "/", 2)[0] - switch collection { - case "app.bsky.feed.post": - post, ok := rec.(*appbsky.FeedPost) - if !ok { - return fmt.Errorf("mismatch between collection (%s) and type", collection) - } - am, err := e.GetAccountMeta(ctx, ident) - if err != nil { - return err - } - evt := e.NewPostEvent(*am, path, recCID, post) - e.Logger.Debug("processing post", "did", ident.DID, "path", path) - if err := e.Rules.CallPostRules(&evt); err != nil { - return err - } - if evt.Err != nil { - return evt.Err - } - evt.CanonicalLogLine() - if err := evt.PersistActions(ctx); err != nil { - return err - } - if err := evt.PersistCounters(ctx); err != nil { - return err - } - default: - am, err := e.GetAccountMeta(ctx, ident) - if err != nil { - return err - } - evt := e.NewRecordEvent(*am, path, recCID, rec) - e.Logger.Debug("processing record", "did", ident.DID, "path", path) - if err := e.Rules.CallRecordRules(&evt); err != nil { - return err - } - if evt.Err != nil { - return evt.Err - } - evt.CanonicalLogLine() - if err := evt.PersistActions(ctx); err != nil { - return err - } - if err := evt.PersistCounters(ctx); err != nil { - return err - } + am, err := e.GetAccountMeta(ctx, ident) + if err != nil { + return err + } + evt := e.NewRecordEvent(*am, path, recCID, rec) + e.Logger.Debug("processing record", "did", ident.DID, "path", path) + if err := e.Rules.CallRecordRules(&evt); err != nil { + return err + } + if evt.Err != nil { + return evt.Err + } + evt.CanonicalLogLine() + if err := evt.PersistActions(ctx); err != nil { + return err + } + if err := evt.PersistCounters(ctx); err != nil { + return err } return nil @@ -162,35 +133,15 @@ func (e *Engine) FetchAndProcessRecord(ctx context.Context, uri string) error { return e.ProcessRecord(ctx, ident.DID, aturi.Path(), *out.Cid, out.Value.Val) } -func (e *Engine) NewPostEvent(am AccountMeta, path, recCID string, post *appbsky.FeedPost) PostEvent { - parts := strings.SplitN(path, "/", 2) - return PostEvent{ - RecordEvent{ - Event{ - Engine: e, - Logger: e.Logger.With("did", am.Identity.DID, "collection", parts[0], "rkey", parts[1]), - Account: am, - }, - parts[0], - parts[1], - recCID, - []string{}, - false, - []ModReport{}, - []string{}, - }, - post, - } -} - func (e *Engine) NewRecordEvent(am AccountMeta, path, recCID string, rec any) RecordEvent { parts := strings.SplitN(path, "/", 2) return RecordEvent{ - Event{ + RepoEvent{ Engine: e, Logger: e.Logger.With("did", am.Identity.DID, "collection", parts[0], "rkey", parts[1]), Account: am, }, + rec, parts[0], parts[1], recCID, diff --git a/automod/engine_test.go b/automod/engine_test.go index aa2e3e0ef..a597f2c9c 100644 --- a/automod/engine_test.go +++ b/automod/engine_test.go @@ -12,14 +12,14 @@ import ( "github.com/stretchr/testify/assert" ) -func simpleRule(evt *PostEvent) error { - for _, tag := range evt.Post.Tags { +func simpleRule(evt *RecordEvent, post *appbsky.FeedPost) error { + for _, tag := range post.Tags { if evt.InSet("banned-hashtags", tag) { evt.AddRecordLabel("bad-hashtag") break } } - for _, facet := range evt.Post.Facets { + for _, facet := range post.Facets { for _, feat := range facet.Features { if feat.RichtextFacet_Tag != nil { tag := feat.RichtextFacet_Tag.Tag diff --git a/automod/event.go b/automod/event.go index 0924293e1..2de194eae 100644 --- a/automod/event.go +++ b/automod/event.go @@ -19,8 +19,10 @@ type CounterRef struct { Val string } -// base type for events. events are both containers for data about the event itself (similar to an HTTP request type); aggregate results and state (counters, mod actions) to be persisted after all rules are run; and act as an API for additional network reads and operations. -type Event struct { +// base type for events specific to an account, usually derived from a repo event stream message (one such message may result in multiple `RepoEvent`) +// +// events are both containers for data about the event itself (similar to an HTTP request type); aggregate results and state (counters, mod actions) to be persisted after all rules are run; and act as an API for additional network reads and operations. +type RepoEvent struct { Engine *Engine Err error Logger *slog.Logger @@ -32,7 +34,7 @@ type Event struct { AccountTakedown bool } -func (e *Event) GetCount(name, val, period string) int { +func (e *RepoEvent) GetCount(name, val, period string) int { v, err := e.Engine.GetCount(name, val, period) if err != nil { e.Err = err @@ -41,7 +43,7 @@ func (e *Event) GetCount(name, val, period string) int { return v } -func (e *Event) InSet(name, val string) bool { +func (e *RepoEvent) InSet(name, val string) bool { v, err := e.Engine.InSet(name, val) if err != nil { e.Err = err @@ -50,27 +52,27 @@ func (e *Event) InSet(name, val string) bool { return v } -func (e *Event) Increment(name, val string) { +func (e *RepoEvent) Increment(name, val string) { e.CounterIncrements = append(e.CounterIncrements, CounterRef{Name: name, Val: val}) } -func (e *Event) TakedownAccount() { +func (e *RepoEvent) TakedownAccount() { e.AccountTakedown = true } -func (e *Event) AddAccountLabel(val string) { +func (e *RepoEvent) AddAccountLabel(val string) { e.AccountLabels = append(e.AccountLabels, val) } -func (e *Event) AddAccountFlag(val string) { +func (e *RepoEvent) AddAccountFlag(val string) { e.AccountFlags = append(e.AccountFlags, val) } -func (e *Event) ReportAccount(reason, comment string) { +func (e *RepoEvent) ReportAccount(reason, comment string) { e.AccountReports = append(e.AccountReports, ModReport{ReasonType: reason, Comment: comment}) } -func (e *Event) PersistAccountActions(ctx context.Context) error { +func (e *RepoEvent) PersistAccountActions(ctx context.Context) error { if e.Engine.AdminClient == nil { return nil } @@ -124,11 +126,11 @@ func (e *Event) PersistAccountActions(ctx context.Context) error { return nil } -func (e *Event) PersistActions(ctx context.Context) error { +func (e *RepoEvent) PersistActions(ctx context.Context) error { return e.PersistAccountActions(ctx) } -func (e *Event) PersistCounters(ctx context.Context) error { +func (e *RepoEvent) PersistCounters(ctx context.Context) error { // TODO: dedupe this array for _, ref := range e.CounterIncrements { err := e.Engine.Counters.Increment(ctx, ref.Name, ref.Val) @@ -139,7 +141,7 @@ func (e *Event) PersistCounters(ctx context.Context) error { return nil } -func (e *Event) CanonicalLogLine() { +func (e *RepoEvent) CanonicalLogLine() { e.Logger.Info("canonical-event-line", "accountLabels", e.AccountLabels, "accountFlags", e.AccountFlags, @@ -149,12 +151,13 @@ func (e *Event) CanonicalLogLine() { } type IdentityEvent struct { - Event + RepoEvent } type RecordEvent struct { - Event + RepoEvent + Record any Collection string RecordKey string CID string @@ -254,13 +257,7 @@ func (e *RecordEvent) CanonicalLogLine() { ) } -type PostEvent struct { - RecordEvent - - Post *appbsky.FeedPost - // TODO: post thread context (root, parent) -} - type IdentityRuleFunc = func(evt *IdentityEvent) error type RecordRuleFunc = func(evt *RecordEvent) error -type PostRuleFunc = func(evt *PostEvent) error +type PostRuleFunc = func(evt *RecordEvent, post *appbsky.FeedPost) error +type ProfileRuleFunc = func(evt *RecordEvent, profile *appbsky.ActorProfile) error diff --git a/automod/rules/hashtags.go b/automod/rules/hashtags.go index cef83f45a..298607a9b 100644 --- a/automod/rules/hashtags.go +++ b/automod/rules/hashtags.go @@ -1,11 +1,12 @@ package rules import ( + appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" ) -func BanHashtagsPostRule(evt *automod.PostEvent) error { - for _, tag := range ExtractHashtags(evt.Post) { +func BanHashtagsPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) error { + for _, tag := range ExtractHashtags(post) { if evt.InSet("banned-hashtags", tag) { evt.AddRecordLabel("bad-hashtag") break diff --git a/automod/rules/hashtags_test.go b/automod/rules/hashtags_test.go index ac3a3fc26..994fc1c9b 100644 --- a/automod/rules/hashtags_test.go +++ b/automod/rules/hashtags_test.go @@ -26,15 +26,15 @@ func TestBanHashtagPostRule(t *testing.T) { p1 := appbsky.FeedPost{ Text: "some post blah", } - evt1 := engine.NewPostEvent(am1, path, cid1, &p1) - assert.NoError(BanHashtagsPostRule(&evt1)) + evt1 := engine.NewRecordEvent(am1, path, cid1, &p1) + assert.NoError(BanHashtagsPostRule(&evt1, &p1)) assert.Empty(evt1.RecordLabels) p2 := appbsky.FeedPost{ Text: "some post blah", Tags: []string{"one", "slur"}, } - evt2 := engine.NewPostEvent(am1, path, cid1, &p2) - assert.NoError(BanHashtagsPostRule(&evt2)) + evt2 := engine.NewRecordEvent(am1, path, cid1, &p2) + assert.NoError(BanHashtagsPostRule(&evt2, &p2)) assert.NotEmpty(evt2.RecordLabels) } diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go index 44dc46de3..35d4093da 100644 --- a/automod/rules/misleading.go +++ b/automod/rules/misleading.go @@ -4,12 +4,13 @@ import ( "context" "net/url" + appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod" ) -func MisleadingURLPostRule(evt *automod.PostEvent) error { - facets, err := ExtractFacets(evt.Post) +func MisleadingURLPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) error { + facets, err := ExtractFacets(post) if err != nil { evt.Logger.Warn("invalid facets", "err", err) evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? @@ -41,10 +42,10 @@ func MisleadingURLPostRule(evt *automod.PostEvent) error { return nil } -func MisleadingMentionPostRule(evt *automod.PostEvent) error { +func MisleadingMentionPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) error { // TODO: do we really need to route context around? probably ctx := context.TODO() - facets, err := ExtractFacets(evt.Post) + facets, err := ExtractFacets(post) if err != nil { evt.Logger.Warn("invalid facets", "err", err) evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? diff --git a/automod/rules/misleading_test.go b/automod/rules/misleading_test.go index edc7a0d19..2fc889bac 100644 --- a/automod/rules/misleading_test.go +++ b/automod/rules/misleading_test.go @@ -41,8 +41,8 @@ func TestMisleadingURLPostRule(t *testing.T) { }, }, } - evt1 := engine.NewPostEvent(am1, path, cid1, &p1) - assert.NoError(MisleadingURLPostRule(&evt1)) + evt1 := engine.NewRecordEvent(am1, path, cid1, &p1) + assert.NoError(MisleadingURLPostRule(&evt1, &p1)) assert.NotEmpty(evt1.RecordLabels) } @@ -76,7 +76,7 @@ func TestMisleadingMentionPostRule(t *testing.T) { }, }, } - evt1 := engine.NewPostEvent(am1, path, cid1, &p1) - assert.NoError(MisleadingMentionPostRule(&evt1)) + evt1 := engine.NewRecordEvent(am1, path, cid1, &p1) + assert.NoError(MisleadingMentionPostRule(&evt1, &p1)) assert.NotEmpty(evt1.RecordLabels) } diff --git a/automod/rules/private.go b/automod/rules/private.go index 8f00d3bf1..6382dfe24 100644 --- a/automod/rules/private.go +++ b/automod/rules/private.go @@ -3,11 +3,12 @@ package rules import ( "strings" + appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" ) // dummy rule. this leaks PII (account email) in logs and should never be used in real life -func AccountPrivateDemoPostRule(evt *automod.PostEvent) error { +func AccountPrivateDemoPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) error { if evt.Account.Private != nil { if strings.HasSuffix(evt.Account.Private.Email, "@blueskyweb.xyz") { evt.Logger.Info("hello dev!", "email", evt.Account.Private.Email) diff --git a/automod/rules/profile.go b/automod/rules/profile.go index e6f70bdc4..5b69e55f0 100644 --- a/automod/rules/profile.go +++ b/automod/rules/profile.go @@ -1,12 +1,13 @@ package rules import ( + appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" ) // this is a dummy rule to demonstrate accessing account metadata (eg, profile) from within post handler -func AccountDemoPostRule(evt *automod.PostEvent) error { - if evt.Account.Profile.Description != nil && len(evt.Post.Text) > 5 && *evt.Account.Profile.Description == evt.Post.Text { +func AccountDemoPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) error { + if evt.Account.Profile.Description != nil && len(post.Text) > 5 && *evt.Account.Profile.Description == post.Text { evt.AddRecordFlag("own-profile-description") } return nil diff --git a/automod/rules/replies.go b/automod/rules/replies.go index e69a37217..db086cba2 100644 --- a/automod/rules/replies.go +++ b/automod/rules/replies.go @@ -1,11 +1,12 @@ package rules import ( + appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" ) -func ReplyCountPostRule(evt *automod.PostEvent) error { - if evt.Post.Reply != nil { +func ReplyCountPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) error { + if post.Reply != nil { did := evt.Account.Identity.DID.String() if evt.GetCount("reply", did, automod.PeriodDay) > 3 { evt.AddAccountFlag("frequent-replier") diff --git a/automod/ruleset.go b/automod/ruleset.go index fe106dcd4..12d0794c4 100644 --- a/automod/ruleset.go +++ b/automod/ruleset.go @@ -1,13 +1,21 @@ package automod +import ( + "fmt" + + appbsky "github.com/bluesky-social/indigo/api/bsky" +) + type RuleSet struct { PostRules []PostRuleFunc + ProfileRules []ProfileRuleFunc RecordRules []RecordRuleFunc IdentityRules []IdentityRuleFunc } -func (r *RuleSet) CallPostRules(evt *PostEvent) error { - for _, f := range r.PostRules { +func (r *RuleSet) CallRecordRules(evt *RecordEvent) error { + // first the generic rules + for _, f := range r.RecordRules { err := f(evt) if err != nil { return err @@ -16,17 +24,35 @@ func (r *RuleSet) CallPostRules(evt *PostEvent) error { return evt.Err } } - return nil -} - -func (r *RuleSet) CallRecordRules(evt *RecordEvent) error { - for _, f := range r.RecordRules { - err := f(evt) - if err != nil { - return err + // then any record-type-specific rules + switch evt.Collection { + case "app.bsky.feed.post": + post, ok := evt.Record.(*appbsky.FeedPost) + if !ok { + return fmt.Errorf("mismatch between collection (%s) and type", evt.Collection) } - if evt.Err != nil { - return evt.Err + for _, f := range r.PostRules { + err := f(evt, post) + if err != nil { + return err + } + if evt.Err != nil { + return evt.Err + } + } + case "app.bsky.actor.profile": + profile, ok := evt.Record.(*appbsky.ActorProfile) + if !ok { + return fmt.Errorf("mismatch between collection (%s) and type", evt.Collection) + } + for _, f := range r.ProfileRules { + err := f(evt, profile) + if err != nil { + return err + } + if evt.Err != nil { + return evt.Err + } } } return nil From 4e599dbecbc780bec13d14487f06d96e2d460bf2 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 19 Nov 2023 20:10:12 -0800 Subject: [PATCH 30/35] automod: refactor out RelayClient, connect to account's PDS directly getRecord from Relay (BGS) wasn't working :shrug: --- automod/engine.go | 11 +++++++---- cmd/hepa/server.go | 11 ----------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/automod/engine.go b/automod/engine.go index 6513e5d3d..817aae915 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -115,15 +115,18 @@ func (e *Engine) FetchAndProcessRecord(ctx context.Context, uri string) error { if aturi.RecordKey() == "" { return fmt.Errorf("need a full, not partial, AT-URI: %s", uri) } - if e.RelayClient == nil { - return fmt.Errorf("can't fetch record without relay client configured") - } ident, err := e.Directory.Lookup(ctx, aturi.Authority()) if err != nil { return fmt.Errorf("resolving AT-URI authority: %v", err) } + pdsURL := ident.PDSEndpoint() + if pdsURL == "" { + return fmt.Errorf("could not resolve PDS endpoint for AT-URI account: %s", ident.DID.String()) + } + pdsClient := xrpc.Client{Host: ident.PDSEndpoint()} + e.Logger.Info("fetching record", "did", ident.DID.String(), "collection", aturi.Collection().String(), "rkey", aturi.RecordKey().String()) - out, err := comatproto.RepoGetRecord(ctx, e.RelayClient, "", aturi.Collection().String(), ident.DID.String(), aturi.RecordKey().String()) + out, err := comatproto.RepoGetRecord(ctx, &pdsClient, "", aturi.Collection().String(), ident.DID.String(), aturi.RecordKey().String()) if err != nil { return fmt.Errorf("fetching record from Relay (%s): %v", aturi, err) } diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 6120a222d..a09e37c7d 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -117,13 +117,6 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { cache = automod.NewMemCacheStore(5_000, 30*time.Minute) } - relayURL := config.BGSHost - if strings.HasPrefix(relayURL, "ws") { - relayURL = "http" + relayURL[2:] - } - // XXX: - relayURL = "https://bsky.social" - engine := automod.Engine{ Logger: logger, Directory: dir, @@ -136,10 +129,6 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { Client: util.RobustHTTPClient(), Host: config.BskyHost, }, - RelayClient: &xrpc.Client{ - Client: util.RobustHTTPClient(), - Host: relayURL, - }, } s := &Server{ From a2b29a8813a06483841e71d4c8c14cec20bf63ba Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 19 Nov 2023 23:15:48 -0800 Subject: [PATCH 31/35] automod: don't actually label, just flag --- automod/rules/hashtags.go | 2 +- automod/rules/hashtags_test.go | 4 ++-- automod/rules/misleading.go | 10 +++++----- automod/rules/misleading_test.go | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/automod/rules/hashtags.go b/automod/rules/hashtags.go index 298607a9b..aa047a207 100644 --- a/automod/rules/hashtags.go +++ b/automod/rules/hashtags.go @@ -8,7 +8,7 @@ import ( func BanHashtagsPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) error { for _, tag := range ExtractHashtags(post) { if evt.InSet("banned-hashtags", tag) { - evt.AddRecordLabel("bad-hashtag") + evt.AddRecordFlag("bad-hashtag") break } } diff --git a/automod/rules/hashtags_test.go b/automod/rules/hashtags_test.go index 994fc1c9b..678aa0299 100644 --- a/automod/rules/hashtags_test.go +++ b/automod/rules/hashtags_test.go @@ -28,7 +28,7 @@ func TestBanHashtagPostRule(t *testing.T) { } evt1 := engine.NewRecordEvent(am1, path, cid1, &p1) assert.NoError(BanHashtagsPostRule(&evt1, &p1)) - assert.Empty(evt1.RecordLabels) + assert.Empty(evt1.RecordFlags) p2 := appbsky.FeedPost{ Text: "some post blah", @@ -36,5 +36,5 @@ func TestBanHashtagPostRule(t *testing.T) { } evt2 := engine.NewRecordEvent(am1, path, cid1, &p2) assert.NoError(BanHashtagsPostRule(&evt2, &p2)) - assert.NotEmpty(evt2.RecordLabels) + assert.NotEmpty(evt2.RecordFlags) } diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go index 35d4093da..d2c982e72 100644 --- a/automod/rules/misleading.go +++ b/automod/rules/misleading.go @@ -13,7 +13,7 @@ func MisleadingURLPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) err facets, err := ExtractFacets(post) if err != nil { evt.Logger.Warn("invalid facets", "err", err) - evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? + evt.AddRecordFlag("invalid") // TODO: or some other "this record is corrupt" indicator? return nil } for _, facet := range facets { @@ -35,7 +35,7 @@ func MisleadingURLPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) err // this public code will obviously get discovered and bypassed. this doesn't earn you any security cred! if linkURL.Host != textURL.Host { evt.Logger.Warn("misleading mismatched domains", "link", linkURL.Host, "text", textURL.Host) - evt.AddRecordLabel("misleading") + evt.AddRecordFlag("misleading") } } } @@ -48,7 +48,7 @@ func MisleadingMentionPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) facets, err := ExtractFacets(post) if err != nil { evt.Logger.Warn("invalid facets", "err", err) - evt.AddRecordLabel("invalid") // TODO: or some other "this record is corrupt" indicator? + evt.AddRecordFlag("invalid") // TODO: or some other "this record is corrupt" indicator? return nil } for _, facet := range facets { @@ -66,14 +66,14 @@ func MisleadingMentionPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) mentioned, err := evt.Engine.Directory.LookupHandle(ctx, handle) if err != nil { evt.Logger.Warn("could not resolve handle", "handle", handle) - evt.AddRecordLabel("misleading") + evt.AddRecordFlag("misleading") break } // TODO: check if mentioned DID was recently updated? might be a caching issue if mentioned.DID.String() != *facet.DID { evt.Logger.Warn("misleading mention", "text", txt, "did", facet.DID) - evt.AddRecordLabel("misleading") + evt.AddRecordFlag("misleading") continue } } diff --git a/automod/rules/misleading_test.go b/automod/rules/misleading_test.go index 2fc889bac..fee4444ec 100644 --- a/automod/rules/misleading_test.go +++ b/automod/rules/misleading_test.go @@ -43,7 +43,7 @@ func TestMisleadingURLPostRule(t *testing.T) { } evt1 := engine.NewRecordEvent(am1, path, cid1, &p1) assert.NoError(MisleadingURLPostRule(&evt1, &p1)) - assert.NotEmpty(evt1.RecordLabels) + assert.NotEmpty(evt1.RecordFlags) } func TestMisleadingMentionPostRule(t *testing.T) { @@ -78,5 +78,5 @@ func TestMisleadingMentionPostRule(t *testing.T) { } evt1 := engine.NewRecordEvent(am1, path, cid1, &p1) assert.NoError(MisleadingMentionPostRule(&evt1, &p1)) - assert.NotEmpty(evt1.RecordLabels) + assert.NotEmpty(evt1.RecordFlags) } From 3539c95fa6b3cc670e4f14585090244ff1e8ba14 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 19 Nov 2023 23:28:58 -0800 Subject: [PATCH 32/35] automod: tweak misleading URL processing --- automod/rules/misleading.go | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go index d2c982e72..e1f15e14c 100644 --- a/automod/rules/misleading.go +++ b/automod/rules/misleading.go @@ -3,6 +3,7 @@ package rules import ( "context" "net/url" + "strings" appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/syntax" @@ -20,21 +21,27 @@ func MisleadingURLPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) err if facet.URL != nil { linkURL, err := url.Parse(*facet.URL) if err != nil { - evt.Logger.Warn("invalid link metadata URL", "uri", facet.URL) + evt.Logger.Warn("invalid link metadata URL", "url", facet.URL) continue } - // try parsing as a full URL - textURL, err := url.Parse(facet.Text) + text := strings.TrimSpace(facet.Text) + // try to fix any missing method in the text + if !strings.Contains(text, "://") { + text = "https://" + text + } + + // try parsing as a full URL (with whitespace trimmed) + textURL, err := url.Parse(text) if err != nil { - evt.Logger.Warn("invalid link text URL", "uri", facet.Text) + evt.Logger.Warn("invalid link text URL", "url", facet.Text) continue } // for now just compare domains to handle the most obvious cases // this public code will obviously get discovered and bypassed. this doesn't earn you any security cred! - if linkURL.Host != textURL.Host { - evt.Logger.Warn("misleading mismatched domains", "link", linkURL.Host, "text", textURL.Host) + if linkURL.Host != textURL.Host && linkURL.Host != "www."+linkURL.Host { + evt.Logger.Warn("misleading mismatched domains", "linkHost", linkURL.Host, "textHost", textURL.Host, "text", facet.Text) evt.AddRecordFlag("misleading") } } From 695ed761ef5920fe2a2a4e60312602220831a910 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 19 Nov 2023 23:45:12 -0800 Subject: [PATCH 33/35] automod: more URL tweaks --- automod/rules/misleading.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go index e1f15e14c..1de75b01f 100644 --- a/automod/rules/misleading.go +++ b/automod/rules/misleading.go @@ -25,7 +25,12 @@ func MisleadingURLPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) err continue } - text := strings.TrimSpace(facet.Text) + // basic text string pre-cleanups + text := strings.TrimSuffix(strings.TrimSpace(facet.Text), "...") + // if really not a domain, just skipp + if !strings.Contains(text, ".") { + continue + } // try to fix any missing method in the text if !strings.Contains(text, "://") { text = "https://" + text From ce3afdd247cb2a05a047df6d6a39820711d9a78d Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 19 Nov 2023 23:45:31 -0800 Subject: [PATCH 34/35] automod: fix nil error on IdentityEvent logging --- automod/engine.go | 1 + 1 file changed, 1 insertion(+) diff --git a/automod/engine.go b/automod/engine.go index 817aae915..1bb2a7cc2 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -51,6 +51,7 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. evt := IdentityEvent{ RepoEvent{ Engine: e, + Logger: e.Logger.With("did", am.Identity.DID), Account: *am, }, } From c4aefa7f813920a11fbdea24c1940453ef4a50ed Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 19 Nov 2023 23:56:28 -0800 Subject: [PATCH 35/35] automod: yet more URL cleaning --- automod/rules/misleading.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go index 1de75b01f..b8668ead2 100644 --- a/automod/rules/misleading.go +++ b/automod/rules/misleading.go @@ -26,7 +26,7 @@ func MisleadingURLPostRule(evt *automod.RecordEvent, post *appbsky.FeedPost) err } // basic text string pre-cleanups - text := strings.TrimSuffix(strings.TrimSpace(facet.Text), "...") + text := strings.ToLower(strings.TrimSuffix(strings.TrimSpace(facet.Text), "...")) // if really not a domain, just skipp if !strings.Contains(text, ".") { continue