From 0a82c6c7fa11fd457fcc61af23a90a3a4e16bdd1 Mon Sep 17 00:00:00 2001 From: Hailey Date: Sun, 29 Sep 2024 02:24:05 -0700 Subject: [PATCH 01/50] add tokenize while keeping common censor chars --- automod/keyword/tokenize.go | 23 ++++++++++++---- automod/keyword/tokenize_test.go | 47 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 5 deletions(-) diff --git a/automod/keyword/tokenize.go b/automod/keyword/tokenize.go index 8c477432b..a2a0c627e 100644 --- a/automod/keyword/tokenize.go +++ b/automod/keyword/tokenize.go @@ -12,18 +12,19 @@ import ( ) var ( - puncChars = regexp.MustCompile(`[[:punct:]]+`) - nonTokenChars = regexp.MustCompile(`[^\pL\pN\s]+`) + puncChars = regexp.MustCompile(`[[:punct:]]+`) + nonTokenChars = regexp.MustCompile(`[^\pL\pN\s]+`) + nonTokenCharsSkipCensorChars = regexp.MustCompile(`[^\pL\pN\s#*_-]`) ) // Splits free-form text in to tokens, including lower-case, unicode normalization, and some unicode folding. // // The intent is for this to work similarly to an NLP tokenizer, as might be used in a fulltext search engine, and enable fast matching to a list of known tokens. It might eventually even do stemming, removing pluralization (trailing "s" for English), etc. -func TokenizeText(text string) []string { +func tokenizeText(text string, nonTokenCharsRegex *regexp.Regexp) []string { // this function needs to be re-defined in every function call to prevent a race condition normFunc := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) - split := strings.ToLower(nonTokenChars.ReplaceAllString(text, " ")) - bare := strings.ToLower(nonTokenChars.ReplaceAllString(split, "")) + split := strings.ToLower(nonTokenCharsRegex.ReplaceAllString(text, " ")) + bare := strings.ToLower(nonTokenCharsRegex.ReplaceAllString(split, "")) norm, _, err := transform.String(normFunc, bare) if err != nil { slog.Warn("unicode normalization error", "err", err) @@ -32,6 +33,18 @@ func TokenizeText(text string) []string { return strings.Fields(norm) } +func TokenizeText(text string) []string { + return tokenizeText(text, nonTokenChars) +} + +func TokenizeTextSkippingCensorChars(text string) []string { + return tokenizeText(text, nonTokenCharsSkipCensorChars) +} + +func TokenizeTextWithCustomNonTokenRegex(text string, regex *regexp.Regexp) []string { + return tokenizeText(text, regex) +} + func splitIdentRune(c rune) bool { return !unicode.IsLetter(c) && !unicode.IsNumber(c) } diff --git a/automod/keyword/tokenize_test.go b/automod/keyword/tokenize_test.go index 89d5f79b1..66f0178b8 100644 --- a/automod/keyword/tokenize_test.go +++ b/automod/keyword/tokenize_test.go @@ -1,6 +1,7 @@ package keyword import ( + "regexp" "testing" "github.com/stretchr/testify/assert" @@ -17,6 +18,9 @@ func TestTokenizeText(t *testing.T) { {text: "Hello, โลก!", out: []string{"hello", "โลก"}}, {text: "Gdańsk", out: []string{"gdansk"}}, {text: " foo1;bar2,baz3...", out: []string{"foo1", "bar2", "baz3"}}, + {text: "foo*bar", out: []string{"foo", "bar"}}, + {text: "foo-bar", out: []string{"foo", "bar"}}, + {text: "foo_bar", out: []string{"foo", "bar"}}, } for _, fix := range fixtures { @@ -24,6 +28,49 @@ func TestTokenizeText(t *testing.T) { } } +func TestTokenizeTextWithCensorChars(t *testing.T) { + assert := assert.New(t) + + fixtures := []struct { + text string + out []string + }{ + {text: "", out: []string{}}, + {text: "Hello, โลก!", out: []string{"hello", "โลก"}}, + {text: "Gdańsk", out: []string{"gdansk"}}, + {text: " foo1;bar2,baz3...", out: []string{"foo1", "bar2", "baz3"}}, + {text: "foo*bar,foo&bar", out: []string{"foo*bar", "foo", "bar"}}, + {text: "foo-bar,foo&bar", out: []string{"foo-bar", "foo", "bar"}}, + {text: "foo_bar,foo&bar", out: []string{"foo_bar", "foo", "bar"}}, + {text: "foo#bar,foo&bar", out: []string{"foo#bar", "foo", "bar"}}, + } + + for _, fix := range fixtures { + assert.Equal(fix.out, TokenizeTextSkippingCensorChars(fix.text)) + } +} + +func TestTokenizeTextWithCustomRegex(t *testing.T) { + assert := assert.New(t) + + fixtures := []struct { + text string + out []string + }{ + {text: "", out: []string{}}, + {text: "Hello, โลก!", out: []string{"hello", "โลก"}}, + {text: "Gdańsk", out: []string{"gdansk"}}, + {text: " foo1;bar2,baz3...", out: []string{"foo1", "bar2", "baz3"}}, + {text: "foo*bar", out: []string{"foo", "bar"}}, + {text: "foo&bar,foo*bar", out: []string{"foo&bar", "foo", "bar"}}, + } + + regex := regexp.MustCompile(`[^\pL\pN\s&]`) + for _, fix := range fixtures { + assert.Equal(fix.out, TokenizeTextWithCustomNonTokenRegex(fix.text, regex)) + } +} + func TestTokenizeIdentifier(t *testing.T) { assert := assert.New(t) From 4da4e794d52d374dfd2ef19772aa46269c5fed1b Mon Sep 17 00:00:00 2001 From: Hailey Date: Sun, 29 Sep 2024 02:27:20 -0700 Subject: [PATCH 02/50] rename --- automod/keyword/tokenize.go | 4 ++-- automod/keyword/tokenize_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/automod/keyword/tokenize.go b/automod/keyword/tokenize.go index a2a0c627e..9a2edd9ec 100644 --- a/automod/keyword/tokenize.go +++ b/automod/keyword/tokenize.go @@ -41,8 +41,8 @@ func TokenizeTextSkippingCensorChars(text string) []string { return tokenizeText(text, nonTokenCharsSkipCensorChars) } -func TokenizeTextWithCustomNonTokenRegex(text string, regex *regexp.Regexp) []string { - return tokenizeText(text, regex) +func TokenizeTextWithRegex(text string, nonTokenCharsRegex *regexp.Regexp) []string { + return tokenizeText(text, nonTokenCharsRegex) } func splitIdentRune(c rune) bool { diff --git a/automod/keyword/tokenize_test.go b/automod/keyword/tokenize_test.go index 66f0178b8..45b477f6d 100644 --- a/automod/keyword/tokenize_test.go +++ b/automod/keyword/tokenize_test.go @@ -67,7 +67,7 @@ func TestTokenizeTextWithCustomRegex(t *testing.T) { regex := regexp.MustCompile(`[^\pL\pN\s&]`) for _, fix := range fixtures { - assert.Equal(fix.out, TokenizeTextWithCustomNonTokenRegex(fix.text, regex)) + assert.Equal(fix.out, TokenizeTextWithRegex(fix.text, regex)) } } From c8238e1043309c94117e520c4b256be84588256f Mon Sep 17 00:00:00 2001 From: Hailey Date: Sun, 29 Sep 2024 02:35:07 -0700 Subject: [PATCH 03/50] clean --- automod/keyword/tokenize.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/automod/keyword/tokenize.go b/automod/keyword/tokenize.go index 9a2edd9ec..0b5a33ca4 100644 --- a/automod/keyword/tokenize.go +++ b/automod/keyword/tokenize.go @@ -20,7 +20,7 @@ var ( // Splits free-form text in to tokens, including lower-case, unicode normalization, and some unicode folding. // // The intent is for this to work similarly to an NLP tokenizer, as might be used in a fulltext search engine, and enable fast matching to a list of known tokens. It might eventually even do stemming, removing pluralization (trailing "s" for English), etc. -func tokenizeText(text string, nonTokenCharsRegex *regexp.Regexp) []string { +func TokenizeTextWithRegex(text string, nonTokenCharsRegex *regexp.Regexp) []string { // this function needs to be re-defined in every function call to prevent a race condition normFunc := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) split := strings.ToLower(nonTokenCharsRegex.ReplaceAllString(text, " ")) @@ -34,15 +34,11 @@ func tokenizeText(text string, nonTokenCharsRegex *regexp.Regexp) []string { } func TokenizeText(text string) []string { - return tokenizeText(text, nonTokenChars) + return TokenizeTextWithRegex(text, nonTokenChars) } func TokenizeTextSkippingCensorChars(text string) []string { - return tokenizeText(text, nonTokenCharsSkipCensorChars) -} - -func TokenizeTextWithRegex(text string, nonTokenCharsRegex *regexp.Regexp) []string { - return tokenizeText(text, nonTokenCharsRegex) + return TokenizeTextWithRegex(text, nonTokenCharsSkipCensorChars) } func splitIdentRune(c rune) bool { From c47920c113530c9ea3d04229b88fa522f38ad29a Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 7 Oct 2024 18:05:02 -0700 Subject: [PATCH 04/50] refactor automod consumers (firehose+ozone) to package --- automod/consumer/doc.go | 2 + automod/consumer/firehose.go | 312 +++++++++++++++++++++++++++++++++++ automod/consumer/ozone.go | 186 +++++++++++++++++++++ automod/consumer/util.go | 25 +++ 4 files changed, 525 insertions(+) create mode 100644 automod/consumer/doc.go create mode 100644 automod/consumer/firehose.go create mode 100644 automod/consumer/ozone.go create mode 100644 automod/consumer/util.go diff --git a/automod/consumer/doc.go b/automod/consumer/doc.go new file mode 100644 index 000000000..fa8ccdcb2 --- /dev/null +++ b/automod/consumer/doc.go @@ -0,0 +1,2 @@ +// Code for consuming from atproto firehose and ozone event stream, pushing events in to automod engine. +package consumer diff --git a/automod/consumer/firehose.go b/automod/consumer/firehose.go new file mode 100644 index 000000000..df6d8f91b --- /dev/null +++ b/automod/consumer/firehose.go @@ -0,0 +1,312 @@ +package consumer + +import ( + "bytes" + "context" + "fmt" + "log/slog" + "net/http" + "net/url" + "sync/atomic" + "time" + + comatproto "github.com/bluesky-social/indigo/api/atproto" + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/events/schedulers/autoscaling" + "github.com/bluesky-social/indigo/events/schedulers/parallel" + lexutil "github.com/bluesky-social/indigo/lex/util" + + "github.com/bluesky-social/indigo/events" + "github.com/bluesky-social/indigo/repo" + "github.com/bluesky-social/indigo/repomgr" + "github.com/carlmjohnson/versioninfo" + "github.com/gorilla/websocket" + "github.com/redis/go-redis/v9" +) + +// TODO: should probably make this not hepa-specific; or even configurable +var firehoseCursorKey = "hepa/seq" + +type FirehoseConsumer struct { + Parallelism int + Logger *slog.Logger + RedisClient *redis.Client + Engine *automod.Engine + Host string + + // TODO: prefilter record collections; or predicate function? + // TODO: enable/disable event types; or predicate function? + + // lastSeq is the most recent event sequence number we've received and begun to handle. + // This number is periodically persisted to redis, if redis is present. + // The value is best-effort (the stream handling itself is concurrent, so event numbers may not be monotonic), + // but nonetheless, you must use atomics when updating or reading this (to avoid data races). + lastSeq int64 +} + +func (fc *FirehoseConsumer) Run(ctx context.Context) error { + + if fc.Engine == nil { + return fmt.Errorf("nil engine") + } + + cur, err := fc.ReadLastCursor(ctx) + if err != nil { + return err + } + + dialer := websocket.DefaultDialer + u, err := url.Parse(fc.Host) + if err != nil { + return fmt.Errorf("invalid Host URI: %w", err) + } + u.Path = "xrpc/com.atproto.sync.subscribeRepos" + if cur != 0 { + u.RawQuery = fmt.Sprintf("cursor=%d", cur) + } + fc.Logger.Info("subscribing to repo event stream", "upstream", fc.Host, "cursor", cur) + con, _, err := dialer.Dial(u.String(), http.Header{ + "User-Agent": []string{fmt.Sprintf("hepa/%s", versioninfo.Short())}, + }) + if err != nil { + return fmt.Errorf("subscribing to firehose failed (dialing): %w", err) + } + + rsc := &events.RepoStreamCallbacks{ + RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { + atomic.StoreInt64(&fc.lastSeq, evt.Seq) + return fc.HandleRepoCommit(ctx, evt) + }, + RepoIdentity: func(evt *comatproto.SyncSubscribeRepos_Identity) error { + atomic.StoreInt64(&fc.lastSeq, evt.Seq) + did, err := syntax.ParseDID(evt.Did) + if err != nil { + fc.Logger.Error("bad DID in RepoIdentity event", "did", evt.Did, "seq", evt.Seq, "err", err) + return nil + } + if err := fc.Engine.ProcessIdentityEvent(ctx, "identity", did); err != nil { + fc.Logger.Error("processing repo identity failed", "did", evt.Did, "seq", evt.Seq, "err", err) + } + return nil + }, + RepoAccount: func(evt *comatproto.SyncSubscribeRepos_Account) error { + atomic.StoreInt64(&fc.lastSeq, evt.Seq) + did, err := syntax.ParseDID(evt.Did) + if err != nil { + fc.Logger.Error("bad DID in RepoAccount event", "did", evt.Did, "seq", evt.Seq, "err", err) + return nil + } + if err := fc.Engine.ProcessIdentityEvent(ctx, "account", did); err != nil { + fc.Logger.Error("processing repo account failed", "did", evt.Did, "seq", evt.Seq, "err", err) + } + return nil + }, + // TODO: deprecated + RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error { + atomic.StoreInt64(&fc.lastSeq, evt.Seq) + did, err := syntax.ParseDID(evt.Did) + if err != nil { + fc.Logger.Error("bad DID in RepoHandle event", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) + return nil + } + if err := fc.Engine.ProcessIdentityEvent(ctx, "handle", did); err != nil { + fc.Logger.Error("processing handle update failed", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) + } + return nil + }, + // TODO: deprecated + RepoTombstone: func(evt *comatproto.SyncSubscribeRepos_Tombstone) error { + atomic.StoreInt64(&fc.lastSeq, evt.Seq) + did, err := syntax.ParseDID(evt.Did) + if err != nil { + fc.Logger.Error("bad DID in RepoTombstone event", "did", evt.Did, "seq", evt.Seq, "err", err) + return nil + } + if err := fc.Engine.ProcessIdentityEvent(ctx, "tombstone", did); err != nil { + fc.Logger.Error("processing repo tombstone failed", "did", evt.Did, "seq", evt.Seq, "err", err) + } + return nil + }, + } + + var scheduler events.Scheduler + if fc.Parallelism > 0 { + // use a fixed-parallelism scheduler if configured + scheduler = parallel.NewScheduler( + fc.Parallelism, + 1000, + fc.Host, + rsc.EventHandler, + ) + fc.Logger.Info("hepa scheduler configured", "scheduler", "parallel", "initial", fc.Parallelism) + } else { + // otherwise use auto-scaling scheduler + scaleSettings := autoscaling.DefaultAutoscaleSettings() + // start at higher parallelism (somewhat arbitrary) + scaleSettings.Concurrency = 4 + scaleSettings.MaxConcurrency = 200 + scheduler = autoscaling.NewScheduler(scaleSettings, fc.Host, rsc.EventHandler) + fc.Logger.Info("hepa scheduler configured", "scheduler", "autoscaling", "initial", scaleSettings.Concurrency, "max", scaleSettings.MaxConcurrency) + } + + return events.HandleRepoStream(ctx, con, scheduler) +} + +// NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better. +func (fc *FirehoseConsumer) HandleRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) error { + + logger := fc.Logger.With("event", "commit", "did", evt.Repo, "rev", evt.Rev, "seq", evt.Seq) + logger.Debug("received commit event") + + if evt.TooBig { + logger.Warn("skipping tooBig events for now") + return nil + } + + did, err := syntax.ParseDID(evt.Repo) + if err != nil { + logger.Error("bad DID syntax in event", "err", err) + return nil + } + + rr, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(evt.Blocks)) + if err != nil { + logger.Error("failed to read repo from car", "err", err) + return nil + } + + // empty commit is a special case, temporarily, basically indicates "new account" + if len(evt.Ops) == 0 { + if err := fc.Engine.ProcessIdentityEvent(ctx, "create", did); err != nil { + fc.Logger.Error("processing handle update failed", "did", evt.Repo, "rev", evt.Rev, "seq", evt.Seq, "err", err) + } + } + + for _, op := range evt.Ops { + logger = logger.With("eventKind", op.Action, "path", op.Path) + collection, rkey, err := splitRepoPath(op.Path) + if err != nil { + logger.Error("invalid path in repo op") + return nil + } + + ek := repomgr.EventKind(op.Action) + switch ek { + case repomgr.EvtKindCreateRecord, repomgr.EvtKindUpdateRecord: + // read the record bytes from blocks, and verify CID + rc, recCBOR, err := rr.GetRecordBytes(ctx, op.Path) + if err != nil { + logger.Error("reading record from event blocks (CAR)", "err", err) + break + } + if op.Cid == nil || lexutil.LexLink(rc) != *op.Cid { + logger.Error("mismatch between commit op CID and record block", "recordCID", rc, "opCID", op.Cid) + break + } + var action string + switch ek { + case repomgr.EvtKindCreateRecord: + action = automod.CreateOp + case repomgr.EvtKindUpdateRecord: + action = automod.UpdateOp + default: + logger.Error("impossible event kind", "kind", ek) + break + } + recCID := syntax.CID(op.Cid.String()) + err = fc.Engine.ProcessRecordOp(ctx, automod.RecordOp{ + Action: action, + DID: did, + Collection: collection, + RecordKey: rkey, + CID: &recCID, + RecordCBOR: *recCBOR, + }) + if err != nil { + logger.Error("engine failed to process record", "err", err) + continue + } + case repomgr.EvtKindDeleteRecord: + err = fc.Engine.ProcessRecordOp(ctx, automod.RecordOp{ + Action: automod.DeleteOp, + DID: did, + Collection: collection, + RecordKey: rkey, + CID: nil, + RecordCBOR: nil, + }) + if err != nil { + logger.Error("engine failed to process record", "err", err) + continue + } + default: + // TODO: should this be an error? + } + } + + return nil +} + +func (fc *FirehoseConsumer) ReadLastCursor(ctx context.Context) (int64, error) { + // if redis isn't configured, just skip + if fc.RedisClient == nil { + fc.Logger.Info("redis not configured, skipping cursor read") + return 0, nil + } + + val, err := fc.RedisClient.Get(ctx, firehoseCursorKey).Int64() + if err == redis.Nil { + fc.Logger.Info("no pre-existing cursor in redis") + return 0, nil + } else if err != nil { + return 0, err + } + fc.Logger.Info("successfully found prior subscription cursor seq in redis", "seq", val) + return val, nil +} + +func (fc *FirehoseConsumer) PersistCursor(ctx context.Context) error { + // if redis isn't configured, just skip + if fc.RedisClient == nil { + return nil + } + lastSeq := atomic.LoadInt64(&fc.lastSeq) + if lastSeq <= 0 { + return nil + } + err := fc.RedisClient.Set(ctx, firehoseCursorKey, lastSeq, 14*24*time.Hour).Err() + return err +} + +// this method runs in a loop, persisting the current cursor state every 5 seconds +func (fc *FirehoseConsumer) RunPersistCursor(ctx context.Context) error { + + // if redis isn't configured, just skip + if fc.RedisClient == nil { + return nil + } + ticker := time.NewTicker(5 * time.Second) + for { + select { + case <-ctx.Done(): + lastSeq := atomic.LoadInt64(&fc.lastSeq) + if lastSeq >= 1 { + fc.Logger.Info("persisting final cursor seq value", "seq", lastSeq) + err := fc.PersistCursor(ctx) + if err != nil { + fc.Logger.Error("failed to persist cursor", "err", err, "seq", lastSeq) + } + } + return nil + case <-ticker.C: + lastSeq := atomic.LoadInt64(&fc.lastSeq) + if lastSeq >= 1 { + err := fc.PersistCursor(ctx) + if err != nil { + fc.Logger.Error("failed to persist cursor", "err", err, "seq", lastSeq) + } + } + } + } +} diff --git a/automod/consumer/ozone.go b/automod/consumer/ozone.go new file mode 100644 index 000000000..0692ac393 --- /dev/null +++ b/automod/consumer/ozone.go @@ -0,0 +1,186 @@ +package consumer + +import ( + "context" + "fmt" + "log/slog" + "sync/atomic" + "time" + + toolsozone "github.com/bluesky-social/indigo/api/ozone" + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/xrpc" + + "github.com/redis/go-redis/v9" +) + +// TODO: should probably make this not hepa-specific; or even configurable +var ozoneCursorKey = "hepa/ozoneTimestamp" + +type OzoneConsumer struct { + Logger *slog.Logger + RedisClient *redis.Client + OzoneClient *xrpc.Client + Engine *automod.Engine + + // same as lastSeq, but for Ozone timestamp cursor. the value is a string. + lastCursor atomic.Value +} + +func (oc *OzoneConsumer) Run(ctx context.Context) error { + + if oc.Engine == nil { + return fmt.Errorf("nil engine") + } + if oc.OzoneClient == nil { + return fmt.Errorf("nil ozoneclient") + } + + cur, err := oc.ReadLastCursor(ctx) + if err != nil { + return err + } + + if cur == "" { + cur = syntax.DatetimeNow().String() + } + since, err := syntax.ParseDatetime(cur) + if err != nil { + return err + } + + oc.Logger.Info("subscribing to ozone event log", "upstream", oc.OzoneClient.Host, "cursor", cur, "since", since) + var limit int64 = 50 + period := time.Second * 5 + + for { + //func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, types []string) (*ModerationQueryEvents_Output, error) { + me, err := toolsozone.ModerationQueryEvents( + ctx, + oc.OzoneClient, + nil, // addedLabels: If specified, only events where all of these labels were added are returned + nil, // addedTags: If specified, only events where all of these tags were added are returned + "", // comment: If specified, only events with comments containing the keyword are returned + since.String(), // createdAfter: Retrieve events created after a given timestamp + "", // createdBefore: Retrieve events created before a given timestamp + "", // createdBy + "", // cursor + false, // hasComment: If true, only events with comments are returned + true, // includeAllUserRecords: If true, events on all record types (posts, lists, profile etc.) owned by the did are returned + limit, + nil, // removedLabels: If specified, only events where all of these labels were removed are returned + nil, // removedTags + nil, // reportTypes + "asc", // sortDirection: Sort direction for the events. Defaults to descending order of created at timestamp. + "", // subject + nil, // types: The types of events (fully qualified string in the format of tools.ozone.moderation.defs#modEvent) to filter by. If not specified, all events are returned. + ) + if err != nil { + oc.Logger.Warn("ozone query events failed; sleeping then will retrying", "err", err, "period", period.String()) + time.Sleep(period) + continue + } + + // track if the response contained anything new + anyNewEvents := false + for _, evt := range me.Events { + createdAt, err := syntax.ParseDatetime(evt.CreatedAt) + if err != nil { + return fmt.Errorf("invalid time format for ozone 'createdAt': %w", err) + } + // skip if the timestamp is the exact same + if createdAt == since { + continue + } + anyNewEvents = true + // TODO: is there a race condition here? + if !createdAt.Time().After(since.Time()) { + oc.Logger.Error("out of order ozone event", "createdAt", createdAt, "since", since) + return fmt.Errorf("out of order ozone event") + } + if err = oc.HandleOzoneEvent(ctx, evt); err != nil { + oc.Logger.Error("failed to process ozone event", "event", evt) + } + since = createdAt + oc.lastCursor.Store(since.String()) + } + if !anyNewEvents { + oc.Logger.Debug("... ozone poller sleeping", "period", period.String()) + time.Sleep(period) + } + } +} + +func (oc *OzoneConsumer) HandleOzoneEvent(ctx context.Context, eventView *toolsozone.ModerationDefs_ModEventView) error { + + oc.Logger.Debug("received ozone event", "eventID", eventView.Id, "createdAt", eventView.CreatedAt) + + if err := oc.Engine.ProcessOzoneEvent(ctx, eventView); err != nil { + oc.Logger.Error("engine failed to process ozone event", "err", err) + } + return nil +} + +func (oc *OzoneConsumer) ReadLastCursor(ctx context.Context) (string, error) { + // if redis isn't configured, just skip + if oc.RedisClient == nil { + oc.Logger.Info("redis not configured, skipping ozone cursor read") + return "", nil + } + + val, err := oc.RedisClient.Get(ctx, ozoneCursorKey).Result() + if err == redis.Nil || val == "" { + oc.Logger.Info("no pre-existing ozone cursor in redis") + return "", nil + } else if err != nil { + return "", err + } + oc.Logger.Info("successfully found prior ozone offset timestamp in redis", "cursor", val) + return val, nil +} + +func (oc *OzoneConsumer) PersistCursor(ctx context.Context) error { + // if redis isn't configured, just skip + if oc.RedisClient == nil { + return nil + } + lastCursor := oc.lastCursor.Load() + if lastCursor == nil || lastCursor == "" { + return nil + } + err := oc.RedisClient.Set(ctx, ozoneCursorKey, lastCursor, 14*24*time.Hour).Err() + return err +} + +// this method runs in a loop, persisting the current cursor state every 5 seconds +func (oc *OzoneConsumer) RunPersistCursor(ctx context.Context) error { + + // if redis isn't configured, just skip + if oc.RedisClient == nil { + return nil + } + ticker := time.NewTicker(5 * time.Second) + for { + select { + case <-ctx.Done(): + lastCursor := oc.lastCursor.Load() + if lastCursor != nil && lastCursor != "" { + oc.Logger.Info("persisting final ozone cursor timestamp", "cursor", lastCursor) + err := oc.PersistCursor(ctx) + if err != nil { + oc.Logger.Error("failed to persist ozone cursor", "err", err, "cursor", lastCursor) + } + } + return nil + case <-ticker.C: + lastCursor := oc.lastCursor.Load() + if lastCursor != nil && lastCursor != "" { + err := oc.PersistCursor(ctx) + if err != nil { + oc.Logger.Error("failed to persist ozone cursor", "err", err, "cursor", lastCursor) + } + } + } + } +} diff --git a/automod/consumer/util.go b/automod/consumer/util.go new file mode 100644 index 000000000..b1c34ebaf --- /dev/null +++ b/automod/consumer/util.go @@ -0,0 +1,25 @@ +package consumer + +import ( + "fmt" + "strings" + + "github.com/bluesky-social/indigo/atproto/syntax" +) + +// TODO: move this to a "ParsePath" helper in syntax package? +func splitRepoPath(path string) (syntax.NSID, syntax.RecordKey, error) { + parts := strings.SplitN(path, "/", 3) + if len(parts) != 2 { + return "", "", fmt.Errorf("invalid record path: %s", path) + } + collection, err := syntax.ParseNSID(parts[0]) + if err != nil { + return "", "", err + } + rkey, err := syntax.ParseRecordKey(parts[1]) + if err != nil { + return "", "", err + } + return collection, rkey, nil +} From 13c772a83933d703fcf427764054228b4948b025 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 7 Oct 2024 18:05:36 -0700 Subject: [PATCH 05/50] update hepa (automod) to use refactored consumers --- cmd/hepa/consumer.go | 240 ------------------------------------- cmd/hepa/consumer_ozone.go | 97 --------------- cmd/hepa/main.go | 67 +++++++---- cmd/hepa/server.go | 156 ++---------------------- 4 files changed, 52 insertions(+), 508 deletions(-) delete mode 100644 cmd/hepa/consumer.go delete mode 100644 cmd/hepa/consumer_ozone.go diff --git a/cmd/hepa/consumer.go b/cmd/hepa/consumer.go deleted file mode 100644 index e9d789baa..000000000 --- a/cmd/hepa/consumer.go +++ /dev/null @@ -1,240 +0,0 @@ -package main - -import ( - "bytes" - "context" - "fmt" - "net/http" - "net/url" - "strings" - "sync/atomic" - - comatproto "github.com/bluesky-social/indigo/api/atproto" - "github.com/bluesky-social/indigo/atproto/syntax" - "github.com/bluesky-social/indigo/automod" - "github.com/bluesky-social/indigo/events/schedulers/autoscaling" - "github.com/bluesky-social/indigo/events/schedulers/parallel" - lexutil "github.com/bluesky-social/indigo/lex/util" - - "github.com/bluesky-social/indigo/events" - "github.com/bluesky-social/indigo/repo" - "github.com/bluesky-social/indigo/repomgr" - "github.com/carlmjohnson/versioninfo" - "github.com/gorilla/websocket" -) - -func (s *Server) RunConsumer(ctx context.Context) error { - - cur, err := s.ReadLastCursor(ctx) - if err != nil { - return err - } - - dialer := websocket.DefaultDialer - u, err := url.Parse(s.relayHost) - if err != nil { - return fmt.Errorf("invalid relayHost URI: %w", err) - } - u.Path = "xrpc/com.atproto.sync.subscribeRepos" - if cur != 0 { - u.RawQuery = fmt.Sprintf("cursor=%d", cur) - } - s.logger.Info("subscribing to repo event stream", "upstream", s.relayHost, "cursor", cur) - con, _, err := dialer.Dial(u.String(), http.Header{ - "User-Agent": []string{fmt.Sprintf("hepa/%s", versioninfo.Short())}, - }) - if err != nil { - return fmt.Errorf("subscribing to firehose failed (dialing): %w", err) - } - - rsc := &events.RepoStreamCallbacks{ - RepoCommit: func(evt *comatproto.SyncSubscribeRepos_Commit) error { - atomic.StoreInt64(&s.lastSeq, evt.Seq) - return s.HandleRepoCommit(ctx, evt) - }, - RepoIdentity: func(evt *comatproto.SyncSubscribeRepos_Identity) error { - atomic.StoreInt64(&s.lastSeq, evt.Seq) - did, err := syntax.ParseDID(evt.Did) - if err != nil { - s.logger.Error("bad DID in RepoIdentity event", "did", evt.Did, "seq", evt.Seq, "err", err) - return nil - } - if err := s.engine.ProcessIdentityEvent(ctx, "identity", did); err != nil { - s.logger.Error("processing repo identity failed", "did", evt.Did, "seq", evt.Seq, "err", err) - } - return nil - }, - RepoAccount: func(evt *comatproto.SyncSubscribeRepos_Account) error { - atomic.StoreInt64(&s.lastSeq, evt.Seq) - did, err := syntax.ParseDID(evt.Did) - if err != nil { - s.logger.Error("bad DID in RepoAccount event", "did", evt.Did, "seq", evt.Seq, "err", err) - return nil - } - if err := s.engine.ProcessIdentityEvent(ctx, "account", did); err != nil { - s.logger.Error("processing repo account failed", "did", evt.Did, "seq", evt.Seq, "err", err) - } - return nil - }, - // TODO: deprecated - RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error { - atomic.StoreInt64(&s.lastSeq, evt.Seq) - did, err := syntax.ParseDID(evt.Did) - if err != nil { - s.logger.Error("bad DID in RepoHandle event", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) - return nil - } - if err := s.engine.ProcessIdentityEvent(ctx, "handle", did); err != nil { - s.logger.Error("processing handle update failed", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) - } - return nil - }, - // TODO: deprecated - RepoTombstone: func(evt *comatproto.SyncSubscribeRepos_Tombstone) error { - atomic.StoreInt64(&s.lastSeq, evt.Seq) - did, err := syntax.ParseDID(evt.Did) - if err != nil { - s.logger.Error("bad DID in RepoTombstone event", "did", evt.Did, "seq", evt.Seq, "err", err) - return nil - } - if err := s.engine.ProcessIdentityEvent(ctx, "tombstone", did); err != nil { - s.logger.Error("processing repo tombstone failed", "did", evt.Did, "seq", evt.Seq, "err", err) - } - return nil - }, - } - - var scheduler events.Scheduler - if s.firehoseParallelism > 0 { - // use a fixed-parallelism scheduler if configured - scheduler = parallel.NewScheduler( - s.firehoseParallelism, - 1000, - s.relayHost, - rsc.EventHandler, - ) - s.logger.Info("hepa scheduler configured", "scheduler", "parallel", "initial", s.firehoseParallelism) - } else { - // otherwise use auto-scaling scheduler - scaleSettings := autoscaling.DefaultAutoscaleSettings() - // start at higher parallelism (somewhat arbitrary) - scaleSettings.Concurrency = 4 - scaleSettings.MaxConcurrency = 200 - scheduler = autoscaling.NewScheduler(scaleSettings, s.relayHost, rsc.EventHandler) - s.logger.Info("hepa scheduler configured", "scheduler", "autoscaling", "initial", scaleSettings.Concurrency, "max", scaleSettings.MaxConcurrency) - } - - return events.HandleRepoStream(ctx, con, scheduler) -} - -// TODO: move this to a "ParsePath" helper in syntax package? -func splitRepoPath(path string) (syntax.NSID, syntax.RecordKey, error) { - parts := strings.SplitN(path, "/", 3) - if len(parts) != 2 { - return "", "", fmt.Errorf("invalid record path: %s", path) - } - collection, err := syntax.ParseNSID(parts[0]) - if err != nil { - return "", "", err - } - rkey, err := syntax.ParseRecordKey(parts[1]) - if err != nil { - return "", "", err - } - return collection, rkey, nil -} - -// NOTE: for now, this function basically never errors, just logs and returns nil. Should think through error processing better. -func (s *Server) HandleRepoCommit(ctx context.Context, evt *comatproto.SyncSubscribeRepos_Commit) error { - - logger := s.logger.With("event", "commit", "did", evt.Repo, "rev", evt.Rev, "seq", evt.Seq) - logger.Debug("received commit event") - - if evt.TooBig { - logger.Warn("skipping tooBig events for now") - return nil - } - - did, err := syntax.ParseDID(evt.Repo) - if err != nil { - logger.Error("bad DID syntax in event", "err", err) - return nil - } - - rr, err := repo.ReadRepoFromCar(ctx, bytes.NewReader(evt.Blocks)) - if err != nil { - logger.Error("failed to read repo from car", "err", err) - return nil - } - - // empty commit is a special case, temporarily, basically indicates "new account" - if len(evt.Ops) == 0 { - if err := s.engine.ProcessIdentityEvent(ctx, "create", did); err != nil { - s.logger.Error("processing handle update failed", "did", evt.Repo, "rev", evt.Rev, "seq", evt.Seq, "err", err) - } - } - - for _, op := range evt.Ops { - logger = logger.With("eventKind", op.Action, "path", op.Path) - collection, rkey, err := splitRepoPath(op.Path) - if err != nil { - logger.Error("invalid path in repo op") - return nil - } - - ek := repomgr.EventKind(op.Action) - switch ek { - case repomgr.EvtKindCreateRecord, repomgr.EvtKindUpdateRecord: - // read the record bytes from blocks, and verify CID - rc, recCBOR, err := rr.GetRecordBytes(ctx, op.Path) - if err != nil { - logger.Error("reading record from event blocks (CAR)", "err", err) - break - } - if op.Cid == nil || lexutil.LexLink(rc) != *op.Cid { - logger.Error("mismatch between commit op CID and record block", "recordCID", rc, "opCID", op.Cid) - break - } - var action string - switch ek { - case repomgr.EvtKindCreateRecord: - action = automod.CreateOp - case repomgr.EvtKindUpdateRecord: - action = automod.UpdateOp - default: - logger.Error("impossible event kind", "kind", ek) - break - } - recCID := syntax.CID(op.Cid.String()) - err = s.engine.ProcessRecordOp(ctx, automod.RecordOp{ - Action: action, - DID: did, - Collection: collection, - RecordKey: rkey, - CID: &recCID, - RecordCBOR: *recCBOR, - }) - if err != nil { - logger.Error("engine failed to process record", "err", err) - continue - } - case repomgr.EvtKindDeleteRecord: - err = s.engine.ProcessRecordOp(ctx, automod.RecordOp{ - Action: automod.DeleteOp, - DID: did, - Collection: collection, - RecordKey: rkey, - CID: nil, - RecordCBOR: nil, - }) - if err != nil { - logger.Error("engine failed to process record", "err", err) - continue - } - default: - // TODO: should this be an error? - } - } - - return nil -} diff --git a/cmd/hepa/consumer_ozone.go b/cmd/hepa/consumer_ozone.go deleted file mode 100644 index 406a34a4c..000000000 --- a/cmd/hepa/consumer_ozone.go +++ /dev/null @@ -1,97 +0,0 @@ -package main - -import ( - "context" - "fmt" - "time" - - toolsozone "github.com/bluesky-social/indigo/api/ozone" - "github.com/bluesky-social/indigo/atproto/syntax" -) - -func (s *Server) RunOzoneConsumer(ctx context.Context) error { - - cur, err := s.ReadLastOzoneCursor(ctx) - if err != nil { - return err - } - - if cur == "" { - cur = syntax.DatetimeNow().String() - } - since, err := syntax.ParseDatetime(cur) - if err != nil { - return err - } - - s.logger.Info("subscribing to ozone event log", "upstream", s.engine.OzoneClient.Host, "cursor", cur, "since", since) - var limit int64 = 50 - period := time.Second * 5 - - for { - //func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, types []string) (*ModerationQueryEvents_Output, error) { - me, err := toolsozone.ModerationQueryEvents( - ctx, - s.engine.OzoneClient, - nil, // addedLabels: If specified, only events where all of these labels were added are returned - nil, // addedTags: If specified, only events where all of these tags were added are returned - "", // comment: If specified, only events with comments containing the keyword are returned - since.String(), // createdAfter: Retrieve events created after a given timestamp - "", // createdBefore: Retrieve events created before a given timestamp - "", // createdBy - "", // cursor - false, // hasComment: If true, only events with comments are returned - true, // includeAllUserRecords: If true, events on all record types (posts, lists, profile etc.) owned by the did are returned - limit, - nil, // removedLabels: If specified, only events where all of these labels were removed are returned - nil, // removedTags - nil, // reportTypes - "asc", // sortDirection: Sort direction for the events. Defaults to descending order of created at timestamp. - "", // subject - nil, // types: The types of events (fully qualified string in the format of tools.ozone.moderation.defs#modEvent) to filter by. If not specified, all events are returned. - ) - if err != nil { - s.logger.Warn("ozone query events failed; sleeping then will retrying", "err", err, "period", period.String()) - time.Sleep(period) - continue - } - - // track if the response contained anything new - anyNewEvents := false - for _, evt := range me.Events { - createdAt, err := syntax.ParseDatetime(evt.CreatedAt) - if err != nil { - return fmt.Errorf("invalid time format for ozone 'createdAt': %w", err) - } - // skip if the timestamp is the exact same - if createdAt == since { - continue - } - anyNewEvents = true - // TODO: is there a race condition here? - if !createdAt.Time().After(since.Time()) { - s.logger.Error("out of order ozone event", "createdAt", createdAt, "since", since) - return fmt.Errorf("out of order ozone event") - } - if err = s.HandleOzoneEvent(ctx, evt); err != nil { - s.logger.Error("failed to process ozone event", "event", evt) - } - since = createdAt - s.lastOzoneCursor.Store(since.String()) - } - if !anyNewEvents { - s.logger.Debug("... ozone poller sleeping", "period", period.String()) - time.Sleep(period) - } - } -} - -func (s *Server) HandleOzoneEvent(ctx context.Context, eventView *toolsozone.ModerationDefs_ModEventView) error { - - s.logger.Debug("received ozone event", "eventID", eventView.Id, "createdAt", eventView.CreatedAt) - - if err := s.engine.ProcessOzoneEvent(ctx, eventView); err != nil { - s.logger.Error("engine failed to process ozone event", "err", err) - } - return nil -} diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index dbef6c488..bceaaa189 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -17,6 +17,7 @@ import ( "github.com/bluesky-social/indigo/atproto/identity/redisdir" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod/capture" + "github.com/bluesky-social/indigo/automod/consumer" "github.com/carlmjohnson/versioninfo" _ "github.com/joho/godotenv/autoload" @@ -236,7 +237,7 @@ var runCmd = &cli.Command{ dir, Config{ Logger: logger, - RelayHost: cctx.String("atp-relay-host"), + RelayHost: cctx.String("atp-relay-host"), // DEPRECATED BskyHost: cctx.String("atp-bsky-host"), OzoneHost: cctx.String("atp-ozone-host"), OzoneDID: cctx.String("ozone-did"), @@ -251,7 +252,7 @@ var runCmd = &cli.Command{ AbyssPassword: cctx.String("abyss-password"), RatelimitBypass: cctx.String("ratelimit-bypass"), RulesetName: cctx.String("ruleset"), - FirehoseParallelism: cctx.Int("firehose-parallelism"), + FirehoseParallelism: cctx.Int("firehose-parallelism"), // DEPRECATED PreScreenHost: cctx.String("prescreen-host"), PreScreenToken: cctx.String("prescreen-token"), }, @@ -260,41 +261,59 @@ var runCmd = &cli.Command{ return fmt.Errorf("failed to construct server: %v", err) } - // prometheus HTTP endpoint: /metrics - go func() { - runtime.SetBlockProfileRate(10) - runtime.SetMutexProfileFraction(10) - if err := srv.RunMetrics(cctx.String("metrics-listen")); err != nil { - slog.Error("failed to start metrics endpoint", "error", err) - panic(fmt.Errorf("failed to start metrics endpoint: %w", err)) + // firehose event consumer + relayHost := cctx.String("atp-relay-host") + if relayHost != "" { + fc := consumer.FirehoseConsumer{ + Engine: srv.Engine, + Logger: logger.With("subsystem", "firehose-consumer"), + Host: cctx.String("atp-relay-host"), + Parallelism: cctx.Int("firehose-parallelism"), + RedisClient: srv.RedisClient, } - }() - go func() { - if err := srv.RunPersistCursor(ctx); err != nil { - slog.Error("cursor routine failed", "err", err) + go func() { + if err := fc.RunPersistCursor(ctx); err != nil { + slog.Error("cursor routine failed", "err", err) + } + }() + + if err := fc.Run(ctx); err != nil { + return fmt.Errorf("failure consuming and processing firehose: %w", err) } - }() + } // ozone event consumer (if configured) - if srv.engine.OzoneClient != nil { + if srv.Engine.OzoneClient != nil { + oc := consumer.OzoneConsumer{ + Engine: srv.Engine, + Logger: logger.With("subsystem", "ozone-consumer"), + RedisClient: srv.RedisClient, + } + go func() { - if err := srv.RunOzoneConsumer(ctx); err != nil { + if err := oc.Run(ctx); err != nil { slog.Error("ozone consumer failed", "err", err) } }() go func() { - if err := srv.RunPersistOzoneCursor(ctx); err != nil { + if err := oc.RunPersistCursor(ctx); err != nil { slog.Error("ozone cursor routine failed", "err", err) } }() } - // firehose event consumer (main processor) - if err := srv.RunConsumer(ctx); err != nil { - return fmt.Errorf("failure consuming and processing firehose: %w", err) - } + // prometheus HTTP endpoint: /metrics + go func() { + runtime.SetBlockProfileRate(10) + runtime.SetMutexProfileFraction(10) + if err := srv.RunMetrics(cctx.String("metrics-listen")); err != nil { + slog.Error("failed to start metrics endpoint", "error", err) + panic(fmt.Errorf("failed to start metrics endpoint: %w", err)) + } + }() + return nil }, } @@ -355,7 +374,7 @@ var processRecordCmd = &cli.Command{ return err } - return capture.FetchAndProcessRecord(ctx, srv.engine, aturi) + return capture.FetchAndProcessRecord(ctx, srv.Engine, aturi) }, } @@ -386,7 +405,7 @@ var processRecentCmd = &cli.Command{ return err } - return capture.FetchAndProcessRecent(ctx, srv.engine, *atid, cctx.Int("limit")) + return capture.FetchAndProcessRecent(ctx, srv.Engine, *atid, cctx.Int("limit")) }, } @@ -417,7 +436,7 @@ var captureRecentCmd = &cli.Command{ return err } - cap, err := capture.CaptureRecent(ctx, srv.engine, *atid, cctx.Int("limit")) + cap, err := capture.CaptureRecent(ctx, srv.Engine, *atid, cctx.Int("limit")) if err != nil { return err } diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 55ebf49f2..9fe08f98e 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -7,7 +7,6 @@ import ( "net/http" "os" "strings" - "sync/atomic" "time" "github.com/bluesky-social/indigo/atproto/identity" @@ -27,25 +26,17 @@ import ( ) type Server struct { - relayHost string - firehoseParallelism int - logger *slog.Logger - engine *automod.Engine - rdb *redis.Client - - // lastSeq is the most recent event sequence number we've received and begun to handle. - // This number is periodically persisted to redis, if redis is present. - // The value is best-effort (the stream handling itself is concurrent, so event numbers may not be monotonic), - // but nonetheless, you must use atomics when updating or reading this (to avoid data races). - lastSeq int64 + Engine *automod.Engine + RedisClient *redis.Client - // same as lastSeq, but for Ozone timestamp cursor. the value is a string. - lastOzoneCursor atomic.Value + relayHost string // DEPRECATED + firehoseParallelism int // DEPRECATED + logger *slog.Logger } type Config struct { Logger *slog.Logger - RelayHost string + RelayHost string // DEPRECATED BskyHost string OzoneHost string OzoneDID string @@ -60,7 +51,7 @@ type Config struct { AbyssPassword string RulesetName string RatelimitBypass string - FirehoseParallelism int + FirehoseParallelism int // DEPRECATED PreScreenHost string PreScreenToken string } @@ -234,8 +225,8 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { relayHost: config.RelayHost, firehoseParallelism: config.FirehoseParallelism, logger: logger, - engine: &engine, - rdb: rdb, + Engine: &engine, + RedisClient: rdb, } return s, nil @@ -245,132 +236,3 @@ func (s *Server) RunMetrics(listen string) error { http.Handle("/metrics", promhttp.Handler()) return http.ListenAndServe(listen, nil) } - -var cursorKey = "hepa/seq" -var ozoneCursorKey = "hepa/ozoneTimestamp" - -func (s *Server) ReadLastCursor(ctx context.Context) (int64, error) { - // if redis isn't configured, just skip - if s.rdb == nil { - s.logger.Info("redis not configured, skipping cursor read") - return 0, nil - } - - val, err := s.rdb.Get(ctx, cursorKey).Int64() - if err == redis.Nil { - s.logger.Info("no pre-existing cursor in redis") - return 0, nil - } else if err != nil { - return 0, err - } - s.logger.Info("successfully found prior subscription cursor seq in redis", "seq", val) - return val, nil -} - -func (s *Server) ReadLastOzoneCursor(ctx context.Context) (string, error) { - // if redis isn't configured, just skip - if s.rdb == nil { - s.logger.Info("redis not configured, skipping ozone cursor read") - return "", nil - } - - val, err := s.rdb.Get(ctx, ozoneCursorKey).Result() - if err == redis.Nil || val == "" { - s.logger.Info("no pre-existing ozone cursor in redis") - return "", nil - } else if err != nil { - return "", err - } - s.logger.Info("successfully found prior ozone offset timestamp in redis", "cursor", val) - return val, nil -} - -func (s *Server) PersistCursor(ctx context.Context) error { - // if redis isn't configured, just skip - if s.rdb == nil { - return nil - } - lastSeq := atomic.LoadInt64(&s.lastSeq) - if lastSeq <= 0 { - return nil - } - err := s.rdb.Set(ctx, cursorKey, lastSeq, 14*24*time.Hour).Err() - return err -} - -func (s *Server) PersistOzoneCursor(ctx context.Context) error { - // if redis isn't configured, just skip - if s.rdb == nil { - return nil - } - lastCursor := s.lastOzoneCursor.Load() - if lastCursor == nil || lastCursor == "" { - return nil - } - err := s.rdb.Set(ctx, ozoneCursorKey, lastCursor, 14*24*time.Hour).Err() - return err -} - -// this method runs in a loop, persisting the current cursor state every 5 seconds -func (s *Server) RunPersistCursor(ctx context.Context) error { - - // if redis isn't configured, just skip - if s.rdb == nil { - return nil - } - ticker := time.NewTicker(5 * time.Second) - for { - select { - case <-ctx.Done(): - lastSeq := atomic.LoadInt64(&s.lastSeq) - if lastSeq >= 1 { - s.logger.Info("persisting final cursor seq value", "seq", lastSeq) - err := s.PersistCursor(ctx) - if err != nil { - s.logger.Error("failed to persist cursor", "err", err, "seq", lastSeq) - } - } - return nil - case <-ticker.C: - lastSeq := atomic.LoadInt64(&s.lastSeq) - if lastSeq >= 1 { - err := s.PersistCursor(ctx) - if err != nil { - s.logger.Error("failed to persist cursor", "err", err, "seq", lastSeq) - } - } - } - } -} - -// this method runs in a loop, persisting the current cursor state every 5 seconds -func (s *Server) RunPersistOzoneCursor(ctx context.Context) error { - - // if redis isn't configured, just skip - if s.rdb == nil { - return nil - } - ticker := time.NewTicker(5 * time.Second) - for { - select { - case <-ctx.Done(): - lastCursor := s.lastOzoneCursor.Load() - if lastCursor != nil && lastCursor != "" { - s.logger.Info("persisting final ozone cursor timestamp", "cursor", lastCursor) - err := s.PersistOzoneCursor(ctx) - if err != nil { - s.logger.Error("failed to persist ozone cursor", "err", err, "cursor", lastCursor) - } - } - return nil - case <-ticker.C: - lastCursor := s.lastOzoneCursor.Load() - if lastCursor != nil && lastCursor != "" { - err := s.PersistOzoneCursor(ctx) - if err != nil { - s.logger.Error("failed to persist ozone cursor", "err", err, "cursor", lastCursor) - } - } - } - } -} From f49cfdb05a3bc5f2125177044e87e025a06e797b Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 22 Oct 2024 11:44:56 -0700 Subject: [PATCH 06/50] automod verbosity --- automod/engine/fetch_account_meta.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/automod/engine/fetch_account_meta.go b/automod/engine/fetch_account_meta.go index 16dae3f2d..a20503c98 100644 --- a/automod/engine/fetch_account_meta.go +++ b/automod/engine/fetch_account_meta.go @@ -24,7 +24,7 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( // fallback in case client wasn't configured (eg, testing) if e.BskyClient == nil { - logger.Warn("skipping account meta hydration") + logger.Debug("skipping account meta hydration") am := AccountMeta{ Identity: ident, Profile: ProfileSummary{}, @@ -64,7 +64,7 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( // most common cause of this is a race between automod and ozone/appview for new accounts. just sleep a couple seconds and retry! var xrpcError *xrpc.Error if err != nil && errors.As(err, &xrpcError) && (xrpcError.StatusCode == 400 || xrpcError.StatusCode == 404) { - logger.Info("account profile lookup initially failed (from bsky appview), will retry", "err", err, "sleepDuration", newAccountRetryDuration) + logger.Debug("account profile lookup initially failed (from bsky appview), will retry", "err", err, "sleepDuration", newAccountRetryDuration) time.Sleep(newAccountRetryDuration) pv, err = appbsky.ActorGetProfile(ctx, e.BskyClient, ident.DID.String()) } From 1b2d84c83f3b5aecff95e11b49b44ef3de671bb7 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 22 Oct 2024 12:20:11 -0700 Subject: [PATCH 07/50] automod: add helper to get access to underlying engine --- automod/engine/context.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/automod/engine/context.go b/automod/engine/context.go index cc5b6a5e8..f88b8c9cb 100644 --- a/automod/engine/context.go +++ b/automod/engine/context.go @@ -169,6 +169,13 @@ func (c *BaseContext) InSet(name, val string) bool { return out } +// Returns a pointer to the underlying automod engine. This usually should NOT be used in rules. +// +// This is an escape hatch for hacking on the system before features get fully integerated in to the content API surface. The Engine API is not stable. +func (c *BaseContext) InternalEngine() *Engine { + return c.engine +} + func NewAccountContext(ctx context.Context, eng *Engine, meta AccountMeta) AccountContext { return AccountContext{ BaseContext: BaseContext{ From d52ce4d59cc81f31c67f8c47205de64c147c9e46 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 23 Oct 2024 09:49:40 -0700 Subject: [PATCH 08/50] automod: make account meta fetching optional --- automod/engine/engine.go | 40 ++++++++++++++++++++++++++++++++-------- automod/pkg.go | 1 + 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/automod/engine/engine.go b/automod/engine/engine.go index ae80db3ee..4ad71ba15 100644 --- a/automod/engine/engine.go +++ b/automod/engine/engine.go @@ -43,6 +43,14 @@ type Engine struct { AdminClient *xrpc.Client // used to fetch blobs from upstream PDS instances BlobClient *http.Client + + // internal configuration + Config EngineConfig +} + +type EngineConfig struct { + // if enabled, account metadata is not hydrated for every event by default + SkipAccountMeta bool } // Entrypoint for external code pushing arbitrary identity events in to the engine. @@ -80,10 +88,18 @@ func (eng *Engine) ProcessIdentityEvent(ctx context.Context, typ string, did syn return fmt.Errorf("identity not found for DID: %s", did.String()) } - am, err := eng.GetAccountMeta(ctx, ident) - if err != nil { - eventErrorCount.WithLabelValues("identity").Inc() - return fmt.Errorf("failed to fetch account metadata: %w", err) + var am *AccountMeta + if !eng.Config.SkipAccountMeta { + am, err = eng.GetAccountMeta(ctx, ident) + if err != nil { + eventErrorCount.WithLabelValues("identity").Inc() + return fmt.Errorf("failed to fetch account metadata: %w", err) + } + } else { + am = &AccountMeta{ + Identity: ident, + Profile: ProfileSummary{}, + } } ac := NewAccountContext(ctx, eng, *am) if err := eng.Rules.CallIdentityRules(&ac); err != nil { @@ -136,10 +152,18 @@ func (eng *Engine) ProcessRecordOp(ctx context.Context, op RecordOp) error { return fmt.Errorf("identity not found for DID: %s", op.DID) } - am, err := eng.GetAccountMeta(ctx, ident) - if err != nil { - eventErrorCount.WithLabelValues("record").Inc() - return fmt.Errorf("failed to fetch account metadata: %w", err) + var am *AccountMeta + if !eng.Config.SkipAccountMeta { + am, err = eng.GetAccountMeta(ctx, ident) + if err != nil { + eventErrorCount.WithLabelValues("identity").Inc() + return fmt.Errorf("failed to fetch account metadata: %w", err) + } + } else { + am = &AccountMeta{ + Identity: ident, + Profile: ProfileSummary{}, + } } rc := NewRecordContext(ctx, eng, *am, op) rc.Logger.Debug("processing record") diff --git a/automod/pkg.go b/automod/pkg.go index ebdca9811..e04589698 100644 --- a/automod/pkg.go +++ b/automod/pkg.go @@ -6,6 +6,7 @@ import ( ) type Engine = engine.Engine +type EngineConfig = engine.EngineConfig type AccountMeta = engine.AccountMeta type ProfileSummary = engine.ProfileSummary type AccountPrivate = engine.AccountPrivate From cc1ca95b0a78299b369327f3d64b768c73a26525 Mon Sep 17 00:00:00 2001 From: Hailey Date: Tue, 29 Oct 2024 00:57:25 -0700 Subject: [PATCH 09/50] add some helpers to automod --- automod/rules/helpers.go | 57 +++++++++++++++ automod/rules/helpers_test.go | 134 ++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) diff --git a/automod/rules/helpers.go b/automod/rules/helpers.go index 993b3913c..4a44b690c 100644 --- a/automod/rules/helpers.go +++ b/automod/rules/helpers.go @@ -283,3 +283,60 @@ func AccountIsOlderThan(c *automod.AccountContext, age time.Duration) bool { } return false } + +func ParentOrRootIsDid(post *appbsky.FeedPost, did string) bool { + if post.Reply == nil { + return false + } + + rootUri, err := syntax.ParseATURI(post.Reply.Root.Uri) + if err != nil || !rootUri.Authority().IsDID() { + return false + } + + parentUri, err := syntax.ParseATURI(post.Reply.Parent.Uri) + if err != nil || !parentUri.Authority().IsDID() { + return false + } + + return rootUri.Authority().String() == did || parentUri.Authority().String() == did +} + +func ParentOrRootIsAnyDid(post *appbsky.FeedPost, dids []string) bool { + if post.Reply == nil { + return false + } + + for _, did := range dids { + if ParentOrRootIsDid(post, did) { + return true + } + } + + return false +} + +func PostMentionsDid(post *appbsky.FeedPost, did string) bool { + facets, err := ExtractFacets(post) + if err != nil { + return false + } + + for _, facet := range facets { + if facet.DID != nil && *facet.DID == did { + return true + } + } + + return false +} + +func PostMentionsAnyDid(post *appbsky.FeedPost, dids []string) bool { + for _, did := range dids { + if PostMentionsDid(post, did) { + return true + } + } + + return false +} diff --git a/automod/rules/helpers_test.go b/automod/rules/helpers_test.go index 0d5e11ef2..e6d18f47e 100644 --- a/automod/rules/helpers_test.go +++ b/automod/rules/helpers_test.go @@ -1,6 +1,8 @@ package rules import ( + comatproto "github.com/bluesky-social/indigo/api/atproto" + appbsky "github.com/bluesky-social/indigo/api/bsky" "testing" "time" @@ -115,3 +117,135 @@ func TestAccountIsYoungerThan(t *testing.T) { assert.True(AccountIsOlderThan(&ac, time.Hour)) assert.False(AccountIsOlderThan(&ac, 48*time.Hour)) } + +func TestPostMentionsDid(t *testing.T) { + assert := assert.New(t) + + post := &appbsky.FeedPost{ + Text: "@hailey.at what is upppp also hello to @darthbluesky.bsky.social", + Facets: []*appbsky.RichtextFacet{ + { + Features: []*appbsky.RichtextFacet_Features_Elem{ + { + RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{ + Did: "did:plc:abc123", + }, + }, + }, + Index: &appbsky.RichtextFacet_ByteSlice{ + ByteStart: 0, + ByteEnd: 9, + }, + }, + { + Features: []*appbsky.RichtextFacet_Features_Elem{ + { + RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{ + Did: "did:plc:abc456", + }, + }, + }, + Index: &appbsky.RichtextFacet_ByteSlice{ + ByteStart: 39, + ByteEnd: 63, + }, + }, + }, + } + assert.True(PostMentionsDid(post, "did:plc:abc123")) + assert.False(PostMentionsDid(post, "did:plc:cba321")) + + didList1 := []string{ + "did:plc:cba321", + "did:web:bsky.app", + "did:plc:abc456", + } + + didList2 := []string{ + "did:plc:321cba", + "did:web:bsky.app", + "did:plc:123abc", + } + + assert.True(PostMentionsAnyDid(post, didList1)) + assert.False(PostMentionsAnyDid(post, didList2)) +} + +func TestParentOrRootIsDid(t *testing.T) { + assert := assert.New(t) + + post1 := &appbsky.FeedPost{ + Text: "some random post that i dreamt up last night, idk", + Reply: &appbsky.FeedPost_ReplyRef{ + Root: &comatproto.RepoStrongRef{ + Uri: "at://did:plc:abc123/app.bsky.feed.post/rkey123", + }, + Parent: &comatproto.RepoStrongRef{ + Uri: "at://did:plc:abc123/app.bsky.feed.post/rkey123", + }, + }, + } + + post2 := &appbsky.FeedPost{ + Text: "some random post that i dreamt up last night, idk", + Reply: &appbsky.FeedPost_ReplyRef{ + Root: &comatproto.RepoStrongRef{ + Uri: "at://did:plc:321abc/app.bsky.feed.post/rkey123", + }, + Parent: &comatproto.RepoStrongRef{ + Uri: "at://did:plc:abc123/app.bsky.feed.post/rkey123", + }, + }, + } + + post3 := &appbsky.FeedPost{ + Text: "some random post that i dreamt up last night, idk", + Reply: &appbsky.FeedPost_ReplyRef{ + Root: &comatproto.RepoStrongRef{ + Uri: "at://did:plc:abc123/app.bsky.feed.post/rkey123", + }, + Parent: &comatproto.RepoStrongRef{ + Uri: "at://did:plc:321abc/app.bsky.feed.post/rkey123", + }, + }, + } + + post4 := &appbsky.FeedPost{ + Text: "some random post that i dreamt up last night, idk", + Reply: &appbsky.FeedPost_ReplyRef{ + Root: &comatproto.RepoStrongRef{ + Uri: "at://did:plc:321abc/app.bsky.feed.post/rkey123", + }, + Parent: &comatproto.RepoStrongRef{ + Uri: "at://did:plc:321abc/app.bsky.feed.post/rkey123", + }, + }, + } + + assert.True(ParentOrRootIsDid(post1, "did:plc:abc123")) + assert.False(ParentOrRootIsDid(post1, "did:plc:321abc")) + + assert.True(ParentOrRootIsDid(post2, "did:plc:abc123")) + assert.True(ParentOrRootIsDid(post2, "did:plc:321abc")) + + assert.True(ParentOrRootIsDid(post3, "did:plc:abc123")) + assert.True(ParentOrRootIsDid(post3, "did:plc:321abc")) + + assert.False(ParentOrRootIsDid(post4, "did:plc:abc123")) + assert.True(ParentOrRootIsDid(post4, "did:plc:321abc")) + + didList1 := []string{ + "did:plc:cba321", + "did:web:bsky.app", + "did:plc:abc123", + } + + didList2 := []string{ + "did:plc:321cba", + "did:web:bsky.app", + "did:plc:123abc", + } + + assert.True(ParentOrRootIsAnyDid(post1, didList1)) + assert.False(ParentOrRootIsAnyDid(post1, didList2)) +} From 18a1d462123dffc80626ca25e6bc19a0ee0c6da1 Mon Sep 17 00:00:00 2001 From: Hailey Date: Tue, 29 Oct 2024 00:58:22 -0700 Subject: [PATCH 10/50] nit --- automod/rules/helpers_test.go | 106 +++++++++++++++++----------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/automod/rules/helpers_test.go b/automod/rules/helpers_test.go index e6d18f47e..dd47d0b7c 100644 --- a/automod/rules/helpers_test.go +++ b/automod/rules/helpers_test.go @@ -118,59 +118,6 @@ func TestAccountIsYoungerThan(t *testing.T) { assert.False(AccountIsOlderThan(&ac, 48*time.Hour)) } -func TestPostMentionsDid(t *testing.T) { - assert := assert.New(t) - - post := &appbsky.FeedPost{ - Text: "@hailey.at what is upppp also hello to @darthbluesky.bsky.social", - Facets: []*appbsky.RichtextFacet{ - { - Features: []*appbsky.RichtextFacet_Features_Elem{ - { - RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{ - Did: "did:plc:abc123", - }, - }, - }, - Index: &appbsky.RichtextFacet_ByteSlice{ - ByteStart: 0, - ByteEnd: 9, - }, - }, - { - Features: []*appbsky.RichtextFacet_Features_Elem{ - { - RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{ - Did: "did:plc:abc456", - }, - }, - }, - Index: &appbsky.RichtextFacet_ByteSlice{ - ByteStart: 39, - ByteEnd: 63, - }, - }, - }, - } - assert.True(PostMentionsDid(post, "did:plc:abc123")) - assert.False(PostMentionsDid(post, "did:plc:cba321")) - - didList1 := []string{ - "did:plc:cba321", - "did:web:bsky.app", - "did:plc:abc456", - } - - didList2 := []string{ - "did:plc:321cba", - "did:web:bsky.app", - "did:plc:123abc", - } - - assert.True(PostMentionsAnyDid(post, didList1)) - assert.False(PostMentionsAnyDid(post, didList2)) -} - func TestParentOrRootIsDid(t *testing.T) { assert := assert.New(t) @@ -249,3 +196,56 @@ func TestParentOrRootIsDid(t *testing.T) { assert.True(ParentOrRootIsAnyDid(post1, didList1)) assert.False(ParentOrRootIsAnyDid(post1, didList2)) } + +func TestPostMentionsDid(t *testing.T) { + assert := assert.New(t) + + post := &appbsky.FeedPost{ + Text: "@hailey.at what is upppp also hello to @darthbluesky.bsky.social", + Facets: []*appbsky.RichtextFacet{ + { + Features: []*appbsky.RichtextFacet_Features_Elem{ + { + RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{ + Did: "did:plc:abc123", + }, + }, + }, + Index: &appbsky.RichtextFacet_ByteSlice{ + ByteStart: 0, + ByteEnd: 9, + }, + }, + { + Features: []*appbsky.RichtextFacet_Features_Elem{ + { + RichtextFacet_Mention: &appbsky.RichtextFacet_Mention{ + Did: "did:plc:abc456", + }, + }, + }, + Index: &appbsky.RichtextFacet_ByteSlice{ + ByteStart: 39, + ByteEnd: 63, + }, + }, + }, + } + assert.True(PostMentionsDid(post, "did:plc:abc123")) + assert.False(PostMentionsDid(post, "did:plc:cba321")) + + didList1 := []string{ + "did:plc:cba321", + "did:web:bsky.app", + "did:plc:abc456", + } + + didList2 := []string{ + "did:plc:321cba", + "did:web:bsky.app", + "did:plc:123abc", + } + + assert.True(PostMentionsAnyDid(post, didList1)) + assert.False(PostMentionsAnyDid(post, didList2)) +} From 04400b8ad199344357f547252f626d4b2fb86b15 Mon Sep 17 00:00:00 2001 From: Hailey Date: Tue, 29 Oct 2024 01:01:29 -0700 Subject: [PATCH 11/50] add avatar to accountmeta --- automod/engine/account_meta.go | 1 + automod/engine/fetch_account_meta.go | 1 + 2 files changed, 2 insertions(+) diff --git a/automod/engine/account_meta.go b/automod/engine/account_meta.go index 5a5a178a2..94dfe67c9 100644 --- a/automod/engine/account_meta.go +++ b/automod/engine/account_meta.go @@ -25,6 +25,7 @@ type AccountMeta struct { type ProfileSummary struct { HasAvatar bool + Avatar *string Description *string DisplayName *string } diff --git a/automod/engine/fetch_account_meta.go b/automod/engine/fetch_account_meta.go index 16dae3f2d..f501b92f0 100644 --- a/automod/engine/fetch_account_meta.go +++ b/automod/engine/fetch_account_meta.go @@ -75,6 +75,7 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( am.Profile = ProfileSummary{ HasAvatar: pv.Avatar != nil, + Avatar: pv.Avatar, Description: pv.Description, DisplayName: pv.DisplayName, } From 0b12c1c86d4f6bc0490f70b35b70a794e1a25b60 Mon Sep 17 00:00:00 2001 From: Hailey Date: Tue, 29 Oct 2024 01:05:18 -0700 Subject: [PATCH 12/50] nits --- automod/rules/helpers.go | 6 +++--- automod/rules/helpers_test.go | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/automod/rules/helpers.go b/automod/rules/helpers.go index 4a44b690c..e5bdcb2dd 100644 --- a/automod/rules/helpers.go +++ b/automod/rules/helpers.go @@ -284,7 +284,7 @@ func AccountIsOlderThan(c *automod.AccountContext, age time.Duration) bool { return false } -func ParentOrRootIsDid(post *appbsky.FeedPost, did string) bool { +func PostParentOrRootIsDid(post *appbsky.FeedPost, did string) bool { if post.Reply == nil { return false } @@ -302,13 +302,13 @@ func ParentOrRootIsDid(post *appbsky.FeedPost, did string) bool { return rootUri.Authority().String() == did || parentUri.Authority().String() == did } -func ParentOrRootIsAnyDid(post *appbsky.FeedPost, dids []string) bool { +func PostParentOrRootIsAnyDid(post *appbsky.FeedPost, dids []string) bool { if post.Reply == nil { return false } for _, did := range dids { - if ParentOrRootIsDid(post, did) { + if PostParentOrRootIsDid(post, did) { return true } } diff --git a/automod/rules/helpers_test.go b/automod/rules/helpers_test.go index dd47d0b7c..bba200cb0 100644 --- a/automod/rules/helpers_test.go +++ b/automod/rules/helpers_test.go @@ -169,17 +169,17 @@ func TestParentOrRootIsDid(t *testing.T) { }, } - assert.True(ParentOrRootIsDid(post1, "did:plc:abc123")) - assert.False(ParentOrRootIsDid(post1, "did:plc:321abc")) + assert.True(PostParentOrRootIsDid(post1, "did:plc:abc123")) + assert.False(PostParentOrRootIsDid(post1, "did:plc:321abc")) - assert.True(ParentOrRootIsDid(post2, "did:plc:abc123")) - assert.True(ParentOrRootIsDid(post2, "did:plc:321abc")) + assert.True(PostParentOrRootIsDid(post2, "did:plc:abc123")) + assert.True(PostParentOrRootIsDid(post2, "did:plc:321abc")) - assert.True(ParentOrRootIsDid(post3, "did:plc:abc123")) - assert.True(ParentOrRootIsDid(post3, "did:plc:321abc")) + assert.True(PostParentOrRootIsDid(post3, "did:plc:abc123")) + assert.True(PostParentOrRootIsDid(post3, "did:plc:321abc")) - assert.False(ParentOrRootIsDid(post4, "did:plc:abc123")) - assert.True(ParentOrRootIsDid(post4, "did:plc:321abc")) + assert.False(PostParentOrRootIsDid(post4, "did:plc:abc123")) + assert.True(PostParentOrRootIsDid(post4, "did:plc:321abc")) didList1 := []string{ "did:plc:cba321", @@ -193,8 +193,8 @@ func TestParentOrRootIsDid(t *testing.T) { "did:plc:123abc", } - assert.True(ParentOrRootIsAnyDid(post1, didList1)) - assert.False(ParentOrRootIsAnyDid(post1, didList2)) + assert.True(PostParentOrRootIsAnyDid(post1, didList1)) + assert.False(PostParentOrRootIsAnyDid(post1, didList2)) } func TestPostMentionsDid(t *testing.T) { From dd8ad60bbb3d38a10207e13dd70e0435af42fbff Mon Sep 17 00:00:00 2001 From: Hailey Date: Tue, 29 Oct 2024 15:53:21 -0700 Subject: [PATCH 13/50] get cid from url --- automod/engine/account_meta.go | 3 +- automod/engine/cid_from_cdn_test.go | 42 ++++++++++++++++++++++++++++ automod/engine/fetch_account_meta.go | 24 +++++++++++++++- automod/engine/util.go | 24 ++++++++++++++++ 4 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 automod/engine/cid_from_cdn_test.go diff --git a/automod/engine/account_meta.go b/automod/engine/account_meta.go index 94dfe67c9..76d16755d 100644 --- a/automod/engine/account_meta.go +++ b/automod/engine/account_meta.go @@ -25,7 +25,8 @@ type AccountMeta struct { type ProfileSummary struct { HasAvatar bool - Avatar *string + AvatarCid *string + BannerCid *string Description *string DisplayName *string } diff --git a/automod/engine/cid_from_cdn_test.go b/automod/engine/cid_from_cdn_test.go new file mode 100644 index 000000000..cc7553cb8 --- /dev/null +++ b/automod/engine/cid_from_cdn_test.go @@ -0,0 +1,42 @@ +package engine + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestCidFromCdnUrl(t *testing.T) { + assert := assert.New(t) + + fixCid := "abcdefghijk" + + fixtures := []struct { + url string + cid *string + }{ + { + url: "https://cdn.bsky.app/img/avatar/plain/did:plc:abc123/abcdefghijk@jpeg", + cid: &fixCid, + }, + { + url: "https://cdn.bsky.app/img/feed_fullsize/plain/did:plc:abc123/abcdefghijk@jpeg", + cid: &fixCid, + }, + { + url: "https://cdn.bsky.app/img/feed_fullsize", + cid: nil, + }, + { + url: "https://cdn.bsky.app/img/feed_fullsize/plain/did:plc:abc123/abcdefghijk", + cid: &fixCid, + }, + { + url: "https://cdn.asky.app/img/feed_fullsize/plain/did:plc:abc123/abcdefghijk@jpeg", + cid: nil, + }, + } + + for _, fix := range fixtures { + assert.Equal(fix.cid, CidFromCdnUrl(&fix.url)) + } +} diff --git a/automod/engine/fetch_account_meta.go b/automod/engine/fetch_account_meta.go index f501b92f0..b634c856a 100644 --- a/automod/engine/fetch_account_meta.go +++ b/automod/engine/fetch_account_meta.go @@ -5,6 +5,8 @@ import ( "encoding/json" "errors" "fmt" + "net/url" + "strings" "time" comatproto "github.com/bluesky-social/indigo/api/atproto" @@ -17,6 +19,25 @@ import ( var newAccountRetryDuration = 3 * 1000 * time.Millisecond +// get the cid from a bluesky cdn url +func CidFromCdnUrl(str *string) *string { + if str == nil { + return nil + } + + u, err := url.Parse(*str) + if err != nil || u.Host != "cdn.bsky.app" { + return nil + } + + parts := strings.Split(u.Path, "/") + if len(parts) != 6 { + return nil + } + + return &strings.Split(parts[5], "@")[0] +} + // Helper to hydrate metadata about an account from several sources: PDS (if access), mod service (if access), public identity resolution func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) (*AccountMeta, error) { @@ -75,7 +96,8 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( am.Profile = ProfileSummary{ HasAvatar: pv.Avatar != nil, - Avatar: pv.Avatar, + AvatarCid: cidFromCdnUrl(pv.Avatar), + BannerCid: cidFromCdnUrl(pv.Banner), Description: pv.Description, DisplayName: pv.DisplayName, } diff --git a/automod/engine/util.go b/automod/engine/util.go index 195454c1b..e96c411d5 100644 --- a/automod/engine/util.go +++ b/automod/engine/util.go @@ -1,5 +1,10 @@ package engine +import ( + "net/url" + "strings" +) + func dedupeStrings(in []string) []string { var out []string seen := make(map[string]bool) @@ -11,3 +16,22 @@ func dedupeStrings(in []string) []string { } return out } + +// get the cid from a bluesky cdn url +func cidFromCdnUrl(str *string) *string { + if str == nil { + return nil + } + + u, err := url.Parse(*str) + if err != nil || u.Host != "cdn.bsky.app" { + return nil + } + + parts := strings.Split(u.Path, "/") + if len(parts) != 6 { + return nil + } + + return &strings.Split(parts[5], "@")[0] +} From 5fdfd70573000ea42b746a03b94a4f338a28be80 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 29 Oct 2024 16:48:42 -0700 Subject: [PATCH 14/50] automod: refactor identity and account event processing --- automod/capture/testing.go | 10 ++++- automod/consumer/firehose.go | 59 +++++-------------------- automod/engine/engine.go | 84 ++++++++++++++++++++++++++++++++++-- automod/engine/ruleset.go | 12 ++++++ automod/engine/ruletypes.go | 1 + 5 files changed, 113 insertions(+), 53 deletions(-) diff --git a/automod/capture/testing.go b/automod/capture/testing.go index 998aaef48..fbe00d6cb 100644 --- a/automod/capture/testing.go +++ b/automod/capture/testing.go @@ -7,6 +7,7 @@ import ( "io" "os" + comatproto "github.com/bluesky-social/indigo/api/atproto" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod" @@ -38,12 +39,19 @@ func ProcessCaptureRules(eng *automod.Engine, capture AccountCapture) error { ctx := context.Background() did := capture.AccountMeta.Identity.DID + handle := capture.AccountMeta.Identity.Handle.String() dir := identity.NewMockDirectory() dir.Insert(*capture.AccountMeta.Identity) eng.Directory = &dir // initial identity rules - eng.ProcessIdentityEvent(ctx, "new", did) + identEvent := comatproto.SyncSubscribeRepos_Identity{ + Did: did.String(), + Handle: &handle, + Seq: 12345, + Time: syntax.DatetimeNow().String(), + } + eng.ProcessIdentityEvent(ctx, identEvent) // all the post rules for _, pr := range capture.PostRecords { diff --git a/automod/consumer/firehose.go b/automod/consumer/firehose.go index df6d8f91b..f210b3055 100644 --- a/automod/consumer/firehose.go +++ b/automod/consumer/firehose.go @@ -80,54 +80,20 @@ func (fc *FirehoseConsumer) Run(ctx context.Context) error { }, RepoIdentity: func(evt *comatproto.SyncSubscribeRepos_Identity) error { atomic.StoreInt64(&fc.lastSeq, evt.Seq) - did, err := syntax.ParseDID(evt.Did) - if err != nil { - fc.Logger.Error("bad DID in RepoIdentity event", "did", evt.Did, "seq", evt.Seq, "err", err) - return nil - } - if err := fc.Engine.ProcessIdentityEvent(ctx, "identity", did); err != nil { + if err := fc.Engine.ProcessIdentityEvent(ctx, *evt); err != nil { fc.Logger.Error("processing repo identity failed", "did", evt.Did, "seq", evt.Seq, "err", err) } return nil }, RepoAccount: func(evt *comatproto.SyncSubscribeRepos_Account) error { atomic.StoreInt64(&fc.lastSeq, evt.Seq) - did, err := syntax.ParseDID(evt.Did) - if err != nil { - fc.Logger.Error("bad DID in RepoAccount event", "did", evt.Did, "seq", evt.Seq, "err", err) - return nil - } - if err := fc.Engine.ProcessIdentityEvent(ctx, "account", did); err != nil { + if err := fc.Engine.ProcessAccountEvent(ctx, *evt); err != nil { fc.Logger.Error("processing repo account failed", "did", evt.Did, "seq", evt.Seq, "err", err) } return nil }, - // TODO: deprecated - RepoHandle: func(evt *comatproto.SyncSubscribeRepos_Handle) error { - atomic.StoreInt64(&fc.lastSeq, evt.Seq) - did, err := syntax.ParseDID(evt.Did) - if err != nil { - fc.Logger.Error("bad DID in RepoHandle event", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) - return nil - } - if err := fc.Engine.ProcessIdentityEvent(ctx, "handle", did); err != nil { - fc.Logger.Error("processing handle update failed", "did", evt.Did, "handle", evt.Handle, "seq", evt.Seq, "err", err) - } - return nil - }, - // TODO: deprecated - RepoTombstone: func(evt *comatproto.SyncSubscribeRepos_Tombstone) error { - atomic.StoreInt64(&fc.lastSeq, evt.Seq) - did, err := syntax.ParseDID(evt.Did) - if err != nil { - fc.Logger.Error("bad DID in RepoTombstone event", "did", evt.Did, "seq", evt.Seq, "err", err) - return nil - } - if err := fc.Engine.ProcessIdentityEvent(ctx, "tombstone", did); err != nil { - fc.Logger.Error("processing repo tombstone failed", "did", evt.Did, "seq", evt.Seq, "err", err) - } - return nil - }, + // NOTE: no longer process #handle events + // NOTE: no longer process #tombstone events } var scheduler events.Scheduler @@ -176,13 +142,6 @@ func (fc *FirehoseConsumer) HandleRepoCommit(ctx context.Context, evt *comatprot return nil } - // empty commit is a special case, temporarily, basically indicates "new account" - if len(evt.Ops) == 0 { - if err := fc.Engine.ProcessIdentityEvent(ctx, "create", did); err != nil { - fc.Logger.Error("processing handle update failed", "did", evt.Repo, "rev", evt.Rev, "seq", evt.Seq, "err", err) - } - } - for _, op := range evt.Ops { logger = logger.With("eventKind", op.Action, "path", op.Path) collection, rkey, err := splitRepoPath(op.Path) @@ -215,27 +174,29 @@ func (fc *FirehoseConsumer) HandleRepoCommit(ctx context.Context, evt *comatprot break } recCID := syntax.CID(op.Cid.String()) - err = fc.Engine.ProcessRecordOp(ctx, automod.RecordOp{ + op := automod.RecordOp{ Action: action, DID: did, Collection: collection, RecordKey: rkey, CID: &recCID, RecordCBOR: *recCBOR, - }) + } + err = fc.Engine.ProcessRecordOp(ctx, op) if err != nil { logger.Error("engine failed to process record", "err", err) continue } case repomgr.EvtKindDeleteRecord: - err = fc.Engine.ProcessRecordOp(ctx, automod.RecordOp{ + op := automod.RecordOp{ Action: automod.DeleteOp, DID: did, Collection: collection, RecordKey: rkey, CID: nil, RecordCBOR: nil, - }) + } + err = fc.Engine.ProcessRecordOp(ctx, op) if err != nil { logger.Error("engine failed to process record", "err", err) continue diff --git a/automod/engine/engine.go b/automod/engine/engine.go index 4ad71ba15..8ed864371 100644 --- a/automod/engine/engine.go +++ b/automod/engine/engine.go @@ -7,6 +7,7 @@ import ( "net/http" "time" + comatproto "github.com/bluesky-social/indigo/api/atproto" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod/cachestore" @@ -53,10 +54,10 @@ type EngineConfig struct { SkipAccountMeta bool } -// Entrypoint for external code pushing arbitrary identity events in to the engine. +// Entrypoint for external code pushing #identity events in to the engine. // // This method can be called concurrently, though cached state may end up inconsistent if multiple events for the same account (DID) are processed in parallel. -func (eng *Engine) ProcessIdentityEvent(ctx context.Context, typ string, did syntax.DID) error { +func (eng *Engine) ProcessIdentityEvent(ctx context.Context, evt comatproto.SyncSubscribeRepos_Identity) error { eventProcessCount.WithLabelValues("identity").Inc() start := time.Now() defer func() { @@ -64,10 +65,15 @@ func (eng *Engine) ProcessIdentityEvent(ctx context.Context, typ string, did syn eventProcessDuration.WithLabelValues("identity").Observe(duration.Seconds()) }() + did, err := syntax.ParseDID(evt.Did) + if err != nil { + return fmt.Errorf("bad DID in repo #identity event (%s): %w", evt.Did, err) + } + // similar to an HTTP server, we want to recover any panics from rule execution defer func() { if r := recover(); r != nil { - eng.Logger.Error("automod event execution exception", "err", r, "did", did, "type", typ) + eng.Logger.Error("automod event execution exception", "err", r, "did", did, "type", "identity") eventErrorCount.WithLabelValues("identity").Inc() } }() @@ -78,6 +84,7 @@ func (eng *Engine) ProcessIdentityEvent(ctx context.Context, typ string, did syn if err := eng.PurgeAccountCaches(ctx, did); err != nil { eng.Logger.Error("failed to purge identity cache; identity rule may not run correctly", "err", err) } + // TODO(bnewbold): if it was a tombstone, this might fail ident, err := eng.Directory.LookupDID(ctx, did) if err != nil { eventErrorCount.WithLabelValues("identity").Inc() @@ -118,6 +125,77 @@ func (eng *Engine) ProcessIdentityEvent(ctx context.Context, typ string, did syn return nil } +// Entrypoint for external code pushing #account events in to the engine. +// +// This method can be called concurrently, though cached state may end up inconsistent if multiple events for the same account (DID) are processed in parallel. +func (eng *Engine) ProcessAccountEvent(ctx context.Context, evt comatproto.SyncSubscribeRepos_Account) error { + eventProcessCount.WithLabelValues("account").Inc() + start := time.Now() + defer func() { + duration := time.Since(start) + eventProcessDuration.WithLabelValues("account").Observe(duration.Seconds()) + }() + + did, err := syntax.ParseDID(evt.Did) + if err != nil { + return fmt.Errorf("bad DID in repo #account event (%s): %w", evt.Did, err) + } + + // similar to an HTTP server, we want to recover any panics from rule execution + defer func() { + if r := recover(); r != nil { + eng.Logger.Error("automod event execution exception", "err", r, "did", did, "type", "account") + eventErrorCount.WithLabelValues("account").Inc() + } + }() + ctx, cancel := context.WithTimeout(ctx, identityEventTimeout) + defer cancel() + + // first purge any caches; we need to re-resolve from scratch on account updates + if err := eng.PurgeAccountCaches(ctx, did); err != nil { + eng.Logger.Error("failed to purge account cache; account rule may not run correctly", "err", err) + } + // TODO(bnewbold): if it was a tombstone, this might fail + ident, err := eng.Directory.LookupDID(ctx, did) + if err != nil { + eventErrorCount.WithLabelValues("account").Inc() + return fmt.Errorf("resolving identity: %w", err) + } + if ident == nil { + eventErrorCount.WithLabelValues("account").Inc() + return fmt.Errorf("identity not found for DID: %s", did.String()) + } + + var am *AccountMeta + if !eng.Config.SkipAccountMeta { + am, err = eng.GetAccountMeta(ctx, ident) + if err != nil { + eventErrorCount.WithLabelValues("identity").Inc() + return fmt.Errorf("failed to fetch account metadata: %w", err) + } + } else { + am = &AccountMeta{ + Identity: ident, + Profile: ProfileSummary{}, + } + } + ac := NewAccountContext(ctx, eng, *am) + if err := eng.Rules.CallAccountRules(&ac); err != nil { + eventErrorCount.WithLabelValues("account").Inc() + return fmt.Errorf("rule execution failed: %w", err) + } + eng.CanonicalLogLineAccount(&ac) + if err := eng.persistAccountModActions(&ac); err != nil { + eventErrorCount.WithLabelValues("account").Inc() + return fmt.Errorf("failed to persist actions for account event: %w", err) + } + if err := eng.persistCounters(ctx, ac.effects); err != nil { + eventErrorCount.WithLabelValues("account").Inc() + return fmt.Errorf("failed to persist counters for account event: %w", err) + } + return nil +} + // Entrypoint for external code pushing repository updates. A simple repo commit results in multiple calls. // // This method can be called concurrently, though cached state may end up inconsistent if multiple events for the same account (DID) are processed in parallel. diff --git a/automod/engine/ruleset.go b/automod/engine/ruleset.go index 0b7d90cc4..4c72ef8f9 100644 --- a/automod/engine/ruleset.go +++ b/automod/engine/ruleset.go @@ -16,6 +16,7 @@ type RuleSet struct { RecordRules []RecordRuleFunc RecordDeleteRules []RecordRuleFunc IdentityRules []IdentityRuleFunc + AccountRules []AccountRuleFunc BlobRules []BlobRuleFunc NotificationRules []NotificationRuleFunc OzoneEventRules []OzoneEventRuleFunc @@ -89,6 +90,17 @@ func (r *RuleSet) CallIdentityRules(c *AccountContext) error { return nil } +// Executes rules for account update events. +func (r *RuleSet) CallAccountRules(c *AccountContext) error { + for _, f := range r.AccountRules { + err := f(c) + if err != nil { + c.Logger.Error("account rule execution failed", "err", err) + } + } + return nil +} + func (r *RuleSet) CallNotificationRules(c *NotificationContext) error { for _, f := range r.NotificationRules { err := f(c) diff --git a/automod/engine/ruletypes.go b/automod/engine/ruletypes.go index a86567ead..27d4a149f 100644 --- a/automod/engine/ruletypes.go +++ b/automod/engine/ruletypes.go @@ -6,6 +6,7 @@ import ( ) type IdentityRuleFunc = func(c *AccountContext) error +type AccountRuleFunc = func(c *AccountContext) error type RecordRuleFunc = func(c *RecordContext) error type PostRuleFunc = func(c *RecordContext, post *appbsky.FeedPost) error type ProfileRuleFunc = func(c *RecordContext, profile *appbsky.ActorProfile) error From 7b51c02882b0348463c6d38b3a4ccf25dc829e6d Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 5 Sep 2024 19:58:59 -0700 Subject: [PATCH 15/50] fetch and cache account review state and appeal state --- automod/engine/account_meta.go | 3 +++ automod/engine/fetch_account_meta.go | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/automod/engine/account_meta.go b/automod/engine/account_meta.go index 5a5a178a2..1ab25568e 100644 --- a/automod/engine/account_meta.go +++ b/automod/engine/account_meta.go @@ -34,4 +34,7 @@ type AccountPrivate struct { EmailConfirmed bool IndexedAt *time.Time AccountTags []string + // ReviewState will be one of "open", "escalated", "closed", "none", or "" (unknown) + ReviewState string + Appealed bool } diff --git a/automod/engine/fetch_account_meta.go b/automod/engine/fetch_account_meta.go index a20503c98..55fb4bc4d 100644 --- a/automod/engine/fetch_account_meta.go +++ b/automod/engine/fetch_account_meta.go @@ -131,7 +131,22 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( if rd.Moderation.SubjectStatus.Takendown != nil && *rd.Moderation.SubjectStatus.Takendown == true { am.Takendown = true } + if rd.Moderation.SubjectStatus.Appealed != nil && *rd.Moderation.SubjectStatus.Appealed == true { + ap.Appealed = true + } ap.AccountTags = dedupeStrings(rd.Moderation.SubjectStatus.Tags) + if rd.Moderation.SubjectStatus.ReviewState != nil { + switch *rd.Moderation.SubjectStatus.ReviewState { + case "#reviewOpen": + ap.ReviewState = "open" + case "#reviewEscalated": + ap.ReviewState = "escalated" + case "#reviewClosed": + ap.ReviewState = "closed" + case "#reviewNonde": + ap.ReviewState = "none" + } + } } am.Private = &ap } From 293a3806751ce95b07d074221d34b0e60945281b Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 5 Sep 2024 20:28:49 -0700 Subject: [PATCH 16/50] constants for review states --- automod/engine/account_meta.go | 9 ++++++++- automod/engine/fetch_account_meta.go | 8 ++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/automod/engine/account_meta.go b/automod/engine/account_meta.go index 1ab25568e..e7d4e86ba 100644 --- a/automod/engine/account_meta.go +++ b/automod/engine/account_meta.go @@ -6,6 +6,13 @@ import ( "github.com/bluesky-social/indigo/atproto/identity" ) +var ( + ReviewStateEscalated = "escalated" + ReviewStateOpen = "open" + ReviewStateClosed = "closed" + ReviewStateNone = "none" +) + // information about a repo/account/identity, always pre-populated and relevant to many rules type AccountMeta struct { Identity *identity.Identity @@ -34,7 +41,7 @@ type AccountPrivate struct { EmailConfirmed bool IndexedAt *time.Time AccountTags []string - // ReviewState will be one of "open", "escalated", "closed", "none", or "" (unknown) + // ReviewState will be one of ReviewStateEscalated, ReviewStateOpen, ReviewStateClosed, ReviewStateNone, or "" (unknown) ReviewState string Appealed bool } diff --git a/automod/engine/fetch_account_meta.go b/automod/engine/fetch_account_meta.go index 55fb4bc4d..5bc5c3637 100644 --- a/automod/engine/fetch_account_meta.go +++ b/automod/engine/fetch_account_meta.go @@ -138,13 +138,13 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( if rd.Moderation.SubjectStatus.ReviewState != nil { switch *rd.Moderation.SubjectStatus.ReviewState { case "#reviewOpen": - ap.ReviewState = "open" + ap.ReviewState = ReviewStateOpen case "#reviewEscalated": - ap.ReviewState = "escalated" + ap.ReviewState = ReviewStateEscalated case "#reviewClosed": - ap.ReviewState = "closed" + ap.ReviewState = ReviewStateClosed case "#reviewNonde": - ap.ReviewState = "none" + ap.ReviewState = ReviewStateNone } } } From 66b2bdbcd011ea8f9c912a017a064135251511a7 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 5 Sep 2024 20:29:09 -0700 Subject: [PATCH 17/50] refactors --- automod/engine/context.go | 8 ++++ automod/engine/effects.go | 18 +++++++- automod/engine/metrics.go | 12 +++++- automod/engine/persist.go | 73 +++++++++++++++++++++++++++++++- automod/engine/persisthelpers.go | 20 +++++++++ 5 files changed, 127 insertions(+), 4 deletions(-) diff --git a/automod/engine/context.go b/automod/engine/context.go index f88b8c9cb..b015b3ac0 100644 --- a/automod/engine/context.go +++ b/automod/engine/context.go @@ -279,6 +279,14 @@ func (c *AccountContext) TakedownAccount() { c.effects.TakedownAccount() } +func (c *AccountContext) EscalateAccount() { + c.effects.EscalateAccount() +} + +func (c *AccountContext) AcknowledgeAccount() { + c.effects.AcknowledgeAccount() +} + func (c *RecordContext) AddRecordFlag(val string) { c.effects.AddRecordFlag(val) } diff --git a/automod/engine/effects.go b/automod/engine/effects.go index ed5ac2998..7f2e91d8f 100644 --- a/automod/engine/effects.go +++ b/automod/engine/effects.go @@ -12,6 +12,8 @@ var ( QuotaModReportDay = 2000 // number of takedowns automod can action per day, for all subjects combined (circuit breaker) QuotaModTakedownDay = 200 + // number of misc actions automod can do per day, for all subjects combined (circuit breaker) + QuotaModActionDay = 1000 ) type CounterRef struct { @@ -42,8 +44,12 @@ type Effects struct { AccountFlags []string // Reports which should be filed against this account, as a result of rule execution. AccountReports []ModReport - // If "true", indicates that a rule indicates that the entire account should have a takedown. + // If "true", a rule decided that the entire account should have a takedown. AccountTakedown bool + // If "true", a rule decided that the reported account should be escalated. + AccountEscalate bool + // If "true", a rule decided that the reports on account should be resolved as acknowledged. + AccountAcknowledge bool // Same as "AccountLabels", but at record-level RecordLabels []string // Same as "AccountFlags", but at record-level @@ -128,6 +134,16 @@ func (e *Effects) TakedownAccount() { e.AccountTakedown = true } +// Enqueues the account to be "escalated" for mod review at the end of rule processing. +func (e *Effects) EscalateAccount() { + e.AccountEscalate = true +} + +// Enqueues reports on account to be "acknowledged" (closed) at the end of rule processing. +func (e *Effects) AcknowledgeAccount() { + e.AccountAcknowledge = true +} + // Enqueues the provided label (string value) to be added to the record at the end of rule processing. func (e *Effects) AddRecordLabel(val string) { e.mu.Lock() diff --git a/automod/engine/metrics.go b/automod/engine/metrics.go index bf71197d4..1a08944e7 100644 --- a/automod/engine/metrics.go +++ b/automod/engine/metrics.go @@ -37,7 +37,17 @@ var actionNewReportCount = promauto.NewCounterVec(prometheus.CounterOpts{ var actionNewTakedownCount = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "automod_new_action_takedowns", - Help: "Number of new flags persisted", + Help: "Number of new takedowns", +}, []string{"type"}) + +var actionNewEscalationCount = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "automod_new_action_escalations", + Help: "Number of new subject escalations", +}, []string{"type"}) + +var actionNewAcknowledgeCount = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "automod_new_action_acknowledges", + Help: "Number of new subjects acknowledged", }, []string{"type"}) var accountMetaFetches = promauto.NewCounter(prometheus.CounterOpts{ diff --git a/automod/engine/persist.go b/automod/engine/persist.go index 7d6675bbf..4864b3538 100644 --- a/automod/engine/persist.go +++ b/automod/engine/persist.go @@ -57,8 +57,28 @@ func (eng *Engine) persistAccountModActions(c *AccountContext) error { if err != nil { return fmt.Errorf("circuit-breaking takedowns: %w", err) } + newEscalation := c.effects.AccountEscalate + if c.Account.Private != nil && c.Account.Private.ReviewState == ReviewStateEscalated { + // de-dupe account escalation + newEscalation = false + } else { + newEscalation, err = eng.circuitBreakModAction(ctx, newEscalation) + if err != nil { + return fmt.Errorf("circuit-breaking escalation: %w", err) + } + } + newAcknowledge := c.effects.AccountAcknowledge + if c.Account.Private != nil && (c.Account.Private.ReviewState == "closed" || c.Account.Private.ReviewState == "none") { + // de-dupe account escalation + newAcknowledge = false + } else { + newAcknowledge, err = eng.circuitBreakModAction(ctx, newAcknowledge) + if err != nil { + return fmt.Errorf("circuit-breaking acknowledge: %w", err) + } + } - anyModActions := newTakedown || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 + anyModActions := newTakedown || newEscalation || newAcknowledge || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 if anyModActions && eng.Notifier != nil { for _, srv := range dedupeStrings(c.effects.NotifyServices) { if err := eng.Notifier.SendAccount(ctx, srv, c); err != nil { @@ -145,9 +165,56 @@ func (eng *Engine) persistAccountModActions(c *AccountContext) error { if err != nil { c.Logger.Error("failed to execute account takedown", "err", err) } + + // we don't want to escalate if there is a takedown + newEscalation = false + } + + if newEscalation { + c.Logger.Warn("account-escalate") + actionNewEscalationCount.WithLabelValues("account").Inc() + comment := "[automod]: auto account-escalation" + _, err := toolsozone.ModerationEmitEvent(ctx, xrpcc, &toolsozone.ModerationEmitEvent_Input{ + CreatedBy: xrpcc.Auth.Did, + Event: &toolsozone.ModerationEmitEvent_Input_Event{ + ModerationDefs_ModEventEscalate: &toolsozone.ModerationDefs_ModEventEscalate{ + Comment: &comment, + }, + }, + Subject: &toolsozone.ModerationEmitEvent_Input_Subject{ + AdminDefs_RepoRef: &comatproto.AdminDefs_RepoRef{ + Did: c.Account.Identity.DID.String(), + }, + }, + }) + if err != nil { + c.Logger.Error("failed to execute account escalation", "err", err) + } + } + + if newAcknowledge { + c.Logger.Warn("account-acknowledge") + actionNewAcknowledgeCount.WithLabelValues("account").Inc() + comment := "[automod]: auto account-acknowledge" + _, err := toolsozone.ModerationEmitEvent(ctx, xrpcc, &toolsozone.ModerationEmitEvent_Input{ + CreatedBy: xrpcc.Auth.Did, + Event: &toolsozone.ModerationEmitEvent_Input_Event{ + ModerationDefs_ModEventAcknowledge: &toolsozone.ModerationDefs_ModEventAcknowledge{ + Comment: &comment, + }, + }, + Subject: &toolsozone.ModerationEmitEvent_Input_Subject{ + AdminDefs_RepoRef: &comatproto.AdminDefs_RepoRef{ + Did: c.Account.Identity.DID.String(), + }, + }, + }) + if err != nil { + c.Logger.Error("failed to execute account acknowledge", "err", err) + } } - needCachePurge := newTakedown || len(newLabels) > 0 || len(newFlags) > 0 || createdReports + needCachePurge := newTakedown || newEscalation || newAcknowledge || len(newLabels) > 0 || len(newFlags) > 0 || createdReports if needCachePurge { return eng.PurgeAccountCaches(ctx, c.Account.Identity.DID) } @@ -210,6 +277,8 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { if err != nil { return fmt.Errorf("failed to circuit break takedowns: %w", err) } + // TODO: record escalation + // TODO: record acknowledge if newTakedown || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 { if eng.Notifier != nil { diff --git a/automod/engine/persisthelpers.go b/automod/engine/persisthelpers.go index c224cc4ab..f1dd2b039 100644 --- a/automod/engine/persisthelpers.go +++ b/automod/engine/persisthelpers.go @@ -111,6 +111,26 @@ func (eng *Engine) circuitBreakTakedown(ctx context.Context, takedown bool) (boo return takedown, nil } +// Combined circuit breaker for miscellaneous mod actions like: escalate, acknowledge +func (eng *Engine) circuitBreakModAction(ctx context.Context, action bool) (bool, error) { + if !action { + return false, nil + } + c, err := eng.Counters.GetCount(ctx, "automod-quota", "mod-action", countstore.PeriodDay) + if err != nil { + return false, fmt.Errorf("checking mod action quota: %w", err) + } + if c >= QuotaModActionDay { + eng.Logger.Warn("CIRCUIT BREAKER: automod action") + return false, nil + } + err = eng.Counters.Increment(ctx, "automod-quota", "mod-action") + if err != nil { + return false, fmt.Errorf("incrementing mod action quota: %w", err) + } + return action, nil +} + // Creates a moderation report, but checks first if there was a similar recent one, and skips if so. // // Returns a bool indicating if a new report was created. From 6ed407ea5debaf402e3ba9c3aa9b7a32658a5980 Mon Sep 17 00:00:00 2001 From: Foysal Ahamed Date: Tue, 15 Oct 2024 12:16:41 +0200 Subject: [PATCH 18/50] :sparkles: Add record ack and escalation --- automod/engine/persist.go | 56 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/automod/engine/persist.go b/automod/engine/persist.go index 4864b3538..03cd63705 100644 --- a/automod/engine/persist.go +++ b/automod/engine/persist.go @@ -277,10 +277,18 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { if err != nil { return fmt.Errorf("failed to circuit break takedowns: %w", err) } - // TODO: record escalation - // TODO: record acknowledge + // @TODO: should we check for existing escalation? there doesn't seem to be an existing flag for this at record level + newEscalation, err := eng.circuitBreakModAction(ctx, c.effects.RecordEscalate) + if err != nil { + return fmt.Errorf("circuit-breaking escalation: %w", err) + } + // @TODO: should we check if the subject is already acked? there doesn't seem to be an existing flag for this at record level + newAcknowledge, err := eng.circuitBreakModAction(ctx, c.effects.RecordAcknowledge) + if err != nil { + return fmt.Errorf("circuit-breaking acknowledge: %w", err) + } - if newTakedown || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 { + if newEscalation || newAcknowledge || newTakedown || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 { if eng.Notifier != nil { for _, srv := range dedupeStrings(c.effects.NotifyServices) { if err := eng.Notifier.SendRecord(ctx, srv, c); err != nil { @@ -300,7 +308,7 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { } // exit early - if !newTakedown && len(newLabels) == 0 && len(newReports) == 0 { + if !newAcknowledge && !newEscalation && !newTakedown && len(newLabels) == 0 && len(newReports) == 0 { return nil } @@ -372,5 +380,45 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { c.Logger.Error("failed to execute record takedown", "err", err) } } + + if newEscalation { + c.Logger.Warn("record-escalation") + actionNewEscalationCount.WithLabelValues("record").Inc() + comment := "[automod]: automated record-escalation" + _, err := toolsozone.ModerationEmitEvent(ctx, xrpcc, &toolsozone.ModerationEmitEvent_Input{ + CreatedBy: xrpcc.Auth.Did, + Event: &toolsozone.ModerationEmitEvent_Input_Event{ + ModerationDefs_ModEventEscalate: &toolsozone.ModerationDefs_ModEventEscalate{ + Comment: &comment, + }, + }, + Subject: &toolsozone.ModerationEmitEvent_Input_Subject{ + RepoStrongRef: &strongRef, + }, + }) + if err != nil { + c.Logger.Error("failed to execute record escalation", "err", err) + } + } + + if newAcknowledge { + c.Logger.Warn("record-acknowledge") + actionNewAcknowledgeCount.WithLabelValues("record").Inc() + comment := "[automod]: automated record-acknowledge" + _, err := toolsozone.ModerationEmitEvent(ctx, xrpcc, &toolsozone.ModerationEmitEvent_Input{ + CreatedBy: xrpcc.Auth.Did, + Event: &toolsozone.ModerationEmitEvent_Input_Event{ + ModerationDefs_ModEventAcknowledge: &toolsozone.ModerationDefs_ModEventAcknowledge{ + Comment: &comment, + }, + }, + Subject: &toolsozone.ModerationEmitEvent_Input_Subject{ + RepoStrongRef: &strongRef, + }, + }) + if err != nil { + c.Logger.Error("failed to execute record acknowledge", "err", err) + } + } return nil } From f4ce81ede806511cdd93af79608fc114d47d9c14 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 29 Oct 2024 17:28:17 -0700 Subject: [PATCH 19/50] remove record-level ask/esc (for now) --- automod/engine/persist.go | 53 ++------------------------------------- 1 file changed, 2 insertions(+), 51 deletions(-) diff --git a/automod/engine/persist.go b/automod/engine/persist.go index 03cd63705..d6e7f7554 100644 --- a/automod/engine/persist.go +++ b/automod/engine/persist.go @@ -277,18 +277,8 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { if err != nil { return fmt.Errorf("failed to circuit break takedowns: %w", err) } - // @TODO: should we check for existing escalation? there doesn't seem to be an existing flag for this at record level - newEscalation, err := eng.circuitBreakModAction(ctx, c.effects.RecordEscalate) - if err != nil { - return fmt.Errorf("circuit-breaking escalation: %w", err) - } - // @TODO: should we check if the subject is already acked? there doesn't seem to be an existing flag for this at record level - newAcknowledge, err := eng.circuitBreakModAction(ctx, c.effects.RecordAcknowledge) - if err != nil { - return fmt.Errorf("circuit-breaking acknowledge: %w", err) - } - if newEscalation || newAcknowledge || newTakedown || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 { + if newTakedown || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 { if eng.Notifier != nil { for _, srv := range dedupeStrings(c.effects.NotifyServices) { if err := eng.Notifier.SendRecord(ctx, srv, c); err != nil { @@ -308,7 +298,7 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { } // exit early - if !newAcknowledge && !newEscalation && !newTakedown && len(newLabels) == 0 && len(newReports) == 0 { + if !newTakedown && len(newLabels) == 0 && len(newReports) == 0 { return nil } @@ -381,44 +371,5 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { } } - if newEscalation { - c.Logger.Warn("record-escalation") - actionNewEscalationCount.WithLabelValues("record").Inc() - comment := "[automod]: automated record-escalation" - _, err := toolsozone.ModerationEmitEvent(ctx, xrpcc, &toolsozone.ModerationEmitEvent_Input{ - CreatedBy: xrpcc.Auth.Did, - Event: &toolsozone.ModerationEmitEvent_Input_Event{ - ModerationDefs_ModEventEscalate: &toolsozone.ModerationDefs_ModEventEscalate{ - Comment: &comment, - }, - }, - Subject: &toolsozone.ModerationEmitEvent_Input_Subject{ - RepoStrongRef: &strongRef, - }, - }) - if err != nil { - c.Logger.Error("failed to execute record escalation", "err", err) - } - } - - if newAcknowledge { - c.Logger.Warn("record-acknowledge") - actionNewAcknowledgeCount.WithLabelValues("record").Inc() - comment := "[automod]: automated record-acknowledge" - _, err := toolsozone.ModerationEmitEvent(ctx, xrpcc, &toolsozone.ModerationEmitEvent_Input{ - CreatedBy: xrpcc.Auth.Did, - Event: &toolsozone.ModerationEmitEvent_Input_Event{ - ModerationDefs_ModEventAcknowledge: &toolsozone.ModerationDefs_ModEventAcknowledge{ - Comment: &comment, - }, - }, - Subject: &toolsozone.ModerationEmitEvent_Input_Subject{ - RepoStrongRef: &strongRef, - }, - }) - if err != nil { - c.Logger.Error("failed to execute record acknowledge", "err", err) - } - } return nil } From c728f2ec99eb388249db7259afbd03aa334ec971 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 5 Sep 2024 18:49:52 -0700 Subject: [PATCH 20/50] automod: engine support for adding tags --- automod/engine/effects.go | 30 +++++++++++- automod/engine/metrics.go | 5 ++ automod/engine/persist.go | 78 ++++++++++++++++++++++++++++---- automod/engine/persisthelpers.go | 17 +++++++ 4 files changed, 120 insertions(+), 10 deletions(-) diff --git a/automod/engine/effects.go b/automod/engine/effects.go index 7f2e91d8f..a318615cc 100644 --- a/automod/engine/effects.go +++ b/automod/engine/effects.go @@ -40,7 +40,9 @@ type Effects struct { CounterDistinctIncrements []CounterDistinctRef // TODO: better variable names // Label values which should be applied to the overall account, as a result of rule execution. AccountLabels []string - // Moderation flags (similar to labels, but private) which should be applied to the overall account, as a result of rule execution. + // Moderation tags (similar to labels, but private) which should be applied to the overall account, as a result of rule execution. + AccountTags []string + // automod flags (metadata) which should be applied to the account as a result of rule execution. AccountFlags []string // Reports which should be filed against this account, as a result of rule execution. AccountReports []ModReport @@ -52,6 +54,8 @@ type Effects struct { AccountAcknowledge bool // Same as "AccountLabels", but at record-level RecordLabels []string + // Same as "AccountTags", but at record-level + RecordTags []string // Same as "AccountFlags", but at record-level RecordFlags []string // Same as "AccountReports", but at record-level @@ -102,6 +106,18 @@ func (e *Effects) AddAccountLabel(val string) { e.AccountLabels = append(e.AccountLabels, val) } +// Enqueues the provided label (string value) to be added to the account at the end of rule processing. +func (e *Effects) AddAccountTag(val string) { + e.mu.Lock() + defer e.mu.Unlock() + for _, v := range e.AccountTags { + if v == val { + return + } + } + e.AccountTags = append(e.AccountTags, val) +} + // Enqueues the provided flag (string value) to be recorded (in the Engine's flagstore) at the end of rule processing. func (e *Effects) AddAccountFlag(val string) { e.mu.Lock() @@ -156,6 +172,18 @@ func (e *Effects) AddRecordLabel(val string) { e.RecordLabels = append(e.RecordLabels, val) } +// Enqueues the provided tag (string value) to be added to the record at the end of rule processing. +func (e *Effects) AddRecordTag(val string) { + e.mu.Lock() + defer e.mu.Unlock() + for _, v := range e.RecordTags { + if v == val { + return + } + } + e.RecordTags = append(e.RecordTags, val) +} + // Enqueues the provided flag (string value) to be recorded (in the Engine's flagstore) at the end of rule processing. func (e *Effects) AddRecordFlag(val string) { e.mu.Lock() diff --git a/automod/engine/metrics.go b/automod/engine/metrics.go index 1a08944e7..bc32b8e54 100644 --- a/automod/engine/metrics.go +++ b/automod/engine/metrics.go @@ -25,6 +25,11 @@ var actionNewLabelCount = promauto.NewCounterVec(prometheus.CounterOpts{ Help: "Number of new labels persisted", }, []string{"type", "val"}) +var actionNewTagCount = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "automod_new_action_tags", + Help: "Number of new tags persisted", +}, []string{"type", "val"}) + var actionNewFlagCount = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "automod_new_action_flags", Help: "Number of new flags persisted", diff --git a/automod/engine/persist.go b/automod/engine/persist.go index d6e7f7554..e289a64ef 100644 --- a/automod/engine/persist.go +++ b/automod/engine/persist.go @@ -32,7 +32,7 @@ func (eng *Engine) persistCounters(ctx context.Context, eff *Effects) error { return nil } -// Persists account-level moderation actions: new labels, new flags, new takedowns, and reports. +// Persists account-level moderation actions: new labels, new tags, new flags, new takedowns, and reports. // // If necessary, will "purge" identity and account caches, so that state updates will be picked up for subsequent events. // @@ -42,6 +42,11 @@ func (eng *Engine) persistAccountModActions(c *AccountContext) error { // de-dupe actions newLabels := dedupeLabelActions(c.effects.AccountLabels, c.Account.AccountLabels, c.Account.AccountNegatedLabels) + existingTags := []string{} + if c.Account.Private != nil { + existingTags = c.Account.Private.AccountTags + } + newTags := dedupeTagActions(c.effects.AccountTags, existingTags) newFlags := dedupeFlagActions(c.effects.AccountFlags, c.Account.AccountFlags) // don't report the same account multiple times on the same day for the same reason. this is a quick check; we also query the mod service API just before creating the report. @@ -78,7 +83,7 @@ func (eng *Engine) persistAccountModActions(c *AccountContext) error { } } - anyModActions := newTakedown || newEscalation || newAcknowledge || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 + anyModActions := newTakedown || newEscalation || newAcknowledge || len(newLabels) > 0 || len(newTags) > 0 || len(newFlags) > 0 || len(newReports) > 0 if anyModActions && eng.Notifier != nil { for _, srv := range dedupeStrings(c.effects.NotifyServices) { if err := eng.Notifier.SendAccount(ctx, srv, c); err != nil { @@ -107,7 +112,7 @@ func (eng *Engine) persistAccountModActions(c *AccountContext) error { xrpcc := eng.OzoneClient if len(newLabels) > 0 { - c.Logger.Info("labeling record", "newLabels", newLabels) + c.Logger.Info("labeling account", "newLabels", newLabels) for _, val := range newLabels { // note: WithLabelValues is a prometheus label, not an atproto label actionNewLabelCount.WithLabelValues("account", val).Inc() @@ -133,6 +138,33 @@ func (eng *Engine) persistAccountModActions(c *AccountContext) error { } } + if len(newTags) > 0 { + c.Logger.Info("tagging account", "newTags", newTags) + for _, val := range newTags { + // note: WithLabelValues is a prometheus label, not an atproto label + actionNewTagCount.WithLabelValues("account", val).Inc() + } + comment := "[automod]: auto-tagging account" + _, err := toolsozone.ModerationEmitEvent(ctx, xrpcc, &toolsozone.ModerationEmitEvent_Input{ + CreatedBy: xrpcc.Auth.Did, + Event: &toolsozone.ModerationEmitEvent_Input_Event{ + ModerationDefs_ModEventTag: &toolsozone.ModerationDefs_ModEventTag{ + Add: newTags, + Remove: []string{}, + Comment: &comment, + }, + }, + Subject: &toolsozone.ModerationEmitEvent_Input_Subject{ + AdminDefs_RepoRef: &comatproto.AdminDefs_RepoRef{ + Did: c.Account.Identity.DID.String(), + }, + }, + }) + if err != nil { + c.Logger.Error("failed to create account tags", "err", err) + } + } + // reports are additionally de-duped when persisting the action, so track with a flag createdReports := false for _, mr := range newReports { @@ -214,7 +246,7 @@ func (eng *Engine) persistAccountModActions(c *AccountContext) error { } } - needCachePurge := newTakedown || newEscalation || newAcknowledge || len(newLabels) > 0 || len(newFlags) > 0 || createdReports + needCachePurge := newTakedown || newEscalation || newAcknowledge || len(newLabels) > 0 || len(newTags) > 0 || len(newFlags) > 0 || createdReports if needCachePurge { return eng.PurgeAccountCaches(ctx, c.Account.Identity.DID) } @@ -222,7 +254,7 @@ func (eng *Engine) persistAccountModActions(c *AccountContext) error { return nil } -// Persists some record-level state: labels, takedowns, reports. +// Persists some record-level state: labels, tags, takedowns, reports. // // NOTE: this method currently does *not* persist record-level flags to any storage, and does not de-dupe most actions, on the assumption that the record is new (from firehose) and has no existing mod state. func (eng *Engine) persistRecordModActions(c *RecordContext) error { @@ -233,7 +265,9 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { atURI := c.RecordOp.ATURI().String() newLabels := dedupeStrings(c.effects.RecordLabels) - if len(newLabels) > 0 && eng.OzoneClient != nil { + newTags := dedupeStrings(c.effects.RecordTags) + if (len(newLabels) > 0 || len(newTags) > 0) && eng.OzoneClient != nil { + // fetch existing record labels, tags, etc rv, err := toolsozone.ModerationGetRecord(ctx, eng.OzoneClient, c.RecordOp.CID.String(), c.RecordOp.ATURI().String()) if err != nil { // NOTE: there is a frequent 4xx error here from Ozone because this record has not been indexed yet @@ -250,10 +284,11 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { } existingLabels = dedupeStrings(existingLabels) negLabels = dedupeStrings(negLabels) - // fetch existing record labels newLabels = dedupeLabelActions(newLabels, existingLabels, negLabels) + newTags = dedupeTagActions(newTags, rv.Moderation.SubjectStatus.Tags) } } + newFlags := dedupeStrings(c.effects.RecordFlags) if len(newFlags) > 0 { // fetch existing flags, and de-dupe @@ -278,7 +313,7 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { return fmt.Errorf("failed to circuit break takedowns: %w", err) } - if newTakedown || len(newLabels) > 0 || len(newFlags) > 0 || len(newReports) > 0 { + if newTakedown || len(newLabels) > 0 || len(newTags) > 0 || len(newFlags) > 0 || len(newReports) > 0 { if eng.Notifier != nil { for _, srv := range dedupeStrings(c.effects.NotifyServices) { if err := eng.Notifier.SendRecord(ctx, srv, c); err != nil { @@ -298,7 +333,7 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { } // exit early - if !newTakedown && len(newLabels) == 0 && len(newReports) == 0 { + if !newTakedown && len(newLabels) == 0 && len(newTags) == 0 && len(newReports) == 0 { return nil } @@ -343,6 +378,31 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { } } + if len(newTags) > 0 { + c.Logger.Info("tagging record", "newTags", newTags) + for _, val := range newTags { + // note: WithLabelValues is a prometheus label, not an atproto label + actionNewTagCount.WithLabelValues("record", val).Inc() + } + comment := "[automod]: auto-tagging record" + _, err := toolsozone.ModerationEmitEvent(ctx, xrpcc, &toolsozone.ModerationEmitEvent_Input{ + CreatedBy: xrpcc.Auth.Did, + Event: &toolsozone.ModerationEmitEvent_Input_Event{ + ModerationDefs_ModEventTag: &toolsozone.ModerationDefs_ModEventTag{ + Add: newLabels, + Remove: []string{}, + Comment: &comment, + }, + }, + Subject: &toolsozone.ModerationEmitEvent_Input_Subject{ + RepoStrongRef: &strongRef, + }, + }) + if err != nil { + c.Logger.Error("failed to create record tag", "err", err) + } + } + for _, mr := range newReports { _, err := eng.createRecordReportIfFresh(ctx, xrpcc, c.RecordOp.ATURI(), c.RecordOp.CID, mr) if err != nil { diff --git a/automod/engine/persisthelpers.go b/automod/engine/persisthelpers.go index f1dd2b039..491a86fa4 100644 --- a/automod/engine/persisthelpers.go +++ b/automod/engine/persisthelpers.go @@ -35,6 +35,23 @@ func dedupeLabelActions(labels, existing, existingNegated []string) []string { return newLabels } +func dedupeTagActions(tags, existing []string) []string { + newTags := []string{} + for _, val := range dedupeStrings(tags) { + exists := false + for _, e := range existing { + if val == e { + exists = true + break + } + } + if !exists { + newTags = append(newTags, val) + } + } + return newTags +} + func dedupeFlagActions(flags, existing []string) []string { newFlags := []string{} for _, val := range dedupeStrings(flags) { From 5da2027f1292287c56715fd2f40f47cf8ef3ce25 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 5 Sep 2024 19:26:37 -0700 Subject: [PATCH 21/50] add context helpers --- automod/engine/context.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/automod/engine/context.go b/automod/engine/context.go index b015b3ac0..2e447bebd 100644 --- a/automod/engine/context.go +++ b/automod/engine/context.go @@ -271,6 +271,10 @@ func (c *AccountContext) AddAccountLabel(val string) { c.effects.AddAccountLabel(val) } +func (c *AccountContext) AddAccountTag(val string) { + c.effects.AddAccountTag(val) +} + func (c *AccountContext) ReportAccount(reason, comment string) { c.effects.ReportAccount(reason, comment) } @@ -295,6 +299,10 @@ func (c *RecordContext) AddRecordLabel(val string) { c.effects.AddRecordLabel(val) } +func (c *RecordContext) AddRecordTag(val string) { + c.effects.AddRecordTag(val) +} + func (c *RecordContext) ReportRecord(reason, comment string) { c.effects.ReportRecord(reason, comment) } From 0ed95120d0188d19a83dce1e2c562e8aba9ab2c9 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 29 Oct 2024 17:37:54 -0700 Subject: [PATCH 22/50] add gtube tags for testing --- automod/rules/gtube.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/automod/rules/gtube.go b/automod/rules/gtube.go index 4684541a4..71922a528 100644 --- a/automod/rules/gtube.go +++ b/automod/rules/gtube.go @@ -16,6 +16,7 @@ func GtubePostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { if strings.Contains(post.Text, gtubeString) { c.AddRecordLabel("spam") c.Notify("slack") + c.AddRecordTag("gtube-record") } return nil } @@ -26,6 +27,7 @@ func GtubeProfileRule(c *automod.RecordContext, profile *appbsky.ActorProfile) e if profile.Description != nil && strings.Contains(*profile.Description, gtubeString) { c.AddRecordLabel("spam") c.Notify("slack") + c.AddAccountTag("gtuber-account") } return nil } From ca4d0bab572569b85e196e1650d78e3a65b16f38 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 29 Oct 2024 17:50:59 -0700 Subject: [PATCH 23/50] move automod rule helpers to package --- automod/{rules => helpers}/helpers.go | 10 +++++----- automod/{rules => helpers}/helpers_test.go | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) rename automod/{rules => helpers}/helpers.go (98%) rename automod/{rules => helpers}/helpers_test.go (99%) diff --git a/automod/rules/helpers.go b/automod/helpers/helpers.go similarity index 98% rename from automod/rules/helpers.go rename to automod/helpers/helpers.go index e5bdcb2dd..7aa615a30 100644 --- a/automod/rules/helpers.go +++ b/automod/helpers/helpers.go @@ -1,4 +1,4 @@ -package rules +package helpers import ( "fmt" @@ -13,7 +13,7 @@ import ( "github.com/spaolacci/murmur3" ) -func dedupeStrings(in []string) []string { +func DedupeStrings(in []string) []string { var out []string seen := make(map[string]bool) for _, v := range in { @@ -37,7 +37,7 @@ func ExtractHashtagsPost(post *appbsky.FeedPost) []string { } } } - return dedupeStrings(tags) + return DedupeStrings(tags) } func NormalizeHashtag(raw string) string { @@ -103,7 +103,7 @@ func ExtractPostBlobCIDsPost(post *appbsky.FeedPost) []string { } } } - return dedupeStrings(out) + return DedupeStrings(out) } func ExtractBlobCIDsProfile(profile *appbsky.ActorProfile) []string { @@ -114,7 +114,7 @@ func ExtractBlobCIDsProfile(profile *appbsky.ActorProfile) []string { if profile.Banner != nil { out = append(out, profile.Banner.Ref.String()) } - return dedupeStrings(out) + return DedupeStrings(out) } func ExtractTextTokensPost(post *appbsky.FeedPost) []string { diff --git a/automod/rules/helpers_test.go b/automod/helpers/helpers_test.go similarity index 99% rename from automod/rules/helpers_test.go rename to automod/helpers/helpers_test.go index bba200cb0..9b04a041c 100644 --- a/automod/rules/helpers_test.go +++ b/automod/helpers/helpers_test.go @@ -1,4 +1,4 @@ -package rules +package helpers import ( comatproto "github.com/bluesky-social/indigo/api/atproto" From e1b57bf8a281f5c68293a23b533eae854f71de7a Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 29 Oct 2024 17:51:36 -0700 Subject: [PATCH 24/50] refactor rules to use new helpers package --- automod/rules/harassment.go | 11 ++++++----- automod/rules/hashtags.go | 7 ++++--- automod/rules/identity.go | 3 ++- automod/rules/keyword.go | 9 +++++---- automod/rules/mentions.go | 3 ++- automod/rules/misleading.go | 7 ++++--- automod/rules/misleading_test.go | 21 +++++++++++---------- automod/rules/nostr.go | 3 ++- automod/rules/promo.go | 9 +++++---- automod/rules/quick.go | 5 +++-- automod/rules/replies.go | 25 +++++++++++++------------ automod/rules/reposts.go | 3 ++- automod/visual/hiveai_rule.go | 4 ++-- 13 files changed, 61 insertions(+), 49 deletions(-) diff --git a/automod/rules/harassment.go b/automod/rules/harassment.go index 5212b69e1..2cf7ce194 100644 --- a/automod/rules/harassment.go +++ b/automod/rules/harassment.go @@ -8,18 +8,19 @@ import ( "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/countstore" + "github.com/bluesky-social/indigo/automod/helpers" ) var _ automod.PostRuleFunc = HarassmentTargetInteractionPostRule // looks for new accounts, which interact with frequently-harassed accounts, and report them for review func HarassmentTargetInteractionPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 24*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 24*time.Hour) { return nil } var interactionDIDs []string - facets, err := ExtractFacets(post) + facets, err := helpers.ExtractFacets(post) if err != nil { return err } @@ -28,7 +29,7 @@ func HarassmentTargetInteractionPostRule(c *automod.RecordContext, post *appbsky interactionDIDs = append(interactionDIDs, *pf.DID) } } - if post.Reply != nil && !IsSelfThread(c, post) { + if post.Reply != nil && !helpers.IsSelfThread(c, post) { parentURI, err := syntax.ParseATURI(post.Reply.Parent.Uri) if err != nil { return err @@ -57,7 +58,7 @@ func HarassmentTargetInteractionPostRule(c *automod.RecordContext, post *appbsky return nil } - interactionDIDs = dedupeStrings(interactionDIDs) + interactionDIDs = helpers.DedupeStrings(interactionDIDs) for _, d := range interactionDIDs { did, err := syntax.ParseDID(d) if err != nil { @@ -114,7 +115,7 @@ var _ automod.PostRuleFunc = HarassmentTrivialPostRule // looks for new accounts, which frequently post the same type of content func HarassmentTrivialPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) { return nil } diff --git a/automod/rules/hashtags.go b/automod/rules/hashtags.go index c6d734807..682ce746a 100644 --- a/automod/rules/hashtags.go +++ b/automod/rules/hashtags.go @@ -5,13 +5,14 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/automod/helpers" "github.com/bluesky-social/indigo/automod/keyword" ) // looks for specific hashtags from known lists func BadHashtagsPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - for _, tag := range ExtractHashtagsPost(post) { - tag = NormalizeHashtag(tag) + for _, tag := range helpers.ExtractHashtagsPost(post) { + tag = helpers.NormalizeHashtag(tag) // skip some bad-word hashtags which frequently false-positive if tag == "nazi" || tag == "hitler" { continue @@ -35,7 +36,7 @@ var _ automod.PostRuleFunc = BadHashtagsPostRule // if a post is "almost all" hashtags, it might be a form of search spam func TooManyHashtagsPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - tags := ExtractHashtagsPost(post) + tags := helpers.ExtractHashtagsPost(post) tagChars := 0 for _, tag := range tags { tagChars += len(tag) diff --git a/automod/rules/identity.go b/automod/rules/identity.go index 365d63f95..e74991233 100644 --- a/automod/rules/identity.go +++ b/automod/rules/identity.go @@ -7,11 +7,12 @@ import ( "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/countstore" + "github.com/bluesky-social/indigo/automod/helpers" ) // triggers on first identity event for an account (DID) func NewAccountRule(c *automod.AccountContext) error { - if c.Account.Identity == nil || !AccountIsYoungerThan(c, 4*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(c, 4*time.Hour) { return nil } diff --git a/automod/rules/keyword.go b/automod/rules/keyword.go index abb202600..8d5caa395 100644 --- a/automod/rules/keyword.go +++ b/automod/rules/keyword.go @@ -7,6 +7,7 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/automod/helpers" "github.com/bluesky-social/indigo/automod/keyword" ) @@ -17,7 +18,7 @@ func BadWordPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { isJapanese = true } } - for _, tok := range ExtractTextTokensPost(post) { + for _, tok := range helpers.ExtractTextTokensPost(post) { word := keyword.SlugIsExplicitSlur(tok) // used very frequently in a reclaimed context if word != "" && word != "faggot" && word != "tranny" && word != "coon" && !(word == "kike" && isJapanese) { @@ -54,7 +55,7 @@ func BadWordProfileRule(c *automod.RecordContext, profile *appbsky.ActorProfile) //c.Notify("slack") } } - for _, tok := range ExtractTextTokensProfile(profile) { + for _, tok := range helpers.ExtractTextTokensProfile(profile) { // de-pluralize tok = strings.TrimSuffix(tok, "s") if c.InSet("worst-words", tok) { @@ -71,8 +72,8 @@ var _ automod.ProfileRuleFunc = BadWordProfileRule // looks for the specific harassment situation of a replay to another user with only a single word func ReplySingleBadWordPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if post.Reply != nil && !IsSelfThread(c, post) { - tokens := ExtractTextTokensPost(post) + if post.Reply != nil && !helpers.IsSelfThread(c, post) { + tokens := helpers.ExtractTextTokensPost(post) if len(tokens) != 1 { return nil } diff --git a/automod/rules/mentions.go b/automod/rules/mentions.go index 8155b4a4a..98d419d09 100644 --- a/automod/rules/mentions.go +++ b/automod/rules/mentions.go @@ -8,6 +8,7 @@ import ( "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/countstore" + "github.com/bluesky-social/indigo/automod/helpers" ) var _ automod.PostRuleFunc = DistinctMentionsRule @@ -47,7 +48,7 @@ var youngMentionAccountLimit = 12 var _ automod.PostRuleFunc = YoungAccountDistinctMentionsRule func YoungAccountDistinctMentionsRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 14*24*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 14*24*time.Hour) { return nil } diff --git a/automod/rules/misleading.go b/automod/rules/misleading.go index df4525cfc..31822ccce 100644 --- a/automod/rules/misleading.go +++ b/automod/rules/misleading.go @@ -9,9 +9,10 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/automod/helpers" ) -func isMisleadingURLFacet(facet PostFacet, logger *slog.Logger) bool { +func isMisleadingURLFacet(facet helpers.PostFacet, logger *slog.Logger) bool { linkURL, err := url.Parse(*facet.URL) if err != nil { logger.Warn("invalid link metadata URL", "url", facet.URL) @@ -84,7 +85,7 @@ func MisleadingURLPostRule(c *automod.RecordContext, post *appbsky.FeedPost) err if c.Account.Identity.Handle == "nowbreezing.ntw.app" { return nil } - facets, err := ExtractFacets(post) + facets, err := helpers.ExtractFacets(post) if err != nil { c.Logger.Warn("invalid facets", "err", err) // TODO: or some other "this record is corrupt" indicator? @@ -105,7 +106,7 @@ func MisleadingURLPostRule(c *automod.RecordContext, post *appbsky.FeedPost) err var _ automod.PostRuleFunc = MisleadingMentionPostRule func MisleadingMentionPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - facets, err := ExtractFacets(post) + facets, err := helpers.ExtractFacets(post) if err != nil { c.Logger.Warn("invalid facets", "err", err) // TODO: or some other "this record is corrupt" indicator? diff --git a/automod/rules/misleading_test.go b/automod/rules/misleading_test.go index cf8e814af..2e47883a6 100644 --- a/automod/rules/misleading_test.go +++ b/automod/rules/misleading_test.go @@ -11,6 +11,7 @@ import ( "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/engine" + "github.com/bluesky-social/indigo/automod/helpers" "github.com/stretchr/testify/assert" ) @@ -118,67 +119,67 @@ func TestIsMisleadingURL(t *testing.T) { logger := slog.Default() fixtures := []struct { - facet PostFacet + facet helpers.PostFacet out bool }{ { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "https://atproto.com", URL: pstr("https://atproto.com"), }, out: false, }, { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "https://atproto.com", URL: pstr("https://evil.com"), }, out: true, }, { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "https://www.atproto.com", URL: pstr("https://atproto.com"), }, out: false, }, { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "https://atproto.com", URL: pstr("https://www.atproto.com"), }, out: false, }, { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "[example.com]", URL: pstr("https://www.example.com"), }, out: false, }, { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "example.com...", URL: pstr("https://example.com.evil.com"), }, out: true, }, { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "ATPROTO.com...", URL: pstr("https://atproto.com"), }, out: false, }, { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "1234.5678", URL: pstr("https://arxiv.org/abs/1234.5678"), }, out: false, }, { - facet: PostFacet{ + facet: helpers.PostFacet{ Text: "www.techdirt.com…", URL: pstr("https://www.techdirt.com/"), }, diff --git a/automod/rules/nostr.go b/automod/rules/nostr.go index 0291d0668..5f91e7ee6 100644 --- a/automod/rules/nostr.go +++ b/automod/rules/nostr.go @@ -7,13 +7,14 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/automod/helpers" ) var _ automod.PostRuleFunc = NostrSpamPostRule // looks for new accounts, which frequently post the same type of content func NostrSpamPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) { return nil } diff --git a/automod/rules/promo.go b/automod/rules/promo.go index 0dad7aaf4..f6fe23a24 100644 --- a/automod/rules/promo.go +++ b/automod/rules/promo.go @@ -9,6 +9,7 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/countstore" + "github.com/bluesky-social/indigo/automod/helpers" ) var _ automod.PostRuleFunc = AggressivePromotionRule @@ -17,16 +18,16 @@ var _ automod.PostRuleFunc = AggressivePromotionRule // // this rule depends on ReplyCountPostRule() to set counts func AggressivePromotionRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 7*24*time.Hour) { return nil } - if post.Reply == nil || IsSelfThread(c, post) { + if post.Reply == nil || helpers.IsSelfThread(c, post) { return nil } - allURLs := ExtractTextURLs(post.Text) + allURLs := helpers.ExtractTextURLs(post.Text) if c.Account.Profile.Description != nil { - profileURLs := ExtractTextURLs(*c.Account.Profile.Description) + profileURLs := helpers.ExtractTextURLs(*c.Account.Profile.Description) allURLs = append(allURLs, profileURLs...) } hasPromo := false diff --git a/automod/rules/quick.go b/automod/rules/quick.go index 77075d94a..ea6a69e36 100644 --- a/automod/rules/quick.go +++ b/automod/rules/quick.go @@ -7,6 +7,7 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/automod" + "github.com/bluesky-social/indigo/automod/helpers" ) var botLinkStrings = []string{"ainna13762491", "LINK押して", "→ https://tiny", "⇒ http://tiny"} @@ -54,7 +55,7 @@ func SimpleBotPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { var _ automod.IdentityRuleFunc = NewAccountBotEmailRule func NewAccountBotEmailRule(c *automod.AccountContext) error { - if c.Account.Identity == nil || !AccountIsYoungerThan(c, 1*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(c, 1*time.Hour) { return nil } @@ -73,7 +74,7 @@ var _ automod.PostRuleFunc = TrivialSpamPostRule // looks for new accounts, which frequently post the same type of content func TrivialSpamPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 8*24*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 8*24*time.Hour) { return nil } diff --git a/automod/rules/replies.go b/automod/rules/replies.go index aed986737..e03e9de53 100644 --- a/automod/rules/replies.go +++ b/automod/rules/replies.go @@ -9,13 +9,14 @@ import ( "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/countstore" + "github.com/bluesky-social/indigo/automod/helpers" ) var _ automod.PostRuleFunc = ReplyCountPostRule // does not count "self-replies" (direct to self, or in own post thread) func ReplyCountPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if post.Reply == nil || IsSelfThread(c, post) { + if post.Reply == nil || helpers.IsSelfThread(c, post) { return nil } @@ -47,7 +48,7 @@ var _ automod.PostRuleFunc = IdenticalReplyPostRule // // There can be legitimate situations that trigger this rule, so in most situations should be a "report" not "label" action. func IdenticalReplyPostRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if post.Reply == nil || IsSelfThread(c, post) { + if post.Reply == nil || helpers.IsSelfThread(c, post) { return nil } @@ -55,18 +56,18 @@ func IdenticalReplyPostRule(c *automod.RecordContext, post *appbsky.FeedPost) er if utf8.RuneCountInString(post.Text) <= 10 { return nil } - if AccountIsOlderThan(&c.AccountContext, 14*24*time.Hour) { + if helpers.AccountIsOlderThan(&c.AccountContext, 14*24*time.Hour) { return nil } // don't count if there is a follow-back relationship - if ParentOrRootIsFollower(c, post) { + if helpers.ParentOrRootIsFollower(c, post) { return nil } // increment before read. use a specific period (IncrementPeriod()) to reduce the number of counters (one per unique post text) period := countstore.PeriodDay - bucket := c.Account.Identity.DID.String() + "/" + HashOfString(post.Text) + bucket := c.Account.Identity.DID.String() + "/" + helpers.HashOfString(post.Text) c.IncrementPeriod("reply-text", bucket, period) count := c.GetCount("reply-text", bucket, period) @@ -91,21 +92,21 @@ var identicalReplySameParentMaxPosts int64 = 50 var _ automod.PostRuleFunc = IdenticalReplyPostSameParentRule func IdenticalReplyPostSameParentRule(c *automod.RecordContext, post *appbsky.FeedPost) error { - if post.Reply == nil || IsSelfThread(c, post) { + if post.Reply == nil || helpers.IsSelfThread(c, post) { return nil } - if ParentOrRootIsFollower(c, post) { + if helpers.ParentOrRootIsFollower(c, post) { return nil } postCount := c.Account.PostsCount - if AccountIsOlderThan(&c.AccountContext, identicalReplySameParentMaxAge) || postCount >= identicalReplySameParentMaxPosts { + if helpers.AccountIsOlderThan(&c.AccountContext, identicalReplySameParentMaxAge) || postCount >= identicalReplySameParentMaxPosts { return nil } period := countstore.PeriodHour - bucket := c.Account.Identity.DID.String() + "/" + post.Reply.Parent.Uri + "/" + HashOfString(post.Text) + bucket := c.Account.Identity.DID.String() + "/" + post.Reply.Parent.Uri + "/" + helpers.HashOfString(post.Text) c.IncrementPeriod("reply-text-same-post", bucket, period) count := c.GetCount("reply-text-same-post", bucket, period) @@ -126,7 +127,7 @@ var _ automod.PostRuleFunc = YoungAccountDistinctRepliesRule func YoungAccountDistinctRepliesRule(c *automod.RecordContext, post *appbsky.FeedPost) error { // only replies, and skip self-replies (eg, threads) - if post.Reply == nil || IsSelfThread(c, post) { + if post.Reply == nil || helpers.IsSelfThread(c, post) { return nil } @@ -134,12 +135,12 @@ func YoungAccountDistinctRepliesRule(c *automod.RecordContext, post *appbsky.Fee if utf8.RuneCountInString(post.Text) <= 10 { return nil } - if AccountIsOlderThan(&c.AccountContext, 14*24*time.Hour) { + if helpers.AccountIsOlderThan(&c.AccountContext, 14*24*time.Hour) { return nil } // don't count if there is a follow-back relationship - if ParentOrRootIsFollower(c, post) { + if helpers.ParentOrRootIsFollower(c, post) { return nil } diff --git a/automod/rules/reposts.go b/automod/rules/reposts.go index 75b248461..573146558 100644 --- a/automod/rules/reposts.go +++ b/automod/rules/reposts.go @@ -7,6 +7,7 @@ import ( "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/countstore" + "github.com/bluesky-social/indigo/automod/helpers" ) var dailyRepostThresholdWithoutPost = 30 @@ -18,7 +19,7 @@ var _ automod.RecordRuleFunc = TooManyRepostRule // looks for accounts which do frequent reposts func TooManyRepostRule(c *automod.RecordContext) error { // Don't bother checking reposts from accounts older than 30 days - if c.Account.Identity == nil || !AccountIsYoungerThan(&c.AccountContext, 30*24*time.Hour) { + if c.Account.Identity == nil || !helpers.AccountIsYoungerThan(&c.AccountContext, 30*24*time.Hour) { return nil } diff --git a/automod/visual/hiveai_rule.go b/automod/visual/hiveai_rule.go index 32bcf6a9a..850ee83b1 100644 --- a/automod/visual/hiveai_rule.go +++ b/automod/visual/hiveai_rule.go @@ -5,7 +5,7 @@ import ( "time" "github.com/bluesky-social/indigo/automod" - "github.com/bluesky-social/indigo/automod/rules" + "github.com/bluesky-social/indigo/automod/helpers" lexutil "github.com/bluesky-social/indigo/lex/util" ) @@ -43,7 +43,7 @@ func (hal *HiveAIClient) HiveLabelBlobRule(c *automod.RecordContext, blob lexuti for _, l := range labels { // NOTE: experimenting with profile reporting for new accounts - if l == "sexual" && c.RecordOp.Collection.String() == "app.bsky.actor.profile" && rules.AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) { + if l == "sexual" && c.RecordOp.Collection.String() == "app.bsky.actor.profile" && helpers.AccountIsYoungerThan(&c.AccountContext, 2*24*time.Hour) { c.ReportRecord(automod.ReportReasonSexual, "possible sexual profile (not labeled yet)") c.Logger.Info("skipping record label", "label", l, "reason", "sexual-profile-experiment") } else { From 867d96f71bfcad53b76b4183a0a86bd44362a36f Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 29 Oct 2024 18:00:07 -0700 Subject: [PATCH 25/50] refactor helpers in to separate files --- automod/helpers/account.go | 49 ++++++++ automod/helpers/account_test.go | 61 ++++++++++ automod/helpers/{helpers.go => bsky.go} | 73 ------------ .../helpers/{helpers_test.go => bsky_test.go} | 110 ------------------ automod/helpers/text.go | 35 ++++++ automod/helpers/text_test.go | 64 ++++++++++ 6 files changed, 209 insertions(+), 183 deletions(-) create mode 100644 automod/helpers/account.go create mode 100644 automod/helpers/account_test.go rename automod/helpers/{helpers.go => bsky.go} (71%) rename automod/helpers/{helpers_test.go => bsky_test.go} (53%) create mode 100644 automod/helpers/text.go create mode 100644 automod/helpers/text_test.go diff --git a/automod/helpers/account.go b/automod/helpers/account.go new file mode 100644 index 000000000..2a1a275cb --- /dev/null +++ b/automod/helpers/account.go @@ -0,0 +1,49 @@ +package helpers + +import ( + "time" + + "github.com/bluesky-social/indigo/automod" +) + +// no accounts exist before this time +var atprotoAccountEpoch = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC) + +// returns true if account creation timestamp is plausible: not-nil, not in distant past, not in the future +func plausibleAccountCreation(when *time.Time) bool { + if when == nil { + return false + } + // this is mostly to check for misconfigurations or null values (eg, UNIX epoch zero means "unknown" not actually 1970) + if !when.After(atprotoAccountEpoch) { + return false + } + // a timestamp in the future would also indicate some misconfiguration + if when.After(time.Now().Add(time.Hour)) { + return false + } + return true +} + +// checks if account was created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false' +func AccountIsYoungerThan(c *automod.AccountContext, age time.Duration) bool { + // TODO: consider swapping priority order here (and below) + if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) { + return time.Since(*c.Account.CreatedAt) < age + } + if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) { + return time.Since(*c.Account.Private.IndexedAt) < age + } + return false +} + +// checks if account was *not* created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false' +func AccountIsOlderThan(c *automod.AccountContext, age time.Duration) bool { + if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) { + return time.Since(*c.Account.CreatedAt) >= age + } + if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) { + return time.Since(*c.Account.Private.IndexedAt) >= age + } + return false +} diff --git a/automod/helpers/account_test.go b/automod/helpers/account_test.go new file mode 100644 index 000000000..c949eb77c --- /dev/null +++ b/automod/helpers/account_test.go @@ -0,0 +1,61 @@ +package helpers + +import ( + "testing" + "time" + + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" + "github.com/stretchr/testify/assert" +) + +func TestAccountIsYoungerThan(t *testing.T) { + assert := assert.New(t) + + am := automod.AccountMeta{ + Identity: &identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + }, + Profile: automod.ProfileSummary{}, + Private: nil, + } + now := time.Now() + ac := automod.AccountContext{ + Account: am, + } + assert.False(AccountIsYoungerThan(&ac, time.Hour)) + assert.False(AccountIsOlderThan(&ac, time.Hour)) + + ac.Account.CreatedAt = &now + assert.True(AccountIsYoungerThan(&ac, time.Hour)) + assert.False(AccountIsOlderThan(&ac, time.Hour)) + + yesterday := time.Now().Add(-1 * time.Hour * 24) + ac.Account.CreatedAt = &yesterday + assert.False(AccountIsYoungerThan(&ac, time.Hour)) + assert.True(AccountIsOlderThan(&ac, time.Hour)) + + old := time.Date(1990, 1, 1, 0, 0, 0, 0, time.UTC) + ac.Account.CreatedAt = &old + assert.False(AccountIsYoungerThan(&ac, time.Hour)) + assert.False(AccountIsYoungerThan(&ac, time.Hour*24*365*100)) + assert.False(AccountIsOlderThan(&ac, time.Hour)) + assert.False(AccountIsOlderThan(&ac, time.Hour*24*365*100)) + + future := time.Date(3000, 1, 1, 0, 0, 0, 0, time.UTC) + ac.Account.CreatedAt = &future + assert.False(AccountIsYoungerThan(&ac, time.Hour)) + assert.False(AccountIsOlderThan(&ac, time.Hour)) + + ac.Account.CreatedAt = nil + ac.Account.Private = &automod.AccountPrivate{ + Email: "account@example.com", + IndexedAt: &yesterday, + } + assert.True(AccountIsYoungerThan(&ac, 48*time.Hour)) + assert.False(AccountIsYoungerThan(&ac, time.Hour)) + assert.True(AccountIsOlderThan(&ac, time.Hour)) + assert.False(AccountIsOlderThan(&ac, 48*time.Hour)) +} diff --git a/automod/helpers/helpers.go b/automod/helpers/bsky.go similarity index 71% rename from automod/helpers/helpers.go rename to automod/helpers/bsky.go index 7aa615a30..c7416f2dd 100644 --- a/automod/helpers/helpers.go +++ b/automod/helpers/bsky.go @@ -2,29 +2,13 @@ package helpers import ( "fmt" - "regexp" - "time" appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/automod" "github.com/bluesky-social/indigo/automod/keyword" - - "github.com/spaolacci/murmur3" ) -func DedupeStrings(in []string) []string { - var out []string - seen := make(map[string]bool) - for _, v := range in { - if !seen[v] { - out = append(out, v) - seen[v] = true - } - } - return out -} - func ExtractHashtagsPost(post *appbsky.FeedPost) []string { var tags []string for _, tag := range post.Tags { @@ -152,13 +136,6 @@ func ExtractTextTokensProfile(profile *appbsky.ActorProfile) []string { return keyword.TokenizeText(s) } -// based on: https://stackoverflow.com/a/48769624, with no trailing period allowed -var urlRegex = regexp.MustCompile(`(?:(?:https?|ftp):\/\/)?[\w/\-?=%.]+\.[\w/\-&?=%.]*[\w/\-&?=%]+`) - -func ExtractTextURLs(raw string) []string { - return urlRegex.FindAllString(raw, -1) -} - func ExtractTextURLsProfile(profile *appbsky.ActorProfile) []string { s := "" if profile.Description != nil { @@ -191,14 +168,6 @@ func IsSelfThread(c *automod.RecordContext, post *appbsky.FeedPost) bool { return false } -// returns a fast, compact hash of a string -// -// current implementation uses murmur3, default seed, and hex encoding -func HashOfString(s string) string { - val := murmur3.Sum64([]byte(s)) - return fmt.Sprintf("%016x", val) -} - func ParentOrRootIsFollower(c *automod.RecordContext, post *appbsky.FeedPost) bool { if post.Reply == nil || IsSelfThread(c, post) { return false @@ -242,48 +211,6 @@ func ParentOrRootIsFollower(c *automod.RecordContext, post *appbsky.FeedPost) bo return false } -// no accounts exist before this time -var atprotoAccountEpoch = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC) - -// returns true if account creation timestamp is plausible: not-nil, not in distant past, not in the future -func plausibleAccountCreation(when *time.Time) bool { - if when == nil { - return false - } - // this is mostly to check for misconfigurations or null values (eg, UNIX epoch zero means "unknown" not actually 1970) - if !when.After(atprotoAccountEpoch) { - return false - } - // a timestamp in the future would also indicate some misconfiguration - if when.After(time.Now().Add(time.Hour)) { - return false - } - return true -} - -// checks if account was created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false' -func AccountIsYoungerThan(c *automod.AccountContext, age time.Duration) bool { - // TODO: consider swapping priority order here (and below) - if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) { - return time.Since(*c.Account.CreatedAt) < age - } - if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) { - return time.Since(*c.Account.Private.IndexedAt) < age - } - return false -} - -// checks if account was *not* created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false' -func AccountIsOlderThan(c *automod.AccountContext, age time.Duration) bool { - if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) { - return time.Since(*c.Account.CreatedAt) >= age - } - if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) { - return time.Since(*c.Account.Private.IndexedAt) >= age - } - return false -} - func PostParentOrRootIsDid(post *appbsky.FeedPost, did string) bool { if post.Reply == nil { return false diff --git a/automod/helpers/helpers_test.go b/automod/helpers/bsky_test.go similarity index 53% rename from automod/helpers/helpers_test.go rename to automod/helpers/bsky_test.go index 9b04a041c..b5d6cb242 100644 --- a/automod/helpers/helpers_test.go +++ b/automod/helpers/bsky_test.go @@ -4,120 +4,10 @@ import ( comatproto "github.com/bluesky-social/indigo/api/atproto" appbsky "github.com/bluesky-social/indigo/api/bsky" "testing" - "time" - "github.com/bluesky-social/indigo/atproto/identity" - "github.com/bluesky-social/indigo/atproto/syntax" - "github.com/bluesky-social/indigo/automod" - "github.com/bluesky-social/indigo/automod/keyword" "github.com/stretchr/testify/assert" ) -func TestTokenizeText(t *testing.T) { - assert := assert.New(t) - - fixtures := []struct { - s string - out []string - }{ - { - s: "1 'Two' three!", - out: []string{"1", "two", "three"}, - }, - { - s: " foo1;bar2,baz3...", - out: []string{"foo1", "bar2", "baz3"}, - }, - { - s: "https://example.com/index.html", - out: []string{"https", "example", "com", "index", "html"}, - }, - } - - for _, fix := range fixtures { - assert.Equal(fix.out, keyword.TokenizeText(fix.s)) - } -} - -func TestExtractURL(t *testing.T) { - assert := assert.New(t) - - fixtures := []struct { - s string - out []string - }{ - { - s: "this is a description with example.com mentioned in the middle", - out: []string{"example.com"}, - }, - { - s: "this is another example with https://en.wikipedia.org/index.html: and archive.org, and https://eff.org/... and bsky.app.", - out: []string{"https://en.wikipedia.org/index.html", "archive.org", "https://eff.org/", "bsky.app"}, - }, - } - - for _, fix := range fixtures { - assert.Equal(fix.out, ExtractTextURLs(fix.s)) - } -} - -func TestHashOfString(t *testing.T) { - assert := assert.New(t) - - // hashing function should be consistent over time - assert.Equal("4e6f69c0e3d10992", HashOfString("dummy-value")) -} - -func TestAccountIsYoungerThan(t *testing.T) { - assert := assert.New(t) - - am := automod.AccountMeta{ - Identity: &identity.Identity{ - DID: syntax.DID("did:plc:abc111"), - Handle: syntax.Handle("handle.example.com"), - }, - Profile: automod.ProfileSummary{}, - Private: nil, - } - now := time.Now() - ac := automod.AccountContext{ - Account: am, - } - assert.False(AccountIsYoungerThan(&ac, time.Hour)) - assert.False(AccountIsOlderThan(&ac, time.Hour)) - - ac.Account.CreatedAt = &now - assert.True(AccountIsYoungerThan(&ac, time.Hour)) - assert.False(AccountIsOlderThan(&ac, time.Hour)) - - yesterday := time.Now().Add(-1 * time.Hour * 24) - ac.Account.CreatedAt = &yesterday - assert.False(AccountIsYoungerThan(&ac, time.Hour)) - assert.True(AccountIsOlderThan(&ac, time.Hour)) - - old := time.Date(1990, 1, 1, 0, 0, 0, 0, time.UTC) - ac.Account.CreatedAt = &old - assert.False(AccountIsYoungerThan(&ac, time.Hour)) - assert.False(AccountIsYoungerThan(&ac, time.Hour*24*365*100)) - assert.False(AccountIsOlderThan(&ac, time.Hour)) - assert.False(AccountIsOlderThan(&ac, time.Hour*24*365*100)) - - future := time.Date(3000, 1, 1, 0, 0, 0, 0, time.UTC) - ac.Account.CreatedAt = &future - assert.False(AccountIsYoungerThan(&ac, time.Hour)) - assert.False(AccountIsOlderThan(&ac, time.Hour)) - - ac.Account.CreatedAt = nil - ac.Account.Private = &automod.AccountPrivate{ - Email: "account@example.com", - IndexedAt: &yesterday, - } - assert.True(AccountIsYoungerThan(&ac, 48*time.Hour)) - assert.False(AccountIsYoungerThan(&ac, time.Hour)) - assert.True(AccountIsOlderThan(&ac, time.Hour)) - assert.False(AccountIsOlderThan(&ac, 48*time.Hour)) -} - func TestParentOrRootIsDid(t *testing.T) { assert := assert.New(t) diff --git a/automod/helpers/text.go b/automod/helpers/text.go new file mode 100644 index 000000000..412eb9c8c --- /dev/null +++ b/automod/helpers/text.go @@ -0,0 +1,35 @@ +package helpers + +import ( + "fmt" + "regexp" + + "github.com/spaolacci/murmur3" +) + +func DedupeStrings(in []string) []string { + var out []string + seen := make(map[string]bool) + for _, v := range in { + if !seen[v] { + out = append(out, v) + seen[v] = true + } + } + return out +} + +// returns a fast, compact hash of a string +// +// current implementation uses murmur3, default seed, and hex encoding +func HashOfString(s string) string { + val := murmur3.Sum64([]byte(s)) + return fmt.Sprintf("%016x", val) +} + +// based on: https://stackoverflow.com/a/48769624, with no trailing period allowed +var urlRegex = regexp.MustCompile(`(?:(?:https?|ftp):\/\/)?[\w/\-?=%.]+\.[\w/\-&?=%.]*[\w/\-&?=%]+`) + +func ExtractTextURLs(raw string) []string { + return urlRegex.FindAllString(raw, -1) +} diff --git a/automod/helpers/text_test.go b/automod/helpers/text_test.go new file mode 100644 index 000000000..ef219155e --- /dev/null +++ b/automod/helpers/text_test.go @@ -0,0 +1,64 @@ +package helpers + +import ( + "testing" + + "github.com/bluesky-social/indigo/automod/keyword" + + "github.com/stretchr/testify/assert" +) + +func TestTokenizeText(t *testing.T) { + assert := assert.New(t) + + fixtures := []struct { + s string + out []string + }{ + { + s: "1 'Two' three!", + out: []string{"1", "two", "three"}, + }, + { + s: " foo1;bar2,baz3...", + out: []string{"foo1", "bar2", "baz3"}, + }, + { + s: "https://example.com/index.html", + out: []string{"https", "example", "com", "index", "html"}, + }, + } + + for _, fix := range fixtures { + assert.Equal(fix.out, keyword.TokenizeText(fix.s)) + } +} + +func TestExtractURL(t *testing.T) { + assert := assert.New(t) + + fixtures := []struct { + s string + out []string + }{ + { + s: "this is a description with example.com mentioned in the middle", + out: []string{"example.com"}, + }, + { + s: "this is another example with https://en.wikipedia.org/index.html: and archive.org, and https://eff.org/... and bsky.app.", + out: []string{"https://en.wikipedia.org/index.html", "archive.org", "https://eff.org/", "bsky.app"}, + }, + } + + for _, fix := range fixtures { + assert.Equal(fix.out, ExtractTextURLs(fix.s)) + } +} + +func TestHashOfString(t *testing.T) { + assert := assert.New(t) + + // hashing function should be consistent over time + assert.Equal("4e6f69c0e3d10992", HashOfString("dummy-value")) +} From 976ce81842082ebbbb7aed67458fcf99ceb8df79 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 30 Oct 2024 15:54:22 -0700 Subject: [PATCH 26/50] ozone lexicon updates --- api/ozone/moderationqueryEvents.go | 8 ++++++-- api/ozone/moderationqueryStatuses.go | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/api/ozone/moderationqueryEvents.go b/api/ozone/moderationqueryEvents.go index fee7adc39..1be68412f 100644 --- a/api/ozone/moderationqueryEvents.go +++ b/api/ozone/moderationqueryEvents.go @@ -20,21 +20,24 @@ type ModerationQueryEvents_Output struct { // // addedLabels: If specified, only events where all of these labels were added are returned // addedTags: If specified, only events where all of these tags were added are returned +// collections: If specified, only events where the subject belongs to the given collections will be returned. When subjectType is set to 'account', this will be ignored. // comment: If specified, only events with comments containing the keyword are returned // createdAfter: Retrieve events created after a given timestamp // createdBefore: Retrieve events created before a given timestamp // hasComment: If true, only events with comments are returned -// includeAllUserRecords: If true, events on all record types (posts, lists, profile etc.) owned by the did are returned +// includeAllUserRecords: If true, events on all record types (posts, lists, profile etc.) or records from given 'collections' param, owned by the did are returned. // removedLabels: If specified, only events where all of these labels were removed are returned // removedTags: If specified, only events where all of these tags were removed are returned // sortDirection: Sort direction for the events. Defaults to descending order of created at timestamp. +// subjectType: If specified, only events where the subject is of the given type (account or record) will be returned. When this is set to 'account' the 'collections' parameter will be ignored. When includeAllUserRecords or subject is set, this will be ignored. // types: The types of events (fully qualified string in the format of tools.ozone.moderation.defs#modEvent) to filter by. If not specified, all events are returned. -func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, types []string) (*ModerationQueryEvents_Output, error) { +func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, collections []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, subjectType string, types []string) (*ModerationQueryEvents_Output, error) { var out ModerationQueryEvents_Output params := map[string]interface{}{ "addedLabels": addedLabels, "addedTags": addedTags, + "collections": collections, "comment": comment, "createdAfter": createdAfter, "createdBefore": createdBefore, @@ -48,6 +51,7 @@ func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []st "reportTypes": reportTypes, "sortDirection": sortDirection, "subject": subject, + "subjectType": subjectType, "types": types, } if err := c.Do(ctx, xrpc.Query, "", "tools.ozone.moderation.queryEvents", params, nil, &out); err != nil { diff --git a/api/ozone/moderationqueryStatuses.go b/api/ozone/moderationqueryStatuses.go index 68dac122b..969d77e9d 100644 --- a/api/ozone/moderationqueryStatuses.go +++ b/api/ozone/moderationqueryStatuses.go @@ -19,8 +19,9 @@ type ModerationQueryStatuses_Output struct { // ModerationQueryStatuses calls the XRPC method "tools.ozone.moderation.queryStatuses". // // appealed: Get subjects in unresolved appealed status +// collections: If specified, subjects belonging to the given collections will be returned. When subjectType is set to 'account', this will be ignored. // comment: Search subjects by keyword from comments -// includeAllUserRecords: All subjects belonging to the account specified in the 'subject' param will be returned. +// includeAllUserRecords: All subjects, or subjects from given 'collections' param, belonging to the account specified in the 'subject' param will be returned. // includeMuted: By default, we don't include muted subjects in the results. Set this to true to include them. // lastReviewedBy: Get all subject statuses that were reviewed by a specific moderator // onlyMuted: When set to true, only muted subjects and reporters will be returned. @@ -30,12 +31,14 @@ type ModerationQueryStatuses_Output struct { // reviewedAfter: Search subjects reviewed after a given timestamp // reviewedBefore: Search subjects reviewed before a given timestamp // subject: The subject to get the status for. +// subjectType: If specified, subjects of the given type (account or record) will be returned. When this is set to 'account' the 'collections' parameter will be ignored. When includeAllUserRecords or subject is set, this will be ignored. // takendown: Get subjects that were taken down -func ModerationQueryStatuses(ctx context.Context, c *xrpc.Client, appealed bool, comment string, cursor string, excludeTags []string, ignoreSubjects []string, includeAllUserRecords bool, includeMuted bool, lastReviewedBy string, limit int64, onlyMuted bool, reportedAfter string, reportedBefore string, reviewState string, reviewedAfter string, reviewedBefore string, sortDirection string, sortField string, subject string, tags []string, takendown bool) (*ModerationQueryStatuses_Output, error) { +func ModerationQueryStatuses(ctx context.Context, c *xrpc.Client, appealed bool, collections []string, comment string, cursor string, excludeTags []string, ignoreSubjects []string, includeAllUserRecords bool, includeMuted bool, lastReviewedBy string, limit int64, onlyMuted bool, reportedAfter string, reportedBefore string, reviewState string, reviewedAfter string, reviewedBefore string, sortDirection string, sortField string, subject string, subjectType string, tags []string, takendown bool) (*ModerationQueryStatuses_Output, error) { var out ModerationQueryStatuses_Output params := map[string]interface{}{ "appealed": appealed, + "collections": collections, "comment": comment, "cursor": cursor, "excludeTags": excludeTags, @@ -53,6 +56,7 @@ func ModerationQueryStatuses(ctx context.Context, c *xrpc.Client, appealed bool, "sortDirection": sortDirection, "sortField": sortField, "subject": subject, + "subjectType": subjectType, "tags": tags, "takendown": takendown, } From 5116f9a3b1673b9a44ec1de1df155eff6c31670c Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 30 Oct 2024 15:54:37 -0700 Subject: [PATCH 27/50] ozone sets API --- api/ozone/setaddValues.go | 28 +++++++++++++++++++++++++++ api/ozone/setdefs.go | 20 +++++++++++++++++++ api/ozone/setdeleteSet.go | 31 ++++++++++++++++++++++++++++++ api/ozone/setdeleteValues.go | 28 +++++++++++++++++++++++++++ api/ozone/setgetValues.go | 34 +++++++++++++++++++++++++++++++++ api/ozone/setquerySets.go | 37 ++++++++++++++++++++++++++++++++++++ api/ozone/setupsertSet.go | 21 ++++++++++++++++++++ 7 files changed, 199 insertions(+) create mode 100644 api/ozone/setaddValues.go create mode 100644 api/ozone/setdefs.go create mode 100644 api/ozone/setdeleteSet.go create mode 100644 api/ozone/setdeleteValues.go create mode 100644 api/ozone/setgetValues.go create mode 100644 api/ozone/setquerySets.go create mode 100644 api/ozone/setupsertSet.go diff --git a/api/ozone/setaddValues.go b/api/ozone/setaddValues.go new file mode 100644 index 000000000..1835d5e92 --- /dev/null +++ b/api/ozone/setaddValues.go @@ -0,0 +1,28 @@ +// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. + +package ozone + +// schema: tools.ozone.set.addValues + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// SetAddValues_Input is the input argument to a tools.ozone.set.addValues call. +type SetAddValues_Input struct { + // name: Name of the set to add values to + Name string `json:"name" cborgen:"name"` + // values: Array of string values to add to the set + Values []string `json:"values" cborgen:"values"` +} + +// SetAddValues calls the XRPC method "tools.ozone.set.addValues". +func SetAddValues(ctx context.Context, c *xrpc.Client, input *SetAddValues_Input) error { + if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.set.addValues", nil, input, nil); err != nil { + return err + } + + return nil +} diff --git a/api/ozone/setdefs.go b/api/ozone/setdefs.go new file mode 100644 index 000000000..2181b12fd --- /dev/null +++ b/api/ozone/setdefs.go @@ -0,0 +1,20 @@ +// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. + +package ozone + +// schema: tools.ozone.set.defs + +// SetDefs_Set is a "set" in the tools.ozone.set.defs schema. +type SetDefs_Set struct { + Description *string `json:"description,omitempty" cborgen:"description,omitempty"` + Name string `json:"name" cborgen:"name"` +} + +// SetDefs_SetView is a "setView" in the tools.ozone.set.defs schema. +type SetDefs_SetView struct { + CreatedAt string `json:"createdAt" cborgen:"createdAt"` + Description *string `json:"description,omitempty" cborgen:"description,omitempty"` + Name string `json:"name" cborgen:"name"` + SetSize int64 `json:"setSize" cborgen:"setSize"` + UpdatedAt string `json:"updatedAt" cborgen:"updatedAt"` +} diff --git a/api/ozone/setdeleteSet.go b/api/ozone/setdeleteSet.go new file mode 100644 index 000000000..b5d192364 --- /dev/null +++ b/api/ozone/setdeleteSet.go @@ -0,0 +1,31 @@ +// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. + +package ozone + +// schema: tools.ozone.set.deleteSet + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// SetDeleteSet_Input is the input argument to a tools.ozone.set.deleteSet call. +type SetDeleteSet_Input struct { + // name: Name of the set to delete + Name string `json:"name" cborgen:"name"` +} + +// SetDeleteSet_Output is the output of a tools.ozone.set.deleteSet call. +type SetDeleteSet_Output struct { +} + +// SetDeleteSet calls the XRPC method "tools.ozone.set.deleteSet". +func SetDeleteSet(ctx context.Context, c *xrpc.Client, input *SetDeleteSet_Input) (*SetDeleteSet_Output, error) { + var out SetDeleteSet_Output + if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.set.deleteSet", nil, input, &out); err != nil { + return nil, err + } + + return &out, nil +} diff --git a/api/ozone/setdeleteValues.go b/api/ozone/setdeleteValues.go new file mode 100644 index 000000000..34b62898a --- /dev/null +++ b/api/ozone/setdeleteValues.go @@ -0,0 +1,28 @@ +// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. + +package ozone + +// schema: tools.ozone.set.deleteValues + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// SetDeleteValues_Input is the input argument to a tools.ozone.set.deleteValues call. +type SetDeleteValues_Input struct { + // name: Name of the set to delete values from + Name string `json:"name" cborgen:"name"` + // values: Array of string values to delete from the set + Values []string `json:"values" cborgen:"values"` +} + +// SetDeleteValues calls the XRPC method "tools.ozone.set.deleteValues". +func SetDeleteValues(ctx context.Context, c *xrpc.Client, input *SetDeleteValues_Input) error { + if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.set.deleteValues", nil, input, nil); err != nil { + return err + } + + return nil +} diff --git a/api/ozone/setgetValues.go b/api/ozone/setgetValues.go new file mode 100644 index 000000000..50e77fdda --- /dev/null +++ b/api/ozone/setgetValues.go @@ -0,0 +1,34 @@ +// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. + +package ozone + +// schema: tools.ozone.set.getValues + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// SetGetValues_Output is the output of a tools.ozone.set.getValues call. +type SetGetValues_Output struct { + Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"` + Set *SetDefs_SetView `json:"set" cborgen:"set"` + Values []string `json:"values" cborgen:"values"` +} + +// SetGetValues calls the XRPC method "tools.ozone.set.getValues". +func SetGetValues(ctx context.Context, c *xrpc.Client, cursor string, limit int64, name string) (*SetGetValues_Output, error) { + var out SetGetValues_Output + + params := map[string]interface{}{ + "cursor": cursor, + "limit": limit, + "name": name, + } + if err := c.Do(ctx, xrpc.Query, "", "tools.ozone.set.getValues", params, nil, &out); err != nil { + return nil, err + } + + return &out, nil +} diff --git a/api/ozone/setquerySets.go b/api/ozone/setquerySets.go new file mode 100644 index 000000000..f2f31effb --- /dev/null +++ b/api/ozone/setquerySets.go @@ -0,0 +1,37 @@ +// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. + +package ozone + +// schema: tools.ozone.set.querySets + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// SetQuerySets_Output is the output of a tools.ozone.set.querySets call. +type SetQuerySets_Output struct { + Cursor *string `json:"cursor,omitempty" cborgen:"cursor,omitempty"` + Sets []*SetDefs_SetView `json:"sets" cborgen:"sets"` +} + +// SetQuerySets calls the XRPC method "tools.ozone.set.querySets". +// +// sortDirection: Defaults to ascending order of name field. +func SetQuerySets(ctx context.Context, c *xrpc.Client, cursor string, limit int64, namePrefix string, sortBy string, sortDirection string) (*SetQuerySets_Output, error) { + var out SetQuerySets_Output + + params := map[string]interface{}{ + "cursor": cursor, + "limit": limit, + "namePrefix": namePrefix, + "sortBy": sortBy, + "sortDirection": sortDirection, + } + if err := c.Do(ctx, xrpc.Query, "", "tools.ozone.set.querySets", params, nil, &out); err != nil { + return nil, err + } + + return &out, nil +} diff --git a/api/ozone/setupsertSet.go b/api/ozone/setupsertSet.go new file mode 100644 index 000000000..acc266c4c --- /dev/null +++ b/api/ozone/setupsertSet.go @@ -0,0 +1,21 @@ +// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. + +package ozone + +// schema: tools.ozone.set.upsertSet + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// SetUpsertSet calls the XRPC method "tools.ozone.set.upsertSet". +func SetUpsertSet(ctx context.Context, c *xrpc.Client, input *SetUpsertSet_Input) (*SetDefs_SetView, error) { + var out SetDefs_SetView + if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.set.upsertSet", nil, input, &out); err != nil { + return nil, err + } + + return &out, nil +} From 672b10e976ea81dd95759711a99b3dffb656d85e Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 30 Oct 2024 15:56:42 -0700 Subject: [PATCH 28/50] hack: tools.ozone.set.upsertSet input is via ref --- api/ozone/setupsertSet.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/ozone/setupsertSet.go b/api/ozone/setupsertSet.go index acc266c4c..9f6cc5376 100644 --- a/api/ozone/setupsertSet.go +++ b/api/ozone/setupsertSet.go @@ -11,7 +11,7 @@ import ( ) // SetUpsertSet calls the XRPC method "tools.ozone.set.upsertSet". -func SetUpsertSet(ctx context.Context, c *xrpc.Client, input *SetUpsertSet_Input) (*SetDefs_SetView, error) { +func SetUpsertSet(ctx context.Context, c *xrpc.Client, input *SetDefs_Set) (*SetDefs_SetView, error) { var out SetDefs_SetView if err := c.Do(ctx, xrpc.Procedure, "application/json", "tools.ozone.set.upsertSet", nil, input, &out); err != nil { return nil, err From 466e6ec79cefeef6b9fc5543b4515e86d6ee2921 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 30 Oct 2024 16:13:45 -0700 Subject: [PATCH 29/50] updates for QueryModerationEvents calls --- automod/consumer/ozone.go | 35 ++++++++-------- automod/engine/persisthelpers.go | 70 ++++++++++++++++---------------- cmd/beemo/notify_reports.go | 35 ++++++++-------- cmd/gosky/admin.go | 69 ++++++++++++++++--------------- 4 files changed, 108 insertions(+), 101 deletions(-) diff --git a/automod/consumer/ozone.go b/automod/consumer/ozone.go index 0692ac393..2211cf21a 100644 --- a/automod/consumer/ozone.go +++ b/automod/consumer/ozone.go @@ -55,26 +55,27 @@ func (oc *OzoneConsumer) Run(ctx context.Context) error { period := time.Second * 5 for { - //func ModerationQueryEvents(ctx context.Context, c *xrpc.Client, addedLabels []string, addedTags []string, comment string, createdAfter string, createdBefore string, createdBy string, cursor string, hasComment bool, includeAllUserRecords bool, limit int64, removedLabels []string, removedTags []string, reportTypes []string, sortDirection string, subject string, types []string) (*ModerationQueryEvents_Output, error) { me, err := toolsozone.ModerationQueryEvents( ctx, oc.OzoneClient, - nil, // addedLabels: If specified, only events where all of these labels were added are returned - nil, // addedTags: If specified, only events where all of these tags were added are returned - "", // comment: If specified, only events with comments containing the keyword are returned - since.String(), // createdAfter: Retrieve events created after a given timestamp - "", // createdBefore: Retrieve events created before a given timestamp - "", // createdBy - "", // cursor - false, // hasComment: If true, only events with comments are returned - true, // includeAllUserRecords: If true, events on all record types (posts, lists, profile etc.) owned by the did are returned - limit, - nil, // removedLabels: If specified, only events where all of these labels were removed are returned - nil, // removedTags - nil, // reportTypes - "asc", // sortDirection: Sort direction for the events. Defaults to descending order of created at timestamp. - "", // subject - nil, // types: The types of events (fully qualified string in the format of tools.ozone.moderation.defs#modEvent) to filter by. If not specified, all events are returned. + nil, // addedLabels []string + nil, // addedTags []string + nil, // collections []string + "", // comment string + since.String(), // createdAfter string + "", // createdBefore string + "", // createdBy string + "", // cursor string + false, // hasComment bool + true, // includeAllUserRecords bool + limit, // limit int64 + nil, // removedLabels []string + nil, // removedTags []string + nil, // reportTypes []string + "asc", // sortDirection string + "", // subject string + "", // subjectType string + nil, // types []string ) if err != nil { oc.Logger.Warn("ozone query events failed; sleeping then will retrying", "err", err, "period", period.String()) diff --git a/automod/engine/persisthelpers.go b/automod/engine/persisthelpers.go index 491a86fa4..42ba8934e 100644 --- a/automod/engine/persisthelpers.go +++ b/automod/engine/persisthelpers.go @@ -155,26 +155,27 @@ func (eng *Engine) createReportIfFresh(ctx context.Context, xrpcc *xrpc.Client, // before creating a report, query to see if automod has already reported this account in the past week for the same reason // NOTE: this is running in an inner loop (if there are multiple reports), which is a bit inefficient, but seems acceptable - // ModerationQueryEvents(ctx context.Context, c *xrpc.Client, createdBy string, cursor string, inc ludeAllUserRecords bool, limit int64, sortDirection string, subject string, types []string) resp, err := toolsozone.ModerationQueryEvents( ctx, xrpcc, - nil, - nil, - "", - "", - "", - xrpcc.Auth.Did, - "", - false, - false, - 5, - nil, - nil, - nil, - "", - did.String(), - []string{"tools.ozone.moderation.defs#modEventReport"}, + nil, // addedLabels []string + nil, // addedTags []string + nil, // collections []string + "", // comment string + "", // createdAfter string + "", // createdBefore string + xrpcc.Auth.Did, // createdBy string + "", // cursor string + false, // hasComment bool + false, // includeAllUserRecords bool + 5, // limit int64 + nil, // removedLabels []string + nil, // removedTags []string + nil, // reportTypes []string + "", // sortDirection string + did.String(), // subject string + "", // subjectType string + []string{"tools.ozone.moderation.defs#modEventReport"}, // types []string ) if err != nil { @@ -231,26 +232,27 @@ func (eng *Engine) createRecordReportIfFresh(ctx context.Context, xrpcc *xrpc.Cl // before creating a report, query to see if automod has already reported this account in the past week for the same reason // NOTE: this is running in an inner loop (if there are multiple reports), which is a bit inefficient, but seems acceptable - // ModerationQueryEvents(ctx context.Context, c *xrpc.Client, createdBy string, cursor string, inc ludeAllUserRecords bool, limit int64, sortDirection string, subject string, types []string) resp, err := toolsozone.ModerationQueryEvents( ctx, xrpcc, - nil, - nil, - "", - "", - "", - xrpcc.Auth.Did, - "", - false, - false, - 5, - nil, - nil, - nil, - "", - uri.String(), - []string{"tools.ozone.moderation.defs#modEventReport"}, + nil, // addedLabels []string + nil, // addedTags []string + nil, // collections []string + "", // comment string + "", // createdAfter string + "", // createdBefore string + xrpcc.Auth.Did, // createdBy string + "", // cursor string + false, // hasComment bool + false, // includeAllUserRecords bool + 5, // limit int64 + nil, // removedLabels []string + nil, // removedTags []string + nil, // reportTypes []string + "", // sortDirection string + uri.String(), // subject string + "", // subjectType string + []string{"tools.ozone.moderation.defs#modEventReport"}, // types []string ) if err != nil { return false, err diff --git a/cmd/beemo/notify_reports.go b/cmd/beemo/notify_reports.go index 5404619bc..7593bd87c 100644 --- a/cmd/beemo/notify_reports.go +++ b/cmd/beemo/notify_reports.go @@ -68,27 +68,28 @@ func pollNewReports(cctx *cli.Context) error { xrpcc.Auth.RefreshJwt = refresh.RefreshJwt // query just new reports (regardless of resolution state) - // ModerationQueryEvents(ctx context.Context, c *xrpc.Client, createdBy string, cursor string, includeAllUserRecords bool, limit int64, sortDirection string, subject string, types []string) (*ModerationQueryEvents_Output, error) var limit int64 = 50 me, err := toolsozone.ModerationQueryEvents( cctx.Context, xrpcc, - nil, - nil, - "", - "", - "", - "", - "", - false, - true, - limit, - nil, - nil, - nil, - "", - "", - []string{"tools.ozone.moderation.defs#modEventReport"}, + nil, // addedLabels []string + nil, // addedTags []string + nil, // collections []string + "", // comment string + "", // createdAfter string + "", // createdBefore string + "", // createdBy string + "", // cursor string + false, // hasComment bool + true, // includeAllUserRecords bool + limit, // limit int64 + nil, // removedLabels []string + nil, // removedTags []string + nil, // reportTypes []string + "", // sortDirection string + "", // subject string + "", // subjectType string + []string{"tools.ozone.moderation.defs#modEventReport"}, // types []string ) if err != nil { return err diff --git a/cmd/gosky/admin.go b/cmd/gosky/admin.go index 769577e5e..9467975da 100644 --- a/cmd/gosky/admin.go +++ b/cmd/gosky/admin.go @@ -389,26 +389,27 @@ var listReportsCmd = &cli.Command{ xrpcc.AdminToken = &adminKey // fetch recent moderation reports - // AdminQueryModerationEvents(ctx context.Context, c *xrpc.Client, createdBy string, cursor string, includeAllUserRecords bool, limit int64, sortDirection string, subject string, types []string) (*AdminQueryModerationEvents_Output, error) resp, err := toolsozone.ModerationQueryEvents( ctx, xrpcc, - nil, - nil, - "", - "", - "", - "", - "", - false, - false, - 100, - nil, - nil, - nil, - "", - "", - []string{"tools.ozone.moderation.defs#modEventReport"}, + nil, // addedLabels []string + nil, // addedTags []string + nil, // collections []string + "", // comment string + "", // createdAfter string + "", // createdBefore string + "", // createdBy string + "", // cursor string + false, // hasComment bool + false, // includeAllUserRecords bool + 100, // limit int64 + nil, // removedLabels []string + nil, // removedTags []string + nil, // reportTypes []string + "", // sortDirection string + "", // subject string + "", // subjectType string + []string{"tools.ozone.moderation.defs#modEventReport"}, // types []string ) if err != nil { return err @@ -705,22 +706,24 @@ var queryModerationStatusesCmd = &cli.Command{ resp, err := toolsozone.ModerationQueryEvents( ctx, xrpcc, - nil, - nil, - "", - "", - "", - "", - "", - false, - false, - 100, - nil, - nil, - nil, - "", - "", - []string{"tools.ozone.moderation.defs#modEventReport"}, + nil, // addedLabels []string + nil, // addedTags []string + nil, // collections []string + "", // comment string + "", // createdAfter string + "", // createdBefore string + "", // createdBy string + "", // cursor string + false, // hasComment bool + false, // includeAllUserRecords bool + 100, // limit int64 + nil, // removedLabels []string + nil, // removedTags []string + nil, // reportTypes []string + "", // sortDirection string + "", // subject string + "", // subjectType string + []string{"tools.ozone.moderation.defs#modEventReport"}, // types []string ) if err != nil { return err From bdb4c396b081319cf3c5bcc348b4749c7940316a Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 30 Oct 2024 23:24:33 -0700 Subject: [PATCH 30/50] more lexgen --- api/atproto/admindefs.go | 29 ++++++++++++------- api/bsky/unspeccedgetConfig.go | 26 +++++++++++++++++ api/ozone/moderationdefs.go | 52 ++++++++++++++++++---------------- 3 files changed, 71 insertions(+), 36 deletions(-) create mode 100644 api/bsky/unspeccedgetConfig.go diff --git a/api/atproto/admindefs.go b/api/atproto/admindefs.go index f674117d6..c6e424236 100644 --- a/api/atproto/admindefs.go +++ b/api/atproto/admindefs.go @@ -10,17 +10,18 @@ import ( // AdminDefs_AccountView is a "accountView" in the com.atproto.admin.defs schema. type AdminDefs_AccountView struct { - DeactivatedAt *string `json:"deactivatedAt,omitempty" cborgen:"deactivatedAt,omitempty"` - Did string `json:"did" cborgen:"did"` - Email *string `json:"email,omitempty" cborgen:"email,omitempty"` - EmailConfirmedAt *string `json:"emailConfirmedAt,omitempty" cborgen:"emailConfirmedAt,omitempty"` - Handle string `json:"handle" cborgen:"handle"` - IndexedAt string `json:"indexedAt" cborgen:"indexedAt"` - InviteNote *string `json:"inviteNote,omitempty" cborgen:"inviteNote,omitempty"` - InvitedBy *ServerDefs_InviteCode `json:"invitedBy,omitempty" cborgen:"invitedBy,omitempty"` - Invites []*ServerDefs_InviteCode `json:"invites,omitempty" cborgen:"invites,omitempty"` - InvitesDisabled *bool `json:"invitesDisabled,omitempty" cborgen:"invitesDisabled,omitempty"` - RelatedRecords []*util.LexiconTypeDecoder `json:"relatedRecords,omitempty" cborgen:"relatedRecords,omitempty"` + DeactivatedAt *string `json:"deactivatedAt,omitempty" cborgen:"deactivatedAt,omitempty"` + Did string `json:"did" cborgen:"did"` + Email *string `json:"email,omitempty" cborgen:"email,omitempty"` + EmailConfirmedAt *string `json:"emailConfirmedAt,omitempty" cborgen:"emailConfirmedAt,omitempty"` + Handle string `json:"handle" cborgen:"handle"` + IndexedAt string `json:"indexedAt" cborgen:"indexedAt"` + InviteNote *string `json:"inviteNote,omitempty" cborgen:"inviteNote,omitempty"` + InvitedBy *ServerDefs_InviteCode `json:"invitedBy,omitempty" cborgen:"invitedBy,omitempty"` + Invites []*ServerDefs_InviteCode `json:"invites,omitempty" cborgen:"invites,omitempty"` + InvitesDisabled *bool `json:"invitesDisabled,omitempty" cborgen:"invitesDisabled,omitempty"` + RelatedRecords []*util.LexiconTypeDecoder `json:"relatedRecords,omitempty" cborgen:"relatedRecords,omitempty"` + ThreatSignatures []*AdminDefs_ThreatSignature `json:"threatSignatures,omitempty" cborgen:"threatSignatures,omitempty"` } // AdminDefs_RepoBlobRef is a "repoBlobRef" in the com.atproto.admin.defs schema. @@ -46,3 +47,9 @@ type AdminDefs_StatusAttr struct { Applied bool `json:"applied" cborgen:"applied"` Ref *string `json:"ref,omitempty" cborgen:"ref,omitempty"` } + +// AdminDefs_ThreatSignature is a "threatSignature" in the com.atproto.admin.defs schema. +type AdminDefs_ThreatSignature struct { + Property string `json:"property" cborgen:"property"` + Value string `json:"value" cborgen:"value"` +} diff --git a/api/bsky/unspeccedgetConfig.go b/api/bsky/unspeccedgetConfig.go new file mode 100644 index 000000000..7bc728341 --- /dev/null +++ b/api/bsky/unspeccedgetConfig.go @@ -0,0 +1,26 @@ +// Code generated by cmd/lexgen (see Makefile's lexgen); DO NOT EDIT. + +package bsky + +// schema: app.bsky.unspecced.getConfig + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// UnspeccedGetConfig_Output is the output of a app.bsky.unspecced.getConfig call. +type UnspeccedGetConfig_Output struct { + CheckEmailConfirmed *bool `json:"checkEmailConfirmed,omitempty" cborgen:"checkEmailConfirmed,omitempty"` +} + +// UnspeccedGetConfig calls the XRPC method "app.bsky.unspecced.getConfig". +func UnspeccedGetConfig(ctx context.Context, c *xrpc.Client) (*UnspeccedGetConfig_Output, error) { + var out UnspeccedGetConfig_Output + if err := c.Do(ctx, xrpc.Query, "", "app.bsky.unspecced.getConfig", nil, nil, &out); err != nil { + return nil, err + } + + return &out, nil +} diff --git a/api/ozone/moderationdefs.go b/api/ozone/moderationdefs.go index 038bcbe7a..f419676ec 100644 --- a/api/ozone/moderationdefs.go +++ b/api/ozone/moderationdefs.go @@ -687,37 +687,39 @@ type ModerationDefs_RecordViewNotFound struct { // // RECORDTYPE: ModerationDefs_RepoView type ModerationDefs_RepoView struct { - LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#repoView" cborgen:"$type,const=tools.ozone.moderation.defs#repoView"` - DeactivatedAt *string `json:"deactivatedAt,omitempty" cborgen:"deactivatedAt,omitempty"` - Did string `json:"did" cborgen:"did"` - Email *string `json:"email,omitempty" cborgen:"email,omitempty"` - Handle string `json:"handle" cborgen:"handle"` - IndexedAt string `json:"indexedAt" cborgen:"indexedAt"` - InviteNote *string `json:"inviteNote,omitempty" cborgen:"inviteNote,omitempty"` - InvitedBy *comatprototypes.ServerDefs_InviteCode `json:"invitedBy,omitempty" cborgen:"invitedBy,omitempty"` - InvitesDisabled *bool `json:"invitesDisabled,omitempty" cborgen:"invitesDisabled,omitempty"` - Moderation *ModerationDefs_Moderation `json:"moderation" cborgen:"moderation"` - RelatedRecords []*util.LexiconTypeDecoder `json:"relatedRecords" cborgen:"relatedRecords"` + LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#repoView" cborgen:"$type,const=tools.ozone.moderation.defs#repoView"` + DeactivatedAt *string `json:"deactivatedAt,omitempty" cborgen:"deactivatedAt,omitempty"` + Did string `json:"did" cborgen:"did"` + Email *string `json:"email,omitempty" cborgen:"email,omitempty"` + Handle string `json:"handle" cborgen:"handle"` + IndexedAt string `json:"indexedAt" cborgen:"indexedAt"` + InviteNote *string `json:"inviteNote,omitempty" cborgen:"inviteNote,omitempty"` + InvitedBy *comatprototypes.ServerDefs_InviteCode `json:"invitedBy,omitempty" cborgen:"invitedBy,omitempty"` + InvitesDisabled *bool `json:"invitesDisabled,omitempty" cborgen:"invitesDisabled,omitempty"` + Moderation *ModerationDefs_Moderation `json:"moderation" cborgen:"moderation"` + RelatedRecords []*util.LexiconTypeDecoder `json:"relatedRecords" cborgen:"relatedRecords"` + ThreatSignatures []*comatprototypes.AdminDefs_ThreatSignature `json:"threatSignatures,omitempty" cborgen:"threatSignatures,omitempty"` } // ModerationDefs_RepoViewDetail is a "repoViewDetail" in the tools.ozone.moderation.defs schema. // // RECORDTYPE: ModerationDefs_RepoViewDetail type ModerationDefs_RepoViewDetail struct { - LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#repoViewDetail" cborgen:"$type,const=tools.ozone.moderation.defs#repoViewDetail"` - DeactivatedAt *string `json:"deactivatedAt,omitempty" cborgen:"deactivatedAt,omitempty"` - Did string `json:"did" cborgen:"did"` - Email *string `json:"email,omitempty" cborgen:"email,omitempty"` - EmailConfirmedAt *string `json:"emailConfirmedAt,omitempty" cborgen:"emailConfirmedAt,omitempty"` - Handle string `json:"handle" cborgen:"handle"` - IndexedAt string `json:"indexedAt" cborgen:"indexedAt"` - InviteNote *string `json:"inviteNote,omitempty" cborgen:"inviteNote,omitempty"` - InvitedBy *comatprototypes.ServerDefs_InviteCode `json:"invitedBy,omitempty" cborgen:"invitedBy,omitempty"` - Invites []*comatprototypes.ServerDefs_InviteCode `json:"invites,omitempty" cborgen:"invites,omitempty"` - InvitesDisabled *bool `json:"invitesDisabled,omitempty" cborgen:"invitesDisabled,omitempty"` - Labels []*comatprototypes.LabelDefs_Label `json:"labels,omitempty" cborgen:"labels,omitempty"` - Moderation *ModerationDefs_ModerationDetail `json:"moderation" cborgen:"moderation"` - RelatedRecords []*util.LexiconTypeDecoder `json:"relatedRecords" cborgen:"relatedRecords"` + LexiconTypeID string `json:"$type,const=tools.ozone.moderation.defs#repoViewDetail" cborgen:"$type,const=tools.ozone.moderation.defs#repoViewDetail"` + DeactivatedAt *string `json:"deactivatedAt,omitempty" cborgen:"deactivatedAt,omitempty"` + Did string `json:"did" cborgen:"did"` + Email *string `json:"email,omitempty" cborgen:"email,omitempty"` + EmailConfirmedAt *string `json:"emailConfirmedAt,omitempty" cborgen:"emailConfirmedAt,omitempty"` + Handle string `json:"handle" cborgen:"handle"` + IndexedAt string `json:"indexedAt" cborgen:"indexedAt"` + InviteNote *string `json:"inviteNote,omitempty" cborgen:"inviteNote,omitempty"` + InvitedBy *comatprototypes.ServerDefs_InviteCode `json:"invitedBy,omitempty" cborgen:"invitedBy,omitempty"` + Invites []*comatprototypes.ServerDefs_InviteCode `json:"invites,omitempty" cborgen:"invites,omitempty"` + InvitesDisabled *bool `json:"invitesDisabled,omitempty" cborgen:"invitesDisabled,omitempty"` + Labels []*comatprototypes.LabelDefs_Label `json:"labels,omitempty" cborgen:"labels,omitempty"` + Moderation *ModerationDefs_ModerationDetail `json:"moderation" cborgen:"moderation"` + RelatedRecords []*util.LexiconTypeDecoder `json:"relatedRecords" cborgen:"relatedRecords"` + ThreatSignatures []*comatprototypes.AdminDefs_ThreatSignature `json:"threatSignatures,omitempty" cborgen:"threatSignatures,omitempty"` } // ModerationDefs_RepoViewNotFound is a "repoViewNotFound" in the tools.ozone.moderation.defs schema. From d0cf4e6bb1affc09e588ef8afbd22b705fa2e5b1 Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Thu, 31 Oct 2024 23:06:27 +0000 Subject: [PATCH 31/50] Update usage --- cmd/bigsky/main.go | 11 ++++++++++- events/diskpersist.go | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/cmd/bigsky/main.go b/cmd/bigsky/main.go index 5fb9f73da..c4d7d7b28 100644 --- a/cmd/bigsky/main.go +++ b/cmd/bigsky/main.go @@ -188,6 +188,12 @@ func run(args []string) error { EnvVars: []string{"RELAY_DID_CACHE_SIZE"}, Value: 5_000_000, }, + &cli.DurationFlag{ + Name: "event-playback-ttl", + Usage: "time to live for event playback buffering (only applies to disk persister)", + EnvVars: []string{"RELAY_EVENT_PLAYBACK_TTL"}, + Value: 72 * time.Hour, + }, } app.Action = runBigsky @@ -327,7 +333,10 @@ func runBigsky(cctx *cli.Context) error { if dpd := cctx.String("disk-persister-dir"); dpd != "" { log.Infow("setting up disk persister") - dp, err := events.NewDiskPersistence(dpd, "", db, events.DefaultDiskPersistOptions()) + + pOpts := events.DefaultDiskPersistOptions() + pOpts.Retention = cctx.Duration("event-playback-ttl") + dp, err := events.NewDiskPersistence(dpd, "", db, pOpts) if err != nil { return fmt.Errorf("setting up disk persister: %w", err) } diff --git a/events/diskpersist.go b/events/diskpersist.go index b793ee843..25eb989af 100644 --- a/events/diskpersist.go +++ b/events/diskpersist.go @@ -81,8 +81,8 @@ type DiskPersistOptions struct { func DefaultDiskPersistOptions() *DiskPersistOptions { return &DiskPersistOptions{ EventsPerFile: 10_000, - UIDCacheSize: 100_000, - DIDCacheSize: 100_000, + UIDCacheSize: 1_000_000, + DIDCacheSize: 1_000_000, WriteBufferSize: 50, Retention: time.Hour * 24 * 3, // 3 days } From 7b818d1eb06026e591da9b85bb8320418e5d1dd8 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 4 Nov 2024 17:04:37 -0800 Subject: [PATCH 32/50] lexgen: handle 'ref' as procedure input type --- lex/gen.go | 11 +++++++++-- lex/type_schema.go | 4 ++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/lex/gen.go b/lex/gen.go index af8dab70d..29447bb53 100644 --- a/lex/gen.go +++ b/lex/gen.go @@ -232,9 +232,16 @@ func writeMethods(typename string, ts *TypeSchema, w io.Writer) error { case "record": return nil case "query": - return ts.WriteRPC(w, typename) + return ts.WriteRPC(w, typename, fmt.Sprintf("%s_Input", typename)) case "procedure": - return ts.WriteRPC(w, typename) + if ts.Input == nil || ts.Input.Schema == nil || ts.Input.Schema.Type == "object" { + return ts.WriteRPC(w, typename, fmt.Sprintf("%s_Input", typename)) + } else if ts.Input.Schema.Type == "ref" { + inputname, _ := ts.namesFromRef(ts.Input.Schema.Ref) + return ts.WriteRPC(w, typename, inputname) + } else { + return fmt.Errorf("unhandled input type: %s", ts.Input.Schema.Type) + } case "object", "string": return nil case "subscription": diff --git a/lex/type_schema.go b/lex/type_schema.go index aeeb7389d..fcec3575a 100644 --- a/lex/type_schema.go +++ b/lex/type_schema.go @@ -50,7 +50,7 @@ type TypeSchema struct { Maximum any `json:"maximum"` } -func (s *TypeSchema) WriteRPC(w io.Writer, typename string) error { +func (s *TypeSchema) WriteRPC(w io.Writer, typename, inputname string) error { pf := printerf(w) fname := typename @@ -65,7 +65,7 @@ func (s *TypeSchema) WriteRPC(w io.Writer, typename string) error { case EncodingCBOR, EncodingCAR, EncodingANY, EncodingMP4: params = fmt.Sprintf("%s, input io.Reader", params) case EncodingJSON: - params = fmt.Sprintf("%s, input *%s_Input", params, fname) + params = fmt.Sprintf("%s, input *%s", params, inputname) default: return fmt.Errorf("unsupported input encoding (RPC input): %q", s.Input.Encoding) From 0739b895fc89817068274d2fef051c2af71878ff Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 4 Nov 2024 17:22:58 -0800 Subject: [PATCH 33/50] wire up abuse metadata from ozone to automod --- automod/engine/account_meta.go | 11 +++++++++-- automod/engine/fetch_account_meta.go | 7 +++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/automod/engine/account_meta.go b/automod/engine/account_meta.go index e7d4e86ba..6270908f3 100644 --- a/automod/engine/account_meta.go +++ b/automod/engine/account_meta.go @@ -36,12 +36,19 @@ type ProfileSummary struct { DisplayName *string } +// opaque fingerprints for correlating abusive accounts +type AbuseSignature struct { + Property string + Value string +} + type AccountPrivate struct { Email string EmailConfirmed bool IndexedAt *time.Time AccountTags []string // ReviewState will be one of ReviewStateEscalated, ReviewStateOpen, ReviewStateClosed, ReviewStateNone, or "" (unknown) - ReviewState string - Appealed bool + ReviewState string + Appealed bool + AbuseSignatures []AbuseSignature } diff --git a/automod/engine/fetch_account_meta.go b/automod/engine/fetch_account_meta.go index 5bc5c3637..fd4fc11ed 100644 --- a/automod/engine/fetch_account_meta.go +++ b/automod/engine/fetch_account_meta.go @@ -148,6 +148,13 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( } } } + if rd.ThreatSignatures != nil || len(rd.ThreatSignatures) > 0 { + asigs := make([]AbuseSignature, len(rd.ThreatSignatures)) + for i, sig := range rd.ThreatSignatures { + asigs[i] = AbuseSignature{Property: sig.Property, Value: sig.Value} + } + am.Private.AbuseSignatures = asigs + } am.Private = &ap } } From 73bb35e35c25bca2b5626691148a958a5401e69e Mon Sep 17 00:00:00 2001 From: Hailey Date: Mon, 4 Nov 2024 17:27:52 -0800 Subject: [PATCH 34/50] update --- automod/engine/cid_from_cdn_test.go | 2 +- automod/engine/fetch_account_meta.go | 21 --------------------- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/automod/engine/cid_from_cdn_test.go b/automod/engine/cid_from_cdn_test.go index cc7553cb8..0780403ca 100644 --- a/automod/engine/cid_from_cdn_test.go +++ b/automod/engine/cid_from_cdn_test.go @@ -37,6 +37,6 @@ func TestCidFromCdnUrl(t *testing.T) { } for _, fix := range fixtures { - assert.Equal(fix.cid, CidFromCdnUrl(&fix.url)) + assert.Equal(fix.cid, cidFromCdnUrl(&fix.url)) } } diff --git a/automod/engine/fetch_account_meta.go b/automod/engine/fetch_account_meta.go index b634c856a..2b51930c6 100644 --- a/automod/engine/fetch_account_meta.go +++ b/automod/engine/fetch_account_meta.go @@ -5,8 +5,6 @@ import ( "encoding/json" "errors" "fmt" - "net/url" - "strings" "time" comatproto "github.com/bluesky-social/indigo/api/atproto" @@ -19,25 +17,6 @@ import ( var newAccountRetryDuration = 3 * 1000 * time.Millisecond -// get the cid from a bluesky cdn url -func CidFromCdnUrl(str *string) *string { - if str == nil { - return nil - } - - u, err := url.Parse(*str) - if err != nil || u.Host != "cdn.bsky.app" { - return nil - } - - parts := strings.Split(u.Path, "/") - if len(parts) != 6 { - return nil - } - - return &strings.Split(parts[5], "@")[0] -} - // Helper to hydrate metadata about an account from several sources: PDS (if access), mod service (if access), public identity resolution func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) (*AccountMeta, error) { From aa48298d153754a6a91a6ccfbf1d043ee14b1d9c Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 4 Nov 2024 21:43:56 -0800 Subject: [PATCH 35/50] fix segfaults --- automod/engine/engine.go | 4 ++++ automod/engine/fetch_account_meta.go | 2 +- automod/engine/persist.go | 8 ++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/automod/engine/engine.go b/automod/engine/engine.go index 8ed864371..b313a4492 100644 --- a/automod/engine/engine.go +++ b/automod/engine/engine.go @@ -351,6 +351,7 @@ func (e *Engine) CanonicalLogLineAccount(c *AccountContext) { c.Logger.Info("canonical-event-line", "accountLabels", c.effects.AccountLabels, "accountFlags", c.effects.AccountFlags, + "accountTags", c.effects.AccountTags, "accountTakedown", c.effects.AccountTakedown, "accountReports", len(c.effects.AccountReports), ) @@ -360,10 +361,12 @@ func (e *Engine) CanonicalLogLineRecord(c *RecordContext) { c.Logger.Info("canonical-event-line", "accountLabels", c.effects.AccountLabels, "accountFlags", c.effects.AccountFlags, + "accountTags", c.effects.AccountTags, "accountTakedown", c.effects.AccountTakedown, "accountReports", len(c.effects.AccountReports), "recordLabels", c.effects.RecordLabels, "recordFlags", c.effects.RecordFlags, + "recordTags", c.effects.RecordTags, "recordTakedown", c.effects.RecordTakedown, "recordReports", len(c.effects.RecordReports), ) @@ -373,6 +376,7 @@ func (e *Engine) CanonicalLogLineNotification(c *NotificationContext) { c.Logger.Info("canonical-event-line", "accountLabels", c.effects.AccountLabels, "accountFlags", c.effects.AccountFlags, + "accountTags", c.effects.AccountTags, "accountTakedown", c.effects.AccountTakedown, "accountReports", len(c.effects.AccountReports), "reject", c.effects.RejectEvent, diff --git a/automod/engine/fetch_account_meta.go b/automod/engine/fetch_account_meta.go index fd4fc11ed..775e92eca 100644 --- a/automod/engine/fetch_account_meta.go +++ b/automod/engine/fetch_account_meta.go @@ -153,7 +153,7 @@ func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) ( for i, sig := range rd.ThreatSignatures { asigs[i] = AbuseSignature{Property: sig.Property, Value: sig.Value} } - am.Private.AbuseSignatures = asigs + ap.AbuseSignatures = asigs } am.Private = &ap } diff --git a/automod/engine/persist.go b/automod/engine/persist.go index e289a64ef..3c4b36fd0 100644 --- a/automod/engine/persist.go +++ b/automod/engine/persist.go @@ -285,7 +285,11 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { existingLabels = dedupeStrings(existingLabels) negLabels = dedupeStrings(negLabels) newLabels = dedupeLabelActions(newLabels, existingLabels, negLabels) - newTags = dedupeTagActions(newTags, rv.Moderation.SubjectStatus.Tags) + existingTags := []string{} + if rv.Moderation != nil && rv.Moderation.SubjectStatus != nil && rv.Moderation.SubjectStatus.Tags != nil { + existingTags = rv.Moderation.SubjectStatus.Tags + } + newTags = dedupeTagActions(newTags, existingTags) } } @@ -389,7 +393,7 @@ func (eng *Engine) persistRecordModActions(c *RecordContext) error { CreatedBy: xrpcc.Auth.Did, Event: &toolsozone.ModerationEmitEvent_Input_Event{ ModerationDefs_ModEventTag: &toolsozone.ModerationDefs_ModEventTag{ - Add: newLabels, + Add: newTags, Remove: []string{}, Comment: &comment, }, From e5a1ac546b3b76e8022b26867f6f02a9c45e7d6f Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 5 Nov 2024 15:05:17 -0800 Subject: [PATCH 36/50] hepa: fix routine order --- cmd/hepa/main.go | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index bceaaa189..92560f944 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -261,28 +261,6 @@ var runCmd = &cli.Command{ return fmt.Errorf("failed to construct server: %v", err) } - // firehose event consumer - relayHost := cctx.String("atp-relay-host") - if relayHost != "" { - fc := consumer.FirehoseConsumer{ - Engine: srv.Engine, - Logger: logger.With("subsystem", "firehose-consumer"), - Host: cctx.String("atp-relay-host"), - Parallelism: cctx.Int("firehose-parallelism"), - RedisClient: srv.RedisClient, - } - - go func() { - if err := fc.RunPersistCursor(ctx); err != nil { - slog.Error("cursor routine failed", "err", err) - } - }() - - if err := fc.Run(ctx); err != nil { - return fmt.Errorf("failure consuming and processing firehose: %w", err) - } - } - // ozone event consumer (if configured) if srv.Engine.OzoneClient != nil { oc := consumer.OzoneConsumer{ @@ -314,6 +292,28 @@ var runCmd = &cli.Command{ } }() + // firehose event consumer (note this is actually mandatory) + relayHost := cctx.String("atp-relay-host") + if relayHost != "" { + fc := consumer.FirehoseConsumer{ + Engine: srv.Engine, + Logger: logger.With("subsystem", "firehose-consumer"), + Host: cctx.String("atp-relay-host"), + Parallelism: cctx.Int("firehose-parallelism"), + RedisClient: srv.RedisClient, + } + + go func() { + if err := fc.RunPersistCursor(ctx); err != nil { + slog.Error("cursor routine failed", "err", err) + } + }() + + if err := fc.Run(ctx); err != nil { + return fmt.Errorf("failure consuming and processing firehose: %w", err) + } + } + return nil }, } From 716b3d5db9d85f1f6099f0c7aac383c59f64ecd8 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 5 Nov 2024 20:31:34 -0800 Subject: [PATCH 37/50] make goat record creation work w/o schema defs --- cmd/goat/record.go | 19 ++++++-------- cmd/goat/repocreateRecord.go | 51 ++++++++++++++++++++++++++++++++++++ cmd/goat/repoputRecord.go | 47 +++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 11 deletions(-) create mode 100644 cmd/goat/repocreateRecord.go create mode 100644 cmd/goat/repoputRecord.go diff --git a/cmd/goat/record.go b/cmd/goat/record.go index 7dfebc8e1..013913aa2 100644 --- a/cmd/goat/record.go +++ b/cmd/goat/record.go @@ -10,7 +10,6 @@ import ( "github.com/bluesky-social/indigo/atproto/data" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" - lexutil "github.com/bluesky-social/indigo/lex/util" "github.com/bluesky-social/indigo/xrpc" "github.com/urfave/cli/v2" @@ -231,9 +230,8 @@ func runRecordCreate(cctx *cli.Context) error { return err } - // TODO: replace this with something that allows arbitrary Lexicons, instead of needing registered types - var recordVal lexutil.LexiconTypeDecoder - if err = recordVal.UnmarshalJSON(recordBytes); err != nil { + recordVal, err := data.UnmarshalJSON(recordBytes) + if err != nil { return err } @@ -248,10 +246,10 @@ func runRecordCreate(cctx *cli.Context) error { } validate := !cctx.Bool("no-validate") - resp, err := comatproto.RepoCreateRecord(ctx, xrpcc, &comatproto.RepoCreateRecord_Input{ + resp, err := RepoCreateRecord(ctx, xrpcc, &RepoCreateRecord_Input{ Collection: nsid, Repo: xrpcc.Auth.Did, - Record: &recordVal, + Record: recordVal, Rkey: rkey, Validate: &validate, }) @@ -300,18 +298,17 @@ func runRecordUpdate(cctx *cli.Context) error { return err } - // TODO: replace this with something that allows arbitrary Lexicons, instead of needing registered types - var recordVal lexutil.LexiconTypeDecoder - if err = recordVal.UnmarshalJSON(recordBytes); err != nil { + recordVal, err := data.UnmarshalJSON(recordBytes) + if err != nil { return err } validate := !cctx.Bool("no-validate") - resp, err := comatproto.RepoPutRecord(ctx, xrpcc, &comatproto.RepoPutRecord_Input{ + resp, err := RepoPutRecord(ctx, xrpcc, &RepoPutRecord_Input{ Collection: nsid, Repo: xrpcc.Auth.Did, - Record: &recordVal, + Record: recordVal, Rkey: rkey, Validate: &validate, SwapRecord: existing.Cid, diff --git a/cmd/goat/repocreateRecord.go b/cmd/goat/repocreateRecord.go new file mode 100644 index 000000000..c1fa67a39 --- /dev/null +++ b/cmd/goat/repocreateRecord.go @@ -0,0 +1,51 @@ +// Copied from indigo:api/atproto/repocreateRecords.go + +package main + +// schema: com.atproto.repo.createRecord + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// RepoDefs_CommitMeta is a "commitMeta" in the com.atproto.repo.defs schema. +type RepoDefs_CommitMeta struct { + Cid string `json:"cid" cborgen:"cid"` + Rev string `json:"rev" cborgen:"rev"` +} + +// RepoCreateRecord_Input is the input argument to a com.atproto.repo.createRecord call. +type RepoCreateRecord_Input struct { + // collection: The NSID of the record collection. + Collection string `json:"collection" cborgen:"collection"` + // record: The record itself. Must contain a $type field. + Record map[string]any `json:"record" cborgen:"record"` + // repo: The handle or DID of the repo (aka, current account). + Repo string `json:"repo" cborgen:"repo"` + // rkey: The Record Key. + Rkey *string `json:"rkey,omitempty" cborgen:"rkey,omitempty"` + // swapCommit: Compare and swap with the previous commit by CID. + SwapCommit *string `json:"swapCommit,omitempty" cborgen:"swapCommit,omitempty"` + // validate: Can be set to 'false' to skip Lexicon schema validation of record data, 'true' to require it, or leave unset to validate only for known Lexicons. + Validate *bool `json:"validate,omitempty" cborgen:"validate,omitempty"` +} + +// RepoCreateRecord_Output is the output of a com.atproto.repo.createRecord call. +type RepoCreateRecord_Output struct { + Cid string `json:"cid" cborgen:"cid"` + Commit *RepoDefs_CommitMeta `json:"commit,omitempty" cborgen:"commit,omitempty"` + Uri string `json:"uri" cborgen:"uri"` + ValidationStatus *string `json:"validationStatus,omitempty" cborgen:"validationStatus,omitempty"` +} + +// RepoCreateRecord calls the XRPC method "com.atproto.repo.createRecord". +func RepoCreateRecord(ctx context.Context, c *xrpc.Client, input *RepoCreateRecord_Input) (*RepoCreateRecord_Output, error) { + var out RepoCreateRecord_Output + if err := c.Do(ctx, xrpc.Procedure, "application/json", "com.atproto.repo.createRecord", nil, input, &out); err != nil { + return nil, err + } + + return &out, nil +} diff --git a/cmd/goat/repoputRecord.go b/cmd/goat/repoputRecord.go new file mode 100644 index 000000000..34011797b --- /dev/null +++ b/cmd/goat/repoputRecord.go @@ -0,0 +1,47 @@ +// Copied from indigo:api/atproto/repoputRecords.go + +package main + +// schema: com.atproto.repo.putRecord + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// RepoPutRecord_Input is the input argument to a com.atproto.repo.putRecord call. +type RepoPutRecord_Input struct { + // collection: The NSID of the record collection. + Collection string `json:"collection" cborgen:"collection"` + // record: The record to write. + Record map[string]any `json:"record" cborgen:"record"` + // repo: The handle or DID of the repo (aka, current account). + Repo string `json:"repo" cborgen:"repo"` + // rkey: The Record Key. + Rkey string `json:"rkey" cborgen:"rkey"` + // swapCommit: Compare and swap with the previous commit by CID. + SwapCommit *string `json:"swapCommit,omitempty" cborgen:"swapCommit,omitempty"` + // swapRecord: Compare and swap with the previous record by CID. WARNING: nullable and optional field; may cause problems with golang implementation + SwapRecord *string `json:"swapRecord" cborgen:"swapRecord"` + // validate: Can be set to 'false' to skip Lexicon schema validation of record data, 'true' to require it, or leave unset to validate only for known Lexicons. + Validate *bool `json:"validate,omitempty" cborgen:"validate,omitempty"` +} + +// RepoPutRecord_Output is the output of a com.atproto.repo.putRecord call. +type RepoPutRecord_Output struct { + Cid string `json:"cid" cborgen:"cid"` + Commit *RepoDefs_CommitMeta `json:"commit,omitempty" cborgen:"commit,omitempty"` + Uri string `json:"uri" cborgen:"uri"` + ValidationStatus *string `json:"validationStatus,omitempty" cborgen:"validationStatus,omitempty"` +} + +// RepoPutRecord calls the XRPC method "com.atproto.repo.putRecord". +func RepoPutRecord(ctx context.Context, c *xrpc.Client, input *RepoPutRecord_Input) (*RepoPutRecord_Output, error) { + var out RepoPutRecord_Output + if err := c.Do(ctx, xrpc.Procedure, "application/json", "com.atproto.repo.putRecord", nil, input, &out); err != nil { + return nil, err + } + + return &out, nil +} From 29a16d0dc6de2396c2eb20c7390f7a72c0c6d594 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 5 Nov 2024 20:39:10 -0800 Subject: [PATCH 38/50] make goat pref handling schema-agnostic --- cmd/goat/account_migrate.go | 5 ++--- cmd/goat/actorgetPreferences.go | 28 ++++++++++++++++++++++++++++ cmd/goat/actorputPreferences.go | 25 +++++++++++++++++++++++++ cmd/goat/bsky_prefs.go | 12 ++++-------- 4 files changed, 59 insertions(+), 11 deletions(-) create mode 100644 cmd/goat/actorgetPreferences.go create mode 100644 cmd/goat/actorputPreferences.go diff --git a/cmd/goat/account_migrate.go b/cmd/goat/account_migrate.go index 72c66e3fb..fa3d60535 100644 --- a/cmd/goat/account_migrate.go +++ b/cmd/goat/account_migrate.go @@ -10,7 +10,6 @@ import ( "time" comatproto "github.com/bluesky-social/indigo/api/atproto" - appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/syntax" "github.com/bluesky-social/indigo/xrpc" @@ -167,11 +166,11 @@ func runAccountMigrate(cctx *cli.Context) error { slog.Info("migrating preferences") // TODO: service proxy header for AppView? - prefResp, err := appbsky.ActorGetPreferences(ctx, oldClient) + prefResp, err := ActorGetPreferences(ctx, oldClient) if err != nil { return fmt.Errorf("failed fetching old preferences: %w", err) } - err = appbsky.ActorPutPreferences(ctx, &newClient, &appbsky.ActorPutPreferences_Input{ + err = ActorPutPreferences(ctx, &newClient, &ActorPutPreferences_Input{ Preferences: prefResp.Preferences, }) if err != nil { diff --git a/cmd/goat/actorgetPreferences.go b/cmd/goat/actorgetPreferences.go new file mode 100644 index 000000000..bd6e8a18c --- /dev/null +++ b/cmd/goat/actorgetPreferences.go @@ -0,0 +1,28 @@ +// Copied from indigo:api/atproto/actorgetPreferences.go + +package main + +// schema: app.bsky.actor.getPreferences + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// ActorGetPreferences_Output is the output of a app.bsky.actor.getPreferences call. +type ActorGetPreferences_Output struct { + Preferences []map[string]any `json:"preferences" cborgen:"preferences"` +} + +// ActorGetPreferences calls the XRPC method "app.bsky.actor.getPreferences". +func ActorGetPreferences(ctx context.Context, c *xrpc.Client) (*ActorGetPreferences_Output, error) { + var out ActorGetPreferences_Output + + params := map[string]interface{}{} + if err := c.Do(ctx, xrpc.Query, "", "app.bsky.actor.getPreferences", params, nil, &out); err != nil { + return nil, err + } + + return &out, nil +} diff --git a/cmd/goat/actorputPreferences.go b/cmd/goat/actorputPreferences.go new file mode 100644 index 000000000..24042236d --- /dev/null +++ b/cmd/goat/actorputPreferences.go @@ -0,0 +1,25 @@ +// Copied from indigo:api/atproto/actorputPreferences.go + +package main + +// schema: app.bsky.actor.putPreferences + +import ( + "context" + + "github.com/bluesky-social/indigo/xrpc" +) + +// ActorPutPreferences_Input is the input argument to a app.bsky.actor.putPreferences call. +type ActorPutPreferences_Input struct { + Preferences []map[string]any `json:"preferences" cborgen:"preferences"` +} + +// ActorPutPreferences calls the XRPC method "app.bsky.actor.putPreferences". +func ActorPutPreferences(ctx context.Context, c *xrpc.Client, input *ActorPutPreferences_Input) error { + if err := c.Do(ctx, xrpc.Procedure, "application/json", "app.bsky.actor.putPreferences", nil, input, nil); err != nil { + return err + } + + return nil +} diff --git a/cmd/goat/bsky_prefs.go b/cmd/goat/bsky_prefs.go index e24965491..725072344 100644 --- a/cmd/goat/bsky_prefs.go +++ b/cmd/goat/bsky_prefs.go @@ -6,8 +6,6 @@ import ( "fmt" "os" - appbsky "github.com/bluesky-social/indigo/api/bsky" - "github.com/urfave/cli/v2" ) @@ -41,7 +39,7 @@ func runBskyPrefsExport(cctx *cli.Context) error { } // TODO: does indigo API code crash with unsupported preference '$type'? Eg "Lexicon decoder" with unsupported type. - resp, err := appbsky.ActorGetPreferences(ctx, xrpcc) + resp, err := ActorGetPreferences(ctx, xrpcc) if err != nil { return fmt.Errorf("failed fetching old preferences: %w", err) } @@ -74,14 +72,12 @@ func runBskyPrefsImport(cctx *cli.Context) error { return err } - var prefsArray []appbsky.ActorDefs_Preferences_Elem - err = json.Unmarshal(prefsBytes, &prefsArray) - if err != nil { + var prefsArray []map[string]any + if err = json.Unmarshal(prefsBytes, &prefsArray); err != nil { return err } - // WARNING: might clobber off-Lexicon or new-Lexicon data fields (which don't round-trip deserialization) - err = appbsky.ActorPutPreferences(ctx, xrpcc, &appbsky.ActorPutPreferences_Input{ + err = ActorPutPreferences(ctx, xrpcc, &ActorPutPreferences_Input{ Preferences: prefsArray, }) if err != nil { From 83930c910b127b56fba52292907c16b2e5d23b3a Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 7 Nov 2024 16:27:38 -0800 Subject: [PATCH 39/50] bigsky: show something helpful when you hit home route --- bgs/bgs.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/bgs/bgs.go b/bgs/bgs.go index 192f4ee3d..6ac5be78a 100644 --- a/bgs/bgs.go +++ b/bgs/bgs.go @@ -349,6 +349,7 @@ func (bgs *BGS) StartWithListener(listen net.Listener) error { e.GET("/xrpc/com.atproto.sync.notifyOfUpdate", bgs.HandleComAtprotoSyncNotifyOfUpdate) e.GET("/xrpc/_health", bgs.HandleHealthCheck) e.GET("/_health", bgs.HandleHealthCheck) + e.GET("/", bgs.HandleHomeMessage) admin := e.Group("/admin", bgs.checkAdminAuth) @@ -420,6 +421,23 @@ func (bgs *BGS) HandleHealthCheck(c echo.Context) error { } } +var homeMessage string = ` +d8888b. d888888b d888b .d8888. db dD db db +88 '8D '88' 88' Y8b 88' YP 88 ,8P' '8b d8' +88oooY' 88 88 '8bo. 88,8P '8bd8' +88~~~b. 88 88 ooo 'Y8b. 88'8b 88 +88 8D .88. 88. ~8~ db 8D 88 '88. 88 +Y8888P' Y888888P Y888P '8888Y' YP YD YP + +This is an atproto [https://atproto.com] relay instance, running the 'bigsky' codebase [https://github.com/bluesky-social/indigo] + +The firehose WebSocket path is at: /xrpc/com.atproto.sync.subscribeRepos +` + +func (bgs *BGS) HandleHomeMessage(c echo.Context) error { + return c.String(http.StatusOK, homeMessage) +} + type AuthToken struct { gorm.Model Token string `gorm:"index"` From 951384a7135020b24d7a0ea351e7ec84aeb5ea66 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 8 Nov 2024 10:05:06 -0800 Subject: [PATCH 40/50] relay: env var for persist dir, and alt env var for data dir --- cmd/bigsky/main.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cmd/bigsky/main.go b/cmd/bigsky/main.go index c4d7d7b28..3009db3db 100644 --- a/cmd/bigsky/main.go +++ b/cmd/bigsky/main.go @@ -85,7 +85,7 @@ func run(args []string) error { Name: "data-dir", Usage: "path of directory for CAR files and other data", Value: "data/bigsky", - EnvVars: []string{"DATA_DIR"}, + EnvVars: []string{"RELAY_DATA_DIR", "DATA_DIR"}, }, &cli.StringFlag{ Name: "plc-host", @@ -112,8 +112,9 @@ func run(args []string) error { EnvVars: []string{"RELAY_METRICS_LISTEN", "BGS_METRICS_LISTEN"}, }, &cli.StringFlag{ - Name: "disk-persister-dir", - Usage: "set directory for disk persister (implicitly enables disk persister)", + Name: "disk-persister-dir", + Usage: "set directory for disk persister (implicitly enables disk persister)", + EnvVars: []string{"RELAY_PERSISTER_DIR"}, }, &cli.StringFlag{ Name: "admin-key", From bf2ec9e41c78c6ea0e0f58b9ce770ab0611885b5 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 8 Nov 2024 13:52:05 -0800 Subject: [PATCH 41/50] hepa: wire up ozone consumption correctly --- cmd/hepa/main.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index 92560f944..7883d9c29 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -264,9 +264,10 @@ var runCmd = &cli.Command{ // ozone event consumer (if configured) if srv.Engine.OzoneClient != nil { oc := consumer.OzoneConsumer{ - Engine: srv.Engine, Logger: logger.With("subsystem", "ozone-consumer"), RedisClient: srv.RedisClient, + OzoneClient: srv.Engine.OzoneClient, + Engine: srv.Engine, } go func() { From 1f22a51acc79097b4c1d8fdce70a148c5efb37f2 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Tue, 12 Nov 2024 21:07:20 -0500 Subject: [PATCH 42/50] simple http client for web did --- did/web.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/did/web.go b/did/web.go index 322520fdc..baea42e6c 100644 --- a/did/web.go +++ b/did/web.go @@ -6,18 +6,26 @@ import ( "fmt" "net/http" "strings" + "time" "unicode" "github.com/whyrusleeping/go-did" "go.opentelemetry.io/otel" ) +var webDidDefaultTimeout = 5 * time.Second + type WebResolver struct { Insecure bool // TODO: cache? maybe at a different layer + + client http.Client } func (wr *WebResolver) GetDocument(ctx context.Context, didstr string) (*Document, error) { + if wr.client.Timeout == 0 { + wr.client.Timeout = webDidDefaultTimeout + } ctx, span := otel.Tracer("did").Start(ctx, "didWebGetDocument") defer span.End() @@ -36,7 +44,7 @@ func (wr *WebResolver) GetDocument(ctx context.Context, didstr string) (*Documen proto = "http" } - resp, err := http.Get(proto + "://" + val + "/.well-known/did.json") + resp, err := wr.client.Get(proto + "://" + val + "/.well-known/did.json") if err != nil { return nil, err } From 8b7dba258df12d35a10a0fb8b79851e71120d1ba Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Wed, 13 Nov 2024 10:59:55 -0800 Subject: [PATCH 43/50] make compaction worker count configurable --- bgs/bgs.go | 26 +++++++++++++++----------- cmd/bigsky/main.go | 6 ++++++ 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/bgs/bgs.go b/bgs/bgs.go index 6ac5be78a..35dfab9d9 100644 --- a/bgs/bgs.go +++ b/bgs/bgs.go @@ -107,20 +107,22 @@ type SocketConsumer struct { } type BGSConfig struct { - SSL bool - CompactInterval time.Duration - DefaultRepoLimit int64 - ConcurrencyPerPDS int64 - MaxQueuePerPDS int64 + SSL bool + CompactInterval time.Duration + DefaultRepoLimit int64 + ConcurrencyPerPDS int64 + MaxQueuePerPDS int64 + NumCompactionWorkers int } func DefaultBGSConfig() *BGSConfig { return &BGSConfig{ - SSL: true, - CompactInterval: 4 * time.Hour, - DefaultRepoLimit: 100, - ConcurrencyPerPDS: 100, - MaxQueuePerPDS: 1_000, + SSL: true, + CompactInterval: 4 * time.Hour, + DefaultRepoLimit: 100, + ConcurrencyPerPDS: 100, + MaxQueuePerPDS: 1_000, + NumCompactionWorkers: 2, } } @@ -168,7 +170,9 @@ func NewBGS(db *gorm.DB, ix *indexer.Indexer, repoman *repomgr.RepoManager, evtm return nil, err } - compactor := NewCompactor(nil) + cOpts := DefaultCompactorOptions() + cOpts.NumWorkers = config.NumCompactionWorkers + compactor := NewCompactor(cOpts) compactor.requeueInterval = config.CompactInterval compactor.Start(bgs) bgs.compactor = compactor diff --git a/cmd/bigsky/main.go b/cmd/bigsky/main.go index 3009db3db..540796f51 100644 --- a/cmd/bigsky/main.go +++ b/cmd/bigsky/main.go @@ -195,6 +195,11 @@ func run(args []string) error { EnvVars: []string{"RELAY_EVENT_PLAYBACK_TTL"}, Value: 72 * time.Hour, }, + &cli.IntFlag{ + Name: "num-compaction-workers", + EnvVars: []string{"RELAY_NUM_COMPACTION_WORKERS"}, + Value: 2, + }, } app.Action = runBigsky @@ -413,6 +418,7 @@ func runBigsky(cctx *cli.Context) error { bgsConfig.ConcurrencyPerPDS = cctx.Int64("concurrency-per-pds") bgsConfig.MaxQueuePerPDS = cctx.Int64("max-queue-per-pds") bgsConfig.DefaultRepoLimit = cctx.Int64("default-repo-limit") + bgsConfig.NumCompactionWorkers = cctx.Int("num-compaction-workers") bgs, err := libbgs.NewBGS(db, ix, repoman, evtman, cachedidr, rf, hr, bgsConfig) if err != nil { return err From 11a0f1c68d301481439e7e773e9d22f14776a47f Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Wed, 13 Nov 2024 11:19:13 -0800 Subject: [PATCH 44/50] update to more optimized cborgen --- api/atproto/cbor_gen.go | 336 ++++++++++------ api/bsky/cbor_gen.go | 798 +++++++++++++++++++++++--------------- api/cbor_gen.go | 21 +- api/chat/cbor_gen.go | 21 +- atproto/data/cbor_gen.go | 63 +-- events/cbor_gen.go | 42 +- go.mod | 6 +- go.sum | 16 +- lex/util/cbor_gen.go | 63 +-- lex/util/cbor_gen_test.go | 126 +++--- mst/cbor_gen.go | 42 +- repo/cbor_gen.go | 42 +- util/labels/cbor_gen.go | 21 +- 13 files changed, 994 insertions(+), 603 deletions(-) diff --git a/api/atproto/cbor_gen.go b/api/atproto/cbor_gen.go index c1f325ebc..d0b57ccbb 100644 --- a/api/atproto/cbor_gen.go +++ b/api/atproto/cbor_gen.go @@ -129,21 +129,24 @@ func (t *RepoStrongRef) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("RepoStrongRef: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Cid (string) (string) case "cid": @@ -180,7 +183,9 @@ func (t *RepoStrongRef) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -492,21 +497,24 @@ func (t *SyncSubscribeRepos_Commit) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SyncSubscribeRepos_Commit: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Ops ([]*atproto.SyncSubscribeRepos_RepoOp) (slice) case "ops": @@ -767,7 +775,9 @@ func (t *SyncSubscribeRepos_Commit) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -901,21 +911,24 @@ func (t *SyncSubscribeRepos_Handle) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SyncSubscribeRepos_Handle: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -978,7 +991,9 @@ func (t *SyncSubscribeRepos_Handle) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1126,21 +1141,24 @@ func (t *SyncSubscribeRepos_Identity) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SyncSubscribeRepos_Identity: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -1213,7 +1231,9 @@ func (t *SyncSubscribeRepos_Identity) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1377,21 +1397,24 @@ func (t *SyncSubscribeRepos_Account) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SyncSubscribeRepos_Account: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -1482,7 +1505,9 @@ func (t *SyncSubscribeRepos_Account) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1585,21 +1610,24 @@ func (t *SyncSubscribeRepos_Info) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SyncSubscribeRepos_Info: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 7) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Name (string) (string) case "name": @@ -1635,7 +1663,9 @@ func (t *SyncSubscribeRepos_Info) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1775,21 +1805,24 @@ func (t *SyncSubscribeRepos_Migrate) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SyncSubscribeRepos_Migrate: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -1862,7 +1895,9 @@ func (t *SyncSubscribeRepos_Migrate) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1967,21 +2002,24 @@ func (t *SyncSubscribeRepos_RepoOp) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SyncSubscribeRepos_RepoOp: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Cid (util.LexLink) (struct) case "cid": @@ -2027,7 +2065,9 @@ func (t *SyncSubscribeRepos_RepoOp) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2138,21 +2178,24 @@ func (t *SyncSubscribeRepos_Tombstone) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SyncSubscribeRepos_Tombstone: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 4) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -2204,7 +2247,9 @@ func (t *SyncSubscribeRepos_Tombstone) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2301,21 +2346,24 @@ func (t *LabelDefs_SelfLabels) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LabelDefs_SelfLabels: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -2390,7 +2438,9 @@ func (t *LabelDefs_SelfLabels) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2456,21 +2506,24 @@ func (t *LabelDefs_SelfLabel) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LabelDefs_SelfLabel: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 3) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Val (string) (string) case "val": @@ -2485,7 +2538,9 @@ func (t *LabelDefs_SelfLabel) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2790,21 +2845,24 @@ func (t *LabelDefs_Label) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LabelDefs_Label: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 3) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Cid (string) (string) case "cid": @@ -2986,7 +3044,9 @@ func (t *LabelDefs_Label) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3077,21 +3137,24 @@ func (t *LabelSubscribeLabels_Labels) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LabelSubscribeLabels_Labels: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Seq (int64) (int64) case "seq": { @@ -3170,7 +3233,9 @@ func (t *LabelSubscribeLabels_Labels) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3273,21 +3338,24 @@ func (t *LabelSubscribeLabels_Info) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LabelSubscribeLabels_Info: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 7) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Name (string) (string) case "name": @@ -3323,7 +3391,9 @@ func (t *LabelSubscribeLabels_Info) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3527,21 +3597,24 @@ func (t *LabelDefs_LabelValueDefinition) UnmarshalCBOR(r io.Reader) (err error) return fmt.Errorf("LabelDefs_LabelValueDefinition: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 14) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Blurs (string) (string) case "blurs": @@ -3681,7 +3754,9 @@ func (t *LabelDefs_LabelValueDefinition) UnmarshalCBOR(r io.Reader) (err error) default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3793,21 +3868,24 @@ func (t *LabelDefs_LabelValueDefinitionStrings) UnmarshalCBOR(r io.Reader) (err return fmt.Errorf("LabelDefs_LabelValueDefinitionStrings: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 11) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Lang (string) (string) case "lang": @@ -3844,7 +3922,9 @@ func (t *LabelDefs_LabelValueDefinitionStrings) UnmarshalCBOR(r io.Reader) (err default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/api/bsky/cbor_gen.go b/api/bsky/cbor_gen.go index bc4a94b78..383265fda 100644 --- a/api/bsky/cbor_gen.go +++ b/api/bsky/cbor_gen.go @@ -338,21 +338,24 @@ func (t *FeedPost) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedPost: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Tags ([]string) (slice) case "tags": @@ -627,7 +630,9 @@ func (t *FeedPost) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -728,21 +733,24 @@ func (t *FeedRepost) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedRepost: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -788,7 +796,9 @@ func (t *FeedRepost) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -893,21 +903,24 @@ func (t *FeedPost_Entity) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedPost_Entity: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Type (string) (string) case "type": @@ -953,7 +966,9 @@ func (t *FeedPost_Entity) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1028,21 +1043,24 @@ func (t *FeedPost_ReplyRef) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedPost_ReplyRef: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Root (atproto.RepoStrongRef) (struct) case "root": @@ -1086,7 +1104,9 @@ func (t *FeedPost_ReplyRef) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1174,21 +1194,24 @@ func (t *FeedPost_TextSlice) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedPost_TextSlice: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.End (int64) (int64) case "end": { @@ -1244,7 +1267,9 @@ func (t *FeedPost_TextSlice) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1332,21 +1357,24 @@ func (t *EmbedImages) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedImages: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -1410,7 +1438,9 @@ func (t *EmbedImages) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1488,21 +1518,24 @@ func (t *EmbedExternal) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedExternal: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 8) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -1537,7 +1570,9 @@ func (t *EmbedExternal) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1673,21 +1708,24 @@ func (t *EmbedExternal_External) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedExternal_External: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 11) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Uri (string) (string) case "uri": @@ -1744,7 +1782,9 @@ func (t *EmbedExternal_External) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1850,21 +1890,24 @@ func (t *EmbedImages_Image) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedImages_Image: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 11) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Alt (string) (string) case "alt": @@ -1919,7 +1962,9 @@ func (t *EmbedImages_Image) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2027,21 +2072,24 @@ func (t *GraphFollow) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("GraphFollow: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -2078,7 +2126,9 @@ func (t *GraphFollow) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2364,21 +2414,24 @@ func (t *ActorProfile) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("ActorProfile: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 20) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -2556,7 +2609,9 @@ func (t *ActorProfile) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2634,21 +2689,24 @@ func (t *EmbedRecord) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedRecord: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -2683,7 +2741,9 @@ func (t *EmbedRecord) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2784,21 +2844,24 @@ func (t *FeedLike) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedLike: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -2844,7 +2907,9 @@ func (t *FeedLike) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -2929,21 +2994,24 @@ func (t *RichtextFacet) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("RichtextFacet: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 8) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Index (bsky.RichtextFacet_ByteSlice) (struct) case "index": @@ -3016,7 +3084,9 @@ func (t *RichtextFacet) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3104,21 +3174,24 @@ func (t *RichtextFacet_ByteSlice) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("RichtextFacet_ByteSlice: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.ByteEnd (int64) (int64) case "byteEnd": { @@ -3174,7 +3247,9 @@ func (t *RichtextFacet_ByteSlice) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3259,21 +3334,24 @@ func (t *RichtextFacet_Link) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("RichtextFacet_Link: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Uri (string) (string) case "uri": @@ -3299,7 +3377,9 @@ func (t *RichtextFacet_Link) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3384,21 +3464,24 @@ func (t *RichtextFacet_Mention) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("RichtextFacet_Mention: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -3424,7 +3507,9 @@ func (t *RichtextFacet_Mention) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3509,21 +3594,24 @@ func (t *RichtextFacet_Tag) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("RichtextFacet_Tag: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Tag (string) (string) case "tag": @@ -3549,7 +3637,9 @@ func (t *RichtextFacet_Tag) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3643,21 +3733,24 @@ func (t *EmbedRecordWithMedia) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedRecordWithMedia: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -3712,7 +3805,9 @@ func (t *EmbedRecordWithMedia) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3813,21 +3908,24 @@ func (t *FeedDefs_NotFoundPost) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedDefs_NotFoundPost: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 8) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Uri (string) (string) case "uri": @@ -3871,7 +3969,9 @@ func (t *FeedDefs_NotFoundPost) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -3979,21 +4079,24 @@ func (t *GraphBlock) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("GraphBlock: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -4030,7 +4133,9 @@ func (t *GraphBlock) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -4283,21 +4388,24 @@ func (t *GraphList) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("GraphList: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 17) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Name (string) (string) case "name": @@ -4465,7 +4573,9 @@ func (t *GraphList) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -4596,21 +4706,24 @@ func (t *GraphListitem) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("GraphListitem: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.List (string) (string) case "list": @@ -4658,7 +4771,9 @@ func (t *GraphListitem) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -4934,21 +5049,24 @@ func (t *FeedGenerator) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedGenerator: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 19) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -5139,7 +5257,9 @@ func (t *FeedGenerator) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -5247,21 +5367,24 @@ func (t *GraphListblock) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("GraphListblock: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -5298,7 +5421,9 @@ func (t *GraphListblock) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -5386,21 +5511,24 @@ func (t *EmbedDefs_AspectRatio) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedDefs_AspectRatio: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Width (int64) (int64) case "width": { @@ -5456,7 +5584,9 @@ func (t *EmbedDefs_AspectRatio) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -5638,21 +5768,24 @@ func (t *FeedThreadgate) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedThreadgate: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 13) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Post (string) (string) case "post": @@ -5778,7 +5911,9 @@ func (t *FeedThreadgate) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -5863,21 +5998,24 @@ func (t *FeedThreadgate_ListRule) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedThreadgate_ListRule: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.List (string) (string) case "list": @@ -5903,7 +6041,9 @@ func (t *FeedThreadgate_ListRule) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -5965,21 +6105,24 @@ func (t *FeedThreadgate_MentionRule) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedThreadgate_MentionRule: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -5994,7 +6137,9 @@ func (t *FeedThreadgate_MentionRule) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -6056,21 +6201,24 @@ func (t *FeedThreadgate_FollowingRule) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedThreadgate_FollowingRule: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -6085,7 +6233,9 @@ func (t *FeedThreadgate_FollowingRule) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -6151,21 +6301,24 @@ func (t *GraphStarterpack_FeedItem) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("GraphStarterpack_FeedItem: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 3) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Uri (string) (string) case "uri": @@ -6180,7 +6333,9 @@ func (t *GraphStarterpack_FeedItem) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -6414,21 +6569,24 @@ func (t *GraphStarterpack) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("GraphStarterpack: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 17) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.List (string) (string) case "list": @@ -6595,7 +6753,9 @@ func (t *GraphStarterpack) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -6720,21 +6880,24 @@ func (t *LabelerService) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LabelerService: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 9) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -6800,7 +6963,9 @@ func (t *LabelerService) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -6916,21 +7081,24 @@ func (t *LabelerDefs_LabelerPolicies) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LabelerDefs_LabelerPolicies: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 21) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LabelValues ([]*string) (slice) case "labelValues": @@ -7033,7 +7201,9 @@ func (t *LabelerDefs_LabelerPolicies) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -7204,21 +7374,24 @@ func (t *EmbedVideo) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedVideo: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 11) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Alt (string) (string) case "alt": @@ -7343,7 +7516,9 @@ func (t *EmbedVideo) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -7425,21 +7600,24 @@ func (t *EmbedVideo_Caption) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EmbedVideo_Caption: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 4) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.File (util.LexBlob) (struct) case "file": @@ -7474,7 +7652,9 @@ func (t *EmbedVideo_Caption) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -7656,21 +7836,24 @@ func (t *FeedPostgate) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedPostgate: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 21) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Post (string) (string) case "post": @@ -7796,7 +7979,9 @@ func (t *FeedPostgate) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -7858,21 +8043,24 @@ func (t *FeedPostgate_DisableRule) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("FeedPostgate_DisableRule: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -7887,7 +8075,9 @@ func (t *FeedPostgate_DisableRule) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/api/cbor_gen.go b/api/cbor_gen.go index 66b989efa..766bc3180 100644 --- a/api/cbor_gen.go +++ b/api/cbor_gen.go @@ -230,21 +230,24 @@ func (t *CreateOp) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("CreateOp: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 11) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Sig (string) (string) case "sig": @@ -335,7 +338,9 @@ func (t *CreateOp) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/api/chat/cbor_gen.go b/api/chat/cbor_gen.go index 0cb37eaac..44d077e75 100644 --- a/api/chat/cbor_gen.go +++ b/api/chat/cbor_gen.go @@ -97,21 +97,24 @@ func (t *ActorDeclaration) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("ActorDeclaration: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 13) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.LexiconTypeID (string) (string) case "$type": @@ -137,7 +140,9 @@ func (t *ActorDeclaration) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/atproto/data/cbor_gen.go b/atproto/data/cbor_gen.go index 89c5e0a5c..18280b707 100644 --- a/atproto/data/cbor_gen.go +++ b/atproto/data/cbor_gen.go @@ -78,21 +78,24 @@ func (t *GenericRecord) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("GenericRecord: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Type (string) (string) case "$type": @@ -107,7 +110,9 @@ func (t *GenericRecord) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -196,21 +201,24 @@ func (t *LegacyBlobSchema) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LegacyBlobSchema: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 8) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Cid (string) (string) case "cid": @@ -236,7 +244,9 @@ func (t *LegacyBlobSchema) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -359,21 +369,24 @@ func (t *BlobSchema) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("BlobSchema: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 8) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Ref (data.CIDLink) (struct) case "ref": @@ -435,7 +448,9 @@ func (t *BlobSchema) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/events/cbor_gen.go b/events/cbor_gen.go index efb873446..8e13f8339 100644 --- a/events/cbor_gen.go +++ b/events/cbor_gen.go @@ -101,21 +101,24 @@ func (t *EventHeader) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("EventHeader: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 2) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.MsgType (string) (string) case "t": @@ -156,7 +159,9 @@ func (t *EventHeader) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -245,21 +250,24 @@ func (t *ErrorFrame) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("ErrorFrame: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 7) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Error (string) (string) case "error": @@ -285,7 +293,9 @@ func (t *ErrorFrame) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/go.mod b/go.mod index 4b942517b..66391db61 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/flosch/pongo2/v6 v6.0.0 github.com/go-redis/cache/v9 v9.0.0 github.com/goccy/go-json v0.10.2 + github.com/gocql/gocql v1.7.0 github.com/golang-jwt/jwt v3.2.2+incompatible github.com/gorilla/websocket v1.5.1 github.com/hashicorp/go-retryablehttp v0.7.5 @@ -53,7 +54,7 @@ require ( github.com/samber/slog-echo v1.8.0 github.com/stretchr/testify v1.9.0 github.com/urfave/cli/v2 v2.25.7 - github.com/whyrusleeping/cbor-gen v0.1.3-0.20240904181319-8dc02b38228c + github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e github.com/whyrusleeping/go-did v0.0.0-20230824162731-404d1707d5d6 gitlab.com/yawning/secp256k1-voi v0.0.0-20230925100816-f2616030848b go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 @@ -79,6 +80,8 @@ require ( require ( github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect github.com/go-redis/redis v6.15.9+incompatible // indirect + github.com/golang/snappy v0.0.3 // indirect + github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/jackc/puddle/v2 v2.2.1 // indirect github.com/klauspost/compress v1.17.3 // indirect @@ -91,6 +94,7 @@ require ( github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect go.uber.org/zap v1.26.0 // indirect golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect + gopkg.in/inf.v0 v0.9.1 // indirect ) require ( diff --git a/go.sum b/go.sum index dfc251448..8cd2edd60 100644 --- a/go.sum +++ b/go.sum @@ -71,6 +71,10 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY= +github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= +github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= +github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/brianvoe/gofakeit/v6 v6.25.0 h1:ZpFjktOpLZUeF8q223o0rUuXtA+m5qW5srjvVi+JkXk= github.com/brianvoe/gofakeit/v6 v6.25.0/go.mod h1:Xj58BMSnFqcn/fAQeSK+/PLtC5kSb7FJIq4JyGa8vEs= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= @@ -152,6 +156,8 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78 github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/gocql/gocql v1.7.0 h1:O+7U7/1gSN7QTEAaMEsJc1Oq2QHXvCWoF3DFK9HDHus= +github.com/gocql/gocql v1.7.0/go.mod h1:vnlvXyFZeLBF0Wy+RS8hrOdbn0UWsWtdg07XJnFxZ+4= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -189,6 +195,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -231,6 +239,8 @@ github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/ github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= +github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= +github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= @@ -616,8 +626,8 @@ github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSD github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 h1:5HZfQkwe0mIfyDmc1Em5GqlNRzcdtlv4HTNmdpt7XH0= github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11/go.mod h1:Wlo/SzPmxVp6vXpGt/zaXhHH0fn4IxgqZc82aKg6bpQ= -github.com/whyrusleeping/cbor-gen v0.1.3-0.20240904181319-8dc02b38228c h1:UsxJNcLPfyLyVaA4iusIrsLAqJn/xh36Qgb8emqtXzk= -github.com/whyrusleeping/cbor-gen v0.1.3-0.20240904181319-8dc02b38228c/go.mod h1:pM99HXyEbSQHcosHc0iW7YFmwnscr+t9Te4ibko05so= +github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e h1:28X54ciEwwUxyHn9yrZfl5ojgF4CBNLWX7LR0rvBkf4= +github.com/whyrusleeping/cbor-gen v0.2.1-0.20241030202151-b7a6831be65e/go.mod h1:pM99HXyEbSQHcosHc0iW7YFmwnscr+t9Te4ibko05so= github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f h1:jQa4QT2UP9WYv2nzyawpKMOCl+Z/jW7djv2/J50lj9E= github.com/whyrusleeping/chunker v0.0.0-20181014151217-fe64bd25879f/go.mod h1:p9UJB6dDgdPgMJZs7UjUOdulKyRr9fqkS+6JKAInPy8= github.com/whyrusleeping/go-did v0.0.0-20230824162731-404d1707d5d6 h1:yJ9/LwIGIk/c0CdoavpC9RNSGSruIspSZtxG3Nnldic= @@ -1061,6 +1071,8 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/lex/util/cbor_gen.go b/lex/util/cbor_gen.go index dd057f1fd..84e78775e 100644 --- a/lex/util/cbor_gen.go +++ b/lex/util/cbor_gen.go @@ -78,21 +78,24 @@ func (t *CborChecker) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("CborChecker: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 5) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Type (string) (string) case "$type": @@ -107,7 +110,9 @@ func (t *CborChecker) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -196,21 +201,24 @@ func (t *LegacyBlob) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("LegacyBlob: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 8) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Cid (string) (string) case "cid": @@ -236,7 +244,9 @@ func (t *LegacyBlob) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -359,21 +369,24 @@ func (t *BlobSchema) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("BlobSchema: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 8) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Ref (util.LexLink) (struct) case "ref": @@ -435,7 +448,9 @@ func (t *BlobSchema) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/lex/util/cbor_gen_test.go b/lex/util/cbor_gen_test.go index 76bc90ee3..175f2cb00 100644 --- a/lex/util/cbor_gen_test.go +++ b/lex/util/cbor_gen_test.go @@ -254,21 +254,24 @@ func (t *basicSchema) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("basicSchema: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 7) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 8192) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 8192) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Bool (bool) (bool) case "bool": @@ -430,7 +433,9 @@ func (t *basicSchema) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -567,21 +572,24 @@ func (t *basicSchemaInner) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("basicSchemaInner: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 6) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 8192) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 8192) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Arr ([]string) (slice) case "arr": @@ -680,7 +688,9 @@ func (t *basicSchemaInner) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -779,21 +789,24 @@ func (t *ipldSchema) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("ipldSchema: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 1) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 8192) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 8192) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.A (util.LexLink) (struct) case "a": @@ -840,7 +853,9 @@ func (t *ipldSchema) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1059,21 +1074,24 @@ func (t *basicOldSchema) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("basicOldSchema: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 1) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 8192) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 8192) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.A (string) (string) case "a": @@ -1224,7 +1242,9 @@ func (t *basicOldSchema) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1361,21 +1381,24 @@ func (t *basicOldSchemaInner) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("basicOldSchemaInner: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 1) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 8192) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 8192) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.H (string) (string) case "h": @@ -1474,7 +1497,9 @@ func (t *basicOldSchemaInner) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -1558,21 +1583,24 @@ func (t *ipldOldSchema) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("ipldOldSchema: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 1) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 8192) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 8192) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.A (util.LexLink) (struct) case "a": @@ -1608,7 +1636,9 @@ func (t *ipldOldSchema) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/mst/cbor_gen.go b/mst/cbor_gen.go index 19dd2f75d..8f7e7dcc9 100644 --- a/mst/cbor_gen.go +++ b/mst/cbor_gen.go @@ -104,21 +104,24 @@ func (t *nodeData) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("nodeData: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 1) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Entries ([]mst.treeEntry) (slice) case "e": @@ -184,7 +187,9 @@ func (t *nodeData) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -312,21 +317,24 @@ func (t *treeEntry) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("treeEntry: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 1) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.KeySuffix ([]uint8) (slice) case "k": @@ -415,7 +423,9 @@ func (t *treeEntry) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/repo/cbor_gen.go b/repo/cbor_gen.go index 96bfb2e0d..02594508b 100644 --- a/repo/cbor_gen.go +++ b/repo/cbor_gen.go @@ -194,21 +194,24 @@ func (t *SignedCommit) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("SignedCommit: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 7) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -319,7 +322,9 @@ func (t *SignedCommit) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } @@ -477,21 +482,24 @@ func (t *UnsignedCommit) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("UnsignedCommit: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 7) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Did (string) (string) case "did": @@ -579,7 +587,9 @@ func (t *UnsignedCommit) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } diff --git a/util/labels/cbor_gen.go b/util/labels/cbor_gen.go index e777d7e45..b70d76aef 100644 --- a/util/labels/cbor_gen.go +++ b/util/labels/cbor_gen.go @@ -285,21 +285,24 @@ func (t *UnsignedLabel) UnmarshalCBOR(r io.Reader) (err error) { return fmt.Errorf("UnsignedLabel: map struct too large (%d)", extra) } - var name string n := extra + nameBuf := make([]byte, 3) for i := uint64(0); i < n; i++ { + nameLen, ok, err := cbg.ReadFullStringIntoBuf(cr, nameBuf, 1000000) + if err != nil { + return err + } - { - sval, err := cbg.ReadStringWithMax(cr, 1000000) - if err != nil { + if !ok { + // Field doesn't exist on this type, so ignore it + if err := cbg.ScanForLinks(cr, func(cid.Cid) {}); err != nil { return err } - - name = string(sval) + continue } - switch name { + switch string(nameBuf[:nameLen]) { // t.Cid (string) (string) case "cid": @@ -458,7 +461,9 @@ func (t *UnsignedLabel) UnmarshalCBOR(r io.Reader) (err error) { default: // Field doesn't exist on this type, so ignore it - cbg.ScanForLinks(r, func(cid.Cid) {}) + if err := cbg.ScanForLinks(r, func(cid.Cid) {}); err != nil { + return err + } } } From e8c9d2e788367b4716c9eb4c82fc5488c348a83c Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 15 Nov 2024 14:04:34 +0000 Subject: [PATCH 45/50] identity: default dir with 100 max idle conns, and 1sec idle --- atproto/identity/identity.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/atproto/identity/identity.go b/atproto/identity/identity.go index 02e66f22c..2e67e0a5c 100644 --- a/atproto/identity/identity.go +++ b/atproto/identity/identity.go @@ -66,6 +66,11 @@ func DefaultDirectory() Directory { PLCURL: DefaultPLCURL, HTTPClient: http.Client{ Timeout: time.Second * 15, + Transport: &http.Transport{ + // would want this around 100ms for services doing lots of handle resolution. Impacts PLC connections as well, but not too bad. + IdleConnTimeout: time.Millisecond * 1000, + MaxIdleConns: 100, + }, }, Resolver: net.Resolver{ Dial: func(ctx context.Context, network, address string) (net.Conn, error) { From ed0a5c6480a725d6fb1153723c49810f25ebad3d Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 15 Nov 2024 14:05:05 +0000 Subject: [PATCH 46/50] identity: drop default HTTP timeout from 15s to 10s --- atproto/identity/identity.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atproto/identity/identity.go b/atproto/identity/identity.go index 2e67e0a5c..c8192e6d4 100644 --- a/atproto/identity/identity.go +++ b/atproto/identity/identity.go @@ -65,7 +65,7 @@ func DefaultDirectory() Directory { base := BaseDirectory{ PLCURL: DefaultPLCURL, HTTPClient: http.Client{ - Timeout: time.Second * 15, + Timeout: time.Second * 10, Transport: &http.Transport{ // would want this around 100ms for services doing lots of handle resolution. Impacts PLC connections as well, but not too bad. IdleConnTimeout: time.Millisecond * 1000, From d88346ab5df72a2a1809e27d9994403ced006c71 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 15 Nov 2024 14:05:20 +0000 Subject: [PATCH 47/50] identity: drop default DNS timeout from 5s to 3s --- atproto/identity/identity.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atproto/identity/identity.go b/atproto/identity/identity.go index c8192e6d4..c0453b2af 100644 --- a/atproto/identity/identity.go +++ b/atproto/identity/identity.go @@ -74,7 +74,7 @@ func DefaultDirectory() Directory { }, Resolver: net.Resolver{ Dial: func(ctx context.Context, network, address string) (net.Conn, error) { - d := net.Dialer{Timeout: time.Second * 5} + d := net.Dialer{Timeout: time.Second * 3} return d.DialContext(ctx, network, address) }, }, From 1b7e54e42e5dc9aa4a84671d52a0d3bd83969b15 Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Fri, 15 Nov 2024 11:57:09 -0800 Subject: [PATCH 48/50] allow carstore to use multiple directories, round robin style --- carstore/bs.go | 33 ++++++++++++++++++++------------- carstore/repo_test.go | 11 ++++++++--- cmd/bigsky/main.go | 17 +++++++++++++++-- 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/carstore/bs.go b/carstore/bs.go index e7af35d12..dac62e5d8 100644 --- a/carstore/bs.go +++ b/carstore/bs.go @@ -62,21 +62,23 @@ type CarStore interface { } type FileCarStore struct { - meta *CarStoreGormMeta - rootDir string + meta *CarStoreGormMeta + rootDirs []string lscLk sync.Mutex lastShardCache map[models.Uid]*CarShard } -func NewCarStore(meta *gorm.DB, root string) (CarStore, error) { - if _, err := os.Stat(root); err != nil { - if !os.IsNotExist(err) { - return nil, err - } +func NewCarStore(meta *gorm.DB, roots []string) (CarStore, error) { + for _, root := range roots { + if _, err := os.Stat(root); err != nil { + if !os.IsNotExist(err) { + return nil, err + } - if err := os.Mkdir(root, 0775); err != nil { - return nil, err + if err := os.Mkdir(root, 0775); err != nil { + return nil, err + } } } if err := meta.AutoMigrate(&CarShard{}, &blockRef{}); err != nil { @@ -88,7 +90,7 @@ func NewCarStore(meta *gorm.DB, root string) (CarStore, error) { return &FileCarStore{ meta: &CarStoreGormMeta{meta: meta}, - rootDir: root, + rootDirs: roots, lastShardCache: make(map[models.Uid]*CarShard), }, nil } @@ -541,9 +543,14 @@ func (ds *DeltaSession) GetSize(ctx context.Context, c cid.Cid) (int, error) { func fnameForShard(user models.Uid, seq int) string { return fmt.Sprintf("sh-%d-%d", user, seq) } + +func (cs *FileCarStore) dirForUser(user models.Uid) string { + return cs.rootDirs[int(user)%len(cs.rootDirs)] +} + func (cs *FileCarStore) openNewShardFile(ctx context.Context, user models.Uid, seq int) (*os.File, string, error) { // TODO: some overwrite protections - fname := filepath.Join(cs.rootDir, fnameForShard(user, seq)) + fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq)) fi, err := os.Create(fname) if err != nil { return nil, "", err @@ -557,7 +564,7 @@ func (cs *FileCarStore) writeNewShardFile(ctx context.Context, user models.Uid, defer span.End() // TODO: some overwrite protections - fname := filepath.Join(cs.rootDir, fnameForShard(user, seq)) + fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq)) if err := os.WriteFile(fname, data, 0664); err != nil { return "", err } @@ -982,7 +989,7 @@ func (cs *FileCarStore) openNewCompactedShardFile(ctx context.Context, user mode // TODO: some overwrite protections // NOTE CreateTemp is used for creating a non-colliding file, but we keep it and don't delete it so don't think of it as "temporary". // This creates "sh-%d-%d%s" with some random stuff in the last position - fi, err := os.CreateTemp(cs.rootDir, fnameForShard(user, seq)) + fi, err := os.CreateTemp(cs.dirForUser(user), fnameForShard(user, seq)) if err != nil { return nil, "", err } diff --git a/carstore/repo_test.go b/carstore/repo_test.go index a4d2c8cb8..8366cab95 100644 --- a/carstore/repo_test.go +++ b/carstore/repo_test.go @@ -30,8 +30,13 @@ func testCarStore() (CarStore, func(), error) { return nil, nil, err } - sharddir := filepath.Join(tempdir, "shards") - if err := os.MkdirAll(sharddir, 0775); err != nil { + sharddir1 := filepath.Join(tempdir, "shards1") + if err := os.MkdirAll(sharddir1, 0775); err != nil { + return nil, nil, err + } + + sharddir2 := filepath.Join(tempdir, "shards2") + if err := os.MkdirAll(sharddir2, 0775); err != nil { return nil, nil, err } @@ -45,7 +50,7 @@ func testCarStore() (CarStore, func(), error) { return nil, nil, err } - cs, err := NewCarStore(db, sharddir) + cs, err := NewCarStore(db, []string{sharddir1, sharddir2}) if err != nil { return nil, nil, err } diff --git a/cmd/bigsky/main.go b/cmd/bigsky/main.go index 540796f51..459b1fc20 100644 --- a/cmd/bigsky/main.go +++ b/cmd/bigsky/main.go @@ -200,6 +200,11 @@ func run(args []string) error { EnvVars: []string{"RELAY_NUM_COMPACTION_WORKERS"}, Value: 2, }, + &cli.StringSliceFlag{ + Name: "carstore-shard-dirs", + Usage: "specify list of shard directories for carstore storage, overrides default storage within datadir", + EnvVars: []string{"RELAY_CARSTORE_SHARD_DIRS"}, + }, } app.Action = runBigsky @@ -312,8 +317,16 @@ func runBigsky(cctx *cli.Context) error { } } - os.MkdirAll(filepath.Dir(csdir), os.ModePerm) - cstore, err := carstore.NewCarStore(csdb, csdir) + csdirs := []string{csdir} + if paramDirs := cctx.StringSlice("carstore-shard-dirs"); len(paramDirs) > 0 { + csdirs = paramDirs + } + + for _, csd := range csdirs { + os.MkdirAll(filepath.Dir(csd), os.ModePerm) + } + + cstore, err := carstore.NewCarStore(csdb, csdirs) if err != nil { return err } From fd6ae473a49f459441060f5563ccbc89ac086bf2 Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Fri, 15 Nov 2024 12:32:27 -0800 Subject: [PATCH 49/50] fixup build --- cmd/bigsky/main.go | 4 +++- cmd/laputa/main.go | 2 +- testing/utils.go | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cmd/bigsky/main.go b/cmd/bigsky/main.go index 459b1fc20..54d56735d 100644 --- a/cmd/bigsky/main.go +++ b/cmd/bigsky/main.go @@ -323,7 +323,9 @@ func runBigsky(cctx *cli.Context) error { } for _, csd := range csdirs { - os.MkdirAll(filepath.Dir(csd), os.ModePerm) + if err := os.MkdirAll(filepath.Dir(csd), os.ModePerm); err != nil { + return err + } } cstore, err := carstore.NewCarStore(csdb, csdirs) diff --git a/cmd/laputa/main.go b/cmd/laputa/main.go index 2cedb393a..d91edfc62 100644 --- a/cmd/laputa/main.go +++ b/cmd/laputa/main.go @@ -158,7 +158,7 @@ func run(args []string) { } } - cstore, err := carstore.NewCarStore(csdb, csdir) + cstore, err := carstore.NewCarStore(csdb, []string{csdir}) if err != nil { return err } diff --git a/testing/utils.go b/testing/utils.go index 9b076ef17..7af6e1adc 100644 --- a/testing/utils.go +++ b/testing/utils.go @@ -117,7 +117,7 @@ func SetupPDS(ctx context.Context, suffix string, plc plc.PLCClient) (*TestPDS, return nil, err } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { return nil, err } @@ -550,7 +550,7 @@ func SetupRelay(ctx context.Context, didr plc.PLCClient) (*TestRelay, error) { return nil, err } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { return nil, err } From ffe7fb61bb6d7d889cf263422d7a306ef55187df Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Fri, 15 Nov 2024 12:46:03 -0800 Subject: [PATCH 50/50] more test fixups --- cmd/supercollider/main.go | 2 +- events/dbpersist_test.go | 2 +- indexer/posts_test.go | 2 +- pds/handlers_test.go | 2 +- repomgr/bench_test.go | 2 +- repomgr/ingest_test.go | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/supercollider/main.go b/cmd/supercollider/main.go index 49499ef69..1c64e71d3 100644 --- a/cmd/supercollider/main.go +++ b/cmd/supercollider/main.go @@ -565,7 +565,7 @@ func initSpeedyRepoMan(key *godid.PrivKey) (*repomgr.RepoManager, *godid.PrivKey return nil, nil, err } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { return nil, nil, err } diff --git a/events/dbpersist_test.go b/events/dbpersist_test.go index c299569da..ad8d266b6 100644 --- a/events/dbpersist_test.go +++ b/events/dbpersist_test.go @@ -301,7 +301,7 @@ func setupDBs(t testing.TB) (*gorm.DB, *gorm.DB, carstore.CarStore, string, erro return nil, nil, nil, "", err } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { return nil, nil, nil, "", err } diff --git a/indexer/posts_test.go b/indexer/posts_test.go index ed21ab666..aa6fc99b3 100644 --- a/indexer/posts_test.go +++ b/indexer/posts_test.go @@ -50,7 +50,7 @@ func testIndexer(t *testing.T) *testIx { t.Fatal(err) } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { t.Fatal(err) } diff --git a/pds/handlers_test.go b/pds/handlers_test.go index fe2bb14b8..9cecd3f91 100644 --- a/pds/handlers_test.go +++ b/pds/handlers_test.go @@ -29,7 +29,7 @@ func testCarStore(t *testing.T, db *gorm.DB) (carstore.CarStore, func()) { t.Fatal(err) } - cs, err := carstore.NewCarStore(db, sharddir) + cs, err := carstore.NewCarStore(db, []string{sharddir}) if err != nil { t.Fatal(err) } diff --git a/repomgr/bench_test.go b/repomgr/bench_test.go index 271813909..c01789422 100644 --- a/repomgr/bench_test.go +++ b/repomgr/bench_test.go @@ -54,7 +54,7 @@ func BenchmarkRepoMgrCreates(b *testing.B) { b.Fatal(err) } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { b.Fatal(err) } diff --git a/repomgr/ingest_test.go b/repomgr/ingest_test.go index dcb9097ac..38a8562e5 100644 --- a/repomgr/ingest_test.go +++ b/repomgr/ingest_test.go @@ -50,7 +50,7 @@ func TestLoadNewRepo(t *testing.T) { t.Fatal(err) } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { t.Fatal(err) } @@ -80,7 +80,7 @@ func testCarstore(t *testing.T, dir string) carstore.CarStore { t.Fatal(err) } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { t.Fatal(err) }