diff --git a/automod/account_meta.go b/automod/account_meta.go new file mode 100644 index 000000000..bdc1494f6 --- /dev/null +++ b/automod/account_meta.go @@ -0,0 +1,98 @@ +package automod + +import ( + "context" + "encoding/json" + "fmt" + "time" + + appbsky "github.com/bluesky-social/indigo/api/bsky" + "github.com/bluesky-social/indigo/atproto/identity" +) + +type ProfileSummary struct { + HasAvatar bool + Description *string + DisplayName *string +} + +type AccountPrivate struct { + Email string + EmailConfirmed bool +} + +// information about a repo/account/identity, always pre-populated and relevant to many rules +type AccountMeta struct { + Identity *identity.Identity + Profile ProfileSummary + Private *AccountPrivate + AccountLabels []string + FollowersCount int64 + FollowsCount int64 + PostsCount int64 + IndexedAt *time.Time +} + +func (e *Engine) GetAccountMeta(ctx context.Context, ident *identity.Identity) (*AccountMeta, error) { + + // wipe parsed public key; it's a waste of space and can't serialize + ident.ParsedPublicKey = nil + + existing, err := e.Cache.Get(ctx, "acct", ident.DID.String()) + if err != nil { + return nil, err + } + if existing != "" { + var am AccountMeta + err := json.Unmarshal([]byte(existing), &am) + if err != nil { + return nil, fmt.Errorf("parsing AccountMeta from cache: %v", err) + } + am.Identity = ident + return &am, nil + } + + // fetch account metadata + pv, err := appbsky.ActorGetProfile(ctx, e.BskyClient, ident.DID.String()) + if err != nil { + return nil, err + } + + var labels []string + for _, lbl := range pv.Labels { + labels = append(labels, lbl.Val) + } + + am := AccountMeta{ + Identity: ident, + Profile: ProfileSummary{ + HasAvatar: pv.Avatar != nil, + Description: pv.Description, + DisplayName: pv.DisplayName, + }, + AccountLabels: dedupeStrings(labels), + } + if pv.PostsCount != nil { + am.PostsCount = *pv.PostsCount + } + if pv.FollowersCount != nil { + am.FollowersCount = *pv.FollowersCount + } + if pv.FollowsCount != nil { + am.FollowsCount = *pv.FollowsCount + } + + if e.AdminClient != nil { + // XXX: get admin-level info (email, indexed at, etc). requires lexgen update + } + + val, err := json.Marshal(&am) + if err != nil { + return nil, err + } + + if err := e.Cache.Set(ctx, "acct", ident.DID.String(), string(val)); err != nil { + return nil, err + } + return &am, nil +} diff --git a/automod/cachestore.go b/automod/cachestore.go new file mode 100644 index 000000000..4fd33585e --- /dev/null +++ b/automod/cachestore.go @@ -0,0 +1,36 @@ +package automod + +import ( + "context" + "time" + + "github.com/hashicorp/golang-lru/v2/expirable" +) + +type CacheStore interface { + Get(ctx context.Context, name, key string) (string, error) + Set(ctx context.Context, name, key string, val string) error +} + +type MemCacheStore struct { + Data *expirable.LRU[string, string] +} + +func NewMemCacheStore(capacity int, ttl time.Duration) MemCacheStore { + return MemCacheStore{ + Data: expirable.NewLRU[string, string](capacity, nil, ttl), + } +} + +func (s MemCacheStore) Get(ctx context.Context, name, key string) (string, error) { + v, ok := s.Data.Get(name + "/" + key) + if !ok { + return "", nil + } + return v, nil +} + +func (s MemCacheStore) Set(ctx context.Context, name, key string, val string) error { + s.Data.Add(name+"/"+key, val) + return nil +} diff --git a/automod/engine.go b/automod/engine.go index 24edb8949..5df009043 100644 --- a/automod/engine.go +++ b/automod/engine.go @@ -16,11 +16,13 @@ import ( // // TODO: careful when initializing: several fields should not be null or zero, even though they are pointer type. type Engine struct { - Logger *slog.Logger - Directory identity.Directory - Rules RuleSet - Counters CountStore - Sets SetStore + Logger *slog.Logger + Directory identity.Directory + Rules RuleSet + Counters CountStore + Sets SetStore + Cache CacheStore + BskyClient *xrpc.Client // used to persist moderation actions in mod service (optional) AdminClient *xrpc.Client } @@ -41,10 +43,14 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. return fmt.Errorf("identity not found for did: %s", did.String()) } + am, err := e.GetAccountMeta(ctx, ident) + if err != nil { + return err + } evt := IdentityEvent{ Event{ Engine: e, - Account: AccountMeta{Identity: ident}, + Account: *am, }, } if err := e.Rules.CallIdentityRules(&evt); err != nil { @@ -62,11 +68,13 @@ func (e *Engine) ProcessIdentityEvent(ctx context.Context, t string, did syntax. func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID string, rec any) error { // similar to an HTTP server, we want to recover any panics from rule execution + /* XXX defer func() { if r := recover(); r != nil { e.Logger.Error("automod event execution exception", "err", r) } }() + */ ident, err := e.Directory.LookupDID(ctx, did) if err != nil { @@ -83,7 +91,11 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID if !ok { return fmt.Errorf("mismatch between collection (%s) and type", collection) } - evt := e.NewPostEvent(ident, path, recCID, post) + am, err := e.GetAccountMeta(ctx, ident) + if err != nil { + return err + } + evt := e.NewPostEvent(*am, path, recCID, post) e.Logger.Debug("processing post", "did", ident.DID, "path", path) if err := e.Rules.CallPostRules(&evt); err != nil { return err @@ -99,7 +111,11 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID return err } default: - evt := e.NewRecordEvent(ident, path, recCID, rec) + am, err := e.GetAccountMeta(ctx, ident) + if err != nil { + return err + } + evt := e.NewRecordEvent(*am, path, recCID, rec) e.Logger.Debug("processing record", "did", ident.DID, "path", path) if err := e.Rules.CallRecordRules(&evt); err != nil { return err @@ -119,14 +135,14 @@ func (e *Engine) ProcessRecord(ctx context.Context, did syntax.DID, path, recCID return nil } -func (e *Engine) NewPostEvent(ident *identity.Identity, path, recCID string, post *appbsky.FeedPost) PostEvent { +func (e *Engine) NewPostEvent(am AccountMeta, path, recCID string, post *appbsky.FeedPost) PostEvent { parts := strings.SplitN(path, "/", 2) return PostEvent{ RecordEvent{ Event{ Engine: e, - Logger: e.Logger.With("did", ident.DID, "collection", parts[0], "rkey", parts[1]), - Account: AccountMeta{Identity: ident}, + Logger: e.Logger.With("did", am.Identity.DID, "collection", parts[0], "rkey", parts[1]), + Account: am, }, parts[0], parts[1], @@ -140,13 +156,13 @@ func (e *Engine) NewPostEvent(ident *identity.Identity, path, recCID string, pos } } -func (e *Engine) NewRecordEvent(ident *identity.Identity, path, recCID string, rec any) RecordEvent { +func (e *Engine) NewRecordEvent(am AccountMeta, path, recCID string, rec any) RecordEvent { parts := strings.SplitN(path, "/", 2) return RecordEvent{ Event{ Engine: e, - Logger: e.Logger.With("did", ident.DID, "collection", parts[0], "rkey", parts[1]), - Account: AccountMeta{Identity: ident}, + Logger: e.Logger.With("did", am.Identity.DID, "collection", parts[0], "rkey", parts[1]), + Account: am, }, parts[0], parts[1], diff --git a/automod/event.go b/automod/event.go index baf62b6e4..0924293e1 100644 --- a/automod/event.go +++ b/automod/event.go @@ -7,7 +7,6 @@ import ( comatproto "github.com/bluesky-social/indigo/api/atproto" appbsky "github.com/bluesky-social/indigo/api/bsky" - "github.com/bluesky-social/indigo/atproto/identity" ) type ModReport struct { @@ -15,12 +14,6 @@ type ModReport struct { Comment string } -// information about a repo/account/identity, always pre-populated and relevant to many rules -type AccountMeta struct { - Identity *identity.Identity - // TODO: createdAt / age -} - type CounterRef struct { Name string Val string @@ -84,7 +77,7 @@ func (e *Event) PersistAccountActions(ctx context.Context) error { xrpcc := e.Engine.AdminClient if len(e.AccountLabels) > 0 { _, err := comatproto.AdminTakeModerationAction(ctx, xrpcc, &comatproto.AdminTakeModerationAction_Input{ - Action: "com.atproto.admin.defs#createLabels", + Action: "com.atproto.admin.defs#flag", CreateLabelVals: dedupeStrings(e.AccountLabels), Reason: "automod", CreatedBy: xrpcc.Auth.Did, diff --git a/automod/redis_cache.go b/automod/redis_cache.go new file mode 100644 index 000000000..c5724826d --- /dev/null +++ b/automod/redis_cache.go @@ -0,0 +1,63 @@ +package automod + +import ( + "context" + "time" + + "github.com/go-redis/cache/v9" + "github.com/redis/go-redis/v9" +) + +type RedisCacheStore struct { + Data *cache.Cache + TTL time.Duration +} + +var _ CacheStore = (*RedisCacheStore)(nil) + +func NewRedisCacheStore(redisURL string, ttl time.Duration) (*RedisCacheStore, error) { + opt, err := redis.ParseURL(redisURL) + if err != nil { + return nil, err + } + rdb := redis.NewClient(opt) + // check redis connection + _, err = rdb.Ping(context.TODO()).Result() + if err != nil { + return nil, err + } + data := cache.New(&cache.Options{ + Redis: rdb, + LocalCache: cache.NewTinyLFU(10_000, ttl), + }) + return &RedisCacheStore{ + Data: data, + TTL: ttl, + }, nil +} + +func redisCacheKey(name, key string) string { + return "cache/" + name + "/" + key +} + +func (s RedisCacheStore) Get(ctx context.Context, name, key string) (string, error) { + var val string + err := s.Data.Get(ctx, redisCacheKey(name, key), &val) + if err == cache.ErrCacheMiss { + return "", nil + } + if err != nil { + return "", err + } + return val, nil +} + +func (s RedisCacheStore) Set(ctx context.Context, name, key string, val string) error { + s.Data.Set(&cache.Item{ + Ctx: ctx, + Key: redisCacheKey(name, key), + Value: val, + TTL: s.TTL, + }) + return nil +} diff --git a/automod/redis_directory.go b/automod/redis_directory.go index 5add6ab0e..4a76c8b4d 100644 --- a/automod/redis_directory.go +++ b/automod/redis_directory.go @@ -93,15 +93,19 @@ func (d *RedisDirectory) updateHandle(ctx context.Context, h syntax.Handle) (*Ha DID: "", Err: err, } - d.handleCache.Set(&cache.Item{ + err = d.handleCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + h.String(), Value: he, TTL: d.ErrTTL, }) + if err != nil { + return nil, err + } return &he, nil } + ident.ParsedPublicKey = nil entry := IdentityEntry{ Updated: time.Now(), Identity: ident, @@ -113,18 +117,24 @@ func (d *RedisDirectory) updateHandle(ctx context.Context, h syntax.Handle) (*Ha Err: nil, } - d.identityCache.Set(&cache.Item{ + err = d.identityCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + ident.DID.String(), Value: entry, TTL: d.HitTTL, }) - d.handleCache.Set(&cache.Item{ + if err != nil { + return nil, err + } + err = d.handleCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + h.String(), Value: he, TTL: d.HitTTL, }) + if err != nil { + return nil, err + } return &he, nil } @@ -178,7 +188,7 @@ func (d *RedisDirectory) ResolveHandle(ctx context.Context, h syntax.Handle) (sy func (d *RedisDirectory) updateDID(ctx context.Context, did syntax.DID) (*IdentityEntry, error) { ident, err := d.Inner.LookupDID(ctx, did) - // wipe parsed public key; it's a waste of space + // wipe parsed public key; it's a waste of space and can't serialize if nil == err { ident.ParsedPublicKey = nil } @@ -198,19 +208,25 @@ func (d *RedisDirectory) updateDID(ctx context.Context, did syntax.DID) (*Identi } } - d.identityCache.Set(&cache.Item{ + err = d.identityCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + did.String(), Value: entry, TTL: d.HitTTL, }) + if err != nil { + return nil, err + } if he != nil { - d.handleCache.Set(&cache.Item{ + err = d.handleCache.Set(&cache.Item{ Ctx: ctx, Key: redisDirPrefix + ident.Handle.String(), Value: *he, TTL: d.HitTTL, }) + if err != nil { + return nil, err + } } return &entry, nil } diff --git a/automod/rules/all.go b/automod/rules/all.go index 36f2df578..d8583be44 100644 --- a/automod/rules/all.go +++ b/automod/rules/all.go @@ -11,6 +11,7 @@ func DefaultRules() automod.RuleSet { MisleadingMentionPostRule, ReplyCountPostRule, BanHashtagsPostRule, + AccountDemoPostRule, }, } return rules diff --git a/automod/rules/hashtags_test.go b/automod/rules/hashtags_test.go index 5c95e5583..ac3a3fc26 100644 --- a/automod/rules/hashtags_test.go +++ b/automod/rules/hashtags_test.go @@ -6,6 +6,7 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" "github.com/stretchr/testify/assert" ) @@ -14,16 +15,18 @@ func TestBanHashtagPostRule(t *testing.T) { assert := assert.New(t) engine := engineFixture() - id1 := identity.Identity{ - DID: syntax.DID("did:plc:abc111"), - Handle: syntax.Handle("handle.example.com"), + am1 := automod.AccountMeta{ + Identity: &identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + }, } path := "app.bsky.feed.post/abc123" cid1 := "cid123" p1 := appbsky.FeedPost{ Text: "some post blah", } - evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) + evt1 := engine.NewPostEvent(am1, path, cid1, &p1) assert.NoError(BanHashtagsPostRule(&evt1)) assert.Empty(evt1.RecordLabels) @@ -31,7 +34,7 @@ func TestBanHashtagPostRule(t *testing.T) { Text: "some post blah", Tags: []string{"one", "slur"}, } - evt2 := engine.NewPostEvent(&id1, path, cid1, &p2) + evt2 := engine.NewPostEvent(am1, path, cid1, &p2) assert.NoError(BanHashtagsPostRule(&evt2)) assert.NotEmpty(evt2.RecordLabels) } diff --git a/automod/rules/misleading_test.go b/automod/rules/misleading_test.go index ce8138133..edc7a0d19 100644 --- a/automod/rules/misleading_test.go +++ b/automod/rules/misleading_test.go @@ -6,6 +6,7 @@ import ( appbsky "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/atproto/identity" "github.com/bluesky-social/indigo/atproto/syntax" + "github.com/bluesky-social/indigo/automod" "github.com/stretchr/testify/assert" ) @@ -14,9 +15,11 @@ func TestMisleadingURLPostRule(t *testing.T) { assert := assert.New(t) engine := engineFixture() - id1 := identity.Identity{ - DID: syntax.DID("did:plc:abc111"), - Handle: syntax.Handle("handle.example.com"), + am1 := automod.AccountMeta{ + Identity: &identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + }, } path := "app.bsky.feed.post/abc123" cid1 := "cid123" @@ -38,7 +41,7 @@ func TestMisleadingURLPostRule(t *testing.T) { }, }, } - evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) + evt1 := engine.NewPostEvent(am1, path, cid1, &p1) assert.NoError(MisleadingURLPostRule(&evt1)) assert.NotEmpty(evt1.RecordLabels) } @@ -47,9 +50,11 @@ func TestMisleadingMentionPostRule(t *testing.T) { assert := assert.New(t) engine := engineFixture() - id1 := identity.Identity{ - DID: syntax.DID("did:plc:abc111"), - Handle: syntax.Handle("handle.example.com"), + am1 := automod.AccountMeta{ + Identity: &identity.Identity{ + DID: syntax.DID("did:plc:abc111"), + Handle: syntax.Handle("handle.example.com"), + }, } path := "app.bsky.feed.post/abc123" cid1 := "cid123" @@ -71,7 +76,7 @@ func TestMisleadingMentionPostRule(t *testing.T) { }, }, } - evt1 := engine.NewPostEvent(&id1, path, cid1, &p1) + evt1 := engine.NewPostEvent(am1, path, cid1, &p1) assert.NoError(MisleadingMentionPostRule(&evt1)) assert.NotEmpty(evt1.RecordLabels) } diff --git a/automod/rules/profile.go b/automod/rules/profile.go new file mode 100644 index 000000000..e6f70bdc4 --- /dev/null +++ b/automod/rules/profile.go @@ -0,0 +1,13 @@ +package rules + +import ( + "github.com/bluesky-social/indigo/automod" +) + +// this is a dummy rule to demonstrate accessing account metadata (eg, profile) from within post handler +func AccountDemoPostRule(evt *automod.PostEvent) error { + if evt.Account.Profile.Description != nil && len(evt.Post.Text) > 5 && *evt.Account.Profile.Description == evt.Post.Text { + evt.AddRecordFlag("own-profile-description") + } + return nil +} diff --git a/cmd/hepa/main.go b/cmd/hepa/main.go index 357156baf..8480963b3 100644 --- a/cmd/hepa/main.go +++ b/cmd/hepa/main.go @@ -51,6 +51,12 @@ func run(args []string) error { Value: "https://api.bsky.app", EnvVars: []string{"ATP_MOD_HOST"}, }, + &cli.StringFlag{ + Name: "atp-bsky-host", + Usage: "method, hostname, and port of bsky API (appview) service", + Value: "https://api.bsky.app", + EnvVars: []string{"ATP_BSKY_HOST"}, + }, } app.Commands = []*cli.Command{ @@ -138,6 +144,7 @@ var runCmd = &cli.Command{ dir, Config{ BGSHost: cctx.String("atp-bgs-host"), + BskyHost: cctx.String("atp-bsky-host"), Logger: logger, ModHost: cctx.String("atp-mod-host"), ModAdminToken: cctx.String("mod-admin-token"), diff --git a/cmd/hepa/server.go b/cmd/hepa/server.go index 92e452401..eb784b28d 100644 --- a/cmd/hepa/server.go +++ b/cmd/hepa/server.go @@ -7,6 +7,7 @@ import ( "net/http" "os" "strings" + "time" comatproto "github.com/bluesky-social/indigo/api/atproto" "github.com/bluesky-social/indigo/atproto/identity" @@ -26,6 +27,7 @@ type Server struct { type Config struct { BGSHost string + BskyHost string ModHost string ModAdminToken string ModUsername string @@ -91,13 +93,29 @@ func NewServer(dir identity.Directory, config Config) (*Server, error) { counters = automod.NewMemCountStore() } + var cache automod.CacheStore + if config.RedisURL != "" { + c, err := automod.NewRedisCacheStore(config.RedisURL, 30*time.Minute) + if err != nil { + return nil, err + } + cache = c + } else { + cache = automod.NewMemCacheStore(5_000, 30*time.Minute) + } + engine := automod.Engine{ Logger: logger, Directory: dir, Counters: counters, Sets: sets, + Cache: cache, Rules: rules.DefaultRules(), AdminClient: xrpcc, + BskyClient: &xrpc.Client{ + Client: util.RobustHTTPClient(), + Host: config.BskyHost, + }, } s := &Server{