Skip to content

Commit

Permalink
refactor rule helpers in to sub-module (#783)
Browse files Browse the repository at this point in the history
  • Loading branch information
bnewbold authored Oct 30, 2024
2 parents ed5b6f2 + 867d96f commit 983ce4a
Show file tree
Hide file tree
Showing 19 changed files with 275 additions and 237 deletions.
49 changes: 49 additions & 0 deletions automod/helpers/account.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package helpers

import (
"time"

"github.com/bluesky-social/indigo/automod"
)

// no accounts exist before this time
var atprotoAccountEpoch = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)

// returns true if account creation timestamp is plausible: not-nil, not in distant past, not in the future
func plausibleAccountCreation(when *time.Time) bool {
if when == nil {
return false
}
// this is mostly to check for misconfigurations or null values (eg, UNIX epoch zero means "unknown" not actually 1970)
if !when.After(atprotoAccountEpoch) {
return false
}
// a timestamp in the future would also indicate some misconfiguration
if when.After(time.Now().Add(time.Hour)) {
return false
}
return true
}

// checks if account was created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false'
func AccountIsYoungerThan(c *automod.AccountContext, age time.Duration) bool {
// TODO: consider swapping priority order here (and below)
if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) {
return time.Since(*c.Account.CreatedAt) < age
}
if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) {
return time.Since(*c.Account.Private.IndexedAt) < age
}
return false
}

// checks if account was *not* created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false'
func AccountIsOlderThan(c *automod.AccountContext, age time.Duration) bool {
if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) {
return time.Since(*c.Account.CreatedAt) >= age
}
if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) {
return time.Since(*c.Account.Private.IndexedAt) >= age
}
return false
}
61 changes: 61 additions & 0 deletions automod/helpers/account_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package helpers

import (
"testing"
"time"

"github.com/bluesky-social/indigo/atproto/identity"
"github.com/bluesky-social/indigo/atproto/syntax"
"github.com/bluesky-social/indigo/automod"
"github.com/stretchr/testify/assert"
)

func TestAccountIsYoungerThan(t *testing.T) {
assert := assert.New(t)

am := automod.AccountMeta{
Identity: &identity.Identity{
DID: syntax.DID("did:plc:abc111"),
Handle: syntax.Handle("handle.example.com"),
},
Profile: automod.ProfileSummary{},
Private: nil,
}
now := time.Now()
ac := automod.AccountContext{
Account: am,
}
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, time.Hour))

ac.Account.CreatedAt = &now
assert.True(AccountIsYoungerThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, time.Hour))

yesterday := time.Now().Add(-1 * time.Hour * 24)
ac.Account.CreatedAt = &yesterday
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.True(AccountIsOlderThan(&ac, time.Hour))

old := time.Date(1990, 1, 1, 0, 0, 0, 0, time.UTC)
ac.Account.CreatedAt = &old
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.False(AccountIsYoungerThan(&ac, time.Hour*24*365*100))
assert.False(AccountIsOlderThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, time.Hour*24*365*100))

future := time.Date(3000, 1, 1, 0, 0, 0, 0, time.UTC)
ac.Account.CreatedAt = &future
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, time.Hour))

ac.Account.CreatedAt = nil
ac.Account.Private = &automod.AccountPrivate{
Email: "[email protected]",
IndexedAt: &yesterday,
}
assert.True(AccountIsYoungerThan(&ac, 48*time.Hour))
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.True(AccountIsOlderThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, 48*time.Hour))
}
81 changes: 4 additions & 77 deletions automod/rules/helpers.go → automod/helpers/bsky.go
Original file line number Diff line number Diff line change
@@ -1,30 +1,14 @@
package rules
package helpers

import (
"fmt"
"regexp"
"time"

appbsky "github.com/bluesky-social/indigo/api/bsky"
"github.com/bluesky-social/indigo/atproto/syntax"
"github.com/bluesky-social/indigo/automod"
"github.com/bluesky-social/indigo/automod/keyword"

"github.com/spaolacci/murmur3"
)

func dedupeStrings(in []string) []string {
var out []string
seen := make(map[string]bool)
for _, v := range in {
if !seen[v] {
out = append(out, v)
seen[v] = true
}
}
return out
}

func ExtractHashtagsPost(post *appbsky.FeedPost) []string {
var tags []string
for _, tag := range post.Tags {
Expand All @@ -37,7 +21,7 @@ func ExtractHashtagsPost(post *appbsky.FeedPost) []string {
}
}
}
return dedupeStrings(tags)
return DedupeStrings(tags)
}

func NormalizeHashtag(raw string) string {
Expand Down Expand Up @@ -103,7 +87,7 @@ func ExtractPostBlobCIDsPost(post *appbsky.FeedPost) []string {
}
}
}
return dedupeStrings(out)
return DedupeStrings(out)
}

func ExtractBlobCIDsProfile(profile *appbsky.ActorProfile) []string {
Expand All @@ -114,7 +98,7 @@ func ExtractBlobCIDsProfile(profile *appbsky.ActorProfile) []string {
if profile.Banner != nil {
out = append(out, profile.Banner.Ref.String())
}
return dedupeStrings(out)
return DedupeStrings(out)
}

func ExtractTextTokensPost(post *appbsky.FeedPost) []string {
Expand Down Expand Up @@ -152,13 +136,6 @@ func ExtractTextTokensProfile(profile *appbsky.ActorProfile) []string {
return keyword.TokenizeText(s)
}

// based on: https://stackoverflow.com/a/48769624, with no trailing period allowed
var urlRegex = regexp.MustCompile(`(?:(?:https?|ftp):\/\/)?[\w/\-?=%.]+\.[\w/\-&?=%.]*[\w/\-&?=%]+`)

func ExtractTextURLs(raw string) []string {
return urlRegex.FindAllString(raw, -1)
}

func ExtractTextURLsProfile(profile *appbsky.ActorProfile) []string {
s := ""
if profile.Description != nil {
Expand Down Expand Up @@ -191,14 +168,6 @@ func IsSelfThread(c *automod.RecordContext, post *appbsky.FeedPost) bool {
return false
}

// returns a fast, compact hash of a string
//
// current implementation uses murmur3, default seed, and hex encoding
func HashOfString(s string) string {
val := murmur3.Sum64([]byte(s))
return fmt.Sprintf("%016x", val)
}

func ParentOrRootIsFollower(c *automod.RecordContext, post *appbsky.FeedPost) bool {
if post.Reply == nil || IsSelfThread(c, post) {
return false
Expand Down Expand Up @@ -242,48 +211,6 @@ func ParentOrRootIsFollower(c *automod.RecordContext, post *appbsky.FeedPost) bo
return false
}

// no accounts exist before this time
var atprotoAccountEpoch = time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)

// returns true if account creation timestamp is plausible: not-nil, not in distant past, not in the future
func plausibleAccountCreation(when *time.Time) bool {
if when == nil {
return false
}
// this is mostly to check for misconfigurations or null values (eg, UNIX epoch zero means "unknown" not actually 1970)
if !when.After(atprotoAccountEpoch) {
return false
}
// a timestamp in the future would also indicate some misconfiguration
if when.After(time.Now().Add(time.Hour)) {
return false
}
return true
}

// checks if account was created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false'
func AccountIsYoungerThan(c *automod.AccountContext, age time.Duration) bool {
// TODO: consider swapping priority order here (and below)
if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) {
return time.Since(*c.Account.CreatedAt) < age
}
if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) {
return time.Since(*c.Account.Private.IndexedAt) < age
}
return false
}

// checks if account was *not* created recently, based on either public or private account metadata. if metadata isn't available at all, or seems bogus, returns 'false'
func AccountIsOlderThan(c *automod.AccountContext, age time.Duration) bool {
if c.Account.CreatedAt != nil && plausibleAccountCreation(c.Account.CreatedAt) {
return time.Since(*c.Account.CreatedAt) >= age
}
if c.Account.Private != nil && plausibleAccountCreation(c.Account.Private.IndexedAt) {
return time.Since(*c.Account.Private.IndexedAt) >= age
}
return false
}

func PostParentOrRootIsDid(post *appbsky.FeedPost, did string) bool {
if post.Reply == nil {
return false
Expand Down
112 changes: 1 addition & 111 deletions automod/rules/helpers_test.go → automod/helpers/bsky_test.go
Original file line number Diff line number Diff line change
@@ -1,123 +1,13 @@
package rules
package helpers

import (
comatproto "github.com/bluesky-social/indigo/api/atproto"
appbsky "github.com/bluesky-social/indigo/api/bsky"
"testing"
"time"

"github.com/bluesky-social/indigo/atproto/identity"
"github.com/bluesky-social/indigo/atproto/syntax"
"github.com/bluesky-social/indigo/automod"
"github.com/bluesky-social/indigo/automod/keyword"
"github.com/stretchr/testify/assert"
)

func TestTokenizeText(t *testing.T) {
assert := assert.New(t)

fixtures := []struct {
s string
out []string
}{
{
s: "1 'Two' three!",
out: []string{"1", "two", "three"},
},
{
s: " foo1;bar2,baz3...",
out: []string{"foo1", "bar2", "baz3"},
},
{
s: "https://example.com/index.html",
out: []string{"https", "example", "com", "index", "html"},
},
}

for _, fix := range fixtures {
assert.Equal(fix.out, keyword.TokenizeText(fix.s))
}
}

func TestExtractURL(t *testing.T) {
assert := assert.New(t)

fixtures := []struct {
s string
out []string
}{
{
s: "this is a description with example.com mentioned in the middle",
out: []string{"example.com"},
},
{
s: "this is another example with https://en.wikipedia.org/index.html: and archive.org, and https://eff.org/... and bsky.app.",
out: []string{"https://en.wikipedia.org/index.html", "archive.org", "https://eff.org/", "bsky.app"},
},
}

for _, fix := range fixtures {
assert.Equal(fix.out, ExtractTextURLs(fix.s))
}
}

func TestHashOfString(t *testing.T) {
assert := assert.New(t)

// hashing function should be consistent over time
assert.Equal("4e6f69c0e3d10992", HashOfString("dummy-value"))
}

func TestAccountIsYoungerThan(t *testing.T) {
assert := assert.New(t)

am := automod.AccountMeta{
Identity: &identity.Identity{
DID: syntax.DID("did:plc:abc111"),
Handle: syntax.Handle("handle.example.com"),
},
Profile: automod.ProfileSummary{},
Private: nil,
}
now := time.Now()
ac := automod.AccountContext{
Account: am,
}
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, time.Hour))

ac.Account.CreatedAt = &now
assert.True(AccountIsYoungerThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, time.Hour))

yesterday := time.Now().Add(-1 * time.Hour * 24)
ac.Account.CreatedAt = &yesterday
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.True(AccountIsOlderThan(&ac, time.Hour))

old := time.Date(1990, 1, 1, 0, 0, 0, 0, time.UTC)
ac.Account.CreatedAt = &old
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.False(AccountIsYoungerThan(&ac, time.Hour*24*365*100))
assert.False(AccountIsOlderThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, time.Hour*24*365*100))

future := time.Date(3000, 1, 1, 0, 0, 0, 0, time.UTC)
ac.Account.CreatedAt = &future
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, time.Hour))

ac.Account.CreatedAt = nil
ac.Account.Private = &automod.AccountPrivate{
Email: "[email protected]",
IndexedAt: &yesterday,
}
assert.True(AccountIsYoungerThan(&ac, 48*time.Hour))
assert.False(AccountIsYoungerThan(&ac, time.Hour))
assert.True(AccountIsOlderThan(&ac, time.Hour))
assert.False(AccountIsOlderThan(&ac, 48*time.Hour))
}

func TestParentOrRootIsDid(t *testing.T) {
assert := assert.New(t)

Expand Down
Loading

0 comments on commit 983ce4a

Please sign in to comment.