From c8b4be8da93024532825db58f80a771548f2512a Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 1 Oct 2023 15:56:10 -0700 Subject: [PATCH 01/10] syntax: basic CID (string only) syntax --- atproto/syntax/cid.go | 48 ++++++++++++++++++ atproto/syntax/cid_test.go | 50 +++++++++++++++++++ .../syntax/testdata/cid_syntax_invalid.txt | 16 ++++++ atproto/syntax/testdata/cid_syntax_valid.txt | 14 ++++++ 4 files changed, 128 insertions(+) create mode 100644 atproto/syntax/cid.go create mode 100644 atproto/syntax/cid_test.go create mode 100644 atproto/syntax/testdata/cid_syntax_invalid.txt create mode 100644 atproto/syntax/testdata/cid_syntax_valid.txt diff --git a/atproto/syntax/cid.go b/atproto/syntax/cid.go new file mode 100644 index 000000000..c9aa0de7e --- /dev/null +++ b/atproto/syntax/cid.go @@ -0,0 +1,48 @@ +package syntax + +import ( + "fmt" + "regexp" + "strings" +) + +// Represents a CIDv1 in string format, as would pass Lexicon syntax validation. +// +// You usually want to use the github.com/ipfs/go-cid package and type when working with CIDs ("Links") in atproto. This specific type (syntax.CID) is an informal/incomplete helper specifically for doing fast string verification or pass-through without parsing, re-serialization, or normalization. +// +// Always use [ParseCID] instead of wrapping strings directly, especially when working with network input. +type CID string + +func ParseCID(raw string) (CID, error) { + if len(raw) > 256 { + return "", fmt.Errorf("CID is too long (256 chars max)") + } + if len(raw) < 8 { + return "", fmt.Errorf("CID is too short (8 chars min)") + } + var cidRegex = regexp.MustCompile(`^[a-zA-Z0-9+=]{8,256}$`) + if !cidRegex.MatchString(raw) { + return "", fmt.Errorf("CID syntax didn't validate via regex") + } + if strings.HasPrefix(raw, "Qmb") { + return "", fmt.Errorf("CIDv0 not allowed in this version of atproto") + } + return CID(raw), nil +} + +func (c CID) String() string { + return string(c) +} + +func (c CID) MarshalText() ([]byte, error) { + return []byte(c.String()), nil +} + +func (c *CID) UnmarshalText(text []byte) error { + cid, err := ParseCID(string(text)) + if err != nil { + return err + } + *c = cid + return nil +} diff --git a/atproto/syntax/cid_test.go b/atproto/syntax/cid_test.go new file mode 100644 index 000000000..7da07c6c2 --- /dev/null +++ b/atproto/syntax/cid_test.go @@ -0,0 +1,50 @@ +package syntax + +import ( + "bufio" + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestInteropCIDsValid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/cid_syntax_valid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseCID(line) + if err != nil { + fmt.Println("GOOD: " + line) + } + assert.NoError(err) + } + assert.NoError(scanner.Err()) +} + +func TestInteropCIDsInvalid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/cid_syntax_invalid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseCID(line) + if err == nil { + fmt.Println("BAD: " + line) + } + assert.Error(err) + } + assert.NoError(scanner.Err()) +} diff --git a/atproto/syntax/testdata/cid_syntax_invalid.txt b/atproto/syntax/testdata/cid_syntax_invalid.txt new file mode 100644 index 000000000..5bd1d007c --- /dev/null +++ b/atproto/syntax/testdata/cid_syntax_invalid.txt @@ -0,0 +1,16 @@ +example.com +https://example.com +cid:bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi +. +12345 + +# whitespace + bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi +bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi +bafybe igdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi + +# old CIDv0 not supported +QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR + +# https://github.com/ipfs-shipyard/is-ipfs/blob/master/test/test-cid.spec.ts +noop diff --git a/atproto/syntax/testdata/cid_syntax_valid.txt b/atproto/syntax/testdata/cid_syntax_valid.txt new file mode 100644 index 000000000..9dd3dfed8 --- /dev/null +++ b/atproto/syntax/testdata/cid_syntax_valid.txt @@ -0,0 +1,14 @@ + +# examples from https://docs.ipfs.tech/concepts/content-addressing +bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi + +# https://github.com/ipfs-shipyard/is-ipfs/blob/master/test/test-cid.spec.ts +zdj7WWeQ43G6JJvLWQWZpyHuAMq6uYWRjkBXFad11vE2LHhQ7 +bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va + +# more contrived examples +mBcDxtdWx0aWhhc2g+ +z7x3CtScH765HvShXT +zdj7WhuEjrB52m1BisYCtmjH1hSKa7yZ3jEZ9JcXaFRD51wVz +7134036155352661643226414134664076 +f017012202c5f688262e0ece8569aa6f94d60aad55ca8d9d83734e4a7430d0cff6588ec2b From 72737907a3784fb35c72d518909b3edee79ae2bc Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 1 Oct 2023 16:03:08 -0700 Subject: [PATCH 02/10] syntax: partial TID type (validation-only) --- .../syntax/testdata/tid_syntax_invalid.txt | 15 ++++++ atproto/syntax/testdata/tid_syntax_valid.txt | 6 +++ atproto/syntax/tid.go | 43 ++++++++++++++++ atproto/syntax/tid_test.go | 50 +++++++++++++++++++ 4 files changed, 114 insertions(+) create mode 100644 atproto/syntax/testdata/tid_syntax_invalid.txt create mode 100644 atproto/syntax/testdata/tid_syntax_valid.txt create mode 100644 atproto/syntax/tid.go create mode 100644 atproto/syntax/tid_test.go diff --git a/atproto/syntax/testdata/tid_syntax_invalid.txt b/atproto/syntax/testdata/tid_syntax_invalid.txt new file mode 100644 index 000000000..eca90b2db --- /dev/null +++ b/atproto/syntax/testdata/tid_syntax_invalid.txt @@ -0,0 +1,15 @@ + +# not base32 +3jzfcijpj2z21 +0000000000000 + +# too long/short +3jzfcijpj2z2aa +3jzfcijpj2z2 + +# old dashes syntax not actually supported (TTTT-TTT-TTTT-CC) +3jzf-cij-pj2z-2a + +# high bit can't be high +zzzzzzzzzzzzz +kjzfcijpj2z2a diff --git a/atproto/syntax/testdata/tid_syntax_valid.txt b/atproto/syntax/testdata/tid_syntax_valid.txt new file mode 100644 index 000000000..b161a3fe1 --- /dev/null +++ b/atproto/syntax/testdata/tid_syntax_valid.txt @@ -0,0 +1,6 @@ +# 13 digits +# 234567abcdefghijklmnopqrstuvwxyz + +3jzfcijpj2z2a +7777777777777 +3zzzzzzzzzzzz diff --git a/atproto/syntax/tid.go b/atproto/syntax/tid.go new file mode 100644 index 000000000..47f3ebda4 --- /dev/null +++ b/atproto/syntax/tid.go @@ -0,0 +1,43 @@ +package syntax + +import ( + "fmt" + "regexp" +) + +// Represents a TID in string format, as would pass Lexicon syntax validation. +// +// Always use [ParseTID] instead of wrapping strings directly, especially when working with network input. +// +// Syntax specification: https://atproto.com/specs/record-key +type TID string + +func ParseTID(raw string) (TID, error) { + if len(raw) != 13 { + return "", fmt.Errorf("TID is wrong length (expected 13 chars)") + } + var tidRegex = regexp.MustCompile(`^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$`) + if !tidRegex.MatchString(raw) { + return "", fmt.Errorf("TID syntax didn't validate via regex") + } + return TID(raw), nil +} + +// TODO: additional helpers: to timestamp, from timestamp, from integer, etc + +func (t TID) String() string { + return string(t) +} + +func (t TID) MarshalText() ([]byte, error) { + return []byte(t.String()), nil +} + +func (t *TID) UnmarshalText(text []byte) error { + tid, err := ParseTID(string(text)) + if err != nil { + return err + } + *t = tid + return nil +} diff --git a/atproto/syntax/tid_test.go b/atproto/syntax/tid_test.go new file mode 100644 index 000000000..9d2c883a9 --- /dev/null +++ b/atproto/syntax/tid_test.go @@ -0,0 +1,50 @@ +package syntax + +import ( + "bufio" + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestInteropTIDsValid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/tid_syntax_valid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseTID(line) + if err != nil { + fmt.Println("GOOD: " + line) + } + assert.NoError(err) + } + assert.NoError(scanner.Err()) +} + +func TestInteropTIDsInvalid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/tid_syntax_invalid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseTID(line) + if err == nil { + fmt.Println("BAD: " + line) + } + assert.Error(err) + } + assert.NoError(scanner.Err()) +} From fe1f4f013673e9138a330f792b6bb470964118e5 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 1 Oct 2023 16:36:51 -0700 Subject: [PATCH 03/10] syntax: basic datetime parsing --- atproto/syntax/datetime.go | 43 +++++++++++ atproto/syntax/datetime_test.go | 50 +++++++++++++ .../testdata/datetime_syntax_invalid.txt | 72 +++++++++++++++++++ .../syntax/testdata/datetime_syntax_valid.txt | 20 ++++++ 4 files changed, 185 insertions(+) create mode 100644 atproto/syntax/datetime.go create mode 100644 atproto/syntax/datetime_test.go create mode 100644 atproto/syntax/testdata/datetime_syntax_invalid.txt create mode 100644 atproto/syntax/testdata/datetime_syntax_valid.txt diff --git a/atproto/syntax/datetime.go b/atproto/syntax/datetime.go new file mode 100644 index 000000000..28279cb11 --- /dev/null +++ b/atproto/syntax/datetime.go @@ -0,0 +1,43 @@ +package syntax + +import ( + "fmt" + "regexp" + "strings" +) + +// Represents the a Datetime in string format, as would pass Lexicon syntax validation: the intersection of RFC-3339 and ISO-8601 syntax. +// +// Always use [ParseDatetime] instead of wrapping strings directly, especially when working with network input. +type Datetime string + +func ParseDatetime(raw string) (Datetime, error) { + if len(raw) > 64 { + return "", fmt.Errorf("Datetime too long (max 64 chars)") + } + var datetimeRegex = regexp.MustCompile(`^[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9](.[0-9]{1,20})?(Z|([+-][0-2][0-9]:[0-5][0-9]))$`) + if !datetimeRegex.MatchString(raw) { + return "", fmt.Errorf("Datetime syntax didn't validate via regex") + } + if strings.HasSuffix(raw, "-00:00") { + return "", fmt.Errorf("Datetime can't use '-00:00' for UTC timezone, must use '+00:00', per ISO-8601") + } + return Datetime(raw), nil +} + +func (d Datetime) String() string { + return string(d) +} + +func (d Datetime) MarshalText() ([]byte, error) { + return []byte(d.String()), nil +} + +func (d *Datetime) UnmarshalText(text []byte) error { + datetime, err := ParseDatetime(string(text)) + if err != nil { + return err + } + *d = datetime + return nil +} diff --git a/atproto/syntax/datetime_test.go b/atproto/syntax/datetime_test.go new file mode 100644 index 000000000..0e93cb17d --- /dev/null +++ b/atproto/syntax/datetime_test.go @@ -0,0 +1,50 @@ +package syntax + +import ( + "bufio" + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestInteropDatetimesValid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/datetime_syntax_valid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseDatetime(line) + if err != nil { + fmt.Println("GOOD: " + line) + } + assert.NoError(err) + } + assert.NoError(scanner.Err()) +} + +func TestInteropDatetimesInvalid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/datetime_syntax_invalid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseDatetime(line) + if err == nil { + fmt.Println("BAD: " + line) + } + assert.Error(err) + } + assert.NoError(scanner.Err()) +} diff --git a/atproto/syntax/testdata/datetime_syntax_invalid.txt b/atproto/syntax/testdata/datetime_syntax_invalid.txt new file mode 100644 index 000000000..c4a0e764e --- /dev/null +++ b/atproto/syntax/testdata/datetime_syntax_invalid.txt @@ -0,0 +1,72 @@ + +# subtle changes to: 1985-04-12T23:20:50.123Z +1985-04-12T23:20:50.123z +01985-04-12T23:20:50.123Z +985-04-12T23:20:50.123Z +1985-04-12T23:20:50.Z +1985-04-32T23;20:50.123Z +1985-04-32T23;20:50.123Z + +# en-dash and em-dash +1985—04-32T23;20:50.123Z +1985–04-32T23;20:50.123Z + +# whitespace + 1985-04-12T23:20:50.123Z +1985-04-12T23:20:50.123Z +1985-04-12T 23:20:50.123Z + +# TODO: full parse to validate? +#1985-00-12T23:20:50.123Z +#1985-04-00T23:20:50.123Z +#1985-13-12T23:20:50.123Z +#1985-04-12T25:20:50.123Z +#1985-04-12T23:61:50.123Z +#1985-04-12T23:20:61.123Z + +# not enough zero padding +1985-4-12T23:20:50.123Z +1985-04-2T23:20:50.123Z +1985-04-12T3:20:50.123Z +1985-04-12T23:0:50.123Z +1985-04-12T23:20:5.123Z + +# too much zero padding +01985-04-12T23:20:50.123Z +1985-004-12T23:20:50.123Z +1985-04-012T23:20:50.123Z +1985-04-12T023:20:50.123Z +1985-04-12T23:020:50.123Z +1985-04-12T23:20:050.123Z + +# strict capitalization (ISO-8601) +1985-04-12t23:20:50.123Z +1985-04-12T23:20:50.123z + +# RFC-3339, but not ISO-8601 +1985-04-12T23:20:50.123-00:00 +1985-04-12_23:20:50.123Z +1985-04-12 23:20:50.123Z + +# ISO-8601, but weird +1985-04-274T23:20:50.123Z + +# timezone is required +1985-04-12T23:20:50.123 +1985-04-12T23:20:50 + +1985-04-12 +1985-04-12T23:20Z +1985-04-12T23:20:5Z +1985-04-12T23:20:50.123 ++001985-04-12T23:20:50.123Z +23:20:50.123Z + +1985-04-12T23:20:50.123+00 +1985-04-12T23:20:50.123+00:0 +1985-04-12T23:20:50.123+0:00 +1985-04-12T23:20:50.123 +1985-04-12T23:20:50.123+0000 +1985-04-12T23:20:50.123+00 +1985-04-12T23:20:50.123+ +1985-04-12T23:20:50.123- diff --git a/atproto/syntax/testdata/datetime_syntax_valid.txt b/atproto/syntax/testdata/datetime_syntax_valid.txt new file mode 100644 index 000000000..798c74855 --- /dev/null +++ b/atproto/syntax/testdata/datetime_syntax_valid.txt @@ -0,0 +1,20 @@ +# "preferred" +1985-04-12T23:20:50.123Z +1985-04-12T23:20:50.000Z +2000-01-01T00:00:00.000Z +1985-04-12T23:20:50.123456Z +1985-04-12T23:20:50.120Z +1985-04-12T23:20:50.120000Z + +# "supported" +1985-04-12T23:20:50.1235678912345Z +1985-04-12T23:20:50.100Z +1985-04-12T23:20:50Z +1985-04-12T23:20:50.0Z +1985-04-12T23:20:50.123+00:00 +1985-04-12T23:20:50.123-07:00 +1985-04-12T23:20:50.123+07:00 +1985-04-12T23:20:50.123+01:45 +0985-04-12T23:20:50.123-07:00 +1985-04-12T23:20:50.123-07:00 +0123-01-01T00:00:00.000Z From 61cbb45b803e15a6f651dffb0cea50cb30ba5515 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 1 Oct 2023 16:37:24 -0700 Subject: [PATCH 04/10] syntax: basic language code (BCP-47) validation --- atproto/syntax/language.go | 41 +++++++++++++++ atproto/syntax/language_test.go | 50 +++++++++++++++++++ .../testdata/language_syntax_invalid.txt | 10 ++++ .../syntax/testdata/language_syntax_valid.txt | 18 +++++++ 4 files changed, 119 insertions(+) create mode 100644 atproto/syntax/language.go create mode 100644 atproto/syntax/language_test.go create mode 100644 atproto/syntax/testdata/language_syntax_invalid.txt create mode 100644 atproto/syntax/testdata/language_syntax_valid.txt diff --git a/atproto/syntax/language.go b/atproto/syntax/language.go new file mode 100644 index 000000000..550801f23 --- /dev/null +++ b/atproto/syntax/language.go @@ -0,0 +1,41 @@ +package syntax + +import ( + "fmt" + "regexp" +) + +// Represents a Language specifier in string format, as would pass Lexicon syntax validation. +// +// Always use [ParseLanguage] instead of wrapping strings directly, especially when working with network input. +// +// The syntax is BCP-47. This is a partial/naive parsing implementation, designed for fast validation and exact-string passthrough with no normaliztion. For actually working with BCP-47 language specifiers in atproto code bases, we recommend the golang.org/x/text/language package. +type Language string + +func ParseLanguage(raw string) (Language, error) { + if len(raw) > 128 { + return "", fmt.Errorf("Language is too long (128 chars max)") + } + var langRegex = regexp.MustCompile(`^(i|[a-z]{2,3})(-[a-zA-Z0-9]+)*$`) + if !langRegex.MatchString(raw) { + return "", fmt.Errorf("Language syntax didn't validate via regex") + } + return Language(raw), nil +} + +func (l Language) String() string { + return string(l) +} + +func (l Language) MarshalText() ([]byte, error) { + return []byte(l.String()), nil +} + +func (l *Language) UnmarshalText(text []byte) error { + lang, err := ParseLanguage(string(text)) + if err != nil { + return err + } + *l = lang + return nil +} diff --git a/atproto/syntax/language_test.go b/atproto/syntax/language_test.go new file mode 100644 index 000000000..96556b0ab --- /dev/null +++ b/atproto/syntax/language_test.go @@ -0,0 +1,50 @@ +package syntax + +import ( + "bufio" + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestInteropLanguagesValid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/language_syntax_valid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseLanguage(line) + if err != nil { + fmt.Println("GOOD: " + line) + } + assert.NoError(err) + } + assert.NoError(scanner.Err()) +} + +func TestInteropLanguagesInvalid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/language_syntax_invalid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseLanguage(line) + if err == nil { + fmt.Println("BAD: " + line) + } + assert.Error(err) + } + assert.NoError(scanner.Err()) +} diff --git a/atproto/syntax/testdata/language_syntax_invalid.txt b/atproto/syntax/testdata/language_syntax_invalid.txt new file mode 100644 index 000000000..9bba437a2 --- /dev/null +++ b/atproto/syntax/testdata/language_syntax_invalid.txt @@ -0,0 +1,10 @@ +jaja +. +123 +JA +j +ja- +a-DE + +# technically not valid, but allowing in naive parser +#de-419-DE diff --git a/atproto/syntax/testdata/language_syntax_valid.txt b/atproto/syntax/testdata/language_syntax_valid.txt new file mode 100644 index 000000000..8a4a5fee8 --- /dev/null +++ b/atproto/syntax/testdata/language_syntax_valid.txt @@ -0,0 +1,18 @@ +ja +ban +pt-BR +hy-Latn-IT-arevela +en-GB +zh-Hant +sgn-BE-NL +es-419 +en-GB-boont-r-extended-sequence-x-private + +# grandfathered +zh-hakka +i-default +i-navajo + +# https://github.com/sebinsua/ietf-language-tag-regex/blob/master/test.js +de-CH-1901 +qaa-Qaaa-QM-x-southern From 35c072e82e30c0c694c5c262b93c0826f66c6e16 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 1 Oct 2023 16:37:50 -0700 Subject: [PATCH 05/10] syntax: basic/generic/permissive URI validation --- .../syntax/testdata/uri_syntax_invalid.txt | 17 +++++++ atproto/syntax/testdata/uri_syntax_valid.txt | 14 ++++++ atproto/syntax/uri.go | 41 +++++++++++++++ atproto/syntax/uri_test.go | 50 +++++++++++++++++++ 4 files changed, 122 insertions(+) create mode 100644 atproto/syntax/testdata/uri_syntax_invalid.txt create mode 100644 atproto/syntax/testdata/uri_syntax_valid.txt create mode 100644 atproto/syntax/uri.go create mode 100644 atproto/syntax/uri_test.go diff --git a/atproto/syntax/testdata/uri_syntax_invalid.txt b/atproto/syntax/testdata/uri_syntax_invalid.txt new file mode 100644 index 000000000..f79a4ba12 --- /dev/null +++ b/atproto/syntax/testdata/uri_syntax_invalid.txt @@ -0,0 +1,17 @@ + +example.com +://example.com +//example.com +http: +.http://example.com +-http://example.com +12345 +127.0.0.1 + +https://example.com/path gap + https://example.com/path +https://example.com/trailing-whitespace + +# too long (max 8 kbytes) +# python: "https://example.com/" + 8200 *"x" +https://example. diff --git a/atproto/syntax/testdata/uri_syntax_valid.txt b/atproto/syntax/testdata/uri_syntax_valid.txt new file mode 100644 index 000000000..709a6136a --- /dev/null +++ b/atproto/syntax/testdata/uri_syntax_valid.txt @@ -0,0 +1,14 @@ + +https://example.com +https://example.com/path?q=blah&yes=true#frag.123 +dns:example.com +at://handle.example.com/nsid/rkey +did:key:zQ3shZc2QzApp2oymGvQbzP8eKheVshBHbU4ZYjeXqwSKEn6N +content-type:text/plan +microsoft.windows.camera:thing +go://?Mercedes%20Benz + +# long (but not too long) +# python: "https://example.com/" + 5000*"x" +https://example.com/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + diff --git a/atproto/syntax/uri.go b/atproto/syntax/uri.go new file mode 100644 index 000000000..646fd38da --- /dev/null +++ b/atproto/syntax/uri.go @@ -0,0 +1,41 @@ +package syntax + +import ( + "fmt" + "regexp" +) + +// Represents an arbitary URI in string format, as would pass Lexicon syntax validation. +// +// The syntax is minimal and permissive, designed for fast verification and exact-string passthrough, not schema-specific parsing or validation. For example, will not validate AT-URI or DID strings. +// +// Always use [ParseURI] instead of wrapping strings directly, especially when working with network input. +type URI string + +func ParseURI(raw string) (URI, error) { + if len(raw) > 8192 { + return "", fmt.Errorf("URI is too long (8192 chars max)") + } + var uriRegex = regexp.MustCompile(`^[a-z][a-z.-]{0,80}:[[:graph:]]+$`) + if !uriRegex.MatchString(raw) { + return "", fmt.Errorf("URI syntax didn't validate via regex") + } + return URI(raw), nil +} + +func (u URI) String() string { + return string(u) +} + +func (u URI) MarshalText() ([]byte, error) { + return []byte(u.String()), nil +} + +func (u *URI) UnmarshalText(text []byte) error { + uri, err := ParseURI(string(text)) + if err != nil { + return err + } + *u = uri + return nil +} diff --git a/atproto/syntax/uri_test.go b/atproto/syntax/uri_test.go new file mode 100644 index 000000000..cca4a6d7f --- /dev/null +++ b/atproto/syntax/uri_test.go @@ -0,0 +1,50 @@ +package syntax + +import ( + "bufio" + "fmt" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestInteropURIsValid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/uri_syntax_valid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseURI(line) + if err != nil { + fmt.Println("GOOD: " + line) + } + assert.NoError(err) + } + assert.NoError(scanner.Err()) +} + +func TestInteropURIsInvalid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/uri_syntax_invalid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseURI(line) + if err == nil { + fmt.Println("BAD: " + line) + } + assert.Error(err) + } + assert.NoError(scanner.Err()) +} From c1e18c626c7b6a9240b620c597ccdaf11b16532e Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Sun, 1 Oct 2023 17:07:52 -0700 Subject: [PATCH 06/10] syntax: datetime helpers --- atproto/syntax/datetime.go | 31 +++++++++++++++++ atproto/syntax/datetime_test.go | 34 +++++++++++++++++-- .../testdata/datetime_parse_invalid.txt | 7 ++++ .../testdata/datetime_syntax_invalid.txt | 8 ----- .../syntax/testdata/datetime_syntax_valid.txt | 14 ++++++++ 5 files changed, 83 insertions(+), 11 deletions(-) create mode 100644 atproto/syntax/testdata/datetime_parse_invalid.txt diff --git a/atproto/syntax/datetime.go b/atproto/syntax/datetime.go index 28279cb11..36d4419be 100644 --- a/atproto/syntax/datetime.go +++ b/atproto/syntax/datetime.go @@ -4,6 +4,14 @@ import ( "fmt" "regexp" "strings" + "time" +) + +const ( + // Prefered atproto Datetime string syntax, for use with [time.Format]. + // + // Note that *parsing* syntax is more flexible. + AtprotoDatetimeLayout = "2006-01-02T15:04:05.999Z" ) // Represents the a Datetime in string format, as would pass Lexicon syntax validation: the intersection of RFC-3339 and ISO-8601 syntax. @@ -25,6 +33,29 @@ func ParseDatetime(raw string) (Datetime, error) { return Datetime(raw), nil } +// Parses a string to a golang time.Time in a single step. +func ParseDatetimeTime(raw string) (time.Time, error) { + var zero time.Time + d, err := ParseDatetime(raw) + if err != nil { + return zero, err + } + return d.Time() +} + +// Parses the Datetime string in to a golang time.Time. +// +// There are a small number of strings which will pass initial syntax validation but fail when actually parsing, so this function can return an error. Use [ParseDatetimeTime] to fully parse in a single function call. +func (d Datetime) Time() (time.Time, error) { + return time.Parse(time.RFC3339Nano, d.String()) +} + +// Creates a new valid Datetime string matching the current time, in prefered syntax. +func DatetimeNow() Datetime { + t := time.Now().UTC() + return Datetime(t.Format(AtprotoDatetimeLayout)) +} + func (d Datetime) String() string { return string(d) } diff --git a/atproto/syntax/datetime_test.go b/atproto/syntax/datetime_test.go index 0e93cb17d..a47fb7c15 100644 --- a/atproto/syntax/datetime_test.go +++ b/atproto/syntax/datetime_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/assert" ) -func TestInteropDatetimesValid(t *testing.T) { +func TestInteropDatetimeValid(t *testing.T) { assert := assert.New(t) file, err := os.Open("testdata/datetime_syntax_valid.txt") assert.NoError(err) @@ -20,7 +20,7 @@ func TestInteropDatetimesValid(t *testing.T) { if len(line) == 0 || line[0] == '#' { continue } - _, err := ParseDatetime(line) + _, err := ParseDatetimeTime(line) if err != nil { fmt.Println("GOOD: " + line) } @@ -29,7 +29,7 @@ func TestInteropDatetimesValid(t *testing.T) { assert.NoError(scanner.Err()) } -func TestInteropDatetimesInvalid(t *testing.T) { +func TestInteropDatetimeInvalid(t *testing.T) { assert := assert.New(t) file, err := os.Open("testdata/datetime_syntax_invalid.txt") assert.NoError(err) @@ -48,3 +48,31 @@ func TestInteropDatetimesInvalid(t *testing.T) { } assert.NoError(scanner.Err()) } + +func TestInteropDatetimeTimeInvalid(t *testing.T) { + assert := assert.New(t) + file, err := os.Open("testdata/datetime_parse_invalid.txt") + assert.NoError(err) + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + if len(line) == 0 || line[0] == '#' { + continue + } + _, err := ParseDatetimeTime(line) + if err == nil { + fmt.Println("BAD: " + line) + } + assert.Error(err) + } + assert.NoError(scanner.Err()) +} + +func TestInteropDatetimeNow(t *testing.T) { + assert := assert.New(t) + + dt := DatetimeNow() + _, err := ParseDatetimeTime(dt.String()) + assert.NoError(err) +} diff --git a/atproto/syntax/testdata/datetime_parse_invalid.txt b/atproto/syntax/testdata/datetime_parse_invalid.txt new file mode 100644 index 000000000..1ab8f6d0a --- /dev/null +++ b/atproto/syntax/testdata/datetime_parse_invalid.txt @@ -0,0 +1,7 @@ +# superficial syntax parses ok, but are not valid datetimes for semantic reasons (eg, "month zero") +#1985-00-12T23:20:50.123Z +#1985-04-00T23:20:50.123Z +#1985-13-12T23:20:50.123Z +#1985-04-12T25:20:50.123Z +#1985-04-12T23:61:50.123Z +#1985-04-12T23:20:61.123Z diff --git a/atproto/syntax/testdata/datetime_syntax_invalid.txt b/atproto/syntax/testdata/datetime_syntax_invalid.txt index c4a0e764e..a686f7174 100644 --- a/atproto/syntax/testdata/datetime_syntax_invalid.txt +++ b/atproto/syntax/testdata/datetime_syntax_invalid.txt @@ -16,14 +16,6 @@ 1985-04-12T23:20:50.123Z 1985-04-12T 23:20:50.123Z -# TODO: full parse to validate? -#1985-00-12T23:20:50.123Z -#1985-04-00T23:20:50.123Z -#1985-13-12T23:20:50.123Z -#1985-04-12T25:20:50.123Z -#1985-04-12T23:61:50.123Z -#1985-04-12T23:20:61.123Z - # not enough zero padding 1985-4-12T23:20:50.123Z 1985-04-2T23:20:50.123Z diff --git a/atproto/syntax/testdata/datetime_syntax_valid.txt b/atproto/syntax/testdata/datetime_syntax_valid.txt index 798c74855..79ed5cabb 100644 --- a/atproto/syntax/testdata/datetime_syntax_valid.txt +++ b/atproto/syntax/testdata/datetime_syntax_valid.txt @@ -18,3 +18,17 @@ 0985-04-12T23:20:50.123-07:00 1985-04-12T23:20:50.123-07:00 0123-01-01T00:00:00.000Z + +# various precisions, up through at least 12 digits +1985-04-12T23:20:50.1Z +1985-04-12T23:20:50.12Z +1985-04-12T23:20:50.123Z +1985-04-12T23:20:50.1234Z +1985-04-12T23:20:50.12345Z +1985-04-12T23:20:50.123456Z +1985-04-12T23:20:50.1234567Z +1985-04-12T23:20:50.12345678Z +1985-04-12T23:20:50.123456789Z +1985-04-12T23:20:50.1234567890Z +1985-04-12T23:20:50.12345678901Z +1985-04-12T23:20:50.123456789012Z From 33d2f8040d3359aee5559efcec3db0f9fc070b3d Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 2 Oct 2023 00:00:50 -0700 Subject: [PATCH 07/10] syntax: TID helpers, including generator 'clock' --- atproto/syntax/tid.go | 91 +++++++++++++++++++++++++++++++++++++- atproto/syntax/tid_test.go | 77 ++++++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 1 deletion(-) diff --git a/atproto/syntax/tid.go b/atproto/syntax/tid.go index 47f3ebda4..00176974a 100644 --- a/atproto/syntax/tid.go +++ b/atproto/syntax/tid.go @@ -1,10 +1,22 @@ package syntax import ( + "encoding/base32" "fmt" "regexp" + "strings" + "sync" + "time" ) +const ( + Base32SortAlphabet = "234567abcdefghijklmnopqrstuvwxyz" +) + +func Base32Sort() *base32.Encoding { + return base32.NewEncoding(Base32SortAlphabet).WithPadding(base32.NoPadding) +} + // Represents a TID in string format, as would pass Lexicon syntax validation. // // Always use [ParseTID] instead of wrapping strings directly, especially when working with network input. @@ -23,7 +35,58 @@ func ParseTID(raw string) (TID, error) { return TID(raw), nil } -// TODO: additional helpers: to timestamp, from timestamp, from integer, etc +// Naive (unsafe) one-off TID generation with the current time. +// +// You should usually use a [TIDClock] to ensure monotonic output. +func NewTIDNow(clockId uint) TID { + return NewTID(time.Now().UTC().UnixMicro(), clockId) +} + +func NewTIDFromInteger(v uint64) TID { + v = (0x7FFF_FFFF_FFFF_FFFF & v) + s := "" + for i := 0; i < 13; i++ { + s = string(Base32SortAlphabet[v&0x1F]) + s + v = v >> 5 + } + return TID(s) +} + +// Constructs a new TID from a UNIX timestamp (in milliseconds) and clock ID value. +func NewTID(unixMilis int64, clockId uint) TID { + var v uint64 = (uint64(unixMilis&0x1F_FFFF_FFFF_FFFF) << 10) | uint64(clockId&0x3FF) + return NewTIDFromInteger(v) +} + +// Returns full integer representation of this TID (not used often) +func (t TID) Integer() uint64 { + s := t.String() + if len(s) != 13 { + return 0 + } + var v uint64 + for i := 0; i < 13; i++ { + c := strings.IndexByte(Base32SortAlphabet, s[i]) + if c < 0 { + return 0 + } + v = (v << 5) | uint64(c&0x1F) + } + return v +} + +// Returns the golang [time.Time] corresponding to this TID's timestamp. +func (t TID) Time() time.Time { + i := t.Integer() + i = (i >> 10) & 0x1FFF_FFFF_FFFF_FFFF + return time.UnixMicro(int64(i)).UTC() +} + +// Returns the clock ID part of this TID, as an unsigned integer +func (t TID) ClockID() uint { + i := t.Integer() + return uint(i & 0x3FF) +} func (t TID) String() string { return string(t) @@ -41,3 +104,29 @@ func (t *TID) UnmarshalText(text []byte) error { *t = tid return nil } + +// TID generator, which keeps state to ensure TID values always monotonically increase. +// +// Uses [sync.Mutex], so may block briefly but safe for concurrent use. +type TIDClock struct { + ClockID uint + mtx sync.Mutex + lastUnixMicro int64 +} + +func NewTIDClock(clockId uint) *TIDClock { + return &TIDClock{ + ClockID: clockId, + } +} + +func (c *TIDClock) Next() TID { + now := time.Now().UTC().UnixMicro() + c.mtx.Lock() + if now <= c.lastUnixMicro { + now = c.lastUnixMicro + 1 + } + c.lastUnixMicro = now + c.mtx.Unlock() + return NewTID(now, c.ClockID) +} diff --git a/atproto/syntax/tid_test.go b/atproto/syntax/tid_test.go index 9d2c883a9..9a7b61e8a 100644 --- a/atproto/syntax/tid_test.go +++ b/atproto/syntax/tid_test.go @@ -5,6 +5,7 @@ import ( "fmt" "os" "testing" + "time" "github.com/stretchr/testify/assert" ) @@ -48,3 +49,79 @@ func TestInteropTIDsInvalid(t *testing.T) { } assert.NoError(scanner.Err()) } + +func TestTIDParts(t *testing.T) { + assert := assert.New(t) + + raw := "3kao2cl6lyj2p" + tid, err := ParseTID(raw) + assert.NoError(err) + // TODO: assert.Equal(uint64(0x181a8044491f3bec), tid.Integer()) + // TODO: assert.Equal(uint(1004), tid.ClockID()) + assert.Equal(2023, tid.Time().Year()) + + out := NewTID(tid.Time().UnixMicro(), tid.ClockID()) + assert.Equal(raw, out.String()) + assert.Equal(tid.ClockID(), out.ClockID()) + assert.Equal(tid.Time(), out.Time()) + assert.Equal(tid.Integer(), out.Integer()) + + out2 := NewTIDFromInteger(tid.Integer()) + assert.Equal(tid.Integer(), out2.Integer()) +} + +func TestTIDExamples(t *testing.T) { + assert := assert.New(t) + // TODO: seems like TS code might be wrong? "242k52k4kg3s2" + assert.Equal("242k52k4kg3sc", NewTIDFromInteger(0x0102030405060708).String()) + assert.Equal(uint64(0x0102030405060708), TID("242k52k4kg3sc").Integer()) + //assert.Equal("2222222222222", NewTIDFromInteger(0x0000000000000000).String()) + //assert.Equal(uint64(), TID("242k52k4kg3s2").Integer()) + assert.Equal("2222222222223", NewTIDFromInteger(0x0000000000000001).String()) + assert.Equal(uint64(0x0000000000000001), TID("2222222222223").Integer()) + + assert.Equal("6222222222222", NewTIDFromInteger(0x4000000000000000).String()) + assert.Equal(uint64(0x4000000000000000), TID("6222222222222").Integer()) + + // ignoring type byte + assert.Equal("2222222222222", NewTIDFromInteger(0x8000000000000000).String()) +} + +func TestTIDNoPanic(t *testing.T) { + for _, s := range []string{"", "3jzfcijpj2z2aa", "3jzfcijpj2z2", ".."} { + bad := TID(s) + _ = bad.ClockID() + _ = bad.Integer() + _ = bad.Time() + _ = bad.String() + } +} + +func TestTIDConstruction(t *testing.T) { + assert := assert.New(t) + + zero := NewTID(0, 0) + assert.Equal("2222222222222", zero.String()) + assert.Equal(uint64(0), zero.Integer()) + assert.Equal(uint(0), zero.ClockID()) + assert.Equal(time.UnixMilli(0).UTC(), zero.Time()) + + now := NewTIDNow(1011) + assert.Equal(uint(1011), now.ClockID()) + assert.True(time.Since(now.Time()) < time.Minute) + + over := NewTIDNow(4096) + assert.Equal(uint(0), over.ClockID()) +} + +func TestTIDClock(t *testing.T) { + assert := assert.New(t) + + clk := NewTIDClock(0) + last := NewTID(0, 0) + for i := 0; i < 100; i++ { + next := clk.Next() + assert.Greater(next, last) + last = next + } +} From 77ef1b5e7f6c24530ceea9054bff8f8c7f733804 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 2 Oct 2023 23:10:24 -0700 Subject: [PATCH 08/10] syntax: always do full datetime parse --- atproto/syntax/datetime.go | 26 +++++++++++++++++++------- atproto/syntax/datetime_test.go | 9 +++++++-- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/atproto/syntax/datetime.go b/atproto/syntax/datetime.go index 36d4419be..7910d6e31 100644 --- a/atproto/syntax/datetime.go +++ b/atproto/syntax/datetime.go @@ -17,6 +17,8 @@ const ( // Represents the a Datetime in string format, as would pass Lexicon syntax validation: the intersection of RFC-3339 and ISO-8601 syntax. // // Always use [ParseDatetime] instead of wrapping strings directly, especially when working with network input. +// +// Syntax is specified at: https://atproto.com/specs/lexicon#datetime type Datetime string func ParseDatetime(raw string) (Datetime, error) { @@ -30,24 +32,34 @@ func ParseDatetime(raw string) (Datetime, error) { if strings.HasSuffix(raw, "-00:00") { return "", fmt.Errorf("Datetime can't use '-00:00' for UTC timezone, must use '+00:00', per ISO-8601") } + // ensure that the datetime actually parses using golang time lib + _, err := time.Parse(time.RFC3339Nano, raw) + if err != nil { + return "", err + } return Datetime(raw), nil } -// Parses a string to a golang time.Time in a single step. +// Validates and converts a string to a golang [time.Time] in a single step. func ParseDatetimeTime(raw string) (time.Time, error) { - var zero time.Time d, err := ParseDatetime(raw) if err != nil { + var zero time.Time return zero, err } - return d.Time() + return d.Time(), nil } -// Parses the Datetime string in to a golang time.Time. +// Parses the Datetime string in to a golang [time.Time]. // -// There are a small number of strings which will pass initial syntax validation but fail when actually parsing, so this function can return an error. Use [ParseDatetimeTime] to fully parse in a single function call. -func (d Datetime) Time() (time.Time, error) { - return time.Parse(time.RFC3339Nano, d.String()) +// This method assumes that [ParseDatetime] was used to create the Datetime, which already verified parsing, and thus that [time.Parse] will always succeed. In the event of an error, zero/nil will be returned. +func (d Datetime) Time() time.Time { + var zero time.Time + ret, err := time.Parse(time.RFC3339Nano, d.String()) + if err != nil { + return zero + } + return ret } // Creates a new valid Datetime string matching the current time, in prefered syntax. diff --git a/atproto/syntax/datetime_test.go b/atproto/syntax/datetime_test.go index a47fb7c15..c12e20e84 100644 --- a/atproto/syntax/datetime_test.go +++ b/atproto/syntax/datetime_test.go @@ -60,7 +60,12 @@ func TestInteropDatetimeTimeInvalid(t *testing.T) { if len(line) == 0 || line[0] == '#' { continue } - _, err := ParseDatetimeTime(line) + _, err := ParseDatetime(line) + if err == nil { + fmt.Println("BAD: " + line) + } + assert.Error(err) + _, err = ParseDatetimeTime(line) if err == nil { fmt.Println("BAD: " + line) } @@ -69,7 +74,7 @@ func TestInteropDatetimeTimeInvalid(t *testing.T) { assert.NoError(scanner.Err()) } -func TestInteropDatetimeNow(t *testing.T) { +func TestDatetimeNow(t *testing.T) { assert := assert.New(t) dt := DatetimeNow() From e3485e098ea93126f1eafa8dbd42614395f44659 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 3 Oct 2023 00:17:27 -0700 Subject: [PATCH 09/10] syntax: un-comment datetime syntax lines --- atproto/syntax/testdata/datetime_parse_invalid.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/atproto/syntax/testdata/datetime_parse_invalid.txt b/atproto/syntax/testdata/datetime_parse_invalid.txt index 1ab8f6d0a..3672453a2 100644 --- a/atproto/syntax/testdata/datetime_parse_invalid.txt +++ b/atproto/syntax/testdata/datetime_parse_invalid.txt @@ -1,7 +1,7 @@ # superficial syntax parses ok, but are not valid datetimes for semantic reasons (eg, "month zero") -#1985-00-12T23:20:50.123Z -#1985-04-00T23:20:50.123Z -#1985-13-12T23:20:50.123Z -#1985-04-12T25:20:50.123Z -#1985-04-12T23:61:50.123Z -#1985-04-12T23:20:61.123Z +1985-00-12T23:20:50.123Z +1985-04-00T23:20:50.123Z +1985-13-12T23:20:50.123Z +1985-04-12T25:20:50.123Z +1985-04-12T23:99:50.123Z +1985-04-12T23:20:61.123Z From a03830107b0d4c28c739b20d19bdcef7d9842537 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 4 Oct 2023 00:18:39 -0700 Subject: [PATCH 10/10] syntax: fix TID milli/micro; add NewTIDFromTime --- atproto/syntax/tid.go | 9 +++++++-- atproto/syntax/tid_test.go | 6 +++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/atproto/syntax/tid.go b/atproto/syntax/tid.go index 00176974a..5bed77d51 100644 --- a/atproto/syntax/tid.go +++ b/atproto/syntax/tid.go @@ -53,11 +53,16 @@ func NewTIDFromInteger(v uint64) TID { } // Constructs a new TID from a UNIX timestamp (in milliseconds) and clock ID value. -func NewTID(unixMilis int64, clockId uint) TID { - var v uint64 = (uint64(unixMilis&0x1F_FFFF_FFFF_FFFF) << 10) | uint64(clockId&0x3FF) +func NewTID(unixMicros int64, clockId uint) TID { + var v uint64 = (uint64(unixMicros&0x1F_FFFF_FFFF_FFFF) << 10) | uint64(clockId&0x3FF) return NewTIDFromInteger(v) } +// Constructs a new TID from a [time.Time] and clock ID value +func NewTIDFromTime(ts time.Time, clockId uint) TID { + return NewTID(ts.UTC().UnixMicro(), clockId) +} + // Returns full integer representation of this TID (not used often) func (t TID) Integer() uint64 { s := t.String() diff --git a/atproto/syntax/tid_test.go b/atproto/syntax/tid_test.go index 9a7b61e8a..ed0d94bee 100644 --- a/atproto/syntax/tid_test.go +++ b/atproto/syntax/tid_test.go @@ -104,7 +104,7 @@ func TestTIDConstruction(t *testing.T) { assert.Equal("2222222222222", zero.String()) assert.Equal(uint64(0), zero.Integer()) assert.Equal(uint(0), zero.ClockID()) - assert.Equal(time.UnixMilli(0).UTC(), zero.Time()) + assert.Equal(time.UnixMicro(0).UTC(), zero.Time()) now := NewTIDNow(1011) assert.Equal(uint(1011), now.ClockID()) @@ -112,6 +112,10 @@ func TestTIDConstruction(t *testing.T) { over := NewTIDNow(4096) assert.Equal(uint(0), over.ClockID()) + + next := NewTIDFromTime(time.Now(), 123) + assert.Equal(uint(123), next.ClockID()) + assert.True(time.Since(next.Time()) < time.Minute) } func TestTIDClock(t *testing.T) {