Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

syntax: Datetime, URI, CID, TID #360

Merged
merged 10 commits into from
Oct 12, 2023
48 changes: 48 additions & 0 deletions atproto/syntax/cid.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package syntax

import (
"fmt"
"regexp"
"strings"
)

// Represents a CIDv1 in string format, as would pass Lexicon syntax validation.
//
// You usually want to use the github.com/ipfs/go-cid package and type when working with CIDs ("Links") in atproto. This specific type (syntax.CID) is an informal/incomplete helper specifically for doing fast string verification or pass-through without parsing, re-serialization, or normalization.
//
// Always use [ParseCID] instead of wrapping strings directly, especially when working with network input.
type CID string

func ParseCID(raw string) (CID, error) {
if len(raw) > 256 {
return "", fmt.Errorf("CID is too long (256 chars max)")
}
if len(raw) < 8 {
return "", fmt.Errorf("CID is too short (8 chars min)")
}
var cidRegex = regexp.MustCompile(`^[a-zA-Z0-9+=]{8,256}$`)
if !cidRegex.MatchString(raw) {
return "", fmt.Errorf("CID syntax didn't validate via regex")
}
if strings.HasPrefix(raw, "Qmb") {
return "", fmt.Errorf("CIDv0 not allowed in this version of atproto")
}
return CID(raw), nil
}

func (c CID) String() string {
return string(c)
}

func (c CID) MarshalText() ([]byte, error) {
return []byte(c.String()), nil
}

func (c *CID) UnmarshalText(text []byte) error {
cid, err := ParseCID(string(text))
if err != nil {
return err
}
*c = cid
return nil
}
50 changes: 50 additions & 0 deletions atproto/syntax/cid_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package syntax

import (
"bufio"
"fmt"
"os"
"testing"

"github.com/stretchr/testify/assert"
)

func TestInteropCIDsValid(t *testing.T) {
assert := assert.New(t)
file, err := os.Open("testdata/cid_syntax_valid.txt")
assert.NoError(err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
_, err := ParseCID(line)
if err != nil {
fmt.Println("GOOD: " + line)
}
assert.NoError(err)
}
assert.NoError(scanner.Err())
}

func TestInteropCIDsInvalid(t *testing.T) {
assert := assert.New(t)
file, err := os.Open("testdata/cid_syntax_invalid.txt")
assert.NoError(err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
_, err := ParseCID(line)
if err == nil {
fmt.Println("BAD: " + line)
}
assert.Error(err)
}
assert.NoError(scanner.Err())
}
86 changes: 86 additions & 0 deletions atproto/syntax/datetime.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package syntax

import (
"fmt"
"regexp"
"strings"
"time"
)

const (
// Prefered atproto Datetime string syntax, for use with [time.Format].
//
// Note that *parsing* syntax is more flexible.
AtprotoDatetimeLayout = "2006-01-02T15:04:05.999Z"
)

// Represents the a Datetime in string format, as would pass Lexicon syntax validation: the intersection of RFC-3339 and ISO-8601 syntax.
//
// Always use [ParseDatetime] instead of wrapping strings directly, especially when working with network input.
//
// Syntax is specified at: https://atproto.com/specs/lexicon#datetime
type Datetime string

func ParseDatetime(raw string) (Datetime, error) {
if len(raw) > 64 {
return "", fmt.Errorf("Datetime too long (max 64 chars)")
}
var datetimeRegex = regexp.MustCompile(`^[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9](.[0-9]{1,20})?(Z|([+-][0-2][0-9]:[0-5][0-9]))$`)
if !datetimeRegex.MatchString(raw) {
return "", fmt.Errorf("Datetime syntax didn't validate via regex")
}
if strings.HasSuffix(raw, "-00:00") {
return "", fmt.Errorf("Datetime can't use '-00:00' for UTC timezone, must use '+00:00', per ISO-8601")
}
// ensure that the datetime actually parses using golang time lib
_, err := time.Parse(time.RFC3339Nano, raw)
if err != nil {
return "", err
}
return Datetime(raw), nil
}

// Validates and converts a string to a golang [time.Time] in a single step.
func ParseDatetimeTime(raw string) (time.Time, error) {
d, err := ParseDatetime(raw)
if err != nil {
var zero time.Time
return zero, err
}
return d.Time(), nil
}

// Parses the Datetime string in to a golang [time.Time].
//
// This method assumes that [ParseDatetime] was used to create the Datetime, which already verified parsing, and thus that [time.Parse] will always succeed. In the event of an error, zero/nil will be returned.
func (d Datetime) Time() time.Time {
var zero time.Time
ret, err := time.Parse(time.RFC3339Nano, d.String())
if err != nil {
return zero
}
return ret
}

// Creates a new valid Datetime string matching the current time, in prefered syntax.
func DatetimeNow() Datetime {
t := time.Now().UTC()
return Datetime(t.Format(AtprotoDatetimeLayout))
}

func (d Datetime) String() string {
return string(d)
}

func (d Datetime) MarshalText() ([]byte, error) {
return []byte(d.String()), nil
}

func (d *Datetime) UnmarshalText(text []byte) error {
datetime, err := ParseDatetime(string(text))
if err != nil {
return err
}
*d = datetime
return nil
}
83 changes: 83 additions & 0 deletions atproto/syntax/datetime_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package syntax

import (
"bufio"
"fmt"
"os"
"testing"

"github.com/stretchr/testify/assert"
)

func TestInteropDatetimeValid(t *testing.T) {
assert := assert.New(t)
file, err := os.Open("testdata/datetime_syntax_valid.txt")
assert.NoError(err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
_, err := ParseDatetimeTime(line)
if err != nil {
fmt.Println("GOOD: " + line)
}
assert.NoError(err)
}
assert.NoError(scanner.Err())
}

func TestInteropDatetimeInvalid(t *testing.T) {
assert := assert.New(t)
file, err := os.Open("testdata/datetime_syntax_invalid.txt")
assert.NoError(err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
_, err := ParseDatetime(line)
if err == nil {
fmt.Println("BAD: " + line)
}
assert.Error(err)
}
assert.NoError(scanner.Err())
}

func TestInteropDatetimeTimeInvalid(t *testing.T) {
assert := assert.New(t)
file, err := os.Open("testdata/datetime_parse_invalid.txt")
assert.NoError(err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
_, err := ParseDatetime(line)
if err == nil {
fmt.Println("BAD: " + line)
}
assert.Error(err)
_, err = ParseDatetimeTime(line)
if err == nil {
fmt.Println("BAD: " + line)
}
assert.Error(err)
}
assert.NoError(scanner.Err())
}

func TestDatetimeNow(t *testing.T) {
assert := assert.New(t)

dt := DatetimeNow()
_, err := ParseDatetimeTime(dt.String())
assert.NoError(err)
}
41 changes: 41 additions & 0 deletions atproto/syntax/language.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package syntax

import (
"fmt"
"regexp"
)

// Represents a Language specifier in string format, as would pass Lexicon syntax validation.
//
// Always use [ParseLanguage] instead of wrapping strings directly, especially when working with network input.
//
// The syntax is BCP-47. This is a partial/naive parsing implementation, designed for fast validation and exact-string passthrough with no normaliztion. For actually working with BCP-47 language specifiers in atproto code bases, we recommend the golang.org/x/text/language package.
type Language string

func ParseLanguage(raw string) (Language, error) {
if len(raw) > 128 {
return "", fmt.Errorf("Language is too long (128 chars max)")
}
var langRegex = regexp.MustCompile(`^(i|[a-z]{2,3})(-[a-zA-Z0-9]+)*$`)
if !langRegex.MatchString(raw) {
return "", fmt.Errorf("Language syntax didn't validate via regex")
}
return Language(raw), nil
}

func (l Language) String() string {
return string(l)
}

func (l Language) MarshalText() ([]byte, error) {
return []byte(l.String()), nil
}

func (l *Language) UnmarshalText(text []byte) error {
lang, err := ParseLanguage(string(text))
if err != nil {
return err
}
*l = lang
return nil
}
50 changes: 50 additions & 0 deletions atproto/syntax/language_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package syntax

import (
"bufio"
"fmt"
"os"
"testing"

"github.com/stretchr/testify/assert"
)

func TestInteropLanguagesValid(t *testing.T) {
assert := assert.New(t)
file, err := os.Open("testdata/language_syntax_valid.txt")
assert.NoError(err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
_, err := ParseLanguage(line)
if err != nil {
fmt.Println("GOOD: " + line)
}
assert.NoError(err)
}
assert.NoError(scanner.Err())
}

func TestInteropLanguagesInvalid(t *testing.T) {
assert := assert.New(t)
file, err := os.Open("testdata/language_syntax_invalid.txt")
assert.NoError(err)
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
_, err := ParseLanguage(line)
if err == nil {
fmt.Println("BAD: " + line)
}
assert.Error(err)
}
assert.NoError(scanner.Err())
}
16 changes: 16 additions & 0 deletions atproto/syntax/testdata/cid_syntax_invalid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
example.com
https://example.com
cid:bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi
.
12345

# whitespace
bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi
bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi
bafybe igdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi

# old CIDv0 not supported
QmbWqxBEKC3P8tqsKc98xmWNzrzDtRLMiMPL8wBuTGsMnR

# https://github.com/ipfs-shipyard/is-ipfs/blob/master/test/test-cid.spec.ts
noop
14 changes: 14 additions & 0 deletions atproto/syntax/testdata/cid_syntax_valid.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

# examples from https://docs.ipfs.tech/concepts/content-addressing
bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi

# https://github.com/ipfs-shipyard/is-ipfs/blob/master/test/test-cid.spec.ts
zdj7WWeQ43G6JJvLWQWZpyHuAMq6uYWRjkBXFad11vE2LHhQ7
bafybeie5gq4jxvzmsym6hjlwxej4rwdoxt7wadqvmmwbqi7r27fclha2va

# more contrived examples
mBcDxtdWx0aWhhc2g+
z7x3CtScH765HvShXT
zdj7WhuEjrB52m1BisYCtmjH1hSKa7yZ3jEZ9JcXaFRD51wVz
7134036155352661643226414134664076
f017012202c5f688262e0ece8569aa6f94d60aad55ca8d9d83734e4a7430d0cff6588ec2b
Loading
Loading