From 21a531b97fba19197e5b7a1d9c45bc59d400fe64 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 6 Nov 2023 00:23:22 -0800 Subject: [PATCH 01/23] lexicon: initial work on schema parsing --- atproto/lexicon/cmd/lextool/main.go | 59 ++++ atproto/lexicon/docs.go | 4 + atproto/lexicon/extract.go | 20 ++ atproto/lexicon/language.go | 310 ++++++++++++++++++ atproto/lexicon/language_test.go | 47 +++ atproto/lexicon/lexicon.go | 12 + .../testdata/com_atproto_label_defs.json | 78 +++++ 7 files changed, 530 insertions(+) create mode 100644 atproto/lexicon/cmd/lextool/main.go create mode 100644 atproto/lexicon/docs.go create mode 100644 atproto/lexicon/extract.go create mode 100644 atproto/lexicon/language.go create mode 100644 atproto/lexicon/language_test.go create mode 100644 atproto/lexicon/lexicon.go create mode 100644 atproto/lexicon/testdata/com_atproto_label_defs.json diff --git a/atproto/lexicon/cmd/lextool/main.go b/atproto/lexicon/cmd/lextool/main.go new file mode 100644 index 000000000..a5e72ed0e --- /dev/null +++ b/atproto/lexicon/cmd/lextool/main.go @@ -0,0 +1,59 @@ +package main + +import ( + "encoding/json" + "fmt" + "io" + "log/slog" + "os" + + "github.com/bluesky-social/indigo/atproto/lexicon" + + "github.com/urfave/cli/v2" +) + +func main() { + app := cli.App{ + Name: "lex-tool", + Usage: "informal debugging CLI tool for atproto lexicons", + } + app.Commands = []*cli.Command{ + &cli.Command{ + Name: "parse-schema", + Usage: "parse an individual lexicon schema file (JSON)", + Action: runParseSchema, + }, + } + h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}) + slog.SetDefault(slog.New(h)) + app.RunAndExitOnError() +} + +func runParseSchema(cctx *cli.Context) error { + p := cctx.Args().First() + if p == "" { + return fmt.Errorf("need to provide path to a schema file as an argument") + } + + f, err := os.Open(p) + if err != nil { + return err + } + defer func() { _ = f.Close() }() + + b, err := io.ReadAll(f) + if err != nil { + return err + } + + var sf lexicon.SchemaFile + if err := json.Unmarshal(b, &sf); err != nil { + return err + } + out, err := json.MarshalIndent(sf, "", " ") + if err != nil { + return err + } + fmt.Println(string(out)) + return nil +} diff --git a/atproto/lexicon/docs.go b/atproto/lexicon/docs.go new file mode 100644 index 000000000..754179933 --- /dev/null +++ b/atproto/lexicon/docs.go @@ -0,0 +1,4 @@ +/* +Package atproto/lexicon provides generic Lexicon schema parsing and run-time validation. +*/ +package lexicon diff --git a/atproto/lexicon/extract.go b/atproto/lexicon/extract.go new file mode 100644 index 000000000..51d09ba93 --- /dev/null +++ b/atproto/lexicon/extract.go @@ -0,0 +1,20 @@ +package lexicon + +import ( + "encoding/json" +) + +// Helper type for extracting record type from JSON +type genericSchemaDef struct { + Type string `json:"type"` +} + +// Parses the top-level $type field from generic atproto JSON data +func ExtractTypeJSON(b []byte) (string, error) { + var gsd genericSchemaDef + if err := json.Unmarshal(b, &gsd); err != nil { + return "", err + } + + return gsd.Type, nil +} diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go new file mode 100644 index 000000000..e5f3a9dda --- /dev/null +++ b/atproto/lexicon/language.go @@ -0,0 +1,310 @@ +package lexicon + +import ( + "encoding/json" + "fmt" +) + +// Serialization helper for top-level Lexicon schema JSON objects (files) +type SchemaFile struct { + Lexicon int `json:"lexicon,const=1"` + ID string `json:"id"` + Revision *int `json:"revision,omitempty"` + Description *string `json:"description,omitempty"` + Defs map[string]SchemaDef `json:"defs"` +} + +// enum type to represent any of the schema fields +type SchemaDef struct { + Inner any +} + +func (s SchemaDef) MarshalJSON() ([]byte, error) { + return json.Marshal(s.Inner) +} + +func (s *SchemaDef) UnmarshalJSON(b []byte) error { + t, err := ExtractTypeJSON(b) + if err != nil { + return err + } + switch t { + case "record": + v := new(SchemaRecord) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "query": + v := new(SchemaQuery) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "procedure": + v := new(SchemaProcedure) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "subscription": + v := new(SchemaSubscription) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "null": + v := new(SchemaNull) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "boolean": + v := new(SchemaBoolean) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "integer": + v := new(SchemaInteger) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "string": + v := new(SchemaString) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "bytes": + v := new(SchemaBytes) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "cid-link": + v := new(SchemaCIDLink) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "array": + v := new(SchemaArray) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "object": + v := new(SchemaObject) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "blob": + v := new(SchemaBlob) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "params": + v := new(SchemaParams) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "token": + v := new(SchemaToken) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "ref": + v := new(SchemaRef) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "union": + v := new(SchemaUnion) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + case "unknown": + v := new(SchemaUnknown) + if err = json.Unmarshal(b, v); err != nil { + return err + } + s.Inner = v + return nil + default: + return fmt.Errorf("unexpected schema type: %s", t) + } + return fmt.Errorf("unexpected schema type: %s", t) +} + +type SchemaRecord struct { + Type string `json:"type,const=record"` + Description *string `json:"description,omitempty"` + Key string `json:"key"` + Record SchemaObject `json:"record"` +} + +type SchemaQuery struct { + Type string `json:"type,const=query"` + Description *string `json:"description,omitempty"` + Parameters SchemaParams `json:"parameters"` + Output *SchemaBody `json:"output"` + Errors []SchemaError `json:"errors,omitempty"` // optional +} + +type SchemaProcedure struct { + Type string `json:"type,const=procedure"` + Description *string `json:"description,omitempty"` + Parameters SchemaParams `json:"parameters"` + Output *SchemaBody `json:"output"` // optional + Errors []SchemaError `json:"errors,omitempty"` // optional + Input *SchemaBody `json:"input"` // optional +} + +type SchemaSubscription struct { + Type string `json:"type,const=subscription"` + Description *string `json:"description,omitempty"` + Parameters SchemaParams `json:"parameters"` + Message *SchemaMessage `json:"message,omitempty"` // TODO(specs): is this really optional? +} + +type SchemaBody struct { + Description *string `json:"description,omitempty"` + Encoding string `json:"encoding"` // required, mimetype + Schema *SchemaDef `json:"schema"` // optional; type:object, type:ref, or type:union +} + +type SchemaMessage struct { + Description *string `json:"description,omitempty"` + Schema SchemaDef `json:"schema"` // required; type:union only +} + +type SchemaError struct { + Name string `json:"name"` + Description *string `json:"description"` +} + +type SchemaNull struct { + Type string `json:"type,const=null"` + Description *string `json:"description,omitempty"` +} + +type SchemaBoolean struct { + Type string `json:"type,const=bool"` + Description *string `json:"description,omitempty"` + Default *bool `json:"default,omitempty"` + Const *bool `json:"const,omitempty"` +} + +type SchemaInteger struct { + Type string `json:"type,const=integer"` + Description *string `json:"description,omitempty"` + Minimum *int `json:"minimum,omitempty"` + Maximum *int `json:"maximum,omitempty"` + Enum []int `json:"enum,omitempty"` + Default *int `json:"default,omitempty"` + Const *int `json:"const,omitempty"` +} + +type SchemaString struct { + Type string `json:"type,const=string"` + Description *string `json:"description,omitempty"` + Format *string `json:"format,omitempty"` + MinLength *int `json:"minLength,omitempty"` + MaxLength *int `json:"maxLength,omitempty"` + MinGraphemes *int `json:"minGraphemes,omitempty"` + MaxGraphemes *int `json:"maxGraphemes,omitempty"` + KnownValues []string `json:"knownValues,omitempty"` + Enum []string `json:"enum,omitempty"` + Default *int `json:"default,omitempty"` + Const *int `json:"const,omitempty"` +} + +type SchemaBytes struct { + Type string `json:"type,const=bytes"` + Description *string `json:"description,omitempty"` + MinLength *int `json:"minLength,omitempty"` + MaxLength *int `json:"maxLength,omitempty"` +} + +type SchemaCIDLink struct { + Type string `json:"type,const=cid-link"` + Description *string `json:"description,omitempty"` +} + +type SchemaArray struct { + Type string `json:"type,const=array"` + Description *string `json:"description,omitempty"` + Items SchemaDef `json:"items"` + MinLength *int `json:"minLength,omitempty"` + MaxLength *int `json:"maxLength,omitempty"` +} + +type SchemaObject struct { + Type string `json:"type,const=object"` + Description *string `json:"description,omitempty"` + Properties map[string]SchemaDef `json:"properties"` + Required []string `json:"required,omitempty"` + Nullable []string `json:"nullable,omitempty"` +} + +type SchemaBlob struct { + Type string `json:"type,const=blob"` + Description *string `json:"description,omitempty"` + Accept []string `json:"accept,omitempty"` + MaxSize *int `json:"maxSize,omitempty"` +} + +type SchemaParams struct { + Type string `json:"type,const=params"` + Description *string `json:"description,omitempty"` + Properties map[string]SchemaDef `json:"properties"` // boolean, integer, string, or unknown; or an array of these types + Required []string `json:"required,omitempty"` +} + +type SchemaToken struct { + Type string `json:"type,const=token"` + Description *string `json:"description,omitempty"` +} + +type SchemaRef struct { + Type string `json:"type,const=ref"` + Description *string `json:"description,omitempty"` + Ref string `json:"ref"` +} + +type SchemaUnion struct { + Type string `json:"type,const=union"` + Description *string `json:"description,omitempty"` + Refs []string `json:"refs"` + Closed *bool `json:"closed,omitempty"` +} + +type SchemaUnknown struct { + Type string `json:"type,const=unknown"` + Description *string `json:"description,omitempty"` +} diff --git a/atproto/lexicon/language_test.go b/atproto/lexicon/language_test.go new file mode 100644 index 000000000..89882c86e --- /dev/null +++ b/atproto/lexicon/language_test.go @@ -0,0 +1,47 @@ +package lexicon + +import ( + "encoding/json" + "io" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBasicLabelLexicon(t *testing.T) { + assert := assert.New(t) + + f, err := os.Open("testdata/com_atproto_label_defs.json") + if err != nil { + t.Fatal(err) + } + defer func() { _ = f.Close() }() + + jsonBytes, err := io.ReadAll(f) + if err != nil { + t.Fatal(err) + } + + var schema SchemaFile + if err := json.Unmarshal(jsonBytes, &schema); err != nil { + t.Fatal(err) + } + + outBytes, err := json.Marshal(schema) + if err != nil { + t.Fatal(err) + } + + var beforeMap map[string]any + if err := json.Unmarshal(jsonBytes, &beforeMap); err != nil { + t.Fatal(err) + } + + var afterMap map[string]any + if err := json.Unmarshal(outBytes, &afterMap); err != nil { + t.Fatal(err) + } + + assert.Equal(beforeMap, afterMap) +} diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go new file mode 100644 index 000000000..9b13a8599 --- /dev/null +++ b/atproto/lexicon/lexicon.go @@ -0,0 +1,12 @@ +package lexicon + +import ( +// "github.com/bluesky-social/indigo/atproto/syntax" +) + +// An aggregation of lexicon schemas, and methods for validating generic data against those schemas. +type Catalog struct { +} + +type Schema struct { +} diff --git a/atproto/lexicon/testdata/com_atproto_label_defs.json b/atproto/lexicon/testdata/com_atproto_label_defs.json new file mode 100644 index 000000000..57f06f81b --- /dev/null +++ b/atproto/lexicon/testdata/com_atproto_label_defs.json @@ -0,0 +1,78 @@ +{ + "defs": { + "label": { + "description": "Metadata tag on an atproto resource (eg, repo or record)", + "properties": { + "cid": { + "description": "optionally, CID specifying the specific version of 'uri' resource this label applies to", + "format": "cid", + "type": "string" + }, + "cts": { + "description": "timestamp when this label was created", + "format": "datetime", + "type": "string" + }, + "neg": { + "description": "if true, this is a negation label, overwriting a previous label", + "type": "boolean" + }, + "src": { + "description": "DID of the actor who created this label", + "format": "did", + "type": "string" + }, + "uri": { + "description": "AT URI of the record, repository (account), or other resource which this label applies to", + "format": "uri", + "type": "string" + }, + "val": { + "description": "the short string name of the value or type of this label", + "maxLength": 128, + "type": "string" + } + }, + "required": [ + "src", + "uri", + "val", + "cts" + ], + "type": "object" + }, + "selfLabel": { + "description": "Metadata tag on an atproto record, published by the author within the record. Note -- schemas should use #selfLabels, not #selfLabel.", + "properties": { + "val": { + "description": "the short string name of the value or type of this label", + "maxLength": 128, + "type": "string" + } + }, + "required": [ + "val" + ], + "type": "object" + }, + "selfLabels": { + "description": "Metadata tags on an atproto record, published by the author within the record.", + "properties": { + "values": { + "items": { + "ref": "#selfLabel", + "type": "ref" + }, + "maxLength": 10, + "type": "array" + } + }, + "required": [ + "values" + ], + "type": "object" + } + }, + "id": "com.atproto.label.defs", + "lexicon": 1 +} From 53fcf07ef80976184af5d440ada3e9786a6ddf8a Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Mon, 6 Nov 2023 23:15:41 -0800 Subject: [PATCH 02/23] lexicon: better validation schemas themselves --- atproto/lexicon/cmd/lextool/main.go | 21 ++ atproto/lexicon/language.go | 327 ++++++++++++++++++++++++++-- atproto/lexicon/lexicon.go | 99 ++++++++- 3 files changed, 426 insertions(+), 21 deletions(-) diff --git a/atproto/lexicon/cmd/lextool/main.go b/atproto/lexicon/cmd/lextool/main.go index a5e72ed0e..b5652b40c 100644 --- a/atproto/lexicon/cmd/lextool/main.go +++ b/atproto/lexicon/cmd/lextool/main.go @@ -23,6 +23,11 @@ func main() { Usage: "parse an individual lexicon schema file (JSON)", Action: runParseSchema, }, + &cli.Command{ + Name: "load-directory", + Usage: "try recursively loading all the schemas from a directory", + Action: runLoadDirectory, + }, } h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}) slog.SetDefault(slog.New(h)) @@ -57,3 +62,19 @@ func runParseSchema(cctx *cli.Context) error { fmt.Println(string(out)) return nil } + +func runLoadDirectory(cctx *cli.Context) error { + p := cctx.Args().First() + if p == "" { + return fmt.Errorf("need to provide directory path as an argument") + } + + c := lexicon.NewCatalog() + err := c.LoadDirectory(p) + if err != nil { + return err + } + + fmt.Println("success!") + return nil +} diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index e5f3a9dda..f303d99ab 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -3,6 +3,8 @@ package lexicon import ( "encoding/json" "fmt" + "reflect" + "strings" ) // Serialization helper for top-level Lexicon schema JSON objects (files) @@ -19,6 +21,49 @@ type SchemaDef struct { Inner any } +func (s *SchemaDef) CheckSchema() error { + switch v := s.Inner.(type) { + case SchemaRecord: + return v.CheckSchema() + case SchemaQuery: + return v.CheckSchema() + case SchemaProcedure: + return v.CheckSchema() + case SchemaSubscription: + return v.CheckSchema() + case SchemaNull: + return v.CheckSchema() + case SchemaBoolean: + return v.CheckSchema() + case SchemaInteger: + return v.CheckSchema() + case SchemaString: + return v.CheckSchema() + case SchemaBytes: + return v.CheckSchema() + case SchemaCIDLink: + return v.CheckSchema() + case SchemaArray: + return v.CheckSchema() + case SchemaObject: + return v.CheckSchema() + case SchemaBlob: + return v.CheckSchema() + case SchemaParams: + return v.CheckSchema() + case SchemaToken: + return v.CheckSchema() + case SchemaRef: + return v.CheckSchema() + case SchemaUnion: + return v.CheckSchema() + case SchemaUnknown: + return v.CheckSchema() + default: + return fmt.Errorf("unhandled schema type: %s", reflect.TypeOf(v)) + } +} + func (s SchemaDef) MarshalJSON() ([]byte, error) { return json.Marshal(s.Inner) } @@ -28,132 +73,133 @@ func (s *SchemaDef) UnmarshalJSON(b []byte) error { if err != nil { return err } + // TODO: should we call CheckSchema here, instead of in lexicon loading? switch t { case "record": v := new(SchemaRecord) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "query": v := new(SchemaQuery) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "procedure": v := new(SchemaProcedure) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "subscription": v := new(SchemaSubscription) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "null": v := new(SchemaNull) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "boolean": v := new(SchemaBoolean) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "integer": v := new(SchemaInteger) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "string": v := new(SchemaString) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "bytes": v := new(SchemaBytes) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "cid-link": v := new(SchemaCIDLink) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "array": v := new(SchemaArray) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "object": v := new(SchemaObject) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "blob": v := new(SchemaBlob) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "params": v := new(SchemaParams) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "token": v := new(SchemaToken) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "ref": v := new(SchemaRef) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "union": v := new(SchemaUnion) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil case "unknown": v := new(SchemaUnknown) if err = json.Unmarshal(b, v); err != nil { return err } - s.Inner = v + s.Inner = *v return nil default: return fmt.Errorf("unexpected schema type: %s", t) @@ -168,6 +214,18 @@ type SchemaRecord struct { Record SchemaObject `json:"record"` } +func (s *SchemaRecord) CheckSchema() error { + switch s.Key { + case "tid", "any": + // pass + default: + if !strings.HasPrefix(s.Key, "literal:") { + return fmt.Errorf("invalid record key specifier: %s", s.Key) + } + } + return s.Record.CheckSchema() +} + type SchemaQuery struct { Type string `json:"type,const=query"` Description *string `json:"description,omitempty"` @@ -176,6 +234,20 @@ type SchemaQuery struct { Errors []SchemaError `json:"errors,omitempty"` // optional } +func (s *SchemaQuery) CheckSchema() error { + if s.Output != nil { + if err := s.Output.CheckSchema(); err != nil { + return err + } + } + for _, e := range s.Errors { + if err := e.CheckSchema(); err != nil { + return err + } + } + return s.Parameters.CheckSchema() +} + type SchemaProcedure struct { Type string `json:"type,const=procedure"` Description *string `json:"description,omitempty"` @@ -185,6 +257,25 @@ type SchemaProcedure struct { Input *SchemaBody `json:"input"` // optional } +func (s *SchemaProcedure) CheckSchema() error { + if s.Input != nil { + if err := s.Input.CheckSchema(); err != nil { + return err + } + } + if s.Output != nil { + if err := s.Output.CheckSchema(); err != nil { + return err + } + } + for _, e := range s.Errors { + if err := e.CheckSchema(); err != nil { + return err + } + } + return s.Parameters.CheckSchema() +} + type SchemaSubscription struct { Type string `json:"type,const=subscription"` Description *string `json:"description,omitempty"` @@ -192,27 +283,67 @@ type SchemaSubscription struct { Message *SchemaMessage `json:"message,omitempty"` // TODO(specs): is this really optional? } +func (s *SchemaSubscription) CheckSchema() error { + if s.Message != nil { + if err := s.Message.CheckSchema(); err != nil { + return err + } + } + return s.Parameters.CheckSchema() +} + type SchemaBody struct { Description *string `json:"description,omitempty"` Encoding string `json:"encoding"` // required, mimetype Schema *SchemaDef `json:"schema"` // optional; type:object, type:ref, or type:union } +func (s *SchemaBody) CheckSchema() error { + // TODO: any validation of encoding? + if s.Schema != nil { + switch s.Schema.Inner.(type) { + case SchemaObject, SchemaRef, SchemaUnion: + // pass + default: + return fmt.Errorf("body type can only have object, ref, or union schema") + } + if err := s.Schema.CheckSchema(); err != nil { + return err + } + } + return nil +} + type SchemaMessage struct { Description *string `json:"description,omitempty"` Schema SchemaDef `json:"schema"` // required; type:union only } +func (s *SchemaMessage) CheckSchema() error { + if _, ok := s.Schema.Inner.(SchemaUnion); !ok { + return fmt.Errorf("message must have schema type union") + } + return s.Schema.CheckSchema() +} + type SchemaError struct { Name string `json:"name"` Description *string `json:"description"` } +func (s *SchemaError) CheckSchema() error { + return nil +} + type SchemaNull struct { Type string `json:"type,const=null"` Description *string `json:"description,omitempty"` } +func (s *SchemaNull) CheckSchema() error { + return nil +} + type SchemaBoolean struct { Type string `json:"type,const=bool"` Description *string `json:"description,omitempty"` @@ -220,6 +351,13 @@ type SchemaBoolean struct { Const *bool `json:"const,omitempty"` } +func (s *SchemaBoolean) CheckSchema() error { + if s.Default != nil && s.Const != nil { + return fmt.Errorf("schema can't have both 'default' and 'const'") + } + return nil +} + type SchemaInteger struct { Type string `json:"type,const=integer"` Description *string `json:"description,omitempty"` @@ -230,6 +368,17 @@ type SchemaInteger struct { Const *int `json:"const,omitempty"` } +func (s *SchemaInteger) CheckSchema() error { + // TODO: enforce min/max against enum, default, const + if s.Default != nil && s.Const != nil { + return fmt.Errorf("schema can't have both 'default' and 'const'") + } + if s.Minimum != nil && s.Maximum != nil && *s.Maximum < *s.Minimum { + return fmt.Errorf("schema max < min") + } + return nil +} + type SchemaString struct { Type string `json:"type,const=string"` Description *string `json:"description,omitempty"` @@ -240,8 +389,28 @@ type SchemaString struct { MaxGraphemes *int `json:"maxGraphemes,omitempty"` KnownValues []string `json:"knownValues,omitempty"` Enum []string `json:"enum,omitempty"` - Default *int `json:"default,omitempty"` - Const *int `json:"const,omitempty"` + Default *string `json:"default,omitempty"` + Const *string `json:"const,omitempty"` +} + +func (s *SchemaString) CheckSchema() error { + // TODO: enforce min/max against enum, default, const + if s.Default != nil && s.Const != nil { + return fmt.Errorf("schema can't have both 'default' and 'const'") + } + if s.MinLength != nil && s.MaxLength != nil && *s.MaxLength < *s.MinLength { + return fmt.Errorf("schema max < min") + } + if s.MinGraphemes != nil && s.MaxGraphemes != nil && *s.MaxGraphemes < *s.MinGraphemes { + return fmt.Errorf("schema max < min") + } + if (s.MinLength != nil && *s.MinLength < 0) || + (s.MaxLength != nil && *s.MaxLength < 0) || + (s.MinGraphemes != nil && *s.MinGraphemes < 0) || + (s.MaxGraphemes != nil && *s.MaxGraphemes < 0) { + return fmt.Errorf("string schema min or max below zero") + } + return nil } type SchemaBytes struct { @@ -251,11 +420,26 @@ type SchemaBytes struct { MaxLength *int `json:"maxLength,omitempty"` } +func (s *SchemaBytes) CheckSchema() error { + if s.MinLength != nil && s.MaxLength != nil && *s.MaxLength < *s.MinLength { + return fmt.Errorf("schema max < min") + } + if (s.MinLength != nil && *s.MinLength < 0) || + (s.MaxLength != nil && *s.MaxLength < 0) { + return fmt.Errorf("bytes schema min or max below zero") + } + return nil +} + type SchemaCIDLink struct { Type string `json:"type,const=cid-link"` Description *string `json:"description,omitempty"` } +func (s *SchemaCIDLink) CheckSchema() error { + return nil +} + type SchemaArray struct { Type string `json:"type,const=array"` Description *string `json:"description,omitempty"` @@ -264,6 +448,17 @@ type SchemaArray struct { MaxLength *int `json:"maxLength,omitempty"` } +func (s *SchemaArray) CheckSchema() error { + if s.MinLength != nil && s.MaxLength != nil && *s.MaxLength < *s.MinLength { + return fmt.Errorf("schema max < min") + } + if (s.MinLength != nil && *s.MinLength < 0) || + (s.MaxLength != nil && *s.MaxLength < 0) { + return fmt.Errorf("array schema min or max below zero") + } + return s.Items.CheckSchema() +} + type SchemaObject struct { Type string `json:"type,const=object"` Description *string `json:"description,omitempty"` @@ -272,6 +467,31 @@ type SchemaObject struct { Nullable []string `json:"nullable,omitempty"` } +func (s *SchemaObject) CheckSchema() error { + // TODO: check for set intersection between required and nullable + // TODO: check for set uniqueness of required and nullable + for _, k := range s.Required { + if _, ok := s.Properties[k]; !ok { + fmt.Errorf("object 'required' field not in properties: %s", k) + } + } + for _, k := range s.Nullable { + if _, ok := s.Properties[k]; !ok { + fmt.Errorf("object 'nullable' field not in properties: %s", k) + } + } + for k, def := range s.Properties { + // TODO: more checks on field name? + if len(k) == 0 { + return fmt.Errorf("empty object schema field name not allowed") + } + if err := def.CheckSchema(); err != nil { + return err + } + } + return nil +} + type SchemaBlob struct { Type string `json:"type,const=blob"` Description *string `json:"description,omitempty"` @@ -279,6 +499,14 @@ type SchemaBlob struct { MaxSize *int `json:"maxSize,omitempty"` } +func (s *SchemaBlob) CheckSchema() error { + // TODO: validate Accept (mimetypes)? + if s.MaxSize != nil && *s.MaxSize <= 0 { + return fmt.Errorf("blob max size less or equal to zero") + } + return nil +} + type SchemaParams struct { Type string `json:"type,const=params"` Description *string `json:"description,omitempty"` @@ -286,17 +514,61 @@ type SchemaParams struct { Required []string `json:"required,omitempty"` } +func (s *SchemaParams) CheckSchema() error { + // TODO: check for set uniqueness of required + for _, k := range s.Required { + if _, ok := s.Properties[k]; !ok { + fmt.Errorf("object 'required' field not in properties: %s", k) + } + } + for k, def := range s.Properties { + // TODO: more checks on field name? + if len(k) == 0 { + return fmt.Errorf("empty object schema field name not allowed") + } + switch v := def.Inner.(type) { + case SchemaBoolean, SchemaInteger, SchemaString, SchemaUnknown: + // pass + case SchemaArray: + switch v.Items.Inner.(type) { + case SchemaBoolean, SchemaInteger, SchemaString, SchemaUnknown: + // pass + default: + return fmt.Errorf("params array item type must be boolean, integer, string, or unknown") + } + default: + return fmt.Errorf("params field type must be boolean, integer, string, or unknown") + } + if err := def.CheckSchema(); err != nil { + return err + } + } + return nil +} + type SchemaToken struct { Type string `json:"type,const=token"` Description *string `json:"description,omitempty"` } +func (s *SchemaToken) CheckSchema() error { + return nil +} + type SchemaRef struct { Type string `json:"type,const=ref"` Description *string `json:"description,omitempty"` Ref string `json:"ref"` } +func (s *SchemaRef) CheckSchema() error { + // TODO: more validation of ref string? + if len(s.Ref) == 0 { + return fmt.Errorf("empty schema ref") + } + return nil +} + type SchemaUnion struct { Type string `json:"type,const=union"` Description *string `json:"description,omitempty"` @@ -304,7 +576,22 @@ type SchemaUnion struct { Closed *bool `json:"closed,omitempty"` } +func (s *SchemaUnion) CheckSchema() error { + // TODO: uniqueness check on refs + for _, ref := range s.Refs { + // TODO: more validation of ref string? + if len(ref) == 0 { + return fmt.Errorf("empty schema ref") + } + } + return nil +} + type SchemaUnknown struct { Type string `json:"type,const=unknown"` Description *string `json:"description,omitempty"` } + +func (s *SchemaUnknown) CheckSchema() error { + return nil +} diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index 9b13a8599..f07aea6c7 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -1,12 +1,109 @@ package lexicon import ( -// "github.com/bluesky-social/indigo/atproto/syntax" + "encoding/json" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "strings" ) // An aggregation of lexicon schemas, and methods for validating generic data against those schemas. type Catalog struct { + // TODO: not safe zero value; hide this field? seems aggressive + Schemas map[string]Schema +} + +func NewCatalog() Catalog { + return Catalog{ + Schemas: make(map[string]Schema), + } } type Schema struct { + ID string + Revision *int + Def any +} + +func (c *Catalog) Resolve(name string) (*Schema, error) { + // default to #main if name doesn't have a fragment + if !strings.Contains(name, "#") { + name = name + "#main" + } + s, ok := c.Schemas[name] + if !ok { + return nil, fmt.Errorf("schema not found in catalog: %s", name) + } + return &s, nil +} + +func (c *Catalog) AddSchemaFile(sf SchemaFile) error { + base := sf.ID + for frag, def := range sf.Defs { + if len(frag) == 0 || strings.Contains(frag, "#") || strings.Contains(frag, ".") { + // TODO: more validation here? + return fmt.Errorf("schema name invalid: %s", frag) + } + name := base + "#" + frag + if _, ok := c.Schemas[name]; ok { + return fmt.Errorf("catalog already contained a schema with name: %s", name) + } + if err := def.CheckSchema(); err != nil { + return err + } + // "A file can have at most one definition with one of the "primary" types. Primary types should always have the name main. It is possible for main to describe a non-primary type." + switch def.Inner.(type) { + case SchemaRecord, SchemaQuery, SchemaProcedure, SchemaSubscription: + if frag != "main" { + return fmt.Errorf("record, query, procedure, and subscription types must be 'main', not: %s", frag) + } + } + s := Schema{ + ID: name, + Revision: sf.Revision, + Def: def.Inner, + } + c.Schemas[name] = s + } + return nil } + +func (c *Catalog) LoadDirectory(dirPath string) error { + return filepath.WalkDir(dirPath, func(p string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if !strings.HasSuffix(p, ".json") { + return nil + } + // TODO: logging + fmt.Println(p) + f, err := os.Open(p) + if err != nil { + return err + } + defer func() { _ = f.Close() }() + + b, err := io.ReadAll(f) + if err != nil { + return err + } + + var sf SchemaFile + if err = json.Unmarshal(b, &sf); err != nil { + return err + } + if err = c.AddSchemaFile(sf); err != nil { + return err + } + return nil + }) +} + +//func (c *Catalog) ValidateData(d map[string]any) error From 99dd213958c29f7475602995c48812e6790dbbc5 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 7 Nov 2023 00:28:38 -0800 Subject: [PATCH 03/23] lexicon: progress on validation --- atproto/lexicon/language.go | 154 ++++++++++++++++++ atproto/lexicon/language_test.go | 2 +- atproto/lexicon/lexicon.go | 107 +++++++++++- atproto/lexicon/lexicon_test.go | 39 +++++ .../{ => valid}/com_atproto_label_defs.json | 0 5 files changed, 300 insertions(+), 2 deletions(-) create mode 100644 atproto/lexicon/lexicon_test.go rename atproto/lexicon/testdata/{ => valid}/com_atproto_label_defs.json (100%) diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index f303d99ab..579e45dc9 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -5,6 +5,9 @@ import ( "fmt" "reflect" "strings" + + "github.com/bluesky-social/indigo/atproto/data" + "github.com/bluesky-social/indigo/atproto/syntax" ) // Serialization helper for top-level Lexicon schema JSON objects (files) @@ -334,6 +337,20 @@ type SchemaError struct { func (s *SchemaError) CheckSchema() error { return nil } +func (s *SchemaError) Validate(d any) error { + e, ok := d.(map[string]any) + if !ok { + return fmt.Errorf("expected an object in error position") + } + n, ok := e["error"] + if !ok { + return fmt.Errorf("expected error type") + } + if n != s.Name { + return fmt.Errorf("error type mis-match: %s", n) + } + return nil +} type SchemaNull struct { Type string `json:"type,const=null"` @@ -344,6 +361,13 @@ func (s *SchemaNull) CheckSchema() error { return nil } +func (s *SchemaNull) Validate(d any) error { + if d != nil { + return fmt.Errorf("expected null data, got: %s", reflect.TypeOf(d)) + } + return nil +} + type SchemaBoolean struct { Type string `json:"type,const=bool"` Description *string `json:"description,omitempty"` @@ -358,6 +382,17 @@ func (s *SchemaBoolean) CheckSchema() error { return nil } +func (s *SchemaBoolean) Validate(d any) error { + v, ok := d.(bool) + if !ok { + return fmt.Errorf("expected a boolean") + } + if s.Const != nil && v != *s.Const { + return fmt.Errorf("boolean val didn't match constant (%v): %v", *s.Const, v) + } + return nil +} + type SchemaInteger struct { Type string `json:"type,const=integer"` Description *string `json:"description,omitempty"` @@ -379,6 +414,21 @@ func (s *SchemaInteger) CheckSchema() error { return nil } +func (s *SchemaInteger) Validate(d any) error { + v, ok := d.(int) + if !ok { + return fmt.Errorf("expected an integer") + } + // TODO: enforce enum + if s.Const != nil && v != *s.Const { + return fmt.Errorf("integer val didn't match constant (%d): %d", *s.Const, v) + } + if (s.Minimum != nil && v < *s.Minimum) || (s.Maximum != nil && v > *s.Maximum) { + return fmt.Errorf("integer val outside specified range: %d", v) + } + return nil +} + type SchemaString struct { Type string `json:"type,const=string"` Description *string `json:"description,omitempty"` @@ -410,6 +460,71 @@ func (s *SchemaString) CheckSchema() error { (s.MaxGraphemes != nil && *s.MaxGraphemes < 0) { return fmt.Errorf("string schema min or max below zero") } + if s.Format != nil { + switch *s.Format { + case "at-identifier", "at-uri", "cid", "datetime", "did", "handle", "nsid", "uri", "language": + // pass + default: + return fmt.Errorf("unknown string format: %s", *s.Format) + } + } + return nil +} + +func (s *SchemaString) Validate(d any) error { + v, ok := d.(string) + if !ok { + return fmt.Errorf("expected a string") + } + // TODO: enforce enum + if s.Const != nil && v != *s.Const { + return fmt.Errorf("string val didn't match constant (%s): %s", *s.Const, v) + } + // TODO: is this actually counting UTF-8 length? + if (s.MinLength != nil && len(v) < *s.MinLength) || (s.MaxLength != nil && len(v) > *s.MaxLength) { + return fmt.Errorf("string length outside specified range: %d", len(v)) + } + // TODO: grapheme length + if s.Format != nil { + switch *s.Format { + case "at-identifier": + if _, err := syntax.ParseAtIdentifier(v); err != nil { + return err + } + case "at-uri": + if _, err := syntax.ParseATURI(v); err != nil { + return err + } + case "cid": + if _, err := syntax.ParseCID(v); err != nil { + return err + } + case "datetime": + if _, err := syntax.ParseDatetime(v); err != nil { + return err + } + case "did": + if _, err := syntax.ParseDID(v); err != nil { + return err + } + case "handle": + if _, err := syntax.ParseHandle(v); err != nil { + return err + } + case "nsid": + if _, err := syntax.ParseNSID(v); err != nil { + return err + } + case "uri": + if _, err := syntax.ParseURI(v); err != nil { + return err + } + case "language": + if _, err := syntax.ParseLanguage(v); err != nil { + return err + } + } + } return nil } @@ -431,6 +546,17 @@ func (s *SchemaBytes) CheckSchema() error { return nil } +func (s *SchemaBytes) Validate(d any) error { + v, ok := d.(data.Bytes) + if !ok { + return fmt.Errorf("expecting bytes") + } + if (s.MinLength != nil && len(v) < *s.MinLength) || (s.MaxLength != nil && len(v) > *s.MaxLength) { + return fmt.Errorf("bytes size out of bounds: %d", len(v)) + } + return nil +} + type SchemaCIDLink struct { Type string `json:"type,const=cid-link"` Description *string `json:"description,omitempty"` @@ -440,6 +566,14 @@ func (s *SchemaCIDLink) CheckSchema() error { return nil } +func (s *SchemaCIDLink) Validate(d any) error { + _, ok := d.(data.CIDLink) + if !ok { + return fmt.Errorf("expecting a cid-link") + } + return nil +} + type SchemaArray struct { Type string `json:"type,const=array"` Description *string `json:"description,omitempty"` @@ -507,6 +641,18 @@ func (s *SchemaBlob) CheckSchema() error { return nil } +func (s *SchemaBlob) Validate(d any) error { + v, ok := d.(data.Blob) + if !ok { + return fmt.Errorf("expected a blob") + } + // TODO: validate accept mimetype + if s.MaxSize != nil && int(v.Size) > *s.MaxSize { + return fmt.Errorf("blob size too large: %d", v.Size) + } + return nil +} + type SchemaParams struct { Type string `json:"type,const=params"` Description *string `json:"description,omitempty"` @@ -546,6 +692,10 @@ func (s *SchemaParams) CheckSchema() error { return nil } +func (s *SchemaParams) Validate(d any) error { + return nil +} + type SchemaToken struct { Type string `json:"type,const=token"` Description *string `json:"description,omitempty"` @@ -595,3 +745,7 @@ type SchemaUnknown struct { func (s *SchemaUnknown) CheckSchema() error { return nil } + +func (s *SchemaUnknown) Validate(d any) error { + return nil +} diff --git a/atproto/lexicon/language_test.go b/atproto/lexicon/language_test.go index 89882c86e..982653a79 100644 --- a/atproto/lexicon/language_test.go +++ b/atproto/lexicon/language_test.go @@ -12,7 +12,7 @@ import ( func TestBasicLabelLexicon(t *testing.T) { assert := assert.New(t) - f, err := os.Open("testdata/com_atproto_label_defs.json") + f, err := os.Open("testdata/valid/com_atproto_label_defs.json") if err != nil { t.Fatal(err) } diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index f07aea6c7..22bfdc99c 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -7,6 +7,7 @@ import ( "io/fs" "os" "path/filepath" + "reflect" "strings" ) @@ -106,4 +107,108 @@ func (c *Catalog) LoadDirectory(dirPath string) error { }) } -//func (c *Catalog) ValidateData(d map[string]any) error +func (c *Catalog) Validate(d any, id string) error { + schema, err := c.Resolve(id) + if err != nil { + return nil + } + switch v := schema.Def.(type) { + case SchemaRecord: + obj, ok := d.(map[string]any) + if !ok { + return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) + } + // XXX: return v.Validate(d, schema.ID) + _ = obj + _ = v + return nil + case SchemaQuery: + // XXX: return v.Validate(d) + return nil + case SchemaProcedure: + // XXX: return v.Validate(d) + return nil + case SchemaSubscription: + obj, ok := d.(map[string]any) + if !ok { + return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) + } + // XXX: return v.Validate(d) + _ = obj + return nil + case SchemaToken: + str, ok := d.(string) + if !ok { + return fmt.Errorf("expected a string for token, got: %s", reflect.TypeOf(d)) + } + if str != id { + return fmt.Errorf("expected token (%s), got: %s", id, str) + } + return nil + default: + return c.validateDef(schema.Def, d) + } +} + +func (c *Catalog) validateDef(def any, d any) error { + // TODO: + switch v := def.(type) { + case SchemaNull: + return v.Validate(d) + case SchemaBoolean: + return v.Validate(d) + case SchemaInteger: + return v.Validate(d) + case SchemaString: + return v.Validate(d) + case SchemaBytes: + return v.Validate(d) + case SchemaCIDLink: + return v.Validate(d) + case SchemaArray: + arr, ok := d.([]any) + if !ok { + return fmt.Errorf("expected an array, got: %s", reflect.TypeOf(d)) + } + // XXX: return v.ValidateArray(d, v) + _ = arr + return nil + case SchemaObject: + obj, ok := d.(map[string]any) + if !ok { + return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) + } + return c.ValidateObject(v, obj) + case SchemaBlob: + return v.Validate(d) + case SchemaParams: + return v.Validate(d) + case SchemaRef: + // recurse + return c.Validate(d, v.Ref) + case SchemaUnion: + // XXX: special ValidateUnion helper + return nil + case SchemaUnknown: + return v.Validate(d) + default: + return fmt.Errorf("unhandled schema type: %s", reflect.TypeOf(v)) + } +} + +func (c *Catalog) ValidateObject(s SchemaObject, d map[string]any) error { + for _, k := range s.Required { + if _, ok := d[k]; !ok { + return fmt.Errorf("required field missing: %s", k) + } + } + for k, def := range s.Properties { + if v, ok := d[k]; ok { + err := c.validateDef(def.Inner, v) + if err != nil { + return err + } + } + } + return nil +} diff --git a/atproto/lexicon/lexicon_test.go b/atproto/lexicon/lexicon_test.go new file mode 100644 index 000000000..ba3eeec75 --- /dev/null +++ b/atproto/lexicon/lexicon_test.go @@ -0,0 +1,39 @@ +package lexicon + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBasicCatalog(t *testing.T) { + assert := assert.New(t) + + cat := NewCatalog() + if err := cat.LoadDirectory("testdata/valid"); err != nil { + t.Fatal(err) + } + + assert.NoError(cat.Validate( + map[string]any{ + "cid": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", + "cts": "2000-01-01T00:00:00.000Z", + "neg": false, + "src": "did:example:labeler", + "uri": "at://did:plc:asdf123/com.atproto.feed.post/asdf123", + "val": "test-label", + }, + "com.atproto.label.defs#label", + )) + + assert.Error(cat.Validate( + map[string]any{ + "cid": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", + "cts": "2000-01-01T00:00:00.000Z", + "neg": false, + "uri": "at://did:plc:asdf123/com.atproto.feed.post/asdf123", + "val": "test-label", + }, + "com.atproto.label.defs#label", + )) +} diff --git a/atproto/lexicon/testdata/com_atproto_label_defs.json b/atproto/lexicon/testdata/valid/com_atproto_label_defs.json similarity index 100% rename from atproto/lexicon/testdata/com_atproto_label_defs.json rename to atproto/lexicon/testdata/valid/com_atproto_label_defs.json From 23d8f85014bc677c7a2d982ebac3288d02de4d93 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 7 Nov 2023 22:51:10 -0800 Subject: [PATCH 04/23] lexicon: more progress on testing and validation --- atproto/lexicon/interop_language_test.go | 95 ++++++++ atproto/lexicon/interop_record_test.go | 89 ++++++++ atproto/lexicon/language.go | 24 +- atproto/lexicon/language_test.go | 2 +- atproto/lexicon/lexicon.go | 100 ++++---- atproto/lexicon/lexicon_test.go | 14 +- .../com_atproto_label_defs.json | 6 +- atproto/lexicon/testdata/catalog/query.json | 70 ++++++ atproto/lexicon/testdata/catalog/record.json | 213 ++++++++++++++++++ atproto/lexicon/testdata/lexicon-invalid.json | 18 ++ atproto/lexicon/testdata/lexicon-valid.json | 10 + .../lexicon/testdata/record-data-invalid.json | 21 ++ .../lexicon/testdata/record-data-valid.json | 91 ++++++++ 13 files changed, 690 insertions(+), 63 deletions(-) create mode 100644 atproto/lexicon/interop_language_test.go create mode 100644 atproto/lexicon/interop_record_test.go rename atproto/lexicon/testdata/{valid => catalog}/com_atproto_label_defs.json (99%) create mode 100644 atproto/lexicon/testdata/catalog/query.json create mode 100644 atproto/lexicon/testdata/catalog/record.json create mode 100644 atproto/lexicon/testdata/lexicon-invalid.json create mode 100644 atproto/lexicon/testdata/lexicon-valid.json create mode 100644 atproto/lexicon/testdata/record-data-invalid.json create mode 100644 atproto/lexicon/testdata/record-data-valid.json diff --git a/atproto/lexicon/interop_language_test.go b/atproto/lexicon/interop_language_test.go new file mode 100644 index 000000000..fa9f347ee --- /dev/null +++ b/atproto/lexicon/interop_language_test.go @@ -0,0 +1,95 @@ +package lexicon + +import ( + "encoding/json" + "io" + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +type LexiconFixture struct { + Name string `json:"name"` + Lexicon json.RawMessage `json:"lexicon"` +} + +func TestInteropLexiconValid(t *testing.T) { + + f, err := os.Open("testdata/lexicon-valid.json") + if err != nil { + t.Fatal(err) + } + defer func() { _ = f.Close() }() + + jsonBytes, err := io.ReadAll(f) + if err != nil { + t.Fatal(err) + } + + var fixtures []LexiconFixture + if err := json.Unmarshal(jsonBytes, &fixtures); err != nil { + t.Fatal(err) + } + + for _, f := range fixtures { + testLexiconFixtureValid(t, f) + } +} + +func testLexiconFixtureValid(t *testing.T, fixture LexiconFixture) { + assert := assert.New(t) + + var schema SchemaFile + if err := json.Unmarshal(fixture.Lexicon, &schema); err != nil { + t.Fatal(err) + } + + outBytes, err := json.Marshal(schema) + if err != nil { + t.Fatal(err) + } + + var beforeMap map[string]any + if err := json.Unmarshal(fixture.Lexicon, &beforeMap); err != nil { + t.Fatal(err) + } + + var afterMap map[string]any + if err := json.Unmarshal(outBytes, &afterMap); err != nil { + t.Fatal(err) + } + + assert.Equal(beforeMap, afterMap) +} + +func TestInteropLexiconInvalid(t *testing.T) { + + f, err := os.Open("testdata/lexicon-invalid.json") + if err != nil { + t.Fatal(err) + } + defer func() { _ = f.Close() }() + + jsonBytes, err := io.ReadAll(f) + if err != nil { + t.Fatal(err) + } + + var fixtures []LexiconFixture + if err := json.Unmarshal(jsonBytes, &fixtures); err != nil { + t.Fatal(err) + } + + for _, f := range fixtures { + testLexiconFixtureInvalid(t, f) + } +} + +func testLexiconFixtureInvalid(t *testing.T, fixture LexiconFixture) { + assert := assert.New(t) + + var schema SchemaFile + err := json.Unmarshal(fixture.Lexicon, &schema) + assert.Error(err) +} diff --git a/atproto/lexicon/interop_record_test.go b/atproto/lexicon/interop_record_test.go new file mode 100644 index 000000000..8a2282ff8 --- /dev/null +++ b/atproto/lexicon/interop_record_test.go @@ -0,0 +1,89 @@ +package lexicon + +import ( + "encoding/json" + "fmt" + "io" + "os" + "testing" + + "github.com/bluesky-social/indigo/atproto/data" + + "github.com/stretchr/testify/assert" +) + +type RecordFixture struct { + Name string `json:"name"` + RecordKey string `json:"rkey"` + Data json.RawMessage `json:"data"` +} + +func TestInteropRecordValid(t *testing.T) { + assert := assert.New(t) + + cat := NewCatalog() + if err := cat.LoadDirectory("testdata/catalog"); err != nil { + t.Fatal(err) + } + + f, err := os.Open("testdata/record-data-valid.json") + if err != nil { + t.Fatal(err) + } + defer func() { _ = f.Close() }() + + jsonBytes, err := io.ReadAll(f) + if err != nil { + t.Fatal(err) + } + + var fixtures []RecordFixture + if err := json.Unmarshal(jsonBytes, &fixtures); err != nil { + t.Fatal(err) + } + + for _, fixture := range fixtures { + fmt.Println(fixture.Name) + d, err := data.UnmarshalJSON(fixture.Data) + if err != nil { + t.Fatal(err) + } + + assert.NoError(cat.ValidateRecord(d, "example.lexicon.record")) + } +} + +func TestInteropRecordInvalid(t *testing.T) { + assert := assert.New(t) + + cat := NewCatalog() + if err := cat.LoadDirectory("testdata/catalog"); err != nil { + t.Fatal(err) + } + + f, err := os.Open("testdata/record-data-invalid.json") + if err != nil { + t.Fatal(err) + } + defer func() { _ = f.Close() }() + + jsonBytes, err := io.ReadAll(f) + if err != nil { + t.Fatal(err) + } + + var fixtures []RecordFixture + if err := json.Unmarshal(jsonBytes, &fixtures); err != nil { + t.Fatal(err) + } + + for _, fixture := range fixtures { + fmt.Println(fixture.Name) + d, err := data.UnmarshalJSON(fixture.Data) + if err != nil { + t.Fatal(err) + } + + assert.Error(cat.ValidateRecord(d, "example.lexicon.record")) + } +} diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index 579e45dc9..7f10e54fe 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -63,7 +63,7 @@ func (s *SchemaDef) CheckSchema() error { case SchemaUnknown: return v.CheckSchema() default: - return fmt.Errorf("unhandled schema type: %s", reflect.TypeOf(v)) + return fmt.Errorf("unhandled schema type: %v", reflect.TypeOf(v)) } } @@ -207,7 +207,6 @@ func (s *SchemaDef) UnmarshalJSON(b []byte) error { default: return fmt.Errorf("unexpected schema type: %s", t) } - return fmt.Errorf("unexpected schema type: %s", t) } type SchemaRecord struct { @@ -415,10 +414,11 @@ func (s *SchemaInteger) CheckSchema() error { } func (s *SchemaInteger) Validate(d any) error { - v, ok := d.(int) + v64, ok := d.(int64) if !ok { return fmt.Errorf("expected an integer") } + v := int(v64) // TODO: enforce enum if s.Const != nil && v != *s.Const { return fmt.Errorf("integer val didn't match constant (%d): %d", *s.Const, v) @@ -474,7 +474,7 @@ func (s *SchemaString) CheckSchema() error { func (s *SchemaString) Validate(d any) error { v, ok := d.(string) if !ok { - return fmt.Errorf("expected a string") + return fmt.Errorf("expected a string: %v", reflect.TypeOf(d)) } // TODO: enforce enum if s.Const != nil && v != *s.Const { @@ -606,12 +606,12 @@ func (s *SchemaObject) CheckSchema() error { // TODO: check for set uniqueness of required and nullable for _, k := range s.Required { if _, ok := s.Properties[k]; !ok { - fmt.Errorf("object 'required' field not in properties: %s", k) + return fmt.Errorf("object 'required' field not in properties: %s", k) } } for _, k := range s.Nullable { if _, ok := s.Properties[k]; !ok { - fmt.Errorf("object 'nullable' field not in properties: %s", k) + return fmt.Errorf("object 'nullable' field not in properties: %s", k) } } for k, def := range s.Properties { @@ -626,6 +626,16 @@ func (s *SchemaObject) CheckSchema() error { return nil } +// Checks if a field name 'k' is one of the Nullable fields for this object +func (s *SchemaObject) IsNullable(k string) bool { + for _, el := range s.Nullable { + if el == k { + return true + } + } + return false +} + type SchemaBlob struct { Type string `json:"type,const=blob"` Description *string `json:"description,omitempty"` @@ -664,7 +674,7 @@ func (s *SchemaParams) CheckSchema() error { // TODO: check for set uniqueness of required for _, k := range s.Required { if _, ok := s.Properties[k]; !ok { - fmt.Errorf("object 'required' field not in properties: %s", k) + return fmt.Errorf("object 'required' field not in properties: %s", k) } } for k, def := range s.Properties { diff --git a/atproto/lexicon/language_test.go b/atproto/lexicon/language_test.go index 982653a79..cd33396f2 100644 --- a/atproto/lexicon/language_test.go +++ b/atproto/lexicon/language_test.go @@ -12,7 +12,7 @@ import ( func TestBasicLabelLexicon(t *testing.T) { assert := assert.New(t) - f, err := os.Open("testdata/valid/com_atproto_label_defs.json") + f, err := os.Open("testdata/catalog/com_atproto_label_defs.json") if err != nil { t.Fatal(err) } diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index 22bfdc99c..bee5cbc3a 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -107,50 +107,29 @@ func (c *Catalog) LoadDirectory(dirPath string) error { }) } -func (c *Catalog) Validate(d any, id string) error { - schema, err := c.Resolve(id) +// TODO: rkey? is nsid always known? +// TODO: nsid as syntax.NSID +func (c *Catalog) ValidateRecord(raw any, id string) error { + def, err := c.Resolve(id) if err != nil { - return nil + return err } - switch v := schema.Def.(type) { - case SchemaRecord: - obj, ok := d.(map[string]any) - if !ok { - return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) - } - // XXX: return v.Validate(d, schema.ID) - _ = obj - _ = v - return nil - case SchemaQuery: - // XXX: return v.Validate(d) - return nil - case SchemaProcedure: - // XXX: return v.Validate(d) - return nil - case SchemaSubscription: - obj, ok := d.(map[string]any) - if !ok { - return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) - } - // XXX: return v.Validate(d) - _ = obj - return nil - case SchemaToken: - str, ok := d.(string) - if !ok { - return fmt.Errorf("expected a string for token, got: %s", reflect.TypeOf(d)) - } - if str != id { - return fmt.Errorf("expected token (%s), got: %s", id, str) - } - return nil - default: - return c.validateDef(schema.Def, d) + s, ok := def.Def.(SchemaRecord) + if !ok { + return fmt.Errorf("schema is not of record type: %s", id) } + d, ok := raw.(map[string]any) + if !ok { + return fmt.Errorf("record data is not object type") + } + t, ok := d["$type"] + if !ok || t != id { + return fmt.Errorf("record data missing $type, or didn't match expected NSID") + } + return c.validateObject(s.Record, d) } -func (c *Catalog) validateDef(def any, d any) error { +func (c *Catalog) validateData(def any, d any) error { // TODO: switch v := def.(type) { case SchemaNull: @@ -170,33 +149,44 @@ func (c *Catalog) validateDef(def any, d any) error { if !ok { return fmt.Errorf("expected an array, got: %s", reflect.TypeOf(d)) } - // XXX: return v.ValidateArray(d, v) - _ = arr - return nil + return c.validateArray(v, arr) case SchemaObject: obj, ok := d.(map[string]any) if !ok { return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) } - return c.ValidateObject(v, obj) + return c.validateObject(v, obj) case SchemaBlob: return v.Validate(d) case SchemaParams: return v.Validate(d) case SchemaRef: + // XXX: relative refs (in-file) // recurse - return c.Validate(d, v.Ref) + next, err := c.Resolve(v.Ref) + if err != nil { + return err + } + return c.validateData(next.Def, d) case SchemaUnion: - // XXX: special ValidateUnion helper + //return fmt.Errorf("XXX: union validation not implemented") return nil case SchemaUnknown: return v.Validate(d) + case SchemaToken: + str, ok := d.(string) + if !ok { + return fmt.Errorf("expected a string for token, got: %s", reflect.TypeOf(d)) + } + // XXX: token validation not implemented + _ = str + return nil default: return fmt.Errorf("unhandled schema type: %s", reflect.TypeOf(v)) } } -func (c *Catalog) ValidateObject(s SchemaObject, d map[string]any) error { +func (c *Catalog) validateObject(s SchemaObject, d map[string]any) error { for _, k := range s.Required { if _, ok := d[k]; !ok { return fmt.Errorf("required field missing: %s", k) @@ -204,7 +194,10 @@ func (c *Catalog) ValidateObject(s SchemaObject, d map[string]any) error { } for k, def := range s.Properties { if v, ok := d[k]; ok { - err := c.validateDef(def.Inner, v) + if v == nil && s.IsNullable(k) { + continue + } + err := c.validateData(def.Inner, v) if err != nil { return err } @@ -212,3 +205,16 @@ func (c *Catalog) ValidateObject(s SchemaObject, d map[string]any) error { } return nil } + +func (c *Catalog) validateArray(s SchemaArray, arr []any) error { + if (s.MinLength != nil && len(arr) < *s.MinLength) || (s.MaxLength != nil && len(arr) > *s.MaxLength) { + return fmt.Errorf("array length out of bounds: %d", len(arr)) + } + for _, v := range arr { + err := c.validateData(s.Items.Inner, v) + if err != nil { + return err + } + } + return nil +} diff --git a/atproto/lexicon/lexicon_test.go b/atproto/lexicon/lexicon_test.go index ba3eeec75..81953e04a 100644 --- a/atproto/lexicon/lexicon_test.go +++ b/atproto/lexicon/lexicon_test.go @@ -10,11 +10,16 @@ func TestBasicCatalog(t *testing.T) { assert := assert.New(t) cat := NewCatalog() - if err := cat.LoadDirectory("testdata/valid"); err != nil { + if err := cat.LoadDirectory("testdata/catalog"); err != nil { t.Fatal(err) } - assert.NoError(cat.Validate( + def, err := cat.Resolve("com.atproto.label.defs#label") + if err != nil { + t.Fatal(err) + } + assert.NoError(cat.validateData( + def.Def, map[string]any{ "cid": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", "cts": "2000-01-01T00:00:00.000Z", @@ -23,10 +28,10 @@ func TestBasicCatalog(t *testing.T) { "uri": "at://did:plc:asdf123/com.atproto.feed.post/asdf123", "val": "test-label", }, - "com.atproto.label.defs#label", )) - assert.Error(cat.Validate( + assert.Error(cat.validateData( + def.Def, map[string]any{ "cid": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", "cts": "2000-01-01T00:00:00.000Z", @@ -34,6 +39,5 @@ func TestBasicCatalog(t *testing.T) { "uri": "at://did:plc:asdf123/com.atproto.feed.post/asdf123", "val": "test-label", }, - "com.atproto.label.defs#label", )) } diff --git a/atproto/lexicon/testdata/valid/com_atproto_label_defs.json b/atproto/lexicon/testdata/catalog/com_atproto_label_defs.json similarity index 99% rename from atproto/lexicon/testdata/valid/com_atproto_label_defs.json rename to atproto/lexicon/testdata/catalog/com_atproto_label_defs.json index 57f06f81b..f8677dc37 100644 --- a/atproto/lexicon/testdata/valid/com_atproto_label_defs.json +++ b/atproto/lexicon/testdata/catalog/com_atproto_label_defs.json @@ -1,4 +1,6 @@ { + "lexicon": 1, + "id": "com.atproto.label.defs", "defs": { "label": { "description": "Metadata tag on an atproto resource (eg, repo or record)", @@ -72,7 +74,5 @@ ], "type": "object" } - }, - "id": "com.atproto.label.defs", - "lexicon": 1 + } } diff --git a/atproto/lexicon/testdata/catalog/query.json b/atproto/lexicon/testdata/catalog/query.json new file mode 100644 index 000000000..b337fc007 --- /dev/null +++ b/atproto/lexicon/testdata/catalog/query.json @@ -0,0 +1,70 @@ +{ + "lexicon": 1, + "id": "example.lexicon.query", + "revision": 1, + "description": "exersizes many lexicon features for the query type", + "defs": { + "main": { + "type": "query", + "description": "a query type", + "parameters": { + "type": "params", + "description": "a params type", + "required": ["string"], + "properties": { + "boolean": { + "type": "boolean", + "description": "field of type boolean" + }, + "integer": { + "type": "integer", + "description": "field of type integer" + }, + "string": { + "type": "string", + "description": "field of type string" + }, + "handle": { + "type": "string", + "format": "handle", + "description": "field of type string, format handle" + }, + "unknown": { + "type": "unknown", + "description": "field of type unknown" + }, + "array": { + "type": "array", + "description": "field of type array", + "items": { "type": "integer" } + } + } + }, + "output": { + "description": "output body type", + "encoding": "application/json", + "schema": { + "type": "object", + "properties": { + "a": { + "type": "integer" + }, + "b": { + "type": "integer" + } + } + } + }, + "errors": [ + { + "name": "DemoError", + "description": "demo error value" + }, + { + "name": "AnotherDemoError", + "description": "another demo error value" + } + ] + } + } +} diff --git a/atproto/lexicon/testdata/catalog/record.json b/atproto/lexicon/testdata/catalog/record.json new file mode 100644 index 000000000..11351fd40 --- /dev/null +++ b/atproto/lexicon/testdata/catalog/record.json @@ -0,0 +1,213 @@ +{ + "lexicon": 1, + "id": "example.lexicon.record", + "revision": 1, + "description": "exersizes many lexicon features for the record type", + "defs": { + "main": { + "type": "record", + "key": "literal:demo", + "description": "a record type with many field", + "record": { + "required": [ "integer" ], + "nullable": [ "nullableString" ], + "properties": { + "null": { + "type": "null", + "description": "field of type null" + }, + "boolean": { + "type": "boolean", + "description": "field of type boolean" + }, + "integer": { + "type": "integer", + "description": "field of type integer" + }, + "string": { + "type": "string", + "description": "field of type string" + }, + "nullableString": { + "type": "string", + "description": "field of type string; value is nullable" + }, + "bytes": { + "type": "bytes", + "description": "field of type bytes" + }, + "cid-link": { + "type": "cid-link", + "description": "field of type cid-link" + }, + "blob": { + "type": "blob", + "description": "field of type blob" + }, + "unknown": { + "type": "unknown", + "description": "field of type unknown" + }, + "array": { + "type": "array", + "description": "field of type array", + "items": { "type": "integer" } + }, + "object": { + "type": "object", + "description": "field of type null", + "properties": { + "a": { "type": "integer" }, + "b": { "type": "integer" } + } + }, + "ref": { + "type": "ref", + "description": "field of type ref", + "ref": "example.lexicon.record#demoToken" + }, + "union": { + "type": "union", + "refs": [ + "example.lexicon.record#demoToken", + "example.lexicon.record#demoObject" + ] + }, + "formats": { + "type": "ref", + "ref": "example.lexicon.record#stringFormats" + }, + "constInteger": { + "type": "integer", + "const": 42 + }, + "defaultInteger": { + "type": "integer", + "default": 42 + }, + "enumInteger": { + "type": "integer", + "enum": [4, 9, 16, 25] + }, + "rangeInteger": { + "type": "integer", + "minimum": 10, + "maximum": 20 + }, + "lenString": { + "type": "string", + "minLength": 10, + "maxLength": 20 + }, + "graphemeString": { + "type": "string", + "minGraphemes": 10, + "maxGraphemes": 20 + }, + "enumString": { + "type": "string", + "knownValues": ["fish", "tree", "rock"] + }, + "knownString": { + "type": "string", + "knownValues": ["blue", "green", "red"] + }, + "sizeBytes": { + "type": "bytes", + "minLength": 10, + "maxLength": 20 + }, + "lenArray": { + "type": "array", + "items": { "type": "integer" }, + "minLength": 2, + "maxLength": 5 + }, + "sizeBlob": { + "type": "blob", + "maxSize": 20 + }, + "acceptBlob": { + "type": "blob", + "accept": [ "image/*" ] + }, + "closedUnion": { + "type": "union", + "refs": [ + "example.lexicon.record#demoToken", + "example.lexicon.record#demoObject" + ], + "closed": true + } + } + } + }, + "stringFormats": { + "type": "object", + "description": "all the various string format types", + "properties": { + "did": { + "type": "string", + "format": "did", + "description": "a did string" + }, + "handle": { + "type": "string", + "format": "handle", + "description": "a did string" + }, + "atidentifier": { + "type": "string", + "format": "at-identifier", + "description": "an at-identifier string" + }, + "nsid": { + "type": "string", + "format": "nsid", + "description": "an nsid string" + }, + "aturi": { + "type": "string", + "format": "at-uri", + "description": "an at-uri string" + }, + "cid": { + "type": "string", + "format": "cid", + "description": "a cid string (not a cid-link)" + }, + "datetime": { + "type": "string", + "format": "datetime", + "description": "a datetime string" + }, + "language": { + "type": "string", + "format": "language", + "description": "a language string" + }, + "uri": { + "type": "string", + "format": "uri", + "description": "a generic URI field" + } + } + }, + "demoToken": { + "type": "token", + "description": "an example of what a token looks like" + }, + "demoObject": { + "type": "object", + "description": "smaller object schema for unions", + "parameters": { + "a": { + "type": "integer" + }, + "b": { + "type": "integer" + } + } + } + } +} diff --git a/atproto/lexicon/testdata/lexicon-invalid.json b/atproto/lexicon/testdata/lexicon-invalid.json new file mode 100644 index 000000000..2c80fedbc --- /dev/null +++ b/atproto/lexicon/testdata/lexicon-invalid.json @@ -0,0 +1,18 @@ +[ +{ + "name": "invalid lexicon field", + "lexicon": { + "lexicon": "one", + "id": "example.lexicon", + "defs": { "demo": { "type": "integer" } } + } +}, +{ + "name": "invalid id field", + "lexicon": { + "lexicon": 1, + "id": 2, + "defs": { "demo": { "type": "integer" } } + } +} +] diff --git a/atproto/lexicon/testdata/lexicon-valid.json b/atproto/lexicon/testdata/lexicon-valid.json new file mode 100644 index 000000000..bfecaeb73 --- /dev/null +++ b/atproto/lexicon/testdata/lexicon-valid.json @@ -0,0 +1,10 @@ +[ +{ + "name": "minimal", + "lexicon": { + "lexicon": 1, + "id": "example.lexicon", + "defs": { "demo": { "type": "integer" } } + } +} +] diff --git a/atproto/lexicon/testdata/record-data-invalid.json b/atproto/lexicon/testdata/record-data-invalid.json new file mode 100644 index 000000000..05c8f514a --- /dev/null +++ b/atproto/lexicon/testdata/record-data-invalid.json @@ -0,0 +1,21 @@ +[ + { "name": "missing required field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record" } + }, + { "name": "invalid string field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "string": 2 } }, + { "name": "invalid string format handle", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "handle": "123" } } + }, + { "name": "invalid string format did", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "did": "123" } } + }, + { "name": "invalid array element", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "array": [true, false] } + } +] diff --git a/atproto/lexicon/testdata/record-data-valid.json b/atproto/lexicon/testdata/record-data-valid.json new file mode 100644 index 000000000..a5c4c2f00 --- /dev/null +++ b/atproto/lexicon/testdata/record-data-valid.json @@ -0,0 +1,91 @@ +[ + { + "name": "minimal", + "rkey": "demo", + "data": { + "$type": "example.lexicon.record", + "integer": 1 + } + }, + { + "name": "full", + "rkey": "demo", + "data": { + "$type": "example.lexicon.record", + "null": null, + "boolean": true, + "integer": 3, + "string": "blah", + "nullableString": null, + "bytes": { + "$bytes": "123" + }, + "cidlink": { + "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" + }, + "blob": { + "$type": "blob", + "mimeType": "text/plain", + "size": 12345, + "ref": { + "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" + } + }, + "unknown": { + "a": "alphabet", + "b": 3 + }, + "array": [1,2,3], + "object": { + "a": 1, + "b": 2 + }, + "ref": "example.lexicon.record#demoToken", + "union": { + "$type": "example.lexicon.record#demoObject", + "a": 1, + "b": 2 + }, + "formats": { + "did": "did:web:example.com", + "handle": "handle.example.com", + "atidentifier": "handle.example.com", + "aturi": "at://handle.example.com/com.example.nsid/asdf123", + "nsid": "com.example.nsid", + "cid": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq", + "datetime": "2023-10-30T22:25:23Z", + "language": "en", + "uri": "https://example.com/file.txt" + }, + "constInteger": 42, + "defaultInteger": 123, + "enumInteger": 16, + "rangeInteger": 16, + "lenString": "1234567890ABC", + "graphemeString": "abcde", + "enumString": "fish", + "knownString": "blue", + "sizeBytes": { + "$bytes": "asdfasdfasdfasdf" + }, + "lenArray": [1,2,3], + "sizeBlob": { + "$type": "blob", + "mimeType": "text/plain", + "size": 8, + "ref": { + "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" + } + }, + "acceptBlob": { + "$type": "blob", + "mimeType": "image/png", + "size": 12345, + "ref": { + "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" + } + }, + "closedUnion": "example.lexicon.record#demoToken" + } + } +] From 0a7d59700ee75cd83f4e1457338dd4423f0d4507 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 28 Feb 2024 20:32:44 -0800 Subject: [PATCH 05/23] lexicon: progress on validation --- atproto/lexicon/interop_record_test.go | 7 ++- atproto/lexicon/language.go | 65 ++++++++++++++++++++++++-- atproto/lexicon/lexicon.go | 34 ++++++++++---- atproto/lexicon/mimetype.go | 19 ++++++++ atproto/lexicon/mimetype_test.go | 21 +++++++++ 5 files changed, 131 insertions(+), 15 deletions(-) create mode 100644 atproto/lexicon/mimetype.go create mode 100644 atproto/lexicon/mimetype_test.go diff --git a/atproto/lexicon/interop_record_test.go b/atproto/lexicon/interop_record_test.go index 8a2282ff8..f2bd14d44 100644 --- a/atproto/lexicon/interop_record_test.go +++ b/atproto/lexicon/interop_record_test.go @@ -83,7 +83,10 @@ func TestInteropRecordInvalid(t *testing.T) { if err != nil { t.Fatal(err) } - - assert.Error(cat.ValidateRecord(d, "example.lexicon.record")) + err = cat.ValidateRecord(d, "example.lexicon.record") + if err == nil { + fmt.Println(" FAIL") + } + assert.Error(err) } } diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index 7f10e54fe..eeed9b1eb 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -8,6 +8,8 @@ import ( "github.com/bluesky-social/indigo/atproto/data" "github.com/bluesky-social/indigo/atproto/syntax" + + "github.com/rivo/uniseg" ) // Serialization helper for top-level Lexicon schema JSON objects (files) @@ -419,13 +421,24 @@ func (s *SchemaInteger) Validate(d any) error { return fmt.Errorf("expected an integer") } v := int(v64) - // TODO: enforce enum if s.Const != nil && v != *s.Const { return fmt.Errorf("integer val didn't match constant (%d): %d", *s.Const, v) } if (s.Minimum != nil && v < *s.Minimum) || (s.Maximum != nil && v > *s.Maximum) { return fmt.Errorf("integer val outside specified range: %d", v) } + if len(s.Enum) != 0 { + inEnum := false + for _, e := range s.Enum { + if e == v { + inEnum = true + break + } + } + if !inEnum { + return fmt.Errorf("integer val not in required enum: %d", v) + } + } return nil } @@ -476,7 +489,6 @@ func (s *SchemaString) Validate(d any) error { if !ok { return fmt.Errorf("expected a string: %v", reflect.TypeOf(d)) } - // TODO: enforce enum if s.Const != nil && v != *s.Const { return fmt.Errorf("string val didn't match constant (%s): %s", *s.Const, v) } @@ -484,7 +496,24 @@ func (s *SchemaString) Validate(d any) error { if (s.MinLength != nil && len(v) < *s.MinLength) || (s.MaxLength != nil && len(v) > *s.MaxLength) { return fmt.Errorf("string length outside specified range: %d", len(v)) } - // TODO: grapheme length + if len(s.Enum) != 0 { + inEnum := false + for _, e := range s.Enum { + if e == v { + inEnum = true + break + } + } + if !inEnum { + return fmt.Errorf("string val not in required enum: %s", v) + } + } + if s.MinGraphemes != nil || s.MaxGraphemes != nil { + lenG := uniseg.GraphemeClusterCount(v) + if (s.MinGraphemes != nil && lenG < *s.MinGraphemes) || (s.MaxGraphemes != nil && lenG > *s.MaxGraphemes) { + return fmt.Errorf("string length (graphemes) outside specified range: %d", lenG) + } + } if s.Format != nil { switch *s.Format { case "at-identifier": @@ -656,7 +685,18 @@ func (s *SchemaBlob) Validate(d any) error { if !ok { return fmt.Errorf("expected a blob") } - // TODO: validate accept mimetype + if len(s.Accept) > 0 { + typeOk := false + for _, pat := range s.Accept { + if acceptableMimeType(pat, v.MimeType) { + typeOk = true + break + } + } + if !typeOk { + return fmt.Errorf("blob mimetype doesn't match accepted: %s", v.MimeType) + } + } if s.MaxSize != nil && int(v.Size) > *s.MaxSize { return fmt.Errorf("blob size too large: %d", v.Size) } @@ -702,6 +742,7 @@ func (s *SchemaParams) CheckSchema() error { return nil } +// XXX: implementation? func (s *SchemaParams) Validate(d any) error { return nil } @@ -709,12 +750,28 @@ func (s *SchemaParams) Validate(d any) error { type SchemaToken struct { Type string `json:"type,const=token"` Description *string `json:"description,omitempty"` + // the fully-qualified identifier of this token + Name string } func (s *SchemaToken) CheckSchema() error { return nil } +func (s *SchemaToken) Validate(d any) error { + str, ok := d.(string) + if !ok { + return fmt.Errorf("expected a string for token, got: %s", reflect.TypeOf(d)) + } + if s.Name == "" { + return fmt.Errorf("token name was not populated at parse time") + } + if str != s.Name { + return fmt.Errorf("token name did not match expected: %s", str) + } + return nil +} + type SchemaRef struct { Type string `json:"type,const=ref"` Description *string `json:"description,omitempty"` diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index bee5cbc3a..02be9994d 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -61,6 +61,10 @@ func (c *Catalog) AddSchemaFile(sf SchemaFile) error { if frag != "main" { return fmt.Errorf("record, query, procedure, and subscription types must be 'main', not: %s", frag) } + case SchemaToken: + token := def.Inner.(SchemaToken) + token.Name = name + def.Inner = token } s := Schema{ ID: name, @@ -169,18 +173,11 @@ func (c *Catalog) validateData(def any, d any) error { } return c.validateData(next.Def, d) case SchemaUnion: - //return fmt.Errorf("XXX: union validation not implemented") - return nil + return c.validateUnion(v, d) case SchemaUnknown: return v.Validate(d) case SchemaToken: - str, ok := d.(string) - if !ok { - return fmt.Errorf("expected a string for token, got: %s", reflect.TypeOf(d)) - } - // XXX: token validation not implemented - _ = str - return nil + return v.Validate(d) default: return fmt.Errorf("unhandled schema type: %s", reflect.TypeOf(v)) } @@ -218,3 +215,22 @@ func (c *Catalog) validateArray(s SchemaArray, arr []any) error { } return nil } + +func (c *Catalog) validateUnion(s SchemaUnion, d any) error { + closed := s.Closed != nil && *s.Closed == true + for _, ref := range s.Refs { + def, err := c.Resolve(ref) + if err != nil { + // TODO: how to actually handle unknown defs? + return err + } + if err = c.validateData(def.Def, d); nil == err { // if success + return nil + } + } + if closed { + return fmt.Errorf("data did not match any variant of closed union") + } + // TODO: anything matches if an open union? + return nil +} diff --git a/atproto/lexicon/mimetype.go b/atproto/lexicon/mimetype.go new file mode 100644 index 000000000..0038003c7 --- /dev/null +++ b/atproto/lexicon/mimetype.go @@ -0,0 +1,19 @@ +package lexicon + +import ( + "strings" +) + +// checks if val matches pattern, with optional trailing glob on pattern. case-sensitive. +func acceptableMimeType(pattern, val string) bool { + if val == "" || pattern == "" { + return false + } + if strings.HasSuffix(pattern, "*") { + prefix := pattern[:len(pattern)-1] + return strings.HasPrefix(val, prefix) + } else { + return pattern == val + } + return false +} diff --git a/atproto/lexicon/mimetype_test.go b/atproto/lexicon/mimetype_test.go new file mode 100644 index 000000000..db2be81d5 --- /dev/null +++ b/atproto/lexicon/mimetype_test.go @@ -0,0 +1,21 @@ +package lexicon + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestAcceptableMimeType(t *testing.T) { + assert := assert.New(t) + + assert.True(acceptableMimeType("image/*", "image/png")) + assert.True(acceptableMimeType("text/plain", "text/plain")) + + assert.False(acceptableMimeType("image/*", "text/plain")) + assert.False(acceptableMimeType("text/plain", "image/png")) + assert.False(acceptableMimeType("text/plain", "")) + assert.False(acceptableMimeType("", "text/plain")) + + // TODO: application/json, application/json+thing +} From f35a69d2c7bf5a2c52baadfa391fa765291d1261 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 28 Feb 2024 20:33:07 -0800 Subject: [PATCH 06/23] lexicons: real grapheme test string --- atproto/lexicon/testdata/record-data-valid.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atproto/lexicon/testdata/record-data-valid.json b/atproto/lexicon/testdata/record-data-valid.json index a5c4c2f00..8c8fd78fc 100644 --- a/atproto/lexicon/testdata/record-data-valid.json +++ b/atproto/lexicon/testdata/record-data-valid.json @@ -62,7 +62,7 @@ "enumInteger": 16, "rangeInteger": 16, "lenString": "1234567890ABC", - "graphemeString": "abcde", + "graphemeString": "🇩🇪🏳️‍🌈🇩🇪🏳️‍🌈🇩🇪🏳️‍🌈🇩🇪🏳️‍🌈🇩🇪🏳️‍🌈🇩🇪🏳️‍🌈", "enumString": "fish", "knownString": "blue", "sizeBytes": { From 7f80d1a1456398be16f2a8e139c140c0e5fd0868 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 28 Feb 2024 20:33:25 -0800 Subject: [PATCH 07/23] lexicon: enumString actually an enum --- atproto/lexicon/testdata/catalog/record.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atproto/lexicon/testdata/catalog/record.json b/atproto/lexicon/testdata/catalog/record.json index 11351fd40..dcf3b7b0e 100644 --- a/atproto/lexicon/testdata/catalog/record.json +++ b/atproto/lexicon/testdata/catalog/record.json @@ -106,7 +106,7 @@ }, "enumString": { "type": "string", - "knownValues": ["fish", "tree", "rock"] + "enum": ["fish", "tree", "rock"] }, "knownString": { "type": "string", From 9928fd1fac1e8bb5c138749f05c3357a832b0831 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 28 Feb 2024 20:33:44 -0800 Subject: [PATCH 08/23] lexicon: many more invalid test cases --- .../lexicon/testdata/record-data-invalid.json | 159 +++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) diff --git a/atproto/lexicon/testdata/record-data-invalid.json b/atproto/lexicon/testdata/record-data-invalid.json index 05c8f514a..e5b90d2a2 100644 --- a/atproto/lexicon/testdata/record-data-invalid.json +++ b/atproto/lexicon/testdata/record-data-invalid.json @@ -3,9 +3,68 @@ "rkey": "demo", "data": { "$type": "example.lexicon.record" } }, + { "name": "invalid null field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "null": true } }, + { "name": "invalid boolean field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "boolean": "green"} }, + { "name": "invalid integer field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "integer": "green"} }, + { "name": "invalid non-nullable string field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "string": null } }, { "name": "invalid string field", "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "string": 2 } }, + { "name": "invalid bytes field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "bytes": "green" } }, + { "name": "invalid bytes: empty object", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "bytes": {}}}, + { "name": "invalid bytes: wrong type", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "bytes": { + "bytes": "asdfasdfasdfasdf" + }}}, + { "name": "invalid cid-link field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "cid-link": "green" } }, + { "name": "invalid blob field", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "blob": "green" } }, + { "name": "invalid blob: wrong type", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "bytes": { + "type": "blob", + "size": 123, + "mimeType": false, + "ref": { + "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" + } + }}}, + { "name": "invalid array", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "array": 123 } + }, + { "name": "invalid array element", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "array": [true, false] } + }, + { "name": "invalid object", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "object": 123 } + }, + { "name": "invalid token ref type", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "ref": 123 } + }, + { "name": "invalid ref value", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "ref": "example.lexicon.record#wrongToken" } + }, { "name": "invalid string format handle", "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "handle": "123" } } @@ -14,8 +73,104 @@ "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "did": "123" } } }, - { "name": "invalid array element", + { "name": "invalid string format atidentifier", "rkey": "demo", - "data": { "$type": "example.lexicon.record", "integer": 1, "array": [true, false] } + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "atidentifier": "123" } } + }, + { "name": "invalid string format nsid", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "nsid": "123" } } + }, + { "name": "invalid string format aturi", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "aturi": "123" } } + }, + { "name": "invalid string format cid", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "cid": "123" } } + }, + { "name": "invalid string format datetime", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "datetime": "123" } } + }, + { "name": "invalid string format language", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "language": "123" } } + }, + { "name": "invalid string format uri", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "uri": "123" } } + }, + { "name": "wrong const value", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "constInteger": 41 } + }, + { "name": "integer not in enum", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "enumInteger": 7 } + }, + { "name": "out of integer range", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "rangeInteger": 9000 } + }, + { "name": "string too short", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "lenString": "." } + }, + { "name": "string too long", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "lenString": "abcdefg-abcdefg-abcdefg" } + }, + { "name": "string too short (graphemes)", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "graphemeString": "👩‍👩‍👦‍👦👩‍👩‍👦‍👦" } + }, + { "name": "string too long (graphemes)", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "graphemeString": "abcdefg-abcdefg-abcdefg" } + }, + { "name": "out of enum string", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "enumString": "unexpected" } + }, + { "name": "bytes too short", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "sizeBytes": { "$bytes": "b25l" }} + }, + { "name": "bytes too long", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "sizeBytes": { "$bytes": "b25lb25lb25lb25lb25lb25lb25lb25lb25lb25lb25l" }} + }, + { "name": "array too short", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "lenArray": [0]} + }, + { "name": "array too long", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "lenArray": [0,0,0,0,0,0,0,0,0,0]} + }, + { "name": "blob too large", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "sizeBlob": { + "$type": "blob", + "size": 12345, + "mimeType": "text/plain", + "ref": { + "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" + } + }}}, + { "name": "blob wrong type", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "acceptBlob": { + "$type": "blob", + "size": 12345, + "mimeType": "text/plain", + "ref": { + "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" + } + }}}, + { "name": "out of closed union", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "closedUnion": "other" } } ] From 4927f7c25ef7eeab9c7195dc624680dcc149af1f Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 28 Feb 2024 21:05:59 -0800 Subject: [PATCH 09/23] more progress --- atproto/lexicon/language.go | 11 +++-------- atproto/lexicon/lexicon.go | 14 +++++++------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index eeed9b1eb..7963045b1 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -742,16 +742,11 @@ func (s *SchemaParams) CheckSchema() error { return nil } -// XXX: implementation? -func (s *SchemaParams) Validate(d any) error { - return nil -} - type SchemaToken struct { Type string `json:"type,const=token"` Description *string `json:"description,omitempty"` // the fully-qualified identifier of this token - Name string + fullName string } func (s *SchemaToken) CheckSchema() error { @@ -763,10 +758,10 @@ func (s *SchemaToken) Validate(d any) error { if !ok { return fmt.Errorf("expected a string for token, got: %s", reflect.TypeOf(d)) } - if s.Name == "" { + if s.fullName == "" { return fmt.Errorf("token name was not populated at parse time") } - if str != s.Name { + if str != s.fullName { return fmt.Errorf("token name did not match expected: %s", str) } return nil diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index 02be9994d..c35662007 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -30,6 +30,9 @@ type Schema struct { } func (c *Catalog) Resolve(name string) (*Schema, error) { + if name == "" { + return nil, fmt.Errorf("tried to resolve empty string name") + } // default to #main if name doesn't have a fragment if !strings.Contains(name, "#") { name = name + "#main" @@ -56,15 +59,15 @@ func (c *Catalog) AddSchemaFile(sf SchemaFile) error { return err } // "A file can have at most one definition with one of the "primary" types. Primary types should always have the name main. It is possible for main to describe a non-primary type." - switch def.Inner.(type) { + switch s := def.Inner.(type) { case SchemaRecord, SchemaQuery, SchemaProcedure, SchemaSubscription: if frag != "main" { return fmt.Errorf("record, query, procedure, and subscription types must be 'main', not: %s", frag) } case SchemaToken: - token := def.Inner.(SchemaToken) - token.Name = name - def.Inner = token + // add fully-qualified name to token + s.fullName = name + def.Inner = s } s := Schema{ ID: name, @@ -162,10 +165,7 @@ func (c *Catalog) validateData(def any, d any) error { return c.validateObject(v, obj) case SchemaBlob: return v.Validate(d) - case SchemaParams: - return v.Validate(d) case SchemaRef: - // XXX: relative refs (in-file) // recurse next, err := c.Resolve(v.Ref) if err != nil { From 92c10baee2e91547b5bbb352b412f5c69d71f654 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 29 Feb 2024 00:51:17 -0800 Subject: [PATCH 10/23] lextool: start on network helpers --- atproto/lexicon/cmd/lextool/main.go | 10 ++++ atproto/lexicon/cmd/lextool/net.go | 92 +++++++++++++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 atproto/lexicon/cmd/lextool/net.go diff --git a/atproto/lexicon/cmd/lextool/main.go b/atproto/lexicon/cmd/lextool/main.go index b5652b40c..88bee2389 100644 --- a/atproto/lexicon/cmd/lextool/main.go +++ b/atproto/lexicon/cmd/lextool/main.go @@ -28,6 +28,16 @@ func main() { Usage: "try recursively loading all the schemas from a directory", Action: runLoadDirectory, }, + &cli.Command{ + Name: "validate-record", + Usage: "fetch from network, validate against catalog", + Action: runValidateRecord, + }, + &cli.Command{ + Name: "validate-firehose", + Usage: "subscribe to a firehose, validate every known record against catalog", + Action: runValidateFirehose, + }, } h := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelDebug}) slog.SetDefault(slog.New(h)) diff --git a/atproto/lexicon/cmd/lextool/net.go b/atproto/lexicon/cmd/lextool/net.go new file mode 100644 index 000000000..1ea2dfbb3 --- /dev/null +++ b/atproto/lexicon/cmd/lextool/net.go @@ -0,0 +1,92 @@ +package main + +import ( + "context" + "fmt" + "io" + "log/slog" + "net/http" + + "github.com/bluesky-social/indigo/atproto/data" + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/lexicon" + "github.com/bluesky-social/indigo/atproto/syntax" + + "github.com/urfave/cli/v2" +) + +func runValidateRecord(cctx *cli.Context) error { + ctx := context.Background() + args := cctx.Args().Slice() + if len(args) != 2 { + return fmt.Errorf("expected two args (catalog path and AT-URI)") + } + p := args[0] + if p == "" { + return fmt.Errorf("need to provide directory path as an argument") + } + + c := lexicon.NewCatalog() + err := c.LoadDirectory(p) + if err != nil { + return err + } + + aturi, err := syntax.ParseATURI(args[1]) + if err != nil { + return err + } + if aturi.RecordKey() == "" { + return fmt.Errorf("need a full, not partial, AT-URI: %s", aturi) + } + dir := identity.DefaultDirectory() + ident, err := dir.Lookup(ctx, aturi.Authority()) + if err != nil { + return fmt.Errorf("resolving AT-URI authority: %v", err) + } + pdsURL := ident.PDSEndpoint() + if pdsURL == "" { + return fmt.Errorf("could not resolve PDS endpoint for AT-URI account: %s", ident.DID.String()) + } + + slog.Info("fetching record", "did", ident.DID.String(), "collection", aturi.Collection().String(), "rkey", aturi.RecordKey().String()) + url := fmt.Sprintf("%s/xrpc/com.atproto.repo.getRecord?repo=%s&collection=%s&rkey=%s", + pdsURL, ident.DID, aturi.Collection(), aturi.RecordKey()) + resp, err := http.Get(url) + if err != nil { + return err + } + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("fetch failed") + } + respBytes, err := io.ReadAll(resp.Body) + if err != nil { + return err + } + + body, err := data.UnmarshalJSON(respBytes) + record := body["value"].(map[string]any) + + slog.Info("validating", "did", ident.DID.String(), "collection", aturi.Collection().String(), "rkey", aturi.RecordKey().String()) + err = c.ValidateRecord(record, aturi.Collection().String()) + if err != nil { + return err + } + fmt.Println("success!") + return nil +} + +func runValidateFirehose(cctx *cli.Context) error { + p := cctx.Args().First() + if p == "" { + return fmt.Errorf("need to provide directory path as an argument") + } + + c := lexicon.NewCatalog() + err := c.LoadDirectory(p) + if err != nil { + return err + } + + return fmt.Errorf("UNIMPLEMENTED") +} From e22c44065952f1149ab237f44d6f547165eec6b7 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 29 Feb 2024 00:52:09 -0800 Subject: [PATCH 11/23] lexicon fixes --- atproto/lexicon/language.go | 88 +++++++++++++++++++++++++++++++++++++ atproto/lexicon/lexicon.go | 11 ++--- atproto/lexicon/mimetype.go | 1 - 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index 7963045b1..96c889ecf 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -69,6 +69,81 @@ func (s *SchemaDef) CheckSchema() error { } } +// Helper to recurse down the definition tree and set full references on any sub-schemas which need to embed that metadata +func (s *SchemaDef) SetBase(base string) { + switch v := s.Inner.(type) { + case SchemaRecord: + for i, val := range v.Record.Properties { + val.SetBase(base) + v.Record.Properties[i] = val + } + s.Inner = v + case SchemaQuery: + for i, val := range v.Parameters.Properties { + val.SetBase(base) + v.Parameters.Properties[i] = val + } + if v.Output != nil && v.Output.Schema != nil { + v.Output.Schema.SetBase(base) + } + s.Inner = v + case SchemaProcedure: + for i, val := range v.Parameters.Properties { + val.SetBase(base) + v.Parameters.Properties[i] = val + } + if v.Input != nil && v.Input.Schema != nil { + v.Input.Schema.SetBase(base) + } + if v.Output != nil && v.Output.Schema != nil { + v.Output.Schema.SetBase(base) + } + s.Inner = v + case SchemaSubscription: + for i, val := range v.Parameters.Properties { + val.SetBase(base) + v.Parameters.Properties[i] = val + } + if v.Message != nil { + v.Message.Schema.SetBase(base) + } + s.Inner = v + case SchemaArray: + v.Items.SetBase(base) + s.Inner = v + case SchemaObject: + for i, val := range v.Properties { + val.SetBase(base) + v.Properties[i] = val + } + s.Inner = v + case SchemaParams: + for i, val := range v.Properties { + val.SetBase(base) + v.Properties[i] = val + } + s.Inner = v + case SchemaRef: + // add fully-qualified name + if strings.HasPrefix(v.Ref, "#") { + v.fullRef = base + v.Ref + } else { + v.fullRef = v.Ref + } + s.Inner = v + case SchemaUnion: + // add fully-qualified name + for _, ref := range v.Refs { + if strings.HasPrefix(ref, "#") { + ref = base + ref + } + v.fullRefs = append(v.fullRefs, ref) + } + s.Inner = v + } + return +} + func (s SchemaDef) MarshalJSON() ([]byte, error) { return json.Marshal(s.Inner) } @@ -750,6 +825,9 @@ type SchemaToken struct { } func (s *SchemaToken) CheckSchema() error { + if s.fullName == "" { + return fmt.Errorf("expected fully-qualified token name") + } return nil } @@ -771,6 +849,8 @@ type SchemaRef struct { Type string `json:"type,const=ref"` Description *string `json:"description,omitempty"` Ref string `json:"ref"` + // full path of reference + fullRef string } func (s *SchemaRef) CheckSchema() error { @@ -778,6 +858,9 @@ func (s *SchemaRef) CheckSchema() error { if len(s.Ref) == 0 { return fmt.Errorf("empty schema ref") } + if len(s.fullRef) == 0 { + return fmt.Errorf("empty full schema ref") + } return nil } @@ -786,6 +869,8 @@ type SchemaUnion struct { Description *string `json:"description,omitempty"` Refs []string `json:"refs"` Closed *bool `json:"closed,omitempty"` + // fully qualified + fullRefs []string } func (s *SchemaUnion) CheckSchema() error { @@ -796,6 +881,9 @@ func (s *SchemaUnion) CheckSchema() error { return fmt.Errorf("empty schema ref") } } + if len(s.fullRefs) != len(s.Refs) { + return fmt.Errorf("union refs were not expanded") + } return nil } diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index c35662007..329060a5c 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -55,9 +55,6 @@ func (c *Catalog) AddSchemaFile(sf SchemaFile) error { if _, ok := c.Schemas[name]; ok { return fmt.Errorf("catalog already contained a schema with name: %s", name) } - if err := def.CheckSchema(); err != nil { - return err - } // "A file can have at most one definition with one of the "primary" types. Primary types should always have the name main. It is possible for main to describe a non-primary type." switch s := def.Inner.(type) { case SchemaRecord, SchemaQuery, SchemaProcedure, SchemaSubscription: @@ -69,6 +66,10 @@ func (c *Catalog) AddSchemaFile(sf SchemaFile) error { s.fullName = name def.Inner = s } + def.SetBase(base) + if err := def.CheckSchema(); err != nil { + return err + } s := Schema{ ID: name, Revision: sf.Revision, @@ -167,7 +168,7 @@ func (c *Catalog) validateData(def any, d any) error { return v.Validate(d) case SchemaRef: // recurse - next, err := c.Resolve(v.Ref) + next, err := c.Resolve(v.fullRef) if err != nil { return err } @@ -218,7 +219,7 @@ func (c *Catalog) validateArray(s SchemaArray, arr []any) error { func (c *Catalog) validateUnion(s SchemaUnion, d any) error { closed := s.Closed != nil && *s.Closed == true - for _, ref := range s.Refs { + for _, ref := range s.fullRefs { def, err := c.Resolve(ref) if err != nil { // TODO: how to actually handle unknown defs? diff --git a/atproto/lexicon/mimetype.go b/atproto/lexicon/mimetype.go index 0038003c7..0e42d5edb 100644 --- a/atproto/lexicon/mimetype.go +++ b/atproto/lexicon/mimetype.go @@ -15,5 +15,4 @@ func acceptableMimeType(pattern, val string) bool { } else { return pattern == val } - return false } From e1a8f3a74cdf2a2368860d65d982f9c17d6770eb Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 19 Mar 2024 22:39:57 -0700 Subject: [PATCH 12/23] refactor catalog and methods --- atproto/lexicon/catalog.go | 112 +++++++++++++++++++ atproto/lexicon/cmd/lextool/main.go | 2 +- atproto/lexicon/cmd/lextool/net.go | 10 +- atproto/lexicon/interop_record_test.go | 8 +- atproto/lexicon/lexicon.go | 144 ++++--------------------- atproto/lexicon/lexicon_test.go | 8 +- 6 files changed, 146 insertions(+), 138 deletions(-) create mode 100644 atproto/lexicon/catalog.go diff --git a/atproto/lexicon/catalog.go b/atproto/lexicon/catalog.go new file mode 100644 index 000000000..22b4004e2 --- /dev/null +++ b/atproto/lexicon/catalog.go @@ -0,0 +1,112 @@ +package lexicon + +import ( + "encoding/json" + "fmt" + "io" + "io/fs" + "os" + "path/filepath" + "strings" +) + +// An aggregation of lexicon schemas, and methods for validating generic data against those schemas. +type Catalog interface { + Resolve(ref string) (*Schema, error) +} + +type BaseCatalog struct { + schemas map[string]Schema +} + +func NewBaseCatalog() BaseCatalog { + return BaseCatalog{ + schemas: make(map[string]Schema), + } +} + +func (c *BaseCatalog) Resolve(ref string) (*Schema, error) { + if ref == "" { + return nil, fmt.Errorf("tried to resolve empty string name") + } + // default to #main if name doesn't have a fragment + if !strings.Contains(ref, "#") { + ref = ref + "#main" + } + s, ok := c.schemas[ref] + if !ok { + return nil, fmt.Errorf("schema not found in catalog: %s", ref) + } + return &s, nil +} + +func (c *BaseCatalog) AddSchemaFile(sf SchemaFile) error { + base := sf.ID + for frag, def := range sf.Defs { + if len(frag) == 0 || strings.Contains(frag, "#") || strings.Contains(frag, ".") { + // TODO: more validation here? + return fmt.Errorf("schema name invalid: %s", frag) + } + name := base + "#" + frag + if _, ok := c.schemas[name]; ok { + return fmt.Errorf("catalog already contained a schema with name: %s", name) + } + // "A file can have at most one definition with one of the "primary" types. Primary types should always have the name main. It is possible for main to describe a non-primary type." + switch s := def.Inner.(type) { + case SchemaRecord, SchemaQuery, SchemaProcedure, SchemaSubscription: + if frag != "main" { + return fmt.Errorf("record, query, procedure, and subscription types must be 'main', not: %s", frag) + } + case SchemaToken: + // add fully-qualified name to token + s.fullName = name + def.Inner = s + } + def.SetBase(base) + if err := def.CheckSchema(); err != nil { + return err + } + s := Schema{ + ID: name, + Revision: sf.Revision, + Def: def.Inner, + } + c.schemas[name] = s + } + return nil +} + +func (c *BaseCatalog) LoadDirectory(dirPath string) error { + return filepath.WalkDir(dirPath, func(p string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + if !strings.HasSuffix(p, ".json") { + return nil + } + // TODO: logging + fmt.Println(p) + f, err := os.Open(p) + if err != nil { + return err + } + defer func() { _ = f.Close() }() + + b, err := io.ReadAll(f) + if err != nil { + return err + } + + var sf SchemaFile + if err = json.Unmarshal(b, &sf); err != nil { + return err + } + if err = c.AddSchemaFile(sf); err != nil { + return err + } + return nil + }) +} diff --git a/atproto/lexicon/cmd/lextool/main.go b/atproto/lexicon/cmd/lextool/main.go index 88bee2389..85f379360 100644 --- a/atproto/lexicon/cmd/lextool/main.go +++ b/atproto/lexicon/cmd/lextool/main.go @@ -79,7 +79,7 @@ func runLoadDirectory(cctx *cli.Context) error { return fmt.Errorf("need to provide directory path as an argument") } - c := lexicon.NewCatalog() + c := lexicon.NewBaseCatalog() err := c.LoadDirectory(p) if err != nil { return err diff --git a/atproto/lexicon/cmd/lextool/net.go b/atproto/lexicon/cmd/lextool/net.go index 1ea2dfbb3..92525458e 100644 --- a/atproto/lexicon/cmd/lextool/net.go +++ b/atproto/lexicon/cmd/lextool/net.go @@ -26,8 +26,8 @@ func runValidateRecord(cctx *cli.Context) error { return fmt.Errorf("need to provide directory path as an argument") } - c := lexicon.NewCatalog() - err := c.LoadDirectory(p) + cat := lexicon.NewBaseCatalog() + err := cat.LoadDirectory(p) if err != nil { return err } @@ -68,7 +68,7 @@ func runValidateRecord(cctx *cli.Context) error { record := body["value"].(map[string]any) slog.Info("validating", "did", ident.DID.String(), "collection", aturi.Collection().String(), "rkey", aturi.RecordKey().String()) - err = c.ValidateRecord(record, aturi.Collection().String()) + err = lexicon.ValidateRecord(&cat, record, aturi.Collection().String()) if err != nil { return err } @@ -82,8 +82,8 @@ func runValidateFirehose(cctx *cli.Context) error { return fmt.Errorf("need to provide directory path as an argument") } - c := lexicon.NewCatalog() - err := c.LoadDirectory(p) + cat := lexicon.NewBaseCatalog() + err := cat.LoadDirectory(p) if err != nil { return err } diff --git a/atproto/lexicon/interop_record_test.go b/atproto/lexicon/interop_record_test.go index f2bd14d44..747f0c02a 100644 --- a/atproto/lexicon/interop_record_test.go +++ b/atproto/lexicon/interop_record_test.go @@ -21,7 +21,7 @@ type RecordFixture struct { func TestInteropRecordValid(t *testing.T) { assert := assert.New(t) - cat := NewCatalog() + cat := NewBaseCatalog() if err := cat.LoadDirectory("testdata/catalog"); err != nil { t.Fatal(err) } @@ -49,14 +49,14 @@ func TestInteropRecordValid(t *testing.T) { t.Fatal(err) } - assert.NoError(cat.ValidateRecord(d, "example.lexicon.record")) + assert.NoError(ValidateRecord(&cat, d, "example.lexicon.record")) } } func TestInteropRecordInvalid(t *testing.T) { assert := assert.New(t) - cat := NewCatalog() + cat := NewBaseCatalog() if err := cat.LoadDirectory("testdata/catalog"); err != nil { t.Fatal(err) } @@ -83,7 +83,7 @@ func TestInteropRecordInvalid(t *testing.T) { if err != nil { t.Fatal(err) } - err = cat.ValidateRecord(d, "example.lexicon.record") + err = ValidateRecord(&cat, d, "example.lexicon.record") if err == nil { fmt.Println(" FAIL") } diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index 329060a5c..6a48d1cb4 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -1,143 +1,37 @@ package lexicon import ( - "encoding/json" "fmt" - "io" - "io/fs" - "os" - "path/filepath" "reflect" - "strings" ) -// An aggregation of lexicon schemas, and methods for validating generic data against those schemas. -type Catalog struct { - // TODO: not safe zero value; hide this field? seems aggressive - Schemas map[string]Schema -} - -func NewCatalog() Catalog { - return Catalog{ - Schemas: make(map[string]Schema), - } -} - type Schema struct { ID string Revision *int Def any } -func (c *Catalog) Resolve(name string) (*Schema, error) { - if name == "" { - return nil, fmt.Errorf("tried to resolve empty string name") - } - // default to #main if name doesn't have a fragment - if !strings.Contains(name, "#") { - name = name + "#main" - } - s, ok := c.Schemas[name] - if !ok { - return nil, fmt.Errorf("schema not found in catalog: %s", name) - } - return &s, nil -} - -func (c *Catalog) AddSchemaFile(sf SchemaFile) error { - base := sf.ID - for frag, def := range sf.Defs { - if len(frag) == 0 || strings.Contains(frag, "#") || strings.Contains(frag, ".") { - // TODO: more validation here? - return fmt.Errorf("schema name invalid: %s", frag) - } - name := base + "#" + frag - if _, ok := c.Schemas[name]; ok { - return fmt.Errorf("catalog already contained a schema with name: %s", name) - } - // "A file can have at most one definition with one of the "primary" types. Primary types should always have the name main. It is possible for main to describe a non-primary type." - switch s := def.Inner.(type) { - case SchemaRecord, SchemaQuery, SchemaProcedure, SchemaSubscription: - if frag != "main" { - return fmt.Errorf("record, query, procedure, and subscription types must be 'main', not: %s", frag) - } - case SchemaToken: - // add fully-qualified name to token - s.fullName = name - def.Inner = s - } - def.SetBase(base) - if err := def.CheckSchema(); err != nil { - return err - } - s := Schema{ - ID: name, - Revision: sf.Revision, - Def: def.Inner, - } - c.Schemas[name] = s - } - return nil -} - -func (c *Catalog) LoadDirectory(dirPath string) error { - return filepath.WalkDir(dirPath, func(p string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - if d.IsDir() { - return nil - } - if !strings.HasSuffix(p, ".json") { - return nil - } - // TODO: logging - fmt.Println(p) - f, err := os.Open(p) - if err != nil { - return err - } - defer func() { _ = f.Close() }() - - b, err := io.ReadAll(f) - if err != nil { - return err - } - - var sf SchemaFile - if err = json.Unmarshal(b, &sf); err != nil { - return err - } - if err = c.AddSchemaFile(sf); err != nil { - return err - } - return nil - }) -} - -// TODO: rkey? is nsid always known? -// TODO: nsid as syntax.NSID -func (c *Catalog) ValidateRecord(raw any, id string) error { - def, err := c.Resolve(id) +func ValidateRecord(cat Catalog, recordData any, ref string) error { + def, err := cat.Resolve(ref) if err != nil { return err } s, ok := def.Def.(SchemaRecord) if !ok { - return fmt.Errorf("schema is not of record type: %s", id) + return fmt.Errorf("schema is not of record type: %s", ref) } - d, ok := raw.(map[string]any) + d, ok := recordData.(map[string]any) if !ok { return fmt.Errorf("record data is not object type") } t, ok := d["$type"] - if !ok || t != id { + if !ok || t != ref { return fmt.Errorf("record data missing $type, or didn't match expected NSID") } - return c.validateObject(s.Record, d) + return validateObject(cat, s.Record, d) } -func (c *Catalog) validateData(def any, d any) error { +func validateData(cat Catalog, def any, d any) error { // TODO: switch v := def.(type) { case SchemaNull: @@ -157,24 +51,24 @@ func (c *Catalog) validateData(def any, d any) error { if !ok { return fmt.Errorf("expected an array, got: %s", reflect.TypeOf(d)) } - return c.validateArray(v, arr) + return validateArray(cat, v, arr) case SchemaObject: obj, ok := d.(map[string]any) if !ok { return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) } - return c.validateObject(v, obj) + return validateObject(cat, v, obj) case SchemaBlob: return v.Validate(d) case SchemaRef: // recurse - next, err := c.Resolve(v.fullRef) + next, err := cat.Resolve(v.fullRef) if err != nil { return err } - return c.validateData(next.Def, d) + return validateData(cat, next.Def, d) case SchemaUnion: - return c.validateUnion(v, d) + return validateUnion(cat, v, d) case SchemaUnknown: return v.Validate(d) case SchemaToken: @@ -184,7 +78,7 @@ func (c *Catalog) validateData(def any, d any) error { } } -func (c *Catalog) validateObject(s SchemaObject, d map[string]any) error { +func validateObject(cat Catalog, s SchemaObject, d map[string]any) error { for _, k := range s.Required { if _, ok := d[k]; !ok { return fmt.Errorf("required field missing: %s", k) @@ -195,7 +89,7 @@ func (c *Catalog) validateObject(s SchemaObject, d map[string]any) error { if v == nil && s.IsNullable(k) { continue } - err := c.validateData(def.Inner, v) + err := validateData(cat, def.Inner, v) if err != nil { return err } @@ -204,12 +98,12 @@ func (c *Catalog) validateObject(s SchemaObject, d map[string]any) error { return nil } -func (c *Catalog) validateArray(s SchemaArray, arr []any) error { +func validateArray(cat Catalog, s SchemaArray, arr []any) error { if (s.MinLength != nil && len(arr) < *s.MinLength) || (s.MaxLength != nil && len(arr) > *s.MaxLength) { return fmt.Errorf("array length out of bounds: %d", len(arr)) } for _, v := range arr { - err := c.validateData(s.Items.Inner, v) + err := validateData(cat, s.Items.Inner, v) if err != nil { return err } @@ -217,15 +111,15 @@ func (c *Catalog) validateArray(s SchemaArray, arr []any) error { return nil } -func (c *Catalog) validateUnion(s SchemaUnion, d any) error { +func validateUnion(cat Catalog, s SchemaUnion, d any) error { closed := s.Closed != nil && *s.Closed == true for _, ref := range s.fullRefs { - def, err := c.Resolve(ref) + def, err := cat.Resolve(ref) if err != nil { // TODO: how to actually handle unknown defs? return err } - if err = c.validateData(def.Def, d); nil == err { // if success + if err = validateData(cat, def.Def, d); nil == err { // if success return nil } } diff --git a/atproto/lexicon/lexicon_test.go b/atproto/lexicon/lexicon_test.go index 81953e04a..eef75b04e 100644 --- a/atproto/lexicon/lexicon_test.go +++ b/atproto/lexicon/lexicon_test.go @@ -9,7 +9,7 @@ import ( func TestBasicCatalog(t *testing.T) { assert := assert.New(t) - cat := NewCatalog() + cat := NewBaseCatalog() if err := cat.LoadDirectory("testdata/catalog"); err != nil { t.Fatal(err) } @@ -18,7 +18,8 @@ func TestBasicCatalog(t *testing.T) { if err != nil { t.Fatal(err) } - assert.NoError(cat.validateData( + assert.NoError(validateData( + &cat, def.Def, map[string]any{ "cid": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", @@ -30,7 +31,8 @@ func TestBasicCatalog(t *testing.T) { }, )) - assert.Error(cat.validateData( + assert.Error(validateData( + &cat, def.Def, map[string]any{ "cid": "bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi", From 53c3e8332936120b8355313b966c82654a8e25c2 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 19 Mar 2024 23:03:20 -0700 Subject: [PATCH 13/23] data: support parsing legacy blobs --- atproto/data/parse.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/atproto/data/parse.go b/atproto/data/parse.go index 5924ff2a3..2f1f4305e 100644 --- a/atproto/data/parse.go +++ b/atproto/data/parse.go @@ -111,6 +111,18 @@ func parseMap(obj map[string]any) (any, error) { return nil, fmt.Errorf("$type field must contain a non-empty string") } } + // legacy blob type + if len(obj) == 2 { + if _, ok := obj["mimeType"]; ok { + if _, ok := obj["cid"]; ok { + b, err := parseLegacyBlob(obj) + if err != nil { + return nil, err + } + return *b, nil + } + } + } out := make(map[string]any, len(obj)) for k, val := range obj { if len(k) > MAX_OBJECT_KEY_LEN { @@ -213,6 +225,30 @@ func parseBlob(obj map[string]any) (*Blob, error) { }, nil } +func parseLegacyBlob(obj map[string]any) (*Blob, error) { + if len(obj) != 2 { + return nil, fmt.Errorf("legacy blobs expected to have 2 fields") + } + var err error + mimeType, ok := obj["mimeType"].(string) + if !ok { + return nil, fmt.Errorf("blob 'mimeType' missing or not a string") + } + cidStr, ok := obj["cid"] + if !ok { + return nil, fmt.Errorf("blob 'cid' missing") + } + c, err := cid.Parse(cidStr) + if err != nil { + return nil, fmt.Errorf("invalid CID: %w", err) + } + return &Blob{ + Size: -1, + MimeType: mimeType, + Ref: CIDLink(c), + }, nil +} + func parseObject(obj map[string]any) (map[string]any, error) { out, err := parseMap(obj) if err != nil { From 586731a33e7a8715b86a38f8827e902b22710d56 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 19 Mar 2024 23:03:54 -0700 Subject: [PATCH 14/23] lexicon: support lenient parsing (datetime and legacy blobs) --- atproto/lexicon/cmd/lextool/net.go | 7 ++++-- atproto/lexicon/language.go | 19 ++++++++++++--- atproto/lexicon/lexicon.go | 39 +++++++++++++++++++----------- atproto/lexicon/lexicon_test.go | 2 ++ 4 files changed, 47 insertions(+), 20 deletions(-) diff --git a/atproto/lexicon/cmd/lextool/net.go b/atproto/lexicon/cmd/lextool/net.go index 92525458e..25fd9118a 100644 --- a/atproto/lexicon/cmd/lextool/net.go +++ b/atproto/lexicon/cmd/lextool/net.go @@ -65,10 +65,13 @@ func runValidateRecord(cctx *cli.Context) error { } body, err := data.UnmarshalJSON(respBytes) - record := body["value"].(map[string]any) + record, ok := body["value"].(map[string]any) + if !ok { + return fmt.Errorf("fetched record was not an object") + } slog.Info("validating", "did", ident.DID.String(), "collection", aturi.Collection().String(), "rkey", aturi.RecordKey().String()) - err = lexicon.ValidateRecord(&cat, record, aturi.Collection().String()) + err = lexicon.ValidateRecordLenient(&cat, record, aturi.Collection().String()) if err != nil { return err } diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index 96c889ecf..6f13597ea 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -559,7 +559,8 @@ func (s *SchemaString) CheckSchema() error { return nil } -func (s *SchemaString) Validate(d any) error { +// lenient mode is only for datetimes, and hopefully will be deprecated soon +func (s *SchemaString) Validate(d any, lenient bool) error { v, ok := d.(string) if !ok { return fmt.Errorf("expected a string: %v", reflect.TypeOf(d)) @@ -604,8 +605,14 @@ func (s *SchemaString) Validate(d any) error { return err } case "datetime": - if _, err := syntax.ParseDatetime(v); err != nil { - return err + if lenient { + if _, err := syntax.ParseDatetimeLenient(v); err != nil { + return err + } + } else { + if _, err := syntax.ParseDatetime(v); err != nil { + return err + } } case "did": if _, err := syntax.ParseDID(v); err != nil { @@ -755,11 +762,15 @@ func (s *SchemaBlob) CheckSchema() error { return nil } -func (s *SchemaBlob) Validate(d any) error { +// lenient flag allows legacy blobs (if true) +func (s *SchemaBlob) Validate(d any, lenient bool) error { v, ok := d.(data.Blob) if !ok { return fmt.Errorf("expected a blob") } + if !lenient && v.Size < 0 { + return fmt.Errorf("legacy blobs not allowed") + } if len(s.Accept) > 0 { typeOk := false for _, pat := range s.Accept { diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index 6a48d1cb4..f53edaf41 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -12,6 +12,17 @@ type Schema struct { } func ValidateRecord(cat Catalog, recordData any, ref string) error { + return validateRecordConfig(cat, recordData, ref, false) +} + +// Variation of ValidateRecord which allows "legacy" blob format, and flexible string datetimes. +// +// Hope is to deprecate this lenient variation in the near future! +func ValidateRecordLenient(cat Catalog, recordData any, ref string) error { + return validateRecordConfig(cat, recordData, ref, true) +} + +func validateRecordConfig(cat Catalog, recordData any, ref string, lenient bool) error { def, err := cat.Resolve(ref) if err != nil { return err @@ -28,10 +39,10 @@ func ValidateRecord(cat Catalog, recordData any, ref string) error { if !ok || t != ref { return fmt.Errorf("record data missing $type, or didn't match expected NSID") } - return validateObject(cat, s.Record, d) + return validateObject(cat, s.Record, d, lenient) } -func validateData(cat Catalog, def any, d any) error { +func validateData(cat Catalog, def any, d any, lenient bool) error { // TODO: switch v := def.(type) { case SchemaNull: @@ -41,7 +52,7 @@ func validateData(cat Catalog, def any, d any) error { case SchemaInteger: return v.Validate(d) case SchemaString: - return v.Validate(d) + return v.Validate(d, lenient) case SchemaBytes: return v.Validate(d) case SchemaCIDLink: @@ -51,24 +62,24 @@ func validateData(cat Catalog, def any, d any) error { if !ok { return fmt.Errorf("expected an array, got: %s", reflect.TypeOf(d)) } - return validateArray(cat, v, arr) + return validateArray(cat, v, arr, lenient) case SchemaObject: obj, ok := d.(map[string]any) if !ok { return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) } - return validateObject(cat, v, obj) + return validateObject(cat, v, obj, lenient) case SchemaBlob: - return v.Validate(d) + return v.Validate(d, lenient) case SchemaRef: // recurse next, err := cat.Resolve(v.fullRef) if err != nil { return err } - return validateData(cat, next.Def, d) + return validateData(cat, next.Def, d, lenient) case SchemaUnion: - return validateUnion(cat, v, d) + return validateUnion(cat, v, d, lenient) case SchemaUnknown: return v.Validate(d) case SchemaToken: @@ -78,7 +89,7 @@ func validateData(cat Catalog, def any, d any) error { } } -func validateObject(cat Catalog, s SchemaObject, d map[string]any) error { +func validateObject(cat Catalog, s SchemaObject, d map[string]any, lenient bool) error { for _, k := range s.Required { if _, ok := d[k]; !ok { return fmt.Errorf("required field missing: %s", k) @@ -89,7 +100,7 @@ func validateObject(cat Catalog, s SchemaObject, d map[string]any) error { if v == nil && s.IsNullable(k) { continue } - err := validateData(cat, def.Inner, v) + err := validateData(cat, def.Inner, v, lenient) if err != nil { return err } @@ -98,12 +109,12 @@ func validateObject(cat Catalog, s SchemaObject, d map[string]any) error { return nil } -func validateArray(cat Catalog, s SchemaArray, arr []any) error { +func validateArray(cat Catalog, s SchemaArray, arr []any, lenient bool) error { if (s.MinLength != nil && len(arr) < *s.MinLength) || (s.MaxLength != nil && len(arr) > *s.MaxLength) { return fmt.Errorf("array length out of bounds: %d", len(arr)) } for _, v := range arr { - err := validateData(cat, s.Items.Inner, v) + err := validateData(cat, s.Items.Inner, v, lenient) if err != nil { return err } @@ -111,7 +122,7 @@ func validateArray(cat Catalog, s SchemaArray, arr []any) error { return nil } -func validateUnion(cat Catalog, s SchemaUnion, d any) error { +func validateUnion(cat Catalog, s SchemaUnion, d any, lenient bool) error { closed := s.Closed != nil && *s.Closed == true for _, ref := range s.fullRefs { def, err := cat.Resolve(ref) @@ -119,7 +130,7 @@ func validateUnion(cat Catalog, s SchemaUnion, d any) error { // TODO: how to actually handle unknown defs? return err } - if err = validateData(cat, def.Def, d); nil == err { // if success + if err = validateData(cat, def.Def, d, lenient); nil == err { // if success return nil } } diff --git a/atproto/lexicon/lexicon_test.go b/atproto/lexicon/lexicon_test.go index eef75b04e..3d12e40b8 100644 --- a/atproto/lexicon/lexicon_test.go +++ b/atproto/lexicon/lexicon_test.go @@ -29,6 +29,7 @@ func TestBasicCatalog(t *testing.T) { "uri": "at://did:plc:asdf123/com.atproto.feed.post/asdf123", "val": "test-label", }, + false, )) assert.Error(validateData( @@ -41,5 +42,6 @@ func TestBasicCatalog(t *testing.T) { "uri": "at://did:plc:asdf123/com.atproto.feed.post/asdf123", "val": "test-label", }, + false, )) } From f4adea5f0cebaf42e30ca9053e7859d79f35b04d Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 13 Aug 2024 20:48:48 -0700 Subject: [PATCH 15/23] atproto/identity: correct 'catalog' ref in pkg readme --- atproto/identity/doc.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/atproto/identity/doc.go b/atproto/identity/doc.go index 911b2811c..32bef3313 100644 --- a/atproto/identity/doc.go +++ b/atproto/identity/doc.go @@ -1,8 +1,6 @@ /* Package identity provides types and routines for resolving handles and DIDs from the network -The two main abstractions are a Catalog interface for identity service implementations, and an Identity structure which represents core identity information relevant to atproto. The Catalog interface can be nested, somewhat like HTTP middleware, to provide caching, observability, or other bespoke needs in more complex systems. - -Much of the implementation of this SDK is based on existing code in indigo:api/extra.go +The two main abstractions are a Directory interface for identity service implementations, and an Identity struct which represents core identity information relevant to atproto. The Directory interface can be nested, somewhat like HTTP middleware, to provide caching, observability, or other bespoke needs in more complex systems. */ package identity From cfbbce39d7f635ac6da0d42cc95a3d3cb351a1e2 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Tue, 13 Aug 2024 20:53:54 -0700 Subject: [PATCH 16/23] TID and record-key string format support --- atproto/lexicon/language.go | 10 +++++++++- atproto/lexicon/testdata/catalog/record.json | 10 ++++++++++ atproto/lexicon/testdata/record-data-invalid.json | 8 ++++++++ atproto/lexicon/testdata/record-data-valid.json | 3 ++- 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index 6f13597ea..7ea46e34c 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -550,7 +550,7 @@ func (s *SchemaString) CheckSchema() error { } if s.Format != nil { switch *s.Format { - case "at-identifier", "at-uri", "cid", "datetime", "did", "handle", "nsid", "uri", "language": + case "at-identifier", "at-uri", "cid", "datetime", "did", "handle", "nsid", "uri", "language", "tid", "record-key": // pass default: return fmt.Errorf("unknown string format: %s", *s.Format) @@ -634,6 +634,14 @@ func (s *SchemaString) Validate(d any, lenient bool) error { if _, err := syntax.ParseLanguage(v); err != nil { return err } + case "tid": + if _, err := syntax.ParseTID(v); err != nil { + return err + } + case "record-key": + if _, err := syntax.ParseRecordKey(v); err != nil { + return err + } } } return nil diff --git a/atproto/lexicon/testdata/catalog/record.json b/atproto/lexicon/testdata/catalog/record.json index dcf3b7b0e..cb32a07ee 100644 --- a/atproto/lexicon/testdata/catalog/record.json +++ b/atproto/lexicon/testdata/catalog/record.json @@ -190,6 +190,16 @@ "type": "string", "format": "uri", "description": "a generic URI field" + }, + "tid": { + "type": "string", + "format": "tid", + "description": "a generic TID field" + }, + "recordkey": { + "type": "string", + "format": "record-key", + "description": "a generic record-key field" } } }, diff --git a/atproto/lexicon/testdata/record-data-invalid.json b/atproto/lexicon/testdata/record-data-invalid.json index e5b90d2a2..623ca2395 100644 --- a/atproto/lexicon/testdata/record-data-invalid.json +++ b/atproto/lexicon/testdata/record-data-invalid.json @@ -101,6 +101,14 @@ "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "uri": "123" } } }, + { "name": "invalid string format tid", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "tid": "000" } } + }, + { "name": "invalid string format recordkey", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "formats": { "recordkey": "." } } + }, { "name": "wrong const value", "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "constInteger": 41 } diff --git a/atproto/lexicon/testdata/record-data-valid.json b/atproto/lexicon/testdata/record-data-valid.json index 8c8fd78fc..bbdd89b3d 100644 --- a/atproto/lexicon/testdata/record-data-valid.json +++ b/atproto/lexicon/testdata/record-data-valid.json @@ -55,7 +55,8 @@ "cid": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq", "datetime": "2023-10-30T22:25:23Z", "language": "en", - "uri": "https://example.com/file.txt" + "tid": "3kznmn7xqxl22", + "recordkey": "simple" }, "constInteger": 42, "defaultInteger": 123, From f19a2a447258dd3c6cdc6d64449a39283fac33e8 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 15 Aug 2024 23:46:11 -0700 Subject: [PATCH 17/23] switch to bitmask for validation config --- atproto/lexicon/cmd/lextool/net.go | 2 +- atproto/lexicon/interop_record_test.go | 4 +- atproto/lexicon/language.go | 10 ++--- atproto/lexicon/lexicon.go | 51 ++++++++++++++------------ atproto/lexicon/lexicon_test.go | 4 +- 5 files changed, 36 insertions(+), 35 deletions(-) diff --git a/atproto/lexicon/cmd/lextool/net.go b/atproto/lexicon/cmd/lextool/net.go index 25fd9118a..1600aaee8 100644 --- a/atproto/lexicon/cmd/lextool/net.go +++ b/atproto/lexicon/cmd/lextool/net.go @@ -71,7 +71,7 @@ func runValidateRecord(cctx *cli.Context) error { } slog.Info("validating", "did", ident.DID.String(), "collection", aturi.Collection().String(), "rkey", aturi.RecordKey().String()) - err = lexicon.ValidateRecordLenient(&cat, record, aturi.Collection().String()) + err = lexicon.ValidateRecord(&cat, record, aturi.Collection().String(), lexicon.LenientMode) if err != nil { return err } diff --git a/atproto/lexicon/interop_record_test.go b/atproto/lexicon/interop_record_test.go index 747f0c02a..cb6546998 100644 --- a/atproto/lexicon/interop_record_test.go +++ b/atproto/lexicon/interop_record_test.go @@ -49,7 +49,7 @@ func TestInteropRecordValid(t *testing.T) { t.Fatal(err) } - assert.NoError(ValidateRecord(&cat, d, "example.lexicon.record")) + assert.NoError(ValidateRecord(&cat, d, "example.lexicon.record", 0)) } } @@ -83,7 +83,7 @@ func TestInteropRecordInvalid(t *testing.T) { if err != nil { t.Fatal(err) } - err = ValidateRecord(&cat, d, "example.lexicon.record") + err = ValidateRecord(&cat, d, "example.lexicon.record", 0) if err == nil { fmt.Println(" FAIL") } diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index 7ea46e34c..5f5325be9 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -559,8 +559,7 @@ func (s *SchemaString) CheckSchema() error { return nil } -// lenient mode is only for datetimes, and hopefully will be deprecated soon -func (s *SchemaString) Validate(d any, lenient bool) error { +func (s *SchemaString) Validate(d any, flags ValidateFlags) error { v, ok := d.(string) if !ok { return fmt.Errorf("expected a string: %v", reflect.TypeOf(d)) @@ -605,7 +604,7 @@ func (s *SchemaString) Validate(d any, lenient bool) error { return err } case "datetime": - if lenient { + if flags&AllowLenientDatetime != 0 { if _, err := syntax.ParseDatetimeLenient(v); err != nil { return err } @@ -770,13 +769,12 @@ func (s *SchemaBlob) CheckSchema() error { return nil } -// lenient flag allows legacy blobs (if true) -func (s *SchemaBlob) Validate(d any, lenient bool) error { +func (s *SchemaBlob) Validate(d any, flags ValidateFlags) error { v, ok := d.(data.Blob) if !ok { return fmt.Errorf("expected a blob") } - if !lenient && v.Size < 0 { + if !(flags&AllowLegacyBlob != 0) && v.Size < 0 { return fmt.Errorf("legacy blobs not allowed") } if len(s.Accept) > 0 { diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index f53edaf41..71b47f8f2 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -5,24 +5,27 @@ import ( "reflect" ) +type ValidateFlags int + +const ( + AllowLegacyBlob = 1 << iota + AllowLenientDatetime + StrictRecursiveValidation +) + +var LenientMode ValidateFlags = AllowLegacyBlob | AllowLenientDatetime + type Schema struct { ID string Revision *int Def any } -func ValidateRecord(cat Catalog, recordData any, ref string) error { - return validateRecordConfig(cat, recordData, ref, false) -} - -// Variation of ValidateRecord which allows "legacy" blob format, and flexible string datetimes. -// -// Hope is to deprecate this lenient variation in the near future! -func ValidateRecordLenient(cat Catalog, recordData any, ref string) error { - return validateRecordConfig(cat, recordData, ref, true) +func ValidateRecord(cat Catalog, recordData any, ref string, flags ValidateFlags) error { + return validateRecordConfig(cat, recordData, ref, flags) } -func validateRecordConfig(cat Catalog, recordData any, ref string, lenient bool) error { +func validateRecordConfig(cat Catalog, recordData any, ref string, flags ValidateFlags) error { def, err := cat.Resolve(ref) if err != nil { return err @@ -39,10 +42,10 @@ func validateRecordConfig(cat Catalog, recordData any, ref string, lenient bool) if !ok || t != ref { return fmt.Errorf("record data missing $type, or didn't match expected NSID") } - return validateObject(cat, s.Record, d, lenient) + return validateObject(cat, s.Record, d, flags) } -func validateData(cat Catalog, def any, d any, lenient bool) error { +func validateData(cat Catalog, def any, d any, flags ValidateFlags) error { // TODO: switch v := def.(type) { case SchemaNull: @@ -52,7 +55,7 @@ func validateData(cat Catalog, def any, d any, lenient bool) error { case SchemaInteger: return v.Validate(d) case SchemaString: - return v.Validate(d, lenient) + return v.Validate(d, flags) case SchemaBytes: return v.Validate(d) case SchemaCIDLink: @@ -62,24 +65,24 @@ func validateData(cat Catalog, def any, d any, lenient bool) error { if !ok { return fmt.Errorf("expected an array, got: %s", reflect.TypeOf(d)) } - return validateArray(cat, v, arr, lenient) + return validateArray(cat, v, arr, flags) case SchemaObject: obj, ok := d.(map[string]any) if !ok { return fmt.Errorf("expected an object, got: %s", reflect.TypeOf(d)) } - return validateObject(cat, v, obj, lenient) + return validateObject(cat, v, obj, flags) case SchemaBlob: - return v.Validate(d, lenient) + return v.Validate(d, flags) case SchemaRef: // recurse next, err := cat.Resolve(v.fullRef) if err != nil { return err } - return validateData(cat, next.Def, d, lenient) + return validateData(cat, next.Def, d, flags) case SchemaUnion: - return validateUnion(cat, v, d, lenient) + return validateUnion(cat, v, d, flags) case SchemaUnknown: return v.Validate(d) case SchemaToken: @@ -89,7 +92,7 @@ func validateData(cat Catalog, def any, d any, lenient bool) error { } } -func validateObject(cat Catalog, s SchemaObject, d map[string]any, lenient bool) error { +func validateObject(cat Catalog, s SchemaObject, d map[string]any, flags ValidateFlags) error { for _, k := range s.Required { if _, ok := d[k]; !ok { return fmt.Errorf("required field missing: %s", k) @@ -100,7 +103,7 @@ func validateObject(cat Catalog, s SchemaObject, d map[string]any, lenient bool) if v == nil && s.IsNullable(k) { continue } - err := validateData(cat, def.Inner, v, lenient) + err := validateData(cat, def.Inner, v, flags) if err != nil { return err } @@ -109,12 +112,12 @@ func validateObject(cat Catalog, s SchemaObject, d map[string]any, lenient bool) return nil } -func validateArray(cat Catalog, s SchemaArray, arr []any, lenient bool) error { +func validateArray(cat Catalog, s SchemaArray, arr []any, flags ValidateFlags) error { if (s.MinLength != nil && len(arr) < *s.MinLength) || (s.MaxLength != nil && len(arr) > *s.MaxLength) { return fmt.Errorf("array length out of bounds: %d", len(arr)) } for _, v := range arr { - err := validateData(cat, s.Items.Inner, v, lenient) + err := validateData(cat, s.Items.Inner, v, flags) if err != nil { return err } @@ -122,7 +125,7 @@ func validateArray(cat Catalog, s SchemaArray, arr []any, lenient bool) error { return nil } -func validateUnion(cat Catalog, s SchemaUnion, d any, lenient bool) error { +func validateUnion(cat Catalog, s SchemaUnion, d any, flags ValidateFlags) error { closed := s.Closed != nil && *s.Closed == true for _, ref := range s.fullRefs { def, err := cat.Resolve(ref) @@ -130,7 +133,7 @@ func validateUnion(cat Catalog, s SchemaUnion, d any, lenient bool) error { // TODO: how to actually handle unknown defs? return err } - if err = validateData(cat, def.Def, d, lenient); nil == err { // if success + if err = validateData(cat, def.Def, d, flags); nil == err { // if success return nil } } diff --git a/atproto/lexicon/lexicon_test.go b/atproto/lexicon/lexicon_test.go index 3d12e40b8..ce5c490ec 100644 --- a/atproto/lexicon/lexicon_test.go +++ b/atproto/lexicon/lexicon_test.go @@ -29,7 +29,7 @@ func TestBasicCatalog(t *testing.T) { "uri": "at://did:plc:asdf123/com.atproto.feed.post/asdf123", "val": "test-label", }, - false, + 0, )) assert.Error(validateData( @@ -42,6 +42,6 @@ func TestBasicCatalog(t *testing.T) { "uri": "at://did:plc:asdf123/com.atproto.feed.post/asdf123", "val": "test-label", }, - false, + 0, )) } From 9e64f973fdf8e2acbfe61246c8142c36f8f6de8a Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 2 Oct 2024 20:32:05 -0700 Subject: [PATCH 18/23] fix typo in blob tests --- atproto/lexicon/testdata/record-data-invalid.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atproto/lexicon/testdata/record-data-invalid.json b/atproto/lexicon/testdata/record-data-invalid.json index 623ca2395..5675f1fac 100644 --- a/atproto/lexicon/testdata/record-data-invalid.json +++ b/atproto/lexicon/testdata/record-data-invalid.json @@ -37,7 +37,7 @@ "data": { "$type": "example.lexicon.record", "integer": 1, "blob": "green" } }, { "name": "invalid blob: wrong type", "rkey": "demo", - "data": { "$type": "example.lexicon.record", "integer": 1, "bytes": { + "data": { "$type": "example.lexicon.record", "integer": 1, "blob": { "type": "blob", "size": 123, "mimeType": false, From 5b33a818293c36ac9e479922f87f03143e37527d Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Wed, 2 Oct 2024 20:32:47 -0700 Subject: [PATCH 19/23] enforce 'unknown' must be an object --- atproto/lexicon/language.go | 4 ++++ .../lexicon/testdata/record-data-invalid.json | 19 +++++++++++++++++++ .../lexicon/testdata/record-data-valid.json | 13 +++++++++++++ 3 files changed, 36 insertions(+) diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index 5f5325be9..25265abc4 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -914,5 +914,9 @@ func (s *SchemaUnknown) CheckSchema() error { } func (s *SchemaUnknown) Validate(d any) error { + _, ok := d.(map[string]any) + if !ok { + return fmt.Errorf("'unknown' data must an object") + } return nil } diff --git a/atproto/lexicon/testdata/record-data-invalid.json b/atproto/lexicon/testdata/record-data-invalid.json index 5675f1fac..f431cc2a1 100644 --- a/atproto/lexicon/testdata/record-data-invalid.json +++ b/atproto/lexicon/testdata/record-data-invalid.json @@ -180,5 +180,24 @@ { "name": "out of closed union", "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "closedUnion": "other" } + }, + { "name": "unknown wrong type (bool)", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "unknown": false } + }, + { "name": "unknown wrong type (bytes)", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "unknown": { "$bytes": "123" } } + }, + { "name": "unknown wrong type (blob)", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "unknown": { + "$type": "blob", + "mimeType": "text/plain", + "size": 12345, + "ref": { + "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" + } + }} } ] diff --git a/atproto/lexicon/testdata/record-data-valid.json b/atproto/lexicon/testdata/record-data-valid.json index bbdd89b3d..69aba3224 100644 --- a/atproto/lexicon/testdata/record-data-valid.json +++ b/atproto/lexicon/testdata/record-data-valid.json @@ -88,5 +88,18 @@ }, "closedUnion": "example.lexicon.record#demoToken" } + }, + { + "name": "unknown as a type", + "rkey": "demo", + "data": { + "$type": "example.lexicon.record", + "integer": 1, + "unknown": { + "$type": "example.lexicon.record#demoObject", + "a": 1, + "b": 2 + } + } } ] From 5d9eb21739a39da39418b4068573e0e6c078e8bc Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 3 Oct 2024 16:31:54 -0700 Subject: [PATCH 20/23] more test data --- atproto/lexicon/testdata/catalog/record.json | 19 +++++++++++---- .../lexicon/testdata/record-data-invalid.json | 24 +++++++++++++++++-- .../lexicon/testdata/record-data-valid.json | 5 +++- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/atproto/lexicon/testdata/catalog/record.json b/atproto/lexicon/testdata/catalog/record.json index cb32a07ee..b7ef7297d 100644 --- a/atproto/lexicon/testdata/catalog/record.json +++ b/atproto/lexicon/testdata/catalog/record.json @@ -69,8 +69,8 @@ "union": { "type": "union", "refs": [ - "example.lexicon.record#demoToken", - "example.lexicon.record#demoObject" + "example.lexicon.record#demoObject", + "example.lexicon.record#demoObjectTwo" ] }, "formats": { @@ -134,7 +134,6 @@ "closedUnion": { "type": "union", "refs": [ - "example.lexicon.record#demoToken", "example.lexicon.record#demoObject" ], "closed": true @@ -210,7 +209,7 @@ "demoObject": { "type": "object", "description": "smaller object schema for unions", - "parameters": { + "properties": { "a": { "type": "integer" }, @@ -218,6 +217,18 @@ "type": "integer" } } + }, + "demoObjectTwo": { + "type": "object", + "description": "smaller object schema for unions", + "properties": { + "c": { + "type": "integer" + }, + "d": { + "type": "integer" + } + } } } } diff --git a/atproto/lexicon/testdata/record-data-invalid.json b/atproto/lexicon/testdata/record-data-invalid.json index f431cc2a1..58c227cf1 100644 --- a/atproto/lexicon/testdata/record-data-invalid.json +++ b/atproto/lexicon/testdata/record-data-invalid.json @@ -53,10 +53,14 @@ "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "array": [true, false] } }, - { "name": "invalid object", + { "name": "object wrong data type", "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "object": 123 } }, + { "name": "object nested wrong data type", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "object": {"a": "not-a-number" } } + }, { "name": "invalid token ref type", "rkey": "demo", "data": { "$type": "example.lexicon.record", "integer": 1, "ref": 123 } @@ -177,9 +181,25 @@ "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" } }}}, + { "name": "open union wrong data type", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "union": 123 } + }, + { "name": "open union missing $type", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "union": {"a": 1, "b": 2 } } + }, { "name": "out of closed union", "rkey": "demo", - "data": { "$type": "example.lexicon.record", "integer": 1, "closedUnion": "other" } + "data": { "$type": "example.lexicon.record", "integer": 1, "closedUnion": { "$type": "example.unknown-lexicon.blah", "a": 1 } } + }, + { "name": "union inner invalid", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "closedUnion": { "$type": "example.lexicon.record#demoObjectTwo", "a": 1 } } + }, + { "name": "union inner invalid", + "rkey": "demo", + "data": { "$type": "example.lexicon.record", "integer": 1, "union": { "$type": "example.lexicon.record#demoObject", "a": "not-a-number" } } }, { "name": "unknown wrong type (bool)", "rkey": "demo", diff --git a/atproto/lexicon/testdata/record-data-valid.json b/atproto/lexicon/testdata/record-data-valid.json index 69aba3224..a56489e7d 100644 --- a/atproto/lexicon/testdata/record-data-valid.json +++ b/atproto/lexicon/testdata/record-data-valid.json @@ -86,7 +86,10 @@ "$link": "bafyreiclp443lavogvhj3d2ob2cxbfuscni2k5jk7bebjzg7khl3esabwq" } }, - "closedUnion": "example.lexicon.record#demoToken" + "closedUnion": { + "$type": "example.lexicon.record#demoObject", + "a": 1 + } } }, { From 3243c4a2225c00831e1402040e8433008944faf8 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 3 Oct 2024 16:32:41 -0700 Subject: [PATCH 21/23] union validation --- atproto/lexicon/lexicon.go | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index 71b47f8f2..207aa88ea 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -46,7 +46,6 @@ func validateRecordConfig(cat Catalog, recordData any, ref string, flags Validat } func validateData(cat Catalog, def any, d any, flags ValidateFlags) error { - // TODO: switch v := def.(type) { case SchemaNull: return v.Validate(d) @@ -127,19 +126,40 @@ func validateArray(cat Catalog, s SchemaArray, arr []any, flags ValidateFlags) e func validateUnion(cat Catalog, s SchemaUnion, d any, flags ValidateFlags) error { closed := s.Closed != nil && *s.Closed == true + + obj, ok := d.(map[string]any) + if !ok { + return fmt.Errorf("union data is not object type") + } + typeVal, ok := obj["$type"] + if !ok { + return fmt.Errorf("union data must have $type") + } + t, ok := typeVal.(string) + if !ok { + return fmt.Errorf("union data must have string $type") + } + for _, ref := range s.fullRefs { + if ref != t { + continue + } def, err := cat.Resolve(ref) if err != nil { - // TODO: how to actually handle unknown defs? - return err - } - if err = validateData(cat, def.Def, d, flags); nil == err { // if success - return nil + return fmt.Errorf("could not resolve known union variant $type: %s", ref) } + return validateData(cat, def.Def, d, flags) } if closed { - return fmt.Errorf("data did not match any variant of closed union") + return fmt.Errorf("data did not match any variant of closed union: %s", t) } - // TODO: anything matches if an open union? - return nil + + // eagerly attempt validation of the open union type + def, err := cat.Resolve(t) + if err != nil { + // NOTE: not currently failing on unknown $type. might add a flag to fail here in the future + return fmt.Errorf("could not resolve known union variant $type: %s", t) + //return nil + } + return validateData(cat, def.Def, d, flags) } From eda52d4bb3f69af5482e3dd943145e0150f5b51f Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 3 Oct 2024 17:05:40 -0700 Subject: [PATCH 22/23] add docs and comment --- atproto/lexicon/catalog.go | 11 ++++++--- atproto/lexicon/examples_test.go | 41 ++++++++++++++++++++++++++++++++ atproto/lexicon/language.go | 3 ++- atproto/lexicon/lexicon.go | 20 +++++++++++++--- 4 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 atproto/lexicon/examples_test.go diff --git a/atproto/lexicon/catalog.go b/atproto/lexicon/catalog.go index 22b4004e2..7797fad93 100644 --- a/atproto/lexicon/catalog.go +++ b/atproto/lexicon/catalog.go @@ -5,20 +5,24 @@ import ( "fmt" "io" "io/fs" + "log/slog" "os" "path/filepath" "strings" ) -// An aggregation of lexicon schemas, and methods for validating generic data against those schemas. +// Interface type for a resolver or container of lexicon schemas, and methods for validating generic data against those schemas. type Catalog interface { + // Looks up a schema refrence (NSID string with optional fragment) to a Schema object. Resolve(ref string) (*Schema, error) } +// Trivial in-memory Lexicon Catalog implementation. type BaseCatalog struct { schemas map[string]Schema } +// Creates a new empty BaseCatalog func NewBaseCatalog() BaseCatalog { return BaseCatalog{ schemas: make(map[string]Schema), @@ -40,6 +44,7 @@ func (c *BaseCatalog) Resolve(ref string) (*Schema, error) { return &s, nil } +// Inserts a schema loaded from a JSON file in to the catalog. func (c *BaseCatalog) AddSchemaFile(sf SchemaFile) error { base := sf.ID for frag, def := range sf.Defs { @@ -76,6 +81,7 @@ func (c *BaseCatalog) AddSchemaFile(sf SchemaFile) error { return nil } +// Recursively loads all '.json' files from a directory in to the catalog. func (c *BaseCatalog) LoadDirectory(dirPath string) error { return filepath.WalkDir(dirPath, func(p string, d fs.DirEntry, err error) error { if err != nil { @@ -87,8 +93,7 @@ func (c *BaseCatalog) LoadDirectory(dirPath string) error { if !strings.HasSuffix(p, ".json") { return nil } - // TODO: logging - fmt.Println(p) + slog.Debug("loading Lexicon schema file", "path", p) f, err := os.Open(p) if err != nil { return err diff --git a/atproto/lexicon/examples_test.go b/atproto/lexicon/examples_test.go new file mode 100644 index 000000000..66ea2e9ea --- /dev/null +++ b/atproto/lexicon/examples_test.go @@ -0,0 +1,41 @@ +package lexicon + +import ( + "fmt" + + atdata "github.com/bluesky-social/indigo/atproto/data" +) + +func ExampleRecordValidate() { + + // First load Lexicon schema JSON files from local disk. + cat := NewBaseCatalog() + if err := cat.LoadDirectory("testdata/catalog"); err != nil { + panic("failed to load lexicons") + } + + // Parse record JSON data using atproto/data helper + recordJSON := `{ + "$type": "example.lexicon.record", + "integer": 123, + "formats": { + "did": "did:web:example.com", + "aturi": "at://handle.example.com/com.example.nsid/asdf123", + "datetime": "2023-10-30T22:25:23Z", + "language": "en", + "tid": "3kznmn7xqxl22" + } + }` + + recordData, err := atdata.UnmarshalJSON([]byte(recordJSON)) + if err != nil { + panic("failed to parse record JSON") + } + + if err := ValidateRecord(&cat, recordData, "example.lexicon.record", 0); err != nil { + fmt.Printf("Schema validation failed: %v\n", err) + } else { + fmt.Println("Success!") + } + // Output: Success! +} diff --git a/atproto/lexicon/language.go b/atproto/lexicon/language.go index 25265abc4..734d0031e 100644 --- a/atproto/lexicon/language.go +++ b/atproto/lexicon/language.go @@ -12,7 +12,7 @@ import ( "github.com/rivo/uniseg" ) -// Serialization helper for top-level Lexicon schema JSON objects (files) +// Serialization helper type for top-level Lexicon schema JSON objects (files) type SchemaFile struct { Lexicon int `json:"lexicon,const=1"` ID string `json:"id"` @@ -26,6 +26,7 @@ type SchemaDef struct { Inner any } +// Checks that the schema definition itself is valid (recursively). func (s *SchemaDef) CheckSchema() error { switch v := s.Inner.(type) { case SchemaRecord: diff --git a/atproto/lexicon/lexicon.go b/atproto/lexicon/lexicon.go index 207aa88ea..840d2ae7a 100644 --- a/atproto/lexicon/lexicon.go +++ b/atproto/lexicon/lexicon.go @@ -5,22 +5,33 @@ import ( "reflect" ) +// Boolean flags tweaking how Lexicon validation rules are interpreted. type ValidateFlags int const ( + // Flag which allows legacy "blob" data to pass validation. AllowLegacyBlob = 1 << iota + // Flag which loosens "datetime" string syntax validation. String must still be an ISO datetime, but might be missing timezone (for example) AllowLenientDatetime + // Flag which requires validation of nested data in open unions. By default nested union types are only validated optimistically (if the type is known in catatalog) for unlisted types. This flag will result in a validation error if the Lexicon can't be resolved from the catalog. StrictRecursiveValidation ) +// Combination of agument flags for less formal validation. Recommended for, eg, working with old/legacy data from 2023. var LenientMode ValidateFlags = AllowLegacyBlob | AllowLenientDatetime +// Represents a Lexicon schema definition type Schema struct { ID string Revision *int Def any } +// Checks Lexicon schema (fetched from the catalog) for the given record, with optional flags tweaking default validation rules. +// +// 'recordData' is typed as 'any', but is expected to be 'map[string]any' +// 'ref' is a reference to the schema type, as an NSID with optional fragment. For records, the '$type' must match 'ref' +// 'flags' are parameters tweaking Lexicon validation rules. Zero value is default. func ValidateRecord(cat Catalog, recordData any, ref string, flags ValidateFlags) error { return validateRecordConfig(cat, recordData, ref, flags) } @@ -155,11 +166,14 @@ func validateUnion(cat Catalog, s SchemaUnion, d any, flags ValidateFlags) error } // eagerly attempt validation of the open union type + // TODO: validate reference as NSID with optional fragment def, err := cat.Resolve(t) if err != nil { - // NOTE: not currently failing on unknown $type. might add a flag to fail here in the future - return fmt.Errorf("could not resolve known union variant $type: %s", t) - //return nil + if flags&StrictRecursiveValidation != 0 { + return fmt.Errorf("could not strictly validate open union variant $type: %s", t) + } + // by default, ignore validation of unknown open union data + return nil } return validateData(cat, def.Def, d, flags) } From 2215e75dbef6f2eb956d15c77193415a14af8cdb Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Thu, 3 Oct 2024 17:13:20 -0700 Subject: [PATCH 23/23] go vet --- atproto/lexicon/examples_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atproto/lexicon/examples_test.go b/atproto/lexicon/examples_test.go index 66ea2e9ea..fe2cd0568 100644 --- a/atproto/lexicon/examples_test.go +++ b/atproto/lexicon/examples_test.go @@ -6,7 +6,7 @@ import ( atdata "github.com/bluesky-social/indigo/atproto/data" ) -func ExampleRecordValidate() { +func ExampleValidateRecord() { // First load Lexicon schema JSON files from local disk. cat := NewBaseCatalog()