From 57091b03b40f9dafa88ab262420c6dc718a93e33 Mon Sep 17 00:00:00 2001 From: Josh Humphries <2035234+jhump@users.noreply.github.com> Date: Thu, 10 Oct 2024 13:18:57 -0400 Subject: [PATCH 1/5] preserve all extra properties/metadata, including unrecognized logical types --- gen/testdata/golden.avsc | 3 - schema.go | 47 +++- schema_parse.go | 116 +++++--- schema_test.go | 591 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 710 insertions(+), 47 deletions(-) diff --git a/gen/testdata/golden.avsc b/gen/testdata/golden.avsc index 438d842..592d91b 100644 --- a/gen/testdata/golden.avsc +++ b/gen/testdata/golden.avsc @@ -106,7 +106,6 @@ { "name": "mapOfStrings", "type": { - "name": "aMapOfStrings", "type": "map", "values": "string" } @@ -114,7 +113,6 @@ { "name": "mapOfRecords", "type": { - "name": "aMapOfRecords", "type": "map", "values": { "name": "RecordInMap", @@ -175,7 +173,6 @@ { "name": "aRecordArray", "type": { - "name": "someRecordArray", "type": "array", "items": { "name": "recordInArray", diff --git a/schema.go b/schema.go index 0d82f7a..7e904c9 100644 --- a/schema.go +++ b/schema.go @@ -25,11 +25,19 @@ func (nullDefaultType) MarshalJSON() ([]byte, error) { var nullDefault nullDefaultType = struct{}{} var ( - schemaReserved = []string{ - "doc", "fields", "items", "name", "namespace", "size", "symbols", - "values", "type", "aliases", "logicalType", "precision", "scale", - } - fieldReserved = []string{"default", "doc", "name", "order", "type", "aliases"} + // Note: order matches the order of properties as they are named in the spec. + // https://avro.apache.org/docs/1.12.0/specification + recordReserved = []string{"type", "name", "namespace", "doc", "aliases", "fields"} + fieldReserved = []string{"name", "doc", "type", "order", "aliases", "default"} + enumReserved = []string{"type", "name", "namespace", "aliases", "doc", "symbols", "default"} + arrayReserved = []string{"type", "items"} + mapReserved = []string{"type", "values"} + fixedReserved = []string{"type", "name", "namespace", "aliases", "size"} + fixedWithLogicalTypeReserved = []string{"type", "name", "namespace", "aliases", "size", "logicalType"} + fixedWithDecimalTypeReserved = []string{"type", "name", "namespace", "aliases", "size", "logicalType", "precision", "scale"} + primitiveReserved = []string{"type"} + primitiveWithLogicalTypeReserved = []string{"type", "logicalType"} + primitiveWithDecimalTypeReserved = []string{"type", "logicalType", "precision", "scale"} ) // Type is a schema type. @@ -482,9 +490,16 @@ func NewPrimitiveSchema(t Type, l LogicalSchema, opts ...SchemaOption) *Primitiv for _, opt := range opts { opt(&cfg) } - + reservedProps := primitiveReserved + if l != nil { + if l.Type() == Decimal { + reservedProps = primitiveWithDecimalTypeReserved + } else { + reservedProps = primitiveWithLogicalTypeReserved + } + } return &PrimitiveSchema{ - properties: newProperties(cfg.props, schemaReserved), + properties: newProperties(cfg.props, reservedProps), cacheFingerprinter: cacheFingerprinter{writerFingerprint: cfg.wfp}, typ: t, logical: l, @@ -574,7 +589,7 @@ func NewRecordSchema(name, namespace string, fields []*Field, opts ...SchemaOpti return &RecordSchema{ name: n, - properties: newProperties(cfg.props, schemaReserved), + properties: newProperties(cfg.props, recordReserved), cacheFingerprinter: cacheFingerprinter{writerFingerprint: cfg.wfp}, fields: fields, doc: cfg.doc, @@ -919,7 +934,7 @@ func NewEnumSchema(name, namespace string, symbols []string, opts ...SchemaOptio return &EnumSchema{ name: n, - properties: newProperties(cfg.props, schemaReserved), + properties: newProperties(cfg.props, enumReserved), cacheFingerprinter: cacheFingerprinter{writerFingerprint: cfg.wfp}, symbols: symbols, def: def, @@ -1072,7 +1087,7 @@ func NewArraySchema(items Schema, opts ...SchemaOption) *ArraySchema { } return &ArraySchema{ - properties: newProperties(cfg.props, schemaReserved), + properties: newProperties(cfg.props, arrayReserved), cacheFingerprinter: cacheFingerprinter{writerFingerprint: cfg.wfp}, items: items, } @@ -1142,7 +1157,7 @@ func NewMapSchema(values Schema, opts ...SchemaOption) *MapSchema { } return &MapSchema{ - properties: newProperties(cfg.props, schemaReserved), + properties: newProperties(cfg.props, mapReserved), cacheFingerprinter: cacheFingerprinter{writerFingerprint: cfg.wfp}, values: values, } @@ -1323,9 +1338,17 @@ func NewFixedSchema( return nil, err } + reservedProps := fixedReserved + if logical != nil { + if logical.Type() == Decimal { + reservedProps = fixedWithDecimalTypeReserved + } else { + reservedProps = fixedWithLogicalTypeReserved + } + } return &FixedSchema{ name: n, - properties: newProperties(cfg.props, schemaReserved), + properties: newProperties(cfg.props, reservedProps), cacheFingerprinter: cacheFingerprinter{writerFingerprint: cfg.wfp}, size: size, logical: logical, diff --git a/schema_parse.go b/schema_parse.go index 889ada0..71af2d3 100644 --- a/schema_parse.go +++ b/schema_parse.go @@ -121,6 +121,10 @@ func parsePrimitiveType(namespace, s string, cache *SchemaCache) (Schema, error) func parseComplexType(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { if val, ok := m["type"].([]any); ok { + // TODO: According to the spec, this is not allowed: + // https://avro.apache.org/docs/1.12.0/specification/#schema-declaration + // The "type" property in an object must be a string. A union type will be a slice, + // but not an object with a "type" property that is a slice. return parseUnion(namespace, val, seen, cache) } @@ -132,6 +136,9 @@ func parseComplexType(namespace string, m map[string]any, seen seenCache, cache switch typ { case Null: + // TODO: Per the spec, "null" is a primitive type so should be permitted to + // have other properties/metadata. + // https://avro.apache.org/docs/1.12.0/specification/#primitive-types return &NullSchema{}, nil case String, Bytes, Int, Long, Float, Double, Boolean: @@ -158,14 +165,13 @@ func parseComplexType(namespace string, m map[string]any, seen seenCache, cache } type primitiveSchema struct { + Type string `mapstructure:"type"` LogicalType string `mapstructure:"logicalType"` - Precision int `mapstructure:"precision"` - Scale int `mapstructure:"scale"` Props map[string]any `mapstructure:",remain"` } func parsePrimitive(typ Type, m map[string]any) (Schema, error) { - if m == nil { + if len(m) == 0 { return NewPrimitiveSchema(typ, nil), nil } @@ -179,13 +185,16 @@ func parsePrimitive(typ Type, m map[string]any) (Schema, error) { var logical LogicalSchema if p.LogicalType != "" { - logical = parsePrimitiveLogicalType(typ, p.LogicalType, p.Precision, p.Scale) + logical = parsePrimitiveLogicalType(typ, p.LogicalType, p.Props) + if logical == nil { + preserveLogicalType(p.LogicalType, &p.Props) + } } return NewPrimitiveSchema(typ, logical, WithProps(p.Props)), nil } -func parsePrimitiveLogicalType(typ Type, lt string, prec, scale int) LogicalSchema { +func parsePrimitiveLogicalType(typ Type, lt string, otherProps map[string]any) LogicalSchema { ltyp := LogicalType(lt) if (typ == String && ltyp == UUID) || (typ == Int && ltyp == Date) || @@ -199,20 +208,21 @@ func parsePrimitiveLogicalType(typ Type, lt string, prec, scale int) LogicalSche } if typ == Bytes && ltyp == Decimal { - return parseDecimalLogicalType(-1, prec, scale) + return parseDecimalLogicalType(-1, otherProps) } - return nil + return nil // otherwise, not a recognized logical type } type recordSchema struct { - Type string `mapstructure:"type"` - Name string `mapstructure:"name"` - Namespace string `mapstructure:"namespace"` - Aliases []string `mapstructure:"aliases"` - Doc string `mapstructure:"doc"` - Fields []map[string]any `mapstructure:"fields"` - Props map[string]any `mapstructure:",remain"` + Type string `mapstructure:"type"` + LogicalType string `mapstructure:"logicalType"` + Name string `mapstructure:"name"` + Namespace string `mapstructure:"namespace"` + Aliases []string `mapstructure:"aliases"` + Doc string `mapstructure:"doc"` + Fields []map[string]any `mapstructure:"fields"` + Props map[string]any `mapstructure:",remain"` } func parseRecord(typ Type, namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -230,6 +240,7 @@ func parseRecord(typ Type, namespace string, m map[string]any, seen seenCache, c if r.Namespace == "" { r.Namespace = namespace } + preserveLogicalType(r.LogicalType, &r.Props) if !hasKey(meta.Keys, "fields") { return nil, errors.New("avro: record must have an array of fields") @@ -321,14 +332,15 @@ func parseField(namespace string, m map[string]any, seen seenCache, cache *Schem } type enumSchema struct { - Name string `mapstructure:"name"` - Namespace string `mapstructure:"namespace"` - Aliases []string `mapstructure:"aliases"` - Type string `mapstructure:"type"` - Doc string `mapstructure:"doc"` - Symbols []string `mapstructure:"symbols"` - Default string `mapstructure:"default"` - Props map[string]any `mapstructure:",remain"` + Name string `mapstructure:"name"` + Namespace string `mapstructure:"namespace"` + Aliases []string `mapstructure:"aliases"` + Type string `mapstructure:"type"` + LogicalType string `mapstructure:"logicalType"` + Doc string `mapstructure:"doc"` + Symbols []string `mapstructure:"symbols"` + Default string `mapstructure:"default"` + Props map[string]any `mapstructure:",remain"` } func parseEnum(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -346,6 +358,7 @@ func parseEnum(namespace string, m map[string]any, seen seenCache, cache *Schema if e.Namespace == "" { e.Namespace = namespace } + preserveLogicalType(e.LogicalType, &e.Props) enum, err := NewEnumSchema(e.Name, e.Namespace, e.Symbols, WithDefault(e.Default), WithAliases(e.Aliases), WithDoc(e.Doc), WithProps(e.Props), @@ -368,8 +381,10 @@ func parseEnum(namespace string, m map[string]any, seen seenCache, cache *Schema } type arraySchema struct { - Items any `mapstructure:"items"` - Props map[string]any `mapstructure:",remain"` + Type string `mapstructure:"type"` + LogicalType string `mapstructure:"logicalType"` + Items any `mapstructure:"items"` + Props map[string]any `mapstructure:",remain"` } func parseArray(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -388,13 +403,16 @@ func parseArray(namespace string, m map[string]any, seen seenCache, cache *Schem if err != nil { return nil, err } + preserveLogicalType(a.LogicalType, &a.Props) return NewArraySchema(schema, WithProps(a.Props)), nil } type mapSchema struct { - Values any `mapstructure:"values"` - Props map[string]any `mapstructure:",remain"` + Type string `mapstructure:"type"` + LogicalType string `mapstructure:"logicalType"` + Values any `mapstructure:"values"` + Props map[string]any `mapstructure:",remain"` } func parseMap(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -413,6 +431,7 @@ func parseMap(namespace string, m map[string]any, seen seenCache, cache *SchemaC if err != nil { return nil, err } + preserveLogicalType(ms.LogicalType, &ms.Props) return NewMapSchema(schema, WithProps(ms.Props)), nil } @@ -437,8 +456,6 @@ type fixedSchema struct { Type string `mapstructure:"type"` Size int `mapstructure:"size"` LogicalType string `mapstructure:"logicalType"` - Precision int `mapstructure:"precision"` - Scale int `mapstructure:"scale"` Props map[string]any `mapstructure:",remain"` } @@ -464,7 +481,10 @@ func parseFixed(namespace string, m map[string]any, seen seenCache, cache *Schem var logical LogicalSchema if f.LogicalType != "" { - logical = parseFixedLogicalType(f.Size, f.LogicalType, f.Precision, f.Scale) + logical = parseFixedLogicalType(f.Size, f.LogicalType, f.Props) + if logical == nil { + preserveLogicalType(f.LogicalType, &f.Props) + } } fixed, err := NewFixedSchema(f.Name, f.Namespace, f.Size, logical, WithAliases(f.Aliases), WithProps(f.Props)) @@ -485,19 +505,41 @@ func parseFixed(namespace string, m map[string]any, seen seenCache, cache *Schem return fixed, nil } -func parseFixedLogicalType(size int, lt string, prec, scale int) LogicalSchema { +func parseFixedLogicalType(size int, lt string, otherProps map[string]any) LogicalSchema { ltyp := LogicalType(lt) switch { case ltyp == Duration && size == 12: return NewPrimitiveLogicalSchema(Duration) case ltyp == Decimal: - return parseDecimalLogicalType(size, prec, scale) + return parseDecimalLogicalType(size, otherProps) } return nil } -func parseDecimalLogicalType(size, prec, scale int) LogicalSchema { +type decimalSchema struct { + Precision int `mapstructure:"precision"` + Scale int `mapstructure:"scale"` +} + +func parseDecimalLogicalType(size int, otherProps map[string]any) LogicalSchema { + var ( + d decimalSchema + meta mapstructure.Metadata + ) + if err := decodeMap(otherProps, &d, &meta); err != nil { + return nil + } + decType := newDecimalLogicalType(size, d.Precision, d.Scale) + if decType != nil { + // Remove the properties that we consumed + delete(otherProps, "precision") + delete(otherProps, "scale") + } + return decType +} + +func newDecimalLogicalType(size, prec, scale int) LogicalSchema { if prec <= 0 { return nil } @@ -594,3 +636,13 @@ func (c seenCache) Add(name string) error { c[name] = struct{}{} return nil } + +func preserveLogicalType(logicalType string, props *map[string]any) { + if logicalType == "" { + return // nothing to preserve + } + if *props == nil { + *props = make(map[string]any, 1) + } + (*props)["logicalType"] = logicalType +} diff --git a/schema_test.go b/schema_test.go index 469a0e0..3e10cb8 100644 --- a/schema_test.go +++ b/schema_test.go @@ -1551,3 +1551,594 @@ func TestSchema_DereferencingRectifiesAlreadySeenSchema(t *testing.T) { n := strings.Count(strSchema, `"name":"org.hamba.avro.test1"`) assert.Equal(t, 1, n) } + +func TestParse_PreservesAllProperties(t *testing.T) { + testCases := []struct { + name string + schema string + check func(t *testing.T, schema avro.Schema) + }{ + { + name: "record", + schema: `{ + "type": "record", + "name": "SomeRecord", + "logicalType": "complex-number", + "precision": "abc", + "scale": "def", + "other": [1,2,3], + "fields": [ + { + "name": "r", + "type": "double" + }, + { + "name": "i", + "type": "double" + } + ] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.RecordSchema) + assert.Equal(t, map[string]any{ + "logicalType": "complex-number", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "array", + schema: `{ + "type": "array", + "name": "SomeArray", + "logicalType": "complex-number", + "precision": "abc", + "scale": "def", + "other": [1,2,3], + "items": "double" + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.ArraySchema) + assert.Equal(t, map[string]any{ + "name": "SomeArray", + "logicalType": "complex-number", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "map", + schema: `{ + "type": "map", + "name": "SomeMap", + "logicalType": "weights", + "precision": "abc", + "scale": "def", + "other": [1,2,3], + "values": "double" + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.MapSchema) + assert.Equal(t, map[string]any{ + "name": "SomeMap", + "logicalType": "weights", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "enum", + schema: `{ + "type": "enum", + "name": "SomeEnum", + "logicalType": "status", + "precision": "abc", + "scale": "def", + "other": [1,2,3], + "symbols": ["A", "B", "C"], + "default": "A" + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.EnumSchema) + assert.Equal(t, map[string]any{ + "logicalType": "status", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "fixed-no-logical-type", + schema: `{ + "type": "fixed", + "name": "SomeFixed", + "size": 16, + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.FixedSchema) + assert.Equal(t, map[string]any{ + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "fixed-unknown-logical-type", + schema: `{ + "type": "fixed", + "name": "SomeFixed", + "size": 16, + "logicalType": "uuid", + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.FixedSchema) + assert.Equal(t, map[string]any{ + "logicalType": "uuid", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "fixed-decimal-logical-type", + schema: `{ + "type": "fixed", + "name": "SomeFixed", + "size": 16, + "logicalType": "decimal", + "precision": 10, + "scale": 3, + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.FixedSchema) + assert.Equal(t, map[string]any{ + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + assert.Equal(t, avro.Decimal, rec.Logical().Type()) + }, + }, + { + name: "fixed-invalid-decimal-logical-type-bad-values", // scale is too high + schema: `{ + "type": "fixed", + "name": "SomeFixed", + "size": 16, + "logicalType": "decimal", + "precision": 10, + "scale": 20, + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.FixedSchema) + assert.Equal(t, map[string]any{ + "logicalType": "decimal", + "precision": 10.0, + "scale": 20.0, + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "fixed-invalid-decimal-logical-type-bad-type", // precision and scale aren't ints + schema: `{ + "type": "fixed", + "name": "SomeFixed", + "size": 16, + "logicalType": "decimal", + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.FixedSchema) + assert.Equal(t, map[string]any{ + "logicalType": "decimal", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "fixed-duration-logical", + schema: `{ + "type": "fixed", + "name": "SomeFixed", + "size": 12, + "logicalType": "duration", + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.FixedSchema) + assert.Equal(t, map[string]any{ + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + assert.Equal(t, avro.Duration, rec.Logical().Type()) + }, + }, + { + name: "primitive-no-logical-type", + schema: `{ + "type": "long", + "name": "SomeLong", + "size": 16, + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.PrimitiveSchema) + assert.Equal(t, map[string]any{ + "name": "SomeLong", + "size": 16.0, + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "primitive-unknown-logical-type", + schema: `{ + "type": "string", + "name": "SomeString", + "logicalType": "enum-name", + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.PrimitiveSchema) + assert.Equal(t, map[string]any{ + "name": "SomeString", + "logicalType": "enum-name", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "primitive-date-logical-type", + schema: `{ + "type": "int", + "logicalType": "date", + "precision": 10, + "scale": 3, + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.PrimitiveSchema) + assert.Equal(t, map[string]any{ + "precision": 10.0, + "scale": 3.0, + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + assert.Equal(t, avro.Date, rec.Logical().Type()) + }, + }, + { + name: "primitive-decimal-logical-type", + schema: `{ + "type": "bytes", + "logicalType": "decimal", + "precision": 10, + "scale": 3, + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.PrimitiveSchema) + assert.Equal(t, map[string]any{ + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + assert.Equal(t, avro.Decimal, rec.Logical().Type()) + }, + }, + { + name: "primitive-invalid-decimal-logical-type-bad-values", // scale is too high + schema: `{ + "type": "bytes", + "logicalType": "decimal", + "precision": 10, + "scale": 20, + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.PrimitiveSchema) + assert.Equal(t, map[string]any{ + "logicalType": "decimal", + "precision": 10.0, + "scale": 20.0, + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + { + name: "primitive-invalid-decimal-logical-type-bad-type", // precision and scale aren't ints + schema: `{ + "type": "bytes", + "logicalType": "decimal", + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.PrimitiveSchema) + assert.Equal(t, map[string]any{ + "logicalType": "decimal", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, + } + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + schema, err := avro.Parse(testCase.schema) + require.NoError(t, err) + testCase.check(t, schema) + }) + } +} + +func TestNewSchema_IgnoresInvalidProperties(t *testing.T) { + t.Run("record", func(t *testing.T) { + rec, err := avro.NewRecordSchema("abc.def.Xyz", "", nil, + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "name": 123, + "namespace": "abc", + "doc": "blah", + "fields": []any{1, 2, 3}, + "aliases": "foo", + "type": false, + // valid + "other": true, + "logicalType": "baz", + })) + require.NoError(t, err) + assert.Equal(t, map[string]any{ + "other": true, + "logicalType": "baz", + }, rec.Props()) + }) + t.Run("enum", func(t *testing.T) { + rec, err := avro.NewEnumSchema("abc.def.Xyz", "", []string{"ABC"}, + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "name": 123, + "namespace": "abc", + "doc": "blah", + "symbols": []any{1, 2, 3}, + "aliases": "foo", + "type": false, + "default": 123.456, + // valid + "other": true, + "logicalType": "baz", + })) + require.NoError(t, err) + assert.Equal(t, map[string]any{ + "other": true, + "logicalType": "baz", + }, rec.Props()) + }) + t.Run("fixed-no-logical-type", func(t *testing.T) { + rec, err := avro.NewFixedSchema("abc.def.Xyz", "", 10, nil, + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "type": false, + // valid + "doc": "blah", + "other": true, + "logicalType": "baz", + "precision": "abc", + "scale": "def", + })) + require.NoError(t, err) + assert.Equal(t, map[string]any{ + "doc": "blah", + "other": true, + "logicalType": "baz", + "precision": "abc", + "scale": "def", + }, rec.Props()) + }) + t.Run("fixed-logical-type", func(t *testing.T) { + rec, err := avro.NewFixedSchema("abc.def.Xyz", "", 10, avro.NewPrimitiveLogicalSchema(avro.Duration), + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "type": false, + "logicalType": "baz", + // valid + "doc": "blah", + "other": true, + "precision": "abc", + "scale": "def", + })) + require.NoError(t, err) + assert.Equal(t, map[string]any{ + "doc": "blah", + "other": true, + "precision": "abc", + "scale": "def", + }, rec.Props()) + }) + t.Run("fixed-decimal-logical-type", func(t *testing.T) { + rec, err := avro.NewFixedSchema("abc.def.Xyz", "", 10, avro.NewDecimalLogicalSchema(10, 0), + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "type": false, + "logicalType": "baz", + "precision": "abc", + "scale": "def", + // valid + "doc": "blah", + "other": true, + })) + require.NoError(t, err) + assert.Equal(t, map[string]any{ + "doc": "blah", + "other": true, + }, rec.Props()) + }) + t.Run("array", func(t *testing.T) { + rec := avro.NewArraySchema(avro.NewPrimitiveSchema(avro.String, nil), + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "items": []any{1, 2, 3}, + "type": false, + // valid + "name": 123, + "namespace": "abc", + "doc": "blah", + "aliases": "foo", + "other": true, + "logicalType": "baz", + })) + assert.Equal(t, map[string]any{ + "name": 123, + "namespace": "abc", + "doc": "blah", + "aliases": "foo", + "other": true, + "logicalType": "baz", + }, rec.Props()) + }) + t.Run("map", func(t *testing.T) { + rec := avro.NewMapSchema(avro.NewPrimitiveSchema(avro.String, nil), + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "values": []any{1, 2, 3}, + "type": false, + // valid + "name": 123, + "namespace": "abc", + "doc": "blah", + "aliases": "foo", + "other": true, + "logicalType": "baz", + })) + assert.Equal(t, map[string]any{ + "name": 123, + "namespace": "abc", + "doc": "blah", + "aliases": "foo", + "other": true, + "logicalType": "baz", + }, rec.Props()) + }) + t.Run("primitive-no-logical-type", func(t *testing.T) { + rec := avro.NewPrimitiveSchema(avro.Long, nil, + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "type": false, + // valid + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "doc": "blah", + "other": true, + "logicalType": "baz", + "precision": "abc", + "scale": "def", + })) + assert.Equal(t, map[string]any{ + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "doc": "blah", + "other": true, + "logicalType": "baz", + "precision": "abc", + "scale": "def", + }, rec.Props()) + }) + t.Run("primitive-logical-type", func(t *testing.T) { + rec := avro.NewPrimitiveSchema(avro.Long, avro.NewPrimitiveLogicalSchema(avro.TimestampMicros), + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "type": false, + "logicalType": "baz", + // valid + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "doc": "blah", + "other": true, + "precision": "abc", + "scale": "def", + })) + assert.Equal(t, map[string]any{ + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "doc": "blah", + "other": true, + "precision": "abc", + "scale": "def", + }, rec.Props()) + }) + t.Run("primitive-decimal-logical-type", func(t *testing.T) { + rec := avro.NewPrimitiveSchema(avro.Bytes, avro.NewDecimalLogicalSchema(10, 0), + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "type": false, + "logicalType": "baz", + "precision": "abc", + "scale": "def", + // valid + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "doc": "blah", + "other": true, + })) + assert.Equal(t, map[string]any{ + "name": 123, + "namespace": "abc", + "size": []any{1, 2, 3}, + "aliases": "foo", + "doc": "blah", + "other": true, + }, rec.Props()) + }) +} From cc31452f06f00c1a0fbb21a716424722e53e1449 Mon Sep 17 00:00:00 2001 From: Josh Humphries <2035234+jhump@users.noreply.github.com> Date: Thu, 10 Oct 2024 21:26:03 -0400 Subject: [PATCH 2/5] allow null types to have properties --- schema.go | 24 ++++++++++++++++++++++- schema_json_test.go | 4 ++++ schema_parse.go | 14 ++++++------- schema_test.go | 48 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 8 deletions(-) diff --git a/schema.go b/schema.go index 7e904c9..623c226 100644 --- a/schema.go +++ b/schema.go @@ -1429,9 +1429,22 @@ func (s *FixedSchema) CacheFingerprint() [32]byte { // NullSchema is an Avro null type schema. type NullSchema struct { + properties fingerprinter } +// NewNullSchema creates a new NullSchema. +func NewNullSchema(opts ...SchemaOption) *NullSchema { + var cfg schemaConfig + for _, opt := range opts { + opt(&cfg) + } + + return &NullSchema{ + properties: newProperties(cfg.props, primitiveReserved), + } +} + // Type returns the type of the schema. func (s *NullSchema) Type() Type { return Null @@ -1444,7 +1457,16 @@ func (s *NullSchema) String() string { // MarshalJSON marshals the schema to json. func (s *NullSchema) MarshalJSON() ([]byte, error) { - return []byte(`"null"`), nil + if len(s.props) == 0 { + return []byte(`"null"`), nil + } + buf := new(bytes.Buffer) + buf.WriteString(`{"type":"null"`) + if err := s.marshalPropertiesToJSON(buf); err != nil { + return nil, err + } + buf.WriteString("}") + return buf.Bytes(), nil } // Fingerprint returns the SHA256 fingerprint of the schema. diff --git a/schema_json_test.go b/schema_json_test.go index 2db4ac0..40f04ad 100644 --- a/schema_json_test.go +++ b/schema_json_test.go @@ -23,6 +23,10 @@ func TestSchema_JSON(t *testing.T) { input: `{"type":"null"}`, json: `"null"`, }, + { + input: `{"type":"null","other":"foo"}`, + json: `{"type":"null","other":"foo"}`, + }, { input: `"boolean"`, json: `"boolean"`, diff --git a/schema_parse.go b/schema_parse.go index 71af2d3..706250b 100644 --- a/schema_parse.go +++ b/schema_parse.go @@ -135,13 +135,7 @@ func parseComplexType(namespace string, m map[string]any, seen seenCache, cache typ := Type(str) switch typ { - case Null: - // TODO: Per the spec, "null" is a primitive type so should be permitted to - // have other properties/metadata. - // https://avro.apache.org/docs/1.12.0/specification/#primitive-types - return &NullSchema{}, nil - - case String, Bytes, Int, Long, Float, Double, Boolean: + case String, Bytes, Int, Long, Float, Double, Boolean, Null: return parsePrimitive(typ, m) case Record, Error: @@ -172,6 +166,9 @@ type primitiveSchema struct { func parsePrimitive(typ Type, m map[string]any) (Schema, error) { if len(m) == 0 { + if typ == Null { + return &NullSchema{}, nil + } return NewPrimitiveSchema(typ, nil), nil } @@ -191,6 +188,9 @@ func parsePrimitive(typ Type, m map[string]any) (Schema, error) { } } + if typ == Null { + return NewNullSchema(WithProps(p.Props)), nil + } return NewPrimitiveSchema(typ, logical, WithProps(p.Props)), nil } diff --git a/schema_test.go b/schema_test.go index 3e10cb8..4d0c064 100644 --- a/schema_test.go +++ b/schema_test.go @@ -58,6 +58,7 @@ func TestNullSchema(t *testing.T) { schemas := []string{ `null`, `{"type":"null"}`, + `{"type":"null", "other-property": 123, "another-property": ["a","b","c"]}`, } for _, schm := range schemas { @@ -1892,6 +1893,27 @@ func TestParse_PreservesAllProperties(t *testing.T) { }, rec.Props()) }, }, + { + name: "null", + schema: `{ + "type": "null", + "name": "SomeMap", + "logicalType": "weights", + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.NullSchema) + assert.Equal(t, map[string]any{ + "name": "SomeMap", + "logicalType": "weights", + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, } for _, testCase := range testCases { t.Run(testCase.name, func(t *testing.T) { @@ -2141,4 +2163,30 @@ func TestNewSchema_IgnoresInvalidProperties(t *testing.T) { "other": true, }, rec.Props()) }) + t.Run("null", func(t *testing.T) { + rec := avro.NewNullSchema( + avro.WithProps(map[string]any{ + // invalid (conflict with other type properties) + "type": false, + // valid + "name": 123, + "namespace": "abc", + "doc": "blah", + "aliases": "foo", + "other": true, + "logicalType": "baz", + "precision": "abc", + "scale": "def", + })) + assert.Equal(t, map[string]any{ + "name": 123, + "namespace": "abc", + "doc": "blah", + "aliases": "foo", + "other": true, + "logicalType": "baz", + "precision": "abc", + "scale": "def", + }, rec.Props()) + }) } From 757633935be145bebe4339ac60617f06ffc18f6b Mon Sep 17 00:00:00 2001 From: Josh Humphries <2035234+jhump@users.noreply.github.com> Date: Fri, 11 Oct 2024 12:56:33 -0400 Subject: [PATCH 3/5] make linter happy; code review feedback: rename otherProps->props, remove LogicalType field from intermediate structs that don't really use it --- schema.go | 18 +++++---- schema_parse.go | 99 +++++++++++++++++++++++++++++-------------------- 2 files changed, 68 insertions(+), 49 deletions(-) diff --git a/schema.go b/schema.go index 623c226..b3b97d6 100644 --- a/schema.go +++ b/schema.go @@ -27,14 +27,16 @@ var nullDefault nullDefaultType = struct{}{} var ( // Note: order matches the order of properties as they are named in the spec. // https://avro.apache.org/docs/1.12.0/specification - recordReserved = []string{"type", "name", "namespace", "doc", "aliases", "fields"} - fieldReserved = []string{"name", "doc", "type", "order", "aliases", "default"} - enumReserved = []string{"type", "name", "namespace", "aliases", "doc", "symbols", "default"} - arrayReserved = []string{"type", "items"} - mapReserved = []string{"type", "values"} - fixedReserved = []string{"type", "name", "namespace", "aliases", "size"} - fixedWithLogicalTypeReserved = []string{"type", "name", "namespace", "aliases", "size", "logicalType"} - fixedWithDecimalTypeReserved = []string{"type", "name", "namespace", "aliases", "size", "logicalType", "precision", "scale"} + recordReserved = []string{"type", "name", "namespace", "doc", "aliases", "fields"} + fieldReserved = []string{"name", "doc", "type", "order", "aliases", "default"} + enumReserved = []string{"type", "name", "namespace", "aliases", "doc", "symbols", "default"} + arrayReserved = []string{"type", "items"} + mapReserved = []string{"type", "values"} + fixedReserved = []string{"type", "name", "namespace", "aliases", "size"} + fixedWithLogicalTypeReserved = []string{"type", "name", "namespace", "aliases", "size", "logicalType"} + fixedWithDecimalTypeReserved = []string{ + "type", "name", "namespace", "aliases", "size", "logicalType", "precision", "scale", + } primitiveReserved = []string{"type"} primitiveWithLogicalTypeReserved = []string{"type", "logicalType"} primitiveWithDecimalTypeReserved = []string{"type", "logicalType", "precision", "scale"} diff --git a/schema_parse.go b/schema_parse.go index 706250b..7aab400 100644 --- a/schema_parse.go +++ b/schema_parse.go @@ -121,10 +121,12 @@ func parsePrimitiveType(namespace, s string, cache *SchemaCache) (Schema, error) func parseComplexType(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { if val, ok := m["type"].([]any); ok { - // TODO: According to the spec, this is not allowed: - // https://avro.apache.org/docs/1.12.0/specification/#schema-declaration - // The "type" property in an object must be a string. A union type will be a slice, - // but not an object with a "type" property that is a slice. + // Note: According to the spec, this is not allowed: + // https://avro.apache.org/docs/1.12.0/specification/#schema-declaration + // The "type" property in an object must be a string. A union type will be a slice, + // but NOT an object with a "type" property that is a slice. + // Might be advisable to remove this call (tradeoff between better conformance + // with the spec vs. possible backwards-compatibility issue). return parseUnion(namespace, val, seen, cache) } @@ -194,7 +196,7 @@ func parsePrimitive(typ Type, m map[string]any) (Schema, error) { return NewPrimitiveSchema(typ, logical, WithProps(p.Props)), nil } -func parsePrimitiveLogicalType(typ Type, lt string, otherProps map[string]any) LogicalSchema { +func parsePrimitiveLogicalType(typ Type, lt string, props map[string]any) LogicalSchema { ltyp := LogicalType(lt) if (typ == String && ltyp == UUID) || (typ == Int && ltyp == Date) || @@ -208,21 +210,20 @@ func parsePrimitiveLogicalType(typ Type, lt string, otherProps map[string]any) L } if typ == Bytes && ltyp == Decimal { - return parseDecimalLogicalType(-1, otherProps) + return parseDecimalLogicalType(-1, props) } return nil // otherwise, not a recognized logical type } type recordSchema struct { - Type string `mapstructure:"type"` - LogicalType string `mapstructure:"logicalType"` - Name string `mapstructure:"name"` - Namespace string `mapstructure:"namespace"` - Aliases []string `mapstructure:"aliases"` - Doc string `mapstructure:"doc"` - Fields []map[string]any `mapstructure:"fields"` - Props map[string]any `mapstructure:",remain"` + Type string `mapstructure:"type"` + Name string `mapstructure:"name"` + Namespace string `mapstructure:"namespace"` + Aliases []string `mapstructure:"aliases"` + Doc string `mapstructure:"doc"` + Fields []map[string]any `mapstructure:"fields"` + Props map[string]any `mapstructure:",remain"` } func parseRecord(typ Type, namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -240,7 +241,9 @@ func parseRecord(typ Type, namespace string, m map[string]any, seen seenCache, c if r.Namespace == "" { r.Namespace = namespace } - preserveLogicalType(r.LogicalType, &r.Props) + if err := checkLogicalType(r.Props); err != nil { + return nil, err + } if !hasKey(meta.Keys, "fields") { return nil, errors.New("avro: record must have an array of fields") @@ -332,15 +335,14 @@ func parseField(namespace string, m map[string]any, seen seenCache, cache *Schem } type enumSchema struct { - Name string `mapstructure:"name"` - Namespace string `mapstructure:"namespace"` - Aliases []string `mapstructure:"aliases"` - Type string `mapstructure:"type"` - LogicalType string `mapstructure:"logicalType"` - Doc string `mapstructure:"doc"` - Symbols []string `mapstructure:"symbols"` - Default string `mapstructure:"default"` - Props map[string]any `mapstructure:",remain"` + Name string `mapstructure:"name"` + Namespace string `mapstructure:"namespace"` + Aliases []string `mapstructure:"aliases"` + Type string `mapstructure:"type"` + Doc string `mapstructure:"doc"` + Symbols []string `mapstructure:"symbols"` + Default string `mapstructure:"default"` + Props map[string]any `mapstructure:",remain"` } func parseEnum(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -358,7 +360,9 @@ func parseEnum(namespace string, m map[string]any, seen seenCache, cache *Schema if e.Namespace == "" { e.Namespace = namespace } - preserveLogicalType(e.LogicalType, &e.Props) + if err := checkLogicalType(e.Props); err != nil { + return nil, err + } enum, err := NewEnumSchema(e.Name, e.Namespace, e.Symbols, WithDefault(e.Default), WithAliases(e.Aliases), WithDoc(e.Doc), WithProps(e.Props), @@ -381,10 +385,9 @@ func parseEnum(namespace string, m map[string]any, seen seenCache, cache *Schema } type arraySchema struct { - Type string `mapstructure:"type"` - LogicalType string `mapstructure:"logicalType"` - Items any `mapstructure:"items"` - Props map[string]any `mapstructure:",remain"` + Type string `mapstructure:"type"` + Items any `mapstructure:"items"` + Props map[string]any `mapstructure:",remain"` } func parseArray(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -403,16 +406,17 @@ func parseArray(namespace string, m map[string]any, seen seenCache, cache *Schem if err != nil { return nil, err } - preserveLogicalType(a.LogicalType, &a.Props) + if err := checkLogicalType(a.Props); err != nil { + return nil, err + } return NewArraySchema(schema, WithProps(a.Props)), nil } type mapSchema struct { - Type string `mapstructure:"type"` - LogicalType string `mapstructure:"logicalType"` - Values any `mapstructure:"values"` - Props map[string]any `mapstructure:",remain"` + Type string `mapstructure:"type"` + Values any `mapstructure:"values"` + Props map[string]any `mapstructure:",remain"` } func parseMap(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -431,7 +435,9 @@ func parseMap(namespace string, m map[string]any, seen seenCache, cache *SchemaC if err != nil { return nil, err } - preserveLogicalType(ms.LogicalType, &ms.Props) + if err := checkLogicalType(ms.Props); err != nil { + return nil, err + } return NewMapSchema(schema, WithProps(ms.Props)), nil } @@ -505,13 +511,13 @@ func parseFixed(namespace string, m map[string]any, seen seenCache, cache *Schem return fixed, nil } -func parseFixedLogicalType(size int, lt string, otherProps map[string]any) LogicalSchema { +func parseFixedLogicalType(size int, lt string, props map[string]any) LogicalSchema { ltyp := LogicalType(lt) switch { case ltyp == Duration && size == 12: return NewPrimitiveLogicalSchema(Duration) case ltyp == Decimal: - return parseDecimalLogicalType(size, otherProps) + return parseDecimalLogicalType(size, props) } return nil @@ -522,19 +528,19 @@ type decimalSchema struct { Scale int `mapstructure:"scale"` } -func parseDecimalLogicalType(size int, otherProps map[string]any) LogicalSchema { +func parseDecimalLogicalType(size int, props map[string]any) LogicalSchema { var ( d decimalSchema meta mapstructure.Metadata ) - if err := decodeMap(otherProps, &d, &meta); err != nil { + if err := decodeMap(props, &d, &meta); err != nil { return nil } decType := newDecimalLogicalType(size, d.Precision, d.Scale) if decType != nil { // Remove the properties that we consumed - delete(otherProps, "precision") - delete(otherProps, "scale") + delete(props, "precision") + delete(props, "scale") } return decType } @@ -646,3 +652,14 @@ func preserveLogicalType(logicalType string, props *map[string]any) { } (*props)["logicalType"] = logicalType } + +func checkLogicalType(props map[string]any) error { + val, ok := props["logicalType"] + if !ok { + return nil + } + if _, isString := val.(string); !isString { + return fmt.Errorf(`"logicalType" attribute must be a string, got %T`, val) + } + return nil +} From 767e78a64e439f2e2b2be4f4872ecb656cc63765 Mon Sep 17 00:00:00 2001 From: Josh Humphries <2035234+jhump@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:47:34 -0400 Subject: [PATCH 4/5] update ocf_test.go to verify that ocf.FullSchemaMarshaler can retain unrecognized logicalType attributes --- ocf/ocf_test.go | 1 + ocf/testdata/full-schema.json | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ocf/ocf_test.go b/ocf/ocf_test.go index 38a462b..200a779 100644 --- a/ocf/ocf_test.go +++ b/ocf/ocf_test.go @@ -1074,6 +1074,7 @@ func TestWithSchemaMarshaler(t *testing.T) { "name": "meta", "type": { "type": "array", + "logicalType": "map", "items": { "type": "record", "name": "FooMetadataEntry", diff --git a/ocf/testdata/full-schema.json b/ocf/testdata/full-schema.json index a1b5999..003d072 100644 --- a/ocf/testdata/full-schema.json +++ b/ocf/testdata/full-schema.json @@ -35,7 +35,8 @@ "field-id": 5 } ] - } + }, + "logicalType": "map" }, "field-id": 3 } From 0a95895de14b92fb8e263b342f61ea35d9c2ef57 Mon Sep 17 00:00:00 2001 From: Josh Humphries <2035234+jhump@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:55:44 -0400 Subject: [PATCH 5/5] allow non-string logicalType property; if not a string, will always go into other props, even for fixed and primitive --- schema_parse.go | 68 +++++++++++++++---------------------------------- schema_test.go | 48 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 48 deletions(-) diff --git a/schema_parse.go b/schema_parse.go index 7aab400..6903c02 100644 --- a/schema_parse.go +++ b/schema_parse.go @@ -161,9 +161,8 @@ func parseComplexType(namespace string, m map[string]any, seen seenCache, cache } type primitiveSchema struct { - Type string `mapstructure:"type"` - LogicalType string `mapstructure:"logicalType"` - Props map[string]any `mapstructure:",remain"` + Type string `mapstructure:"type"` + Props map[string]any `mapstructure:",remain"` } func parsePrimitive(typ Type, m map[string]any) (Schema, error) { @@ -183,10 +182,10 @@ func parsePrimitive(typ Type, m map[string]any) (Schema, error) { } var logical LogicalSchema - if p.LogicalType != "" { - logical = parsePrimitiveLogicalType(typ, p.LogicalType, p.Props) - if logical == nil { - preserveLogicalType(p.LogicalType, &p.Props) + if logicalType := logicalTypeProperty(p.Props); logicalType != "" { + logical = parsePrimitiveLogicalType(typ, logicalType, p.Props) + if logical != nil { + delete(p.Props, "logicalType") } } @@ -241,9 +240,6 @@ func parseRecord(typ Type, namespace string, m map[string]any, seen seenCache, c if r.Namespace == "" { r.Namespace = namespace } - if err := checkLogicalType(r.Props); err != nil { - return nil, err - } if !hasKey(meta.Keys, "fields") { return nil, errors.New("avro: record must have an array of fields") @@ -360,9 +356,6 @@ func parseEnum(namespace string, m map[string]any, seen seenCache, cache *Schema if e.Namespace == "" { e.Namespace = namespace } - if err := checkLogicalType(e.Props); err != nil { - return nil, err - } enum, err := NewEnumSchema(e.Name, e.Namespace, e.Symbols, WithDefault(e.Default), WithAliases(e.Aliases), WithDoc(e.Doc), WithProps(e.Props), @@ -406,9 +399,6 @@ func parseArray(namespace string, m map[string]any, seen seenCache, cache *Schem if err != nil { return nil, err } - if err := checkLogicalType(a.Props); err != nil { - return nil, err - } return NewArraySchema(schema, WithProps(a.Props)), nil } @@ -435,9 +425,6 @@ func parseMap(namespace string, m map[string]any, seen seenCache, cache *SchemaC if err != nil { return nil, err } - if err := checkLogicalType(ms.Props); err != nil { - return nil, err - } return NewMapSchema(schema, WithProps(ms.Props)), nil } @@ -456,13 +443,12 @@ func parseUnion(namespace string, v []any, seen seenCache, cache *SchemaCache) ( } type fixedSchema struct { - Name string `mapstructure:"name"` - Namespace string `mapstructure:"namespace"` - Aliases []string `mapstructure:"aliases"` - Type string `mapstructure:"type"` - Size int `mapstructure:"size"` - LogicalType string `mapstructure:"logicalType"` - Props map[string]any `mapstructure:",remain"` + Name string `mapstructure:"name"` + Namespace string `mapstructure:"namespace"` + Aliases []string `mapstructure:"aliases"` + Type string `mapstructure:"type"` + Size int `mapstructure:"size"` + Props map[string]any `mapstructure:",remain"` } func parseFixed(namespace string, m map[string]any, seen seenCache, cache *SchemaCache) (Schema, error) { @@ -486,10 +472,10 @@ func parseFixed(namespace string, m map[string]any, seen seenCache, cache *Schem } var logical LogicalSchema - if f.LogicalType != "" { - logical = parseFixedLogicalType(f.Size, f.LogicalType, f.Props) - if logical == nil { - preserveLogicalType(f.LogicalType, &f.Props) + if logicalType := logicalTypeProperty(f.Props); logicalType != "" { + logical = parseFixedLogicalType(f.Size, logicalType, f.Props) + if logical != nil { + delete(f.Props, "logicalType") } } @@ -643,23 +629,9 @@ func (c seenCache) Add(name string) error { return nil } -func preserveLogicalType(logicalType string, props *map[string]any) { - if logicalType == "" { - return // nothing to preserve +func logicalTypeProperty(props map[string]any) string { + if lt, ok := props["logicalType"].(string); ok { + return lt } - if *props == nil { - *props = make(map[string]any, 1) - } - (*props)["logicalType"] = logicalType -} - -func checkLogicalType(props map[string]any) error { - val, ok := props["logicalType"] - if !ok { - return nil - } - if _, isString := val.(string); !isString { - return fmt.Errorf(`"logicalType" attribute must be a string, got %T`, val) - } - return nil + return "" } diff --git a/schema_test.go b/schema_test.go index 4d0c064..cc8ccea 100644 --- a/schema_test.go +++ b/schema_test.go @@ -1695,6 +1695,30 @@ func TestParse_PreservesAllProperties(t *testing.T) { }, rec.Props()) }, }, + { + name: "fixed-invalid-logical-type", + schema: `{ + "type": "fixed", + "name": "SomeFixed", + "size": 16, + "logicalType": {"foo": "bar", "baz": ["x","y","z"]}, + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.FixedSchema) + assert.Equal(t, map[string]any{ + "logicalType": map[string]any{ + "foo": "bar", + "baz": []any{"x", "y", "z"}, + }, + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, { name: "fixed-decimal-logical-type", schema: `{ @@ -1819,6 +1843,30 @@ func TestParse_PreservesAllProperties(t *testing.T) { }, rec.Props()) }, }, + { + name: "primitive-invalid-logical-type", + schema: `{ + "type": "string", + "name": "SomeString", + "logicalType": {"foo": "bar", "baz": ["x","y","z"]}, + "precision": "abc", + "scale": "def", + "other": [1,2,3] + }`, + check: func(t *testing.T, schema avro.Schema) { + rec := schema.(*avro.PrimitiveSchema) + assert.Equal(t, map[string]any{ + "name": "SomeString", + "logicalType": map[string]any{ + "foo": "bar", + "baz": []any{"x", "y", "z"}, + }, + "precision": "abc", + "scale": "def", + "other": []any{1.0, 2.0, 3.0}, + }, rec.Props()) + }, + }, { name: "primitive-date-logical-type", schema: `{