Skip to content

Commit

Permalink
feat: conditionally validate names of Record, Enums and Fixed. (#415)
Browse files Browse the repository at this point in the history
  • Loading branch information
papanikge authored Jul 17, 2024
1 parent 582d9c9 commit 8ea2833
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 3 deletions.
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ Or use it as a lib in internal commands, it's the `gen` package

## Avro schema validation

### avrosv

A small Avro schema validation command-line utility is also available. This simple tool leverages the
schema parsing functionality of the library, showing validation errors or optionally dumping parsed
schemas to the console. It can be used in CI/CD pipelines to validate schema changes in a repository.
Expand Down Expand Up @@ -223,6 +225,20 @@ Check the options and usage with `-h`:
avrosv -h
```

### Name Validation

Avro names are validated according to the
[Avro specification](https://avro.apache.org/docs/1.11.1/specification/#names).

However, the official Java library does not validate said names accordingly, resulting to some files out in the wild
to have invalid names. Thus, this library has a configuration option to allow for these invalid names to be parsed.

```go
avro.SkipNameValidation = true
```

Note that this variable is global, so ideally you'd need to unset it after you're done with the invalid schema.

## Go Version Support

This library supports the last two versions of Go. While the minimum Go version is
Expand Down
2 changes: 1 addition & 1 deletion ocf/ocf.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ type encoderConfig struct {
EncodingConfig avro.API
}

// EncoderFunc represents an configuration function for Encoder.
// EncoderFunc represents a configuration function for Encoder.
type EncoderFunc func(cfg *encoderConfig)

// WithBlockLength sets the block length on the encoder.
Expand Down
37 changes: 37 additions & 0 deletions ocf/ocf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,43 @@ func TestDecoder_WithDeflate(t *testing.T) {
assert.Equal(t, 1, count)
}

func TestDecoder_InvalidName(t *testing.T) {
type record struct {
Hello int `avro:"hello"`
What string `avro:"what"`
}
want := record{
What: "yes",
Hello: 1,
}

f, err := os.Open("testdata/invalid-name.avro")
if err != nil {
t.Error(err)
return
}
t.Cleanup(func() { _ = f.Close() })

avro.SkipNameValidation = true
defer func() { avro.SkipNameValidation = false }()

dec, err := ocf.NewDecoder(f)
require.NoError(t, err)

var count int
for dec.HasNext() {
count++
var got record
err = dec.Decode(&got)

require.NoError(t, err)
assert.Equal(t, want, got)
}

require.NoError(t, dec.Error())
assert.Equal(t, 1, count)
}

func TestDecoder_WithDeflateHandlesInvalidData(t *testing.T) {
f, err := os.Open("testdata/deflate-invalid-data.avro")
if err != nil {
Expand Down
Binary file added ocf/testdata/invalid-name.avro
Binary file not shown.
2 changes: 1 addition & 1 deletion reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ func (r *Reader) Reset(b []byte) *Reader {
return r
}

// ReportError record a error in iterator instance with current position.
// ReportError record an error in iterator instance with current position.
func (r *Reader) ReportError(operation, msg string) {
if r.Error != nil && !errors.Is(r.Error, io.EOF) {
return
Expand Down
4 changes: 4 additions & 0 deletions schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -1549,6 +1549,10 @@ func validateName(name string) error {
return errors.New("name must be a non-empty")
}

if SkipNameValidation {
return nil
}

if strings.IndexFunc(name[:1], invalidNameFirstChar) > -1 {
return fmt.Errorf("invalid name %s", name)
}
Expand Down
10 changes: 10 additions & 0 deletions schema_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ func TestProperties_PropGetsFromEmptySet(t *testing.T) {
assert.Nil(t, p.Prop("test"))
}

func TestName_InvalidNameFirstCharButValidationSkipped(t *testing.T) {
SkipNameValidation = true
t.Cleanup(func() {
SkipNameValidation = false
})

_, err := newName("+bar", "foo", nil)
assert.NoError(t, err)
}

func TestIsValidDefault(t *testing.T) {
tests := []struct {
name string
Expand Down
9 changes: 8 additions & 1 deletion schema_parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ import (
// DefaultSchemaCache is the default cache for schemas.
var DefaultSchemaCache = &SchemaCache{}

// SkipNameValidation sets whether to skip name validation.
// Avro spec incurs a strict naming convention for names and aliases, however official Avro tools do not follow that
// More info:
// https://lists.apache.org/thread/39v98os6wdpyr6w31xdkz0yzol51fsrr
// https://github.com/apache/avro/pull/1995
var SkipNameValidation = false

// Parse parses a schema string.
func Parse(schema string) (Schema, error) {
return ParseBytes([]byte(schema))
Expand All @@ -25,7 +32,7 @@ func ParseWithCache(schema, namespace string, cache *SchemaCache) (Schema, error
return ParseBytesWithCache([]byte(schema), namespace, cache)
}

// MustParse parses a schema string, panicing if there is an error.
// MustParse parses a schema string, panicking if there is an error.
func MustParse(schema string) Schema {
parsed, err := Parse(schema)
if err != nil {
Expand Down

0 comments on commit 8ea2833

Please sign in to comment.