Skip to content

Commit

Permalink
Merge pull request #3 from toknapp/allow-unknown-enum-values
Browse files Browse the repository at this point in the history
Allow unknown enum values
  • Loading branch information
ubunatic authored Jun 3, 2024
2 parents f80734d + 072cb6e commit 26d5222
Show file tree
Hide file tree
Showing 11 changed files with 595 additions and 67 deletions.
51 changes: 27 additions & 24 deletions encoding/protobq/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,23 @@ func Test_Integration_PublicDataSets(t *testing.T) {
Message: &publicv1.FilmLocation{},
},

{
ProjectID: "bigquery-public-data",
DatasetID: "hacker_news",
TableID: "stories",
Limit: 10,
Message: &publicv1.HackerNewsStory{},
},
// no longer accessible
// {
// ProjectID: "bigquery-public-data",
// DatasetID: "hacker_news",
// TableID: "stories",
// Limit: 10,
// Message: &publicv1.HackerNewsStory{},
// },

{
ProjectID: "bigquery-public-data",
DatasetID: "london_bicycles",
TableID: "cycle_hire",
Limit: 10,
Message: &publicv1.LondonBicycleRental{},
},
// no longer accessible
// {
// ProjectID: "bigquery-public-data",
// DatasetID: "london_bicycles",
// TableID: "cycle_hire",
// Limit: 10,
// Message: &publicv1.LondonBicycleRental{},
// },

{
ProjectID: "bigquery-public-data",
Expand All @@ -65,16 +67,17 @@ func Test_Integration_PublicDataSets(t *testing.T) {
Message: &publicv1.SanFransiscoTransitStopTime{},
},

{
ProjectID: "bigquery-public-data",
DatasetID: "london_bicycles",
TableID: "cycle_stations",
Limit: 10,
Message: &publicv1.LondonBicycleStation{},
UnmarshalOptions: protobq.UnmarshalOptions{
DiscardUnknown: true, // Ignore non-snake case field "nbEmptyDocks".
},
},
// no longer accessible
// {
// ProjectID: "bigquery-public-data",
// DatasetID: "london_bicycles",
// TableID: "cycle_stations",
// Limit: 10,
// Message: &publicv1.LondonBicycleStation{},
// UnmarshalOptions: protobq.UnmarshalOptions{
// DiscardUnknown: true, // Ignore non-snake case field "nbEmptyDocks".
// },
// },

{
ProjectID: "bigquery-public-data",
Expand Down
17 changes: 14 additions & 3 deletions encoding/protobq/marshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ func Marshal(msg proto.Message) (map[string]bigquery.Value, error) {
type MarshalOptions struct {
// Schema contains the schema options.
Schema SchemaOptions

DiscardUnknownEnumValues bool
}

// Marshal marshals the given proto.Message in the BigQuery format using options in
Expand Down Expand Up @@ -80,10 +82,14 @@ func (o MarshalOptions) marshalMessage(msg protoreflect.Message) (map[string]big
if o.Schema.UseOneofFields {
for i := 0; i < msg.Descriptor().Oneofs().Len(); i++ {
oneofDescriptor := msg.Descriptor().Oneofs().Get(i)
if oneofDescriptor.IsSynthetic() {
continue
}
oneofField := msg.WhichOneof(oneofDescriptor)
if oneofField != nil {
result[string(oneofDescriptor.Name())] = string(oneofField.Name())
if oneofField == nil {
continue
}
result[string(oneofDescriptor.Name())] = string(oneofField.Name())
}
}
return result, nil
Expand Down Expand Up @@ -262,7 +268,12 @@ func (o MarshalOptions) marshalEnumValue(
if enumValue := field.Enum().Values().ByNumber(enumNumber); enumValue != nil {
return string(enumValue.Name()), nil
}
return nil, fmt.Errorf("unknown enum number: %v", value.Enum())
if o.DiscardUnknownEnumValues {
// Use 'null' for BQ rows to indicate that no value could be determined.
return nil, nil
}
path := fmt.Sprintf("%s.%s", field.Parent().FullName(), field.Name())
return nil, fmt.Errorf("unknown enum number: %v, fieldname: %v", value.Enum(), path)
}

func (o MarshalOptions) marshalWellKnownTypeValue(
Expand Down
63 changes: 61 additions & 2 deletions encoding/protobq/marshal_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package protobq

import (
"fmt"
"testing"
"time"

Expand Down Expand Up @@ -335,12 +336,70 @@ func TestMarshalOptions_Marshal(t *testing.T) {
opt: MarshalOptions{Schema: SchemaOptions{UseOneofFields: true}},
expected: map[string]bigquery.Value{},
},

{
name: "optional oneof and optional message",
msg: &examplev1.ExampleOptional{
OptMessage_2: &examplev1.ExampleOptMessage{
StringValue: "opt_message_string_value",
},
OptOneofMessage_3: &examplev1.ExampleOptOneof{
OneofFields_1: &examplev1.ExampleOptOneof_OneofMessage_2{
OneofMessage_2: &examplev1.ExampleOptOneof_Message{
StringValue: "opt_one_of_message_string_value",
},
},
},
},
opt: MarshalOptions{Schema: SchemaOptions{UseOneofFields: true}},
expected: map[string]bigquery.Value{
"opt_message_2": map[string]bigquery.Value{"string_value": string("opt_message_string_value")},
"opt_oneof_message_3": map[string]bigquery.Value{
"oneof_fields_1": string("oneof_message_2"),
"oneof_message_2": map[string]bigquery.Value{"string_value": string("opt_one_of_message_string_value")},
},
},
},

{
name: "optional empty oneof and optional empty message",
msg: &examplev1.ExampleOptional{
OptMessage_2: nil,
OptOneofMessage_3: nil,
},
opt: MarshalOptions{Schema: SchemaOptions{UseOneofFields: true}},
expected: map[string]bigquery.Value{},
},

{
name: "allow unknown enum values",
msg: &examplev1.ExampleEnum{
EnumValue: examplev1.ExampleEnum_Enum(100),
},
opt: MarshalOptions{Schema: SchemaOptions{UseOneofFields: true}, DiscardUnknownEnumValues: true},
expected: map[string]bigquery.Value{
"enum_value": nil,
},
},
{
name: "disallow unknown enum values",
msg: &examplev1.ExampleEnum{
EnumValue: examplev1.ExampleEnum_Enum(100),
},
opt: MarshalOptions{Schema: SchemaOptions{UseOneofFields: true}, DiscardUnknownEnumValues: false},
expected: nil,
},
} {
tt := tt
t.Run(tt.name, func(t *testing.T) {
actual, err := tt.opt.Marshal(tt.msg)
assert.NilError(t, err)
assert.DeepEqual(t, tt.expected, actual)
if tt.expected == nil {
fmt.Println(err)
assert.Error(t, err, "unknown enum number: 100, fieldname: einride.bigquery.example.v1.ExampleEnum.enum_value")
} else {
assert.NilError(t, err)
assert.DeepEqual(t, tt.expected, actual)
}
})
}
}
24 changes: 23 additions & 1 deletion encoding/protobq/schema_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -394,10 +394,32 @@ func TestSchemaOptions_InferSchema(t *testing.T) {
msg: &examplev1.ExampleOptional{},
expected: bigquery.Schema{
{
Name: "opt",
Name: "opt_double_1",
Type: bigquery.FloatFieldType,
Required: false,
},
{
Name: "opt_message_2",
Type: "RECORD",
Schema: bigquery.Schema{{Name: "string_value", Type: bigquery.StringFieldType}},
},
{
Name: "opt_oneof_message_3",
Type: bigquery.RecordFieldType,
Required: false,
Schema: bigquery.Schema{
{
Name: "oneof_message_2",
Type: bigquery.RecordFieldType,
Schema: bigquery.Schema{{Name: "string_value", Type: bigquery.StringFieldType}},
},
{
Name: "oneof_fields_1",
Description: "One of: oneof_empty_message_1, oneof_message_2.",
Type: bigquery.StringFieldType,
},
},
},
},
},
} {
Expand Down
15 changes: 15 additions & 0 deletions encoding/protobq/unmarshal.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ type UnmarshalOptions struct {

// If DiscardUnknown is set, unknown fields are ignored.
DiscardUnknown bool

// If DiscardUnknownEnumValues is set, unknown enum values are set to the empty value.
// For compatibility, this is independent of the complementary DiscardUnknown option.
DiscardUnknownEnumValues bool
}

// Unmarshal reads the given BigQuery row and populates the given proto.Message using
Expand Down Expand Up @@ -635,20 +639,31 @@ func (o UnmarshalOptions) unmarshalEnumScalar(
if o.Schema.UseEnumNumbers {
v, ok := bqValue.(int64)
if !ok {
if o.DiscardUnknownEnumValues {
return protoreflect.Value{}, nil
}
return protoreflect.Value{}, fmt.Errorf(
"invalid BigQuery value %#v for enum %s number", bqValue, field.Enum().FullName(),
)
}
return protoreflect.ValueOfEnum(protoreflect.EnumNumber(int32(v))), nil
}

v, ok := bqValue.(string)
if !ok {
if o.DiscardUnknownEnumValues {
return protoreflect.Value{}, nil
}
return protoreflect.Value{}, fmt.Errorf(
"invalid BigQuery value %#v for enum %s", bqValue, field.Enum().FullName(),
)
}

enumVal := field.Enum().Values().ByName(protoreflect.Name(v))
if enumVal == nil {
if o.DiscardUnknownEnumValues {
return protoreflect.Value{}, nil
}
return protoreflect.Value{}, fmt.Errorf(
"unknown enum value %#v for enum %s", bqValue, field.Enum().FullName(),
)
Expand Down
21 changes: 21 additions & 0 deletions encoding/protobq/unmarshal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,27 @@ func TestUnmarshalOptions_Unmarshal(t *testing.T) {
opt: UnmarshalOptions{Schema: SchemaOptions{UseOneofFields: true}},
expected: &examplev1.ExampleOneof{},
},

{
name: "discard unknown enum string",
row: map[string]bigquery.Value{"enum_value": "ENUM_VALUE_100"},
opt: UnmarshalOptions{DiscardUnknownEnumValues: true},
expected: &examplev1.ExampleEnum{EnumValue: examplev1.ExampleEnum_ENUM_UNSPECIFIED},
},

{
name: "discard unknown enum number",
row: map[string]bigquery.Value{"enum_value": 100},
opt: UnmarshalOptions{DiscardUnknownEnumValues: true},
expected: &examplev1.ExampleEnum{EnumValue: examplev1.ExampleEnum_ENUM_UNSPECIFIED},
},

{
name: "discard enum null",
row: map[string]bigquery.Value{"enum_value": nil},
opt: UnmarshalOptions{DiscardUnknownEnumValues: true},
expected: &examplev1.ExampleEnum{EnumValue: examplev1.ExampleEnum_ENUM_UNSPECIFIED},
},
} {
tt := tt
t.Run(tt.name, func(t *testing.T) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,22 @@ syntax = "proto3";
package einride.bigquery.example.v1;

message ExampleOptional {
optional double opt = 1;
optional double opt_double_1 = 1;
optional ExampleOptMessage opt_message_2 = 2;
optional ExampleOptOneof opt_oneof_message_3 = 3;
}

message ExampleOptMessage {
string string_value = 1;
}

message ExampleOptOneof {
oneof oneof_fields_1 {
EmptyMessage oneof_empty_message_1 = 1;
Message oneof_message_2 = 2;
}
message EmptyMessage {}
message Message {
string string_value = 1;
}
}
Loading

0 comments on commit 26d5222

Please sign in to comment.