From b8db342f5ce4b73123a139bcdd47112aa1715a0d Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Thu, 28 Sep 2023 15:57:13 +0200 Subject: [PATCH] feat: add ArchivalMaybeBinaryString type (#1325) This type is ~equivalent to ArchivalMaybeBinaryData but designed to hold a string, do less type conversions, and be easier to use. The intent is to replace the ArchivalMaybeBinaryData type with this type. For now, let us introduce the new type and its tests. Part of https://github.com/ooni/probe/issues/2531 --- internal/model/archival.go | 47 ++++++ internal/model/archival_test.go | 261 ++++++++++++++++++++++++++++++-- 2 files changed, 299 insertions(+), 9 deletions(-) diff --git a/internal/model/archival.go b/internal/model/archival.go index d640f6b351..88b675a819 100644 --- a/internal/model/archival.go +++ b/internal/model/archival.go @@ -119,11 +119,58 @@ func (value *ArchivalBinaryData) UnmarshalJSON(raw []byte) error { return nil } +// ArchivalMaybeBinaryString is a possibly-binary string. When the string is valid UTF-8 +// we serialize it as itself. Otherwise, we use the binary data format defined by +// https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata +type ArchivalMaybeBinaryString string + +var ( + _ json.Marshaler = ArchivalMaybeBinaryString("") + _ json.Unmarshaler = (func() *ArchivalMaybeBinaryString { return nil }()) +) + +// MarshalJSON implements json.Marshaler. +func (value ArchivalMaybeBinaryString) MarshalJSON() ([]byte, error) { + // convert value to a string + str := string(value) + + // TODO(bassosimone): here is where we should scrub the string in the future + // once we have replaced AchivalMaybeBinaryData with ArchivalMaybeBinaryString + + // if we can serialize as UTF-8 string, do that + if utf8.ValidString(str) { + return json.Marshal(str) + } + + // otherwise fallback to the serialization of ArchivalBinaryData + return json.Marshal(ArchivalBinaryData(str)) +} + +// UnmarshalJSON implements json.Unmarshaler. +func (value *ArchivalMaybeBinaryString) UnmarshalJSON(rawData []byte) error { + // first attempt to decode as a string + var s string + if err := json.Unmarshal(rawData, &s); err == nil { + *value = ArchivalMaybeBinaryString(s) + return nil + } + + // then attempt to decode as ArchivalBinaryData + var d ArchivalBinaryData + if err := json.Unmarshal(rawData, &d); err != nil { + return err + } + *value = ArchivalMaybeBinaryString(d) + return nil +} + // ArchivalMaybeBinaryData is a possibly binary string. We use this helper class // to define a custom JSON encoder that allows us to choose the proper // representation depending on whether the Value field is valid UTF-8 or not. // // See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata +// +// Deprecated: do not use this type in new code. type ArchivalMaybeBinaryData struct { Value string } diff --git a/internal/model/archival_test.go b/internal/model/archival_test.go index 56e3ad169a..f3ebbfb560 100644 --- a/internal/model/archival_test.go +++ b/internal/model/archival_test.go @@ -61,22 +61,22 @@ func TestArchivalBinaryData(t *testing.T) { } cases := []testcase{{ - name: "with nil .Value", + name: "with nil value", input: nil, expectErr: nil, expectData: []byte("null"), }, { - name: "with zero length .Value", + name: "with zero length value", input: []byte{}, expectErr: nil, expectData: []byte("null"), }, { - name: "with .Value being a simple binary string", + name: "with value being a simple binary string", input: []byte("Elliot"), expectErr: nil, expectData: []byte(`{"data":"RWxsaW90","format":"base64"}`), }, { - name: "with .Value being a long binary string", + name: "with value being a long binary string", input: archivalBinaryInput, expectErr: nil, expectData: archivalEncodedBinaryInput, @@ -207,7 +207,7 @@ func TestArchivalBinaryData(t *testing.T) { err := json.Unmarshal(tc.input, &abd) t.Log("got this error", err) - t.Log("got this .Value field", abd) + t.Log("got this []byte-like value", abd) t.Logf("converted to string: %s", string(abd)) // handle errors @@ -246,16 +246,16 @@ func TestArchivalBinaryData(t *testing.T) { } cases := []testcase{{ - name: "with nil .Value", + name: "with nil value", input: nil, }, { - name: "with zero length .Value", + name: "with zero length value", input: []byte{}, }, { - name: "with .Value being a simple binary string", + name: "with value being a simple binary string", input: []byte("Elliot"), }, { - name: "with .Value being a long binary string", + name: "with value being a long binary string", input: archivalBinaryInput, }} @@ -294,6 +294,249 @@ func TestArchivalBinaryData(t *testing.T) { }) } +func TestArchivalMaybeBinaryString(t *testing.T) { + // This test verifies that we correctly serialize a string to JSON by + // producing "" | {"format":"base64","data":""} + t.Run("MarshalJSON", func(t *testing.T) { + // testcase is a test case defined by this function + type testcase struct { + // name is the name of the test case + name string + + // input is the possibly-binary input + input model.ArchivalMaybeBinaryString + + // expectErr is the error we expect to see or nil + expectErr error + + // expectData is the data we expect to see + expectData []byte + } + + cases := []testcase{{ + name: "with empty string value", + input: "", + expectErr: nil, + expectData: []byte(`""`), + }, { + name: "with value being a textual string", + input: "Elliot", + expectErr: nil, + expectData: []byte(`"Elliot"`), + }, { + name: "with value being a long binary string", + input: model.ArchivalMaybeBinaryString(archivalBinaryInput), + expectErr: nil, + expectData: archivalEncodedBinaryInput, + }} + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // serialize to JSON + output, err := json.Marshal(tc.input) + + t.Log("got this error", err) + t.Log("got this binary data", output) + t.Logf("converted to string: %s", string(output)) + + // handle errors + switch { + case err == nil && tc.expectErr != nil: + t.Fatal("expected", tc.expectErr, "got", err) + + case err != nil && tc.expectErr == nil: + t.Fatal("expected", tc.expectErr, "got", err) + + case err != nil && tc.expectErr != nil: + if err.Error() != tc.expectErr.Error() { + t.Fatal("expected", tc.expectErr, "got", err) + } + + case err == nil && tc.expectErr == nil: + // all good--fallthrough + } + + if diff := cmp.Diff(tc.expectData, output); diff != "" { + t.Fatal(diff) + } + }) + } + }) + + // This test verifies that we correctly parse binary data to JSON by + // reading from "" | {"format":"base64","data":""} + t.Run("UnmarshalJSON", func(t *testing.T) { + // testcase is a test case defined by this function + type testcase struct { + // name is the name of the test case + name string + + // input is the binary input + input []byte + + // expectErr is the error we expect to see or nil + expectErr error + + // expectData is the data we expect + expectData model.ArchivalMaybeBinaryString + } + + cases := []testcase{{ + name: "with nil input array", + input: nil, + expectErr: errors.New("unexpected end of JSON input"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with zero-length input array", + input: []byte{}, + expectErr: errors.New("unexpected end of JSON input"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with binary input that is not a complete JSON", + input: []byte("{"), + expectErr: errors.New("unexpected end of JSON input"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with ~random binary data as input", + input: archivalBinaryInput, + expectErr: errors.New("invalid character 'W' looking for beginning of value"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with valid JSON of the wrong type (array)", + input: []byte("[]"), + expectErr: errors.New("json: cannot unmarshal array into Go value of type model.archivalBinaryDataRepr"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with valid JSON of the wrong type (number)", + input: []byte("1.17"), + expectErr: errors.New("json: cannot unmarshal number into Go value of type model.archivalBinaryDataRepr"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with input being the liternal null", + input: []byte(`null`), + expectErr: nil, + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with empty JSON object", + input: []byte("{}"), + expectErr: errors.New("model: invalid binary data format: ''"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with correct data model but invalid format", + input: []byte(`{"data":"","format":"antani"}`), + expectErr: errors.New("model: invalid binary data format: 'antani'"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with correct data model and format but invalid base64 string", + input: []byte(`{"data":"x","format":"base64"}`), + expectErr: errors.New("illegal base64 data at input byte 0"), + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with correct data model and format but empty base64 string", + input: []byte(`{"data":"","format":"base64"}`), + expectErr: nil, + expectData: model.ArchivalMaybeBinaryString(""), + }, { + name: "with the a string", + input: []byte(`"Elliot"`), + expectErr: nil, + expectData: model.ArchivalMaybeBinaryString("Elliot"), + }, { + name: "with the encoding of a string", + input: []byte(`{"data":"RWxsaW90","format":"base64"}`), + expectErr: nil, + expectData: model.ArchivalMaybeBinaryString("Elliot"), + }, { + name: "with the encoding of a complex binary string", + input: archivalEncodedBinaryInput, + expectErr: nil, + expectData: model.ArchivalMaybeBinaryString(archivalBinaryInput), + }} + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // unmarshal the raw input into an ArchivalBinaryData type + var abd model.ArchivalMaybeBinaryString + err := json.Unmarshal(tc.input, &abd) + + t.Log("got this error", err) + t.Log("got this maybe-binary-string value", abd) + t.Logf("converted to string: %s", string(abd)) + + // handle errors + switch { + case err == nil && tc.expectErr != nil: + t.Fatal("expected", tc.expectErr, "got", err) + + case err != nil && tc.expectErr == nil: + t.Fatal("expected", tc.expectErr, "got", err) + + case err != nil && tc.expectErr != nil: + if err.Error() != tc.expectErr.Error() { + t.Fatal("expected", tc.expectErr, "got", err) + } + + case err == nil && tc.expectErr == nil: + // all good--fallthrough + } + + if diff := cmp.Diff(tc.expectData, abd); diff != "" { + t.Fatal(diff) + } + }) + } + }) + + // This test verifies that we correctly round trip through JSON + t.Run("MarshalJSON then UnmarshalJSON", func(t *testing.T) { + // testcase is a test case defined by this function + type testcase struct { + // name is the name of the test case + name string + + // input is the maybe-binary input + input model.ArchivalMaybeBinaryString + } + + cases := []testcase{{ + name: "with empty value", + input: "", + }, { + name: "with value being a simple textual string", + input: "Elliot", + }, { + name: "with value being a long binary string", + input: model.ArchivalMaybeBinaryString(archivalBinaryInput), + }} + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + // serialize to JSON + output, err := json.Marshal(tc.input) + + t.Log("got this error", err) + t.Log("got this binary data", output) + t.Logf("converted to string: %s", string(output)) + + if err != nil { + t.Fatal(err) + } + + // parse from JSON + var abc model.ArchivalMaybeBinaryString + if err := json.Unmarshal(output, &abc); err != nil { + t.Fatal(err) + } + + // make sure we round tripped + if diff := cmp.Diff(tc.input, abc); diff != "" { + t.Fatal(diff) + } + }) + } + }) +} + func TestMaybeBinaryValue(t *testing.T) { t.Run("MarshalJSON", func(t *testing.T) { tests := []struct {