Skip to content

Commit

Permalink
feat: add ArchivalMaybeBinaryString type (ooni#1325)
Browse files Browse the repository at this point in the history
This type is ~equivalent to ArchivalMaybeBinaryData but designed to hold
a string, do less type conversions, and be easier to use.

The intent is to replace the ArchivalMaybeBinaryData type with this
type.

For now, let us introduce the new type and its tests.

Part of ooni/probe#2531
  • Loading branch information
bassosimone authored and Murphy-OrangeMud committed Feb 13, 2024
1 parent 99b4a6a commit 64154e0
Show file tree
Hide file tree
Showing 2 changed files with 299 additions and 9 deletions.
47 changes: 47 additions & 0 deletions internal/model/archival.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,58 @@ func (value *ArchivalBinaryData) UnmarshalJSON(raw []byte) error {
return nil
}

// ArchivalMaybeBinaryString is a possibly-binary string. When the string is valid UTF-8
// we serialize it as itself. Otherwise, we use the binary data format defined by
// https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata
type ArchivalMaybeBinaryString string

var (
_ json.Marshaler = ArchivalMaybeBinaryString("")
_ json.Unmarshaler = (func() *ArchivalMaybeBinaryString { return nil }())
)

// MarshalJSON implements json.Marshaler.
func (value ArchivalMaybeBinaryString) MarshalJSON() ([]byte, error) {
// convert value to a string
str := string(value)

// TODO(bassosimone): here is where we should scrub the string in the future
// once we have replaced AchivalMaybeBinaryData with ArchivalMaybeBinaryString

// if we can serialize as UTF-8 string, do that
if utf8.ValidString(str) {
return json.Marshal(str)
}

// otherwise fallback to the serialization of ArchivalBinaryData
return json.Marshal(ArchivalBinaryData(str))
}

// UnmarshalJSON implements json.Unmarshaler.
func (value *ArchivalMaybeBinaryString) UnmarshalJSON(rawData []byte) error {
// first attempt to decode as a string
var s string
if err := json.Unmarshal(rawData, &s); err == nil {
*value = ArchivalMaybeBinaryString(s)
return nil
}

// then attempt to decode as ArchivalBinaryData
var d ArchivalBinaryData
if err := json.Unmarshal(rawData, &d); err != nil {
return err
}
*value = ArchivalMaybeBinaryString(d)
return nil
}

// ArchivalMaybeBinaryData is a possibly binary string. We use this helper class
// to define a custom JSON encoder that allows us to choose the proper
// representation depending on whether the Value field is valid UTF-8 or not.
//
// See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md#maybebinarydata
//
// Deprecated: do not use this type in new code.
type ArchivalMaybeBinaryData struct {
Value string
}
Expand Down
261 changes: 252 additions & 9 deletions internal/model/archival_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,22 +61,22 @@ func TestArchivalBinaryData(t *testing.T) {
}

cases := []testcase{{
name: "with nil .Value",
name: "with nil value",
input: nil,
expectErr: nil,
expectData: []byte("null"),
}, {
name: "with zero length .Value",
name: "with zero length value",
input: []byte{},
expectErr: nil,
expectData: []byte("null"),
}, {
name: "with .Value being a simple binary string",
name: "with value being a simple binary string",
input: []byte("Elliot"),
expectErr: nil,
expectData: []byte(`{"data":"RWxsaW90","format":"base64"}`),
}, {
name: "with .Value being a long binary string",
name: "with value being a long binary string",
input: archivalBinaryInput,
expectErr: nil,
expectData: archivalEncodedBinaryInput,
Expand Down Expand Up @@ -207,7 +207,7 @@ func TestArchivalBinaryData(t *testing.T) {
err := json.Unmarshal(tc.input, &abd)

t.Log("got this error", err)
t.Log("got this .Value field", abd)
t.Log("got this []byte-like value", abd)
t.Logf("converted to string: %s", string(abd))

// handle errors
Expand Down Expand Up @@ -246,16 +246,16 @@ func TestArchivalBinaryData(t *testing.T) {
}

cases := []testcase{{
name: "with nil .Value",
name: "with nil value",
input: nil,
}, {
name: "with zero length .Value",
name: "with zero length value",
input: []byte{},
}, {
name: "with .Value being a simple binary string",
name: "with value being a simple binary string",
input: []byte("Elliot"),
}, {
name: "with .Value being a long binary string",
name: "with value being a long binary string",
input: archivalBinaryInput,
}}

Expand Down Expand Up @@ -294,6 +294,249 @@ func TestArchivalBinaryData(t *testing.T) {
})
}

func TestArchivalMaybeBinaryString(t *testing.T) {
// This test verifies that we correctly serialize a string to JSON by
// producing "" | {"format":"base64","data":"<base64>"}
t.Run("MarshalJSON", func(t *testing.T) {
// testcase is a test case defined by this function
type testcase struct {
// name is the name of the test case
name string

// input is the possibly-binary input
input model.ArchivalMaybeBinaryString

// expectErr is the error we expect to see or nil
expectErr error

// expectData is the data we expect to see
expectData []byte
}

cases := []testcase{{
name: "with empty string value",
input: "",
expectErr: nil,
expectData: []byte(`""`),
}, {
name: "with value being a textual string",
input: "Elliot",
expectErr: nil,
expectData: []byte(`"Elliot"`),
}, {
name: "with value being a long binary string",
input: model.ArchivalMaybeBinaryString(archivalBinaryInput),
expectErr: nil,
expectData: archivalEncodedBinaryInput,
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// serialize to JSON
output, err := json.Marshal(tc.input)

t.Log("got this error", err)
t.Log("got this binary data", output)
t.Logf("converted to string: %s", string(output))

// handle errors
switch {
case err == nil && tc.expectErr != nil:
t.Fatal("expected", tc.expectErr, "got", err)

case err != nil && tc.expectErr == nil:
t.Fatal("expected", tc.expectErr, "got", err)

case err != nil && tc.expectErr != nil:
if err.Error() != tc.expectErr.Error() {
t.Fatal("expected", tc.expectErr, "got", err)
}

case err == nil && tc.expectErr == nil:
// all good--fallthrough
}

if diff := cmp.Diff(tc.expectData, output); diff != "" {
t.Fatal(diff)
}
})
}
})

// This test verifies that we correctly parse binary data to JSON by
// reading from "" | {"format":"base64","data":"<base64>"}
t.Run("UnmarshalJSON", func(t *testing.T) {
// testcase is a test case defined by this function
type testcase struct {
// name is the name of the test case
name string

// input is the binary input
input []byte

// expectErr is the error we expect to see or nil
expectErr error

// expectData is the data we expect
expectData model.ArchivalMaybeBinaryString
}

cases := []testcase{{
name: "with nil input array",
input: nil,
expectErr: errors.New("unexpected end of JSON input"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with zero-length input array",
input: []byte{},
expectErr: errors.New("unexpected end of JSON input"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with binary input that is not a complete JSON",
input: []byte("{"),
expectErr: errors.New("unexpected end of JSON input"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with ~random binary data as input",
input: archivalBinaryInput,
expectErr: errors.New("invalid character 'W' looking for beginning of value"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with valid JSON of the wrong type (array)",
input: []byte("[]"),
expectErr: errors.New("json: cannot unmarshal array into Go value of type model.archivalBinaryDataRepr"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with valid JSON of the wrong type (number)",
input: []byte("1.17"),
expectErr: errors.New("json: cannot unmarshal number into Go value of type model.archivalBinaryDataRepr"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with input being the liternal null",
input: []byte(`null`),
expectErr: nil,
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with empty JSON object",
input: []byte("{}"),
expectErr: errors.New("model: invalid binary data format: ''"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with correct data model but invalid format",
input: []byte(`{"data":"","format":"antani"}`),
expectErr: errors.New("model: invalid binary data format: 'antani'"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with correct data model and format but invalid base64 string",
input: []byte(`{"data":"x","format":"base64"}`),
expectErr: errors.New("illegal base64 data at input byte 0"),
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with correct data model and format but empty base64 string",
input: []byte(`{"data":"","format":"base64"}`),
expectErr: nil,
expectData: model.ArchivalMaybeBinaryString(""),
}, {
name: "with the a string",
input: []byte(`"Elliot"`),
expectErr: nil,
expectData: model.ArchivalMaybeBinaryString("Elliot"),
}, {
name: "with the encoding of a string",
input: []byte(`{"data":"RWxsaW90","format":"base64"}`),
expectErr: nil,
expectData: model.ArchivalMaybeBinaryString("Elliot"),
}, {
name: "with the encoding of a complex binary string",
input: archivalEncodedBinaryInput,
expectErr: nil,
expectData: model.ArchivalMaybeBinaryString(archivalBinaryInput),
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// unmarshal the raw input into an ArchivalBinaryData type
var abd model.ArchivalMaybeBinaryString
err := json.Unmarshal(tc.input, &abd)

t.Log("got this error", err)
t.Log("got this maybe-binary-string value", abd)
t.Logf("converted to string: %s", string(abd))

// handle errors
switch {
case err == nil && tc.expectErr != nil:
t.Fatal("expected", tc.expectErr, "got", err)

case err != nil && tc.expectErr == nil:
t.Fatal("expected", tc.expectErr, "got", err)

case err != nil && tc.expectErr != nil:
if err.Error() != tc.expectErr.Error() {
t.Fatal("expected", tc.expectErr, "got", err)
}

case err == nil && tc.expectErr == nil:
// all good--fallthrough
}

if diff := cmp.Diff(tc.expectData, abd); diff != "" {
t.Fatal(diff)
}
})
}
})

// This test verifies that we correctly round trip through JSON
t.Run("MarshalJSON then UnmarshalJSON", func(t *testing.T) {
// testcase is a test case defined by this function
type testcase struct {
// name is the name of the test case
name string

// input is the maybe-binary input
input model.ArchivalMaybeBinaryString
}

cases := []testcase{{
name: "with empty value",
input: "",
}, {
name: "with value being a simple textual string",
input: "Elliot",
}, {
name: "with value being a long binary string",
input: model.ArchivalMaybeBinaryString(archivalBinaryInput),
}}

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
// serialize to JSON
output, err := json.Marshal(tc.input)

t.Log("got this error", err)
t.Log("got this binary data", output)
t.Logf("converted to string: %s", string(output))

if err != nil {
t.Fatal(err)
}

// parse from JSON
var abc model.ArchivalMaybeBinaryString
if err := json.Unmarshal(output, &abc); err != nil {
t.Fatal(err)
}

// make sure we round tripped
if diff := cmp.Diff(tc.input, abc); diff != "" {
t.Fatal(diff)
}
})
}
})
}

func TestMaybeBinaryValue(t *testing.T) {
t.Run("MarshalJSON", func(t *testing.T) {
tests := []struct {
Expand Down

0 comments on commit 64154e0

Please sign in to comment.