Skip to content

Commit

Permalink
refactor: use ArchivalMaybeBinaryString for headers list (ooni#1334)
Browse files Browse the repository at this point in the history
This commit changes the list representation of HTTP headers to use the
ArchivalMaybeBinaryString type.

Now that we have migrated most representations of HTTP headers to use
this new type, we can finally implement scrubbing.

As a reminder, we still need to migrate the ./legacy/measurex
implementation, however that one is a bit annoying because there are no
tests at all for measurex 🤦🤦🤦🤦🤦🤦🤦.

That said, given that ./legacy/measurex is deprecated and it is only
used by the tor experiment, I think leaving it untouched would probably
be the right thing to do here.

Part of ooni/probe#2531
  • Loading branch information
bassosimone authored and Murphy-OrangeMud committed Feb 13, 2024
1 parent d9652e4 commit aed7d71
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 346 deletions.
24 changes: 12 additions & 12 deletions internal/experiment/hhfm/hhfm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,12 +743,12 @@ func TestNewRequestEntryList(t *testing.T) {
},
wantOut: []tracex.RequestEntry{{
Request: tracex.HTTPRequest{
HeadersList: []tracex.HTTPHeader{{
Key: "ContENt-tYPE",
Value: tracex.MaybeBinaryValue{Value: "text/plain"},
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("ContENt-tYPE"),
model.ArchivalMaybeBinaryString("text/plain"),
}, {
Key: "User-aGENT",
Value: tracex.MaybeBinaryValue{Value: "foo/1.0"},
model.ArchivalMaybeBinaryString("User-aGENT"),
model.ArchivalMaybeBinaryString("foo/1.0"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"ContENt-tYPE": "text/plain",
Expand All @@ -774,7 +774,7 @@ func TestNewRequestEntryList(t *testing.T) {
Request: tracex.HTTPRequest{
Method: "GeT",
Headers: make(map[string]model.ArchivalMaybeBinaryString),
HeadersList: []tracex.HTTPHeader{},
HeadersList: []model.ArchivalHTTPHeader{},
URL: "http://10.0.0.1/",
},
}},
Expand Down Expand Up @@ -813,12 +813,12 @@ func TestNewHTTPResponse(t *testing.T) {
wantOut: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString("deadbeef"),
Code: 200,
HeadersList: []tracex.HTTPHeader{{
Key: "Content-Type",
Value: tracex.MaybeBinaryValue{Value: "text/plain"},
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("Content-Type"),
model.ArchivalMaybeBinaryString("text/plain"),
}, {
Key: "User-Agent",
Value: tracex.MaybeBinaryValue{Value: "foo/1.0"},
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("foo/1.0"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"Content-Type": "text/plain",
Expand All @@ -833,7 +833,7 @@ func TestNewHTTPResponse(t *testing.T) {
wantOut: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(""),
Code: 200,
HeadersList: []tracex.HTTPHeader{},
HeadersList: []model.ArchivalHTTPHeader{},
Headers: map[string]model.ArchivalMaybeBinaryString{},
},
}}
Expand Down
1 change: 0 additions & 1 deletion internal/legacy/tracex/archival.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ type (
DNSQueryEntry = model.ArchivalDNSLookupResult
DNSAnswerEntry = model.ArchivalDNSAnswer
TLSHandshake = model.ArchivalTLSOrQUICHandshakeResult
HTTPHeader = model.ArchivalHTTPHeader
RequestEntry = model.ArchivalHTTPRequestResult
HTTPRequest = model.ArchivalHTTPRequest
HTTPResponse = model.ArchivalHTTPResponse
Expand Down
54 changes: 20 additions & 34 deletions internal/legacy/tracex/archival_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,9 @@ func TestNewRequestList(t *testing.T) {
want: []RequestEntry{{
Failure: NewFailure(io.EOF),
Request: HTTPRequest{
HeadersList: []HTTPHeader{{
Key: "User-Agent",
Value: MaybeBinaryValue{
Value: "miniooni/0.1.0-dev",
},
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"User-Agent": "miniooni/0.1.0-dev",
Expand All @@ -185,18 +183,16 @@ func TestNewRequestList(t *testing.T) {
URL: "https://www.example.com/result",
},
Response: HTTPResponse{
HeadersList: []HTTPHeader{},
HeadersList: []model.ArchivalHTTPHeader{},
Headers: make(map[string]model.ArchivalMaybeBinaryString),
},
T: 0.02,
}, {
Request: HTTPRequest{
Body: model.ArchivalMaybeBinaryString(""),
HeadersList: []HTTPHeader{{
Key: "User-Agent",
Value: MaybeBinaryValue{
Value: "miniooni/0.1.0-dev",
},
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"User-Agent": "miniooni/0.1.0-dev",
Expand All @@ -207,11 +203,9 @@ func TestNewRequestList(t *testing.T) {
Response: HTTPResponse{
Body: model.ArchivalMaybeBinaryString("{}"),
Code: 200,
HeadersList: []HTTPHeader{{
Key: "Server",
Value: MaybeBinaryValue{
Value: "miniooni/0.1.0-dev",
},
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("Server"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"Server": "miniooni/0.1.0-dev",
Expand Down Expand Up @@ -242,11 +236,9 @@ func TestNewRequestList(t *testing.T) {
},
want: []RequestEntry{{
Request: HTTPRequest{
HeadersList: []HTTPHeader{{
Key: "User-Agent",
Value: MaybeBinaryValue{
Value: "miniooni/0.1.0-dev",
},
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"User-Agent": "miniooni/0.1.0-dev",
Expand All @@ -256,21 +248,15 @@ func TestNewRequestList(t *testing.T) {
},
Response: HTTPResponse{
Code: 302,
HeadersList: []HTTPHeader{{
Key: "Location",
Value: MaybeBinaryValue{
Value: "https://x.example.com",
},
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("Location"),
model.ArchivalMaybeBinaryString("https://x.example.com"),
}, {
Key: "Location",
Value: MaybeBinaryValue{
Value: "https://y.example.com",
},
model.ArchivalMaybeBinaryString("Location"),
model.ArchivalMaybeBinaryString("https://y.example.com"),
}, {
Key: "Server",
Value: MaybeBinaryValue{
Value: "miniooni/0.1.0-dev",
},
model.ArchivalMaybeBinaryString("Server"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"Server": "miniooni/0.1.0-dev",
Expand Down
66 changes: 22 additions & 44 deletions internal/measurexlite/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,11 @@ func TestNewArchivalHTTPRequestResult(t *testing.T) {
Body: model.ArchivalMaybeBinaryString(""),
BodyIsTruncated: false,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Accept",
Value: model.ArchivalMaybeBinaryData{
Value: "*/*",
},
model.ArchivalMaybeBinaryString("Accept"),
model.ArchivalMaybeBinaryString("*/*"),
}, {
Key: "User-Agent",
Value: model.ArchivalMaybeBinaryData{
Value: "miniooni/0.1.0-dev",
},
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"Accept": "*/*",
Expand Down Expand Up @@ -195,15 +191,11 @@ func TestNewArchivalHTTPRequestResult(t *testing.T) {
Body: model.ArchivalMaybeBinaryString(""),
BodyIsTruncated: false,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Accept",
Value: model.ArchivalMaybeBinaryData{
Value: "*/*",
},
model.ArchivalMaybeBinaryString("Accept"),
model.ArchivalMaybeBinaryString("*/*"),
}, {
Key: "User-Agent",
Value: model.ArchivalMaybeBinaryData{
Value: "miniooni/0.1.0-dev",
},
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"Accept": "*/*",
Expand All @@ -221,15 +213,11 @@ func TestNewArchivalHTTPRequestResult(t *testing.T) {
BodyIsTruncated: false,
Code: 200,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Content-Type",
Value: model.ArchivalMaybeBinaryData{
Value: "text/html; charset=iso-8859-1",
},
model.ArchivalMaybeBinaryString("Content-Type"),
model.ArchivalMaybeBinaryString("text/html; charset=iso-8859-1"),
}, {
Key: "Server",
Value: model.ArchivalMaybeBinaryData{
Value: "Apache",
},
model.ArchivalMaybeBinaryString("Server"),
model.ArchivalMaybeBinaryString("Apache"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"Content-Type": "text/html; charset=iso-8859-1",
Expand Down Expand Up @@ -293,15 +281,11 @@ func TestNewArchivalHTTPRequestResult(t *testing.T) {
Body: model.ArchivalMaybeBinaryString(""),
BodyIsTruncated: false,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Accept",
Value: model.ArchivalMaybeBinaryData{
Value: "*/*",
},
model.ArchivalMaybeBinaryString("Accept"),
model.ArchivalMaybeBinaryString("*/*"),
}, {
Key: "User-Agent",
Value: model.ArchivalMaybeBinaryData{
Value: "miniooni/0.1.0-dev",
},
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"Accept": "*/*",
Expand All @@ -317,20 +301,14 @@ func TestNewArchivalHTTPRequestResult(t *testing.T) {
BodyIsTruncated: false,
Code: 302,
HeadersList: []model.ArchivalHTTPHeader{{
Key: "Content-Type",
Value: model.ArchivalMaybeBinaryData{
Value: "text/html; charset=iso-8859-1",
},
model.ArchivalMaybeBinaryString("Content-Type"),
model.ArchivalMaybeBinaryString("text/html; charset=iso-8859-1"),
}, {
Key: "Location",
Value: model.ArchivalMaybeBinaryData{
Value: "/v2/index.html",
},
model.ArchivalMaybeBinaryString("Location"),
model.ArchivalMaybeBinaryString("/v2/index.html"),
}, {
Key: "Server",
Value: model.ArchivalMaybeBinaryData{
Value: "Apache",
},
model.ArchivalMaybeBinaryString("Server"),
model.ArchivalMaybeBinaryString("Apache"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
"Content-Type": "text/html; charset=iso-8859-1",
Expand Down
68 changes: 13 additions & 55 deletions internal/model/archival.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,8 @@ func ArchivalNewHTTPHeadersList(source http.Header) (out []ArchivalHTTPHeader) {
for _, key := range keys {
for _, value := range source[key] {
out = append(out, ArchivalHTTPHeader{
Key: key,
Value: ArchivalMaybeBinaryData{
Value: value,
},
ArchivalMaybeBinaryString(key),
ArchivalMaybeBinaryString(value),
})
}
}
Expand All @@ -393,62 +391,22 @@ func ArchivalNewHTTPHeadersMap(header http.Header) (out map[string]ArchivalMaybe
}

// ArchivalHTTPHeader is a single HTTP header.
type ArchivalHTTPHeader struct {
Key string
Value ArchivalMaybeBinaryData
}
type ArchivalHTTPHeader [2]ArchivalMaybeBinaryString

// MarshalJSON marshals a single HTTP header to a tuple where the first
// element is a string and the second element is maybe-binary data.
func (hh ArchivalHTTPHeader) MarshalJSON() ([]byte, error) {
if utf8.ValidString(hh.Value.Value) {
return json.Marshal([]string{hh.Key, hh.Value.Value})
}
value := make(map[string]string)
value["format"] = "base64"
value["data"] = base64.StdEncoding.EncodeToString([]byte(hh.Value.Value))
return json.Marshal([]interface{}{hh.Key, value})
}
// errCannotParseArchivalHTTPHeader indicates that we cannot parse an ArchivalHTTPHeader.
var errCannotParseArchivalHTTPHeader = errors.New("invalid ArchivalHTTPHeader")

// UnmarshalJSON is the opposite of MarshalJSON.
func (hh *ArchivalHTTPHeader) UnmarshalJSON(d []byte) error {
var pair []interface{}
if err := json.Unmarshal(d, &pair); err != nil {
// UnmarshalJSON implements json.Unmarshaler.
func (ahh *ArchivalHTTPHeader) UnmarshalJSON(data []byte) error {
var helper []ArchivalMaybeBinaryString
if err := json.Unmarshal(data, &helper); err != nil {
return err
}
if len(pair) != 2 {
return errors.New("unexpected pair length")
}
key, ok := pair[0].(string)
if !ok {
return errors.New("the key is not a string")
}
value, ok := pair[1].(string)
if !ok {
mapvalue, ok := pair[1].(map[string]interface{})
if !ok {
return errors.New("the value is neither a string nor a map[string]interface{}")
}
if _, ok := mapvalue["format"]; !ok {
return errors.New("missing format")
}
if v, ok := mapvalue["format"].(string); !ok || v != "base64" {
return errors.New("invalid format")
}
if _, ok := mapvalue["data"]; !ok {
return errors.New("missing data field")
}
v, ok := mapvalue["data"].(string)
if !ok {
return errors.New("the data field is not a string")
}
b64, err := base64.StdEncoding.DecodeString(v)
if err != nil {
return err
}
value = string(b64)
if len(helper) != 2 {
return fmt.Errorf("%w: expected 2 elements, got %d", errCannotParseArchivalHTTPHeader, len(helper))
}
hh.Key, hh.Value = key, ArchivalMaybeBinaryData{Value: value}
(*ahh)[0] = helper[0]
(*ahh)[1] = helper[1]
return nil
}

Expand Down
Loading

0 comments on commit aed7d71

Please sign in to comment.