Skip to content

Commit

Permalink
feat(archival): unconditionally scrub HTTP headers and bodies (#1335)
Browse files Browse the repository at this point in the history
This commit modifies ArchivalMaybeBinaryData to unconditionally scrub IP
addresses and endpoints. It also renames the struct
ArchivalScrubbedMaybeBinaryData to underline that it's scrubbing now.

We're using ArchivalScrubbedMaybeBinaryData to represent HTTP headers
and bodies collected by OONI measurements.

With this commit merged, I am now much less concerned about the
potential unintended effects of aggressively using happy eyeballs, which
we started introducing as part of
ooni/probe#2531.

In other words, the rest of the refactoring of the OONI bootstrap could
proceed a bit faster.
  • Loading branch information
bassosimone authored Sep 28, 2023
1 parent b9fcb78 commit 7a6fd91
Show file tree
Hide file tree
Showing 10 changed files with 303 additions and 290 deletions.
2 changes: 1 addition & 1 deletion internal/experiment/hhfm/hhfm.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ func NewRequestEntryList(req *http.Request, headers map[string]string) (out []tr
// specific *http.Response instance and its body.
func NewHTTPResponse(resp *http.Response, data []byte) (out tracex.HTTPResponse) {
out = tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(data),
Body: model.ArchivalScrubbedMaybeBinaryString(data),
Code: int64(resp.StatusCode),
Headers: model.ArchivalNewHTTPHeadersMap(resp.Header),
HeadersList: model.ArchivalNewHTTPHeadersList(resp.Header),
Expand Down
28 changes: 14 additions & 14 deletions internal/experiment/hhfm/hhfm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -744,13 +744,13 @@ func TestNewRequestEntryList(t *testing.T) {
wantOut: []tracex.RequestEntry{{
Request: tracex.HTTPRequest{
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("ContENt-tYPE"),
model.ArchivalMaybeBinaryString("text/plain"),
model.ArchivalScrubbedMaybeBinaryString("ContENt-tYPE"),
model.ArchivalScrubbedMaybeBinaryString("text/plain"),
}, {
model.ArchivalMaybeBinaryString("User-aGENT"),
model.ArchivalMaybeBinaryString("foo/1.0"),
model.ArchivalScrubbedMaybeBinaryString("User-aGENT"),
model.ArchivalScrubbedMaybeBinaryString("foo/1.0"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"ContENt-tYPE": "text/plain",
"User-aGENT": "foo/1.0",
},
Expand All @@ -773,7 +773,7 @@ func TestNewRequestEntryList(t *testing.T) {
wantOut: []tracex.RequestEntry{{
Request: tracex.HTTPRequest{
Method: "GeT",
Headers: make(map[string]model.ArchivalMaybeBinaryString),
Headers: make(map[string]model.ArchivalScrubbedMaybeBinaryString),
HeadersList: []model.ArchivalHTTPHeader{},
URL: "http://10.0.0.1/",
},
Expand Down Expand Up @@ -811,16 +811,16 @@ func TestNewHTTPResponse(t *testing.T) {
data: []byte("deadbeef"),
},
wantOut: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString("deadbeef"),
Body: model.ArchivalScrubbedMaybeBinaryString("deadbeef"),
Code: 200,
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("Content-Type"),
model.ArchivalMaybeBinaryString("text/plain"),
model.ArchivalScrubbedMaybeBinaryString("Content-Type"),
model.ArchivalScrubbedMaybeBinaryString("text/plain"),
}, {
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("foo/1.0"),
model.ArchivalScrubbedMaybeBinaryString("User-Agent"),
model.ArchivalScrubbedMaybeBinaryString("foo/1.0"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Content-Type": "text/plain",
"User-Agent": "foo/1.0",
},
Expand All @@ -831,10 +831,10 @@ func TestNewHTTPResponse(t *testing.T) {
resp: &http.Response{StatusCode: 200},
},
wantOut: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(""),
Body: model.ArchivalScrubbedMaybeBinaryString(""),
Code: 200,
HeadersList: []model.ArchivalHTTPHeader{},
Headers: map[string]model.ArchivalMaybeBinaryString{},
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{},
},
}}
for _, tt := range tests {
Expand Down
4 changes: 2 additions & 2 deletions internal/experiment/riseupvpn/riseupvpn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -773,11 +773,11 @@ func generateMockGetter(requestResponse map[string]string, responseStatus map[st
Failure: failure,
Request: tracex.HTTPRequest{
URL: url,
Body: model.ArchivalMaybeBinaryString(""),
Body: model.ArchivalScrubbedMaybeBinaryString(""),
BodyIsTruncated: false,
},
Response: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
responseBody,
),
BodyIsTruncated: false,
Expand Down
42 changes: 21 additions & 21 deletions internal/experiment/webconnectivity/httpanalysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
randx.Letters(768),
),
},
Expand All @@ -96,7 +96,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
randx.Letters(768),
),
},
Expand All @@ -116,7 +116,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
randx.Letters(1024),
),
},
Expand All @@ -136,7 +136,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
randx.Letters(8),
),
},
Expand All @@ -156,7 +156,7 @@ func TestHTTPBodyLengthChecks(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
randx.Letters(16),
),
},
Expand Down Expand Up @@ -366,7 +366,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Date": "Mon Jul 13 21:10:08 CEST 2020",
},
Code: 200,
Expand All @@ -382,7 +382,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Date": "Mon Jul 13 21:10:08 CEST 2020",
},
Code: 200,
Expand All @@ -402,7 +402,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Date": "Mon Jul 13 21:10:08 CEST 2020",
},
Code: 200,
Expand All @@ -425,7 +425,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Date": "Mon Jul 13 21:10:08 CEST 2020",
"Antani": "MASCETTI",
},
Expand All @@ -450,7 +450,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Date": "Mon Jul 13 21:10:08 CEST 2020",
"Antani": "MASCETTI",
},
Expand All @@ -475,7 +475,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Accept-Ranges": "bytes",
"Age": "404727",
"Cache-Control": "max-age=604800",
Expand Down Expand Up @@ -522,7 +522,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Accept-Ranges": "bytes",
"Age": "404727",
"Cache-Control": "max-age=604800",
Expand Down Expand Up @@ -567,7 +567,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Accept-Ranges": "bytes",
"Age": "404727",
"Cache-Control": "max-age=604800",
Expand Down Expand Up @@ -608,7 +608,7 @@ func TestHeadersMatch(t *testing.T) {
tk: urlgetter.TestKeys{
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"accept-ranges": "bytes",
"AGE": "404727",
"cache-Control": "max-age=604800",
Expand Down Expand Up @@ -699,7 +699,7 @@ func TestTitleMatch(t *testing.T) {
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Code: 200,
Body: model.ArchivalMaybeBinaryString("<HTML/>"),
Body: model.ArchivalScrubbedMaybeBinaryString("<HTML/>"),
},
}},
},
Expand All @@ -712,7 +712,7 @@ func TestTitleMatch(t *testing.T) {
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Code: 200,
Body: model.ArchivalMaybeBinaryString("<HTML/>"),
Body: model.ArchivalScrubbedMaybeBinaryString("<HTML/>"),
},
}},
},
Expand All @@ -731,7 +731,7 @@ func TestTitleMatch(t *testing.T) {
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Code: 200,
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
"<HTML><TITLE>La community di MSN</TITLE></HTML>"),
},
}},
Expand All @@ -751,7 +751,7 @@ func TestTitleMatch(t *testing.T) {
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Code: 200,
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
"<HTML><TITLE>La communità di MSN</TITLE></HTML>"),
},
}},
Expand All @@ -771,7 +771,7 @@ func TestTitleMatch(t *testing.T) {
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Code: 200,
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
"<HTML><TITLE>" + randx.Letters(1024) + "</TITLE></HTML>"),
},
}},
Expand All @@ -791,7 +791,7 @@ func TestTitleMatch(t *testing.T) {
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Code: 200,
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
"<HTML><TiTLe>La commUNity di MSN</tITLE></HTML>"),
},
}},
Expand All @@ -811,7 +811,7 @@ func TestTitleMatch(t *testing.T) {
Requests: []tracex.RequestEntry{{
Response: tracex.HTTPResponse{
Code: 200,
Body: model.ArchivalMaybeBinaryString(
Body: model.ArchivalScrubbedMaybeBinaryString(
"<HTML><TiTLe>La commUNity di MSN</tITLE></HTML>"),
},
}},
Expand Down
2 changes: 1 addition & 1 deletion internal/legacy/tracex/archival.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func newRequestList(begin time.Time, events []Event) (out []RequestEntry) {
entry.Response.HeadersList = model.ArchivalNewHTTPHeadersList(ev.HTTPResponseHeaders)
entry.Response.Code = int64(ev.HTTPStatusCode)
entry.Response.Locations = ev.HTTPResponseHeaders.Values("Location")
entry.Response.Body = model.ArchivalMaybeBinaryString(ev.HTTPResponseBody)
entry.Response.Body = model.ArchivalScrubbedMaybeBinaryString(ev.HTTPResponseBody)
entry.Response.BodyIsTruncated = ev.HTTPResponseBodyIsTruncated
entry.Failure = ev.Err.ToFailure()
out = append(out, entry)
Expand Down
44 changes: 22 additions & 22 deletions internal/legacy/tracex/archival_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,41 +173,41 @@ func TestNewRequestList(t *testing.T) {
Failure: NewFailure(io.EOF),
Request: HTTPRequest{
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
model.ArchivalScrubbedMaybeBinaryString("User-Agent"),
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"User-Agent": "miniooni/0.1.0-dev",
},
Method: "GET",
URL: "https://www.example.com/result",
},
Response: HTTPResponse{
HeadersList: []model.ArchivalHTTPHeader{},
Headers: make(map[string]model.ArchivalMaybeBinaryString),
Headers: make(map[string]model.ArchivalScrubbedMaybeBinaryString),
},
T: 0.02,
}, {
Request: HTTPRequest{
Body: model.ArchivalMaybeBinaryString(""),
Body: model.ArchivalScrubbedMaybeBinaryString(""),
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
model.ArchivalScrubbedMaybeBinaryString("User-Agent"),
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"User-Agent": "miniooni/0.1.0-dev",
},
Method: "POST",
URL: "https://www.example.com/submit",
},
Response: HTTPResponse{
Body: model.ArchivalMaybeBinaryString("{}"),
Body: model.ArchivalScrubbedMaybeBinaryString("{}"),
Code: 200,
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("Server"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
model.ArchivalScrubbedMaybeBinaryString("Server"),
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Server": "miniooni/0.1.0-dev",
},
Locations: nil,
Expand Down Expand Up @@ -237,10 +237,10 @@ func TestNewRequestList(t *testing.T) {
want: []RequestEntry{{
Request: HTTPRequest{
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("User-Agent"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
model.ArchivalScrubbedMaybeBinaryString("User-Agent"),
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"User-Agent": "miniooni/0.1.0-dev",
},
Method: "GET",
Expand All @@ -249,16 +249,16 @@ func TestNewRequestList(t *testing.T) {
Response: HTTPResponse{
Code: 302,
HeadersList: []model.ArchivalHTTPHeader{{
model.ArchivalMaybeBinaryString("Location"),
model.ArchivalMaybeBinaryString("https://x.example.com"),
model.ArchivalScrubbedMaybeBinaryString("Location"),
model.ArchivalScrubbedMaybeBinaryString("https://x.example.com"),
}, {
model.ArchivalMaybeBinaryString("Location"),
model.ArchivalMaybeBinaryString("https://y.example.com"),
model.ArchivalScrubbedMaybeBinaryString("Location"),
model.ArchivalScrubbedMaybeBinaryString("https://y.example.com"),
}, {
model.ArchivalMaybeBinaryString("Server"),
model.ArchivalMaybeBinaryString("miniooni/0.1.0-dev"),
model.ArchivalScrubbedMaybeBinaryString("Server"),
model.ArchivalScrubbedMaybeBinaryString("miniooni/0.1.0-dev"),
}},
Headers: map[string]model.ArchivalMaybeBinaryString{
Headers: map[string]model.ArchivalScrubbedMaybeBinaryString{
"Server": "miniooni/0.1.0-dev",
"Location": "https://x.example.com",
},
Expand Down
Loading

0 comments on commit 7a6fd91

Please sign in to comment.