From 3bcfb5ad05eaf38b6d7fda850e23d832773af017 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 26 Aug 2022 12:36:40 +0200 Subject: [PATCH 1/2] feat(measurexlite): generate HTTP traces Required by https://github.com/ooni/probe/issues/2237 --- internal/measurexlite/http.go | 202 ++++++++++++++++ internal/measurexlite/http_test.go | 365 +++++++++++++++++++++++++++++ internal/model/archival.go | 3 + 3 files changed, 570 insertions(+) create mode 100644 internal/measurexlite/http.go create mode 100644 internal/measurexlite/http_test.go diff --git a/internal/measurexlite/http.go b/internal/measurexlite/http.go new file mode 100644 index 0000000000..55476afd81 --- /dev/null +++ b/internal/measurexlite/http.go @@ -0,0 +1,202 @@ +package measurexlite + +// +// Support for generating HTTP traces +// + +import ( + "net/http" + "sort" + "time" + + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/tracex" +) + +// NewArchivalHTTPRequestResult creates a new model.ArchivalHTTPRequestResult. +// +// Arguments: +// +// - index is the index of the trace; +// +// - started is when we started sending the request; +// +// - network is the underlying network in use ("tcp" or "udp"); +// +// - address is the remote endpoint's address; +// +// - alpn is the negotiated ALPN or an empty string when not applicable; +// +// - transport is the HTTP transport's protocol we're using ("quic" or "tcp"): this field +// was introduced a long time ago to support QUIC measurements and we keep it for backwards +// compatibility but network, address, and alpn are much more informative; +// +// - req is the certainly-non-nil HTTP request; +// +// - resp is the possibly-nil HTTP response; +// +// - maxRespBodySize is the maximum body snapshot size; +// +// - body is the possibly-nil HTTP response body; +// +// - err is the possibly-nil error that occurred during the transaction; +// +// - finished is when we finished reading the response's body. +func NewArchivalHTTPRequestResult(index int64, started time.Duration, network, address, alpn string, + transport string, req *http.Request, resp *http.Response, maxRespBodySize int64, body []byte, err error, + finished time.Duration) *model.ArchivalHTTPRequestResult { + return &model.ArchivalHTTPRequestResult{ + Network: network, + Address: address, + ALPN: alpn, + Failure: tracex.NewFailure(err), + Request: model.ArchivalHTTPRequest{ + Body: model.ArchivalMaybeBinaryData{}, + BodyIsTruncated: false, + HeadersList: newHTTPRequestHeaderList(req), + Headers: newHTTPRequestHeaderMap(req), + Method: httpRequestMethod(req), + Tor: model.ArchivalHTTPTor{}, + Transport: transport, // kept for backward compat + URL: httpRequestURL(req), + }, + Response: model.ArchivalHTTPResponse{ + Body: httpResponseBody(body), + BodyIsTruncated: httpResponseBodyIsTruncated(body, maxRespBodySize), + Code: httpResponseStatusCode(resp), + HeadersList: newHTTPResponseHeaderList(resp), + Headers: newHTTPResponseHeaderMap(resp), + Locations: httpResponseLocations(resp), + }, + T0: started.Seconds(), + T: finished.Seconds(), + TransactionID: index, + } +} + +// httpRequestMethod returns the HTTP request method or an empty string +func httpRequestMethod(req *http.Request) (out string) { + if req != nil { + out = req.Method + } + return +} + +// newHTTPRequestHeaderList calls newHTTPHeaderList with the request headers or +// return an empty array in case the request is nil. +func newHTTPRequestHeaderList(req *http.Request) []model.ArchivalHTTPHeader { + m := http.Header{} + if req != nil { + m = req.Header + } + return newHTTPHeaderList(m) +} + +// newHTTPRequestHeaderMap calls newHTTPHeaderMap with the request headers or +// return an empty map in case the request is nil. +func newHTTPRequestHeaderMap(req *http.Request) map[string]model.ArchivalMaybeBinaryData { + m := http.Header{} + if req != nil { + m = req.Header + } + return newHTTPHeaderMap(m) +} + +// httpRequestURL returns the req.URL.String() or an empty string. +func httpRequestURL(req *http.Request) (out string) { + if req != nil && req.URL != nil { + out = req.URL.String() + } + return +} + +// httpResponseBody returns the response body, if possible, or an empty body. +func httpResponseBody(body []byte) (out model.ArchivalMaybeBinaryData) { + if body != nil { + out.Value = string(body) + } + return +} + +// httpResponseBodyIsTruncated determines whether the body is truncated (if possible) +func httpResponseBodyIsTruncated(body []byte, maxSnapSize int64) (out bool) { + if len(body) > 0 && maxSnapSize > 0 { + out = int64(len(body)) >= maxSnapSize + } + return +} + +// httpResponseStatusCode returns the status code, if possible +func httpResponseStatusCode(resp *http.Response) (code int64) { + if resp != nil { + code = int64(resp.StatusCode) + } + return +} + +// newHTTPResponseHeaderList calls newHTTPHeaderList with the request headers or +// return an empty array in case the request is nil. +func newHTTPResponseHeaderList(resp *http.Response) (out []model.ArchivalHTTPHeader) { + m := http.Header{} + if resp != nil { + m = resp.Header + } + return newHTTPHeaderList(m) +} + +// newHTTPResponseHeaderMap calls newHTTPHeaderMap with the request headers or +// return an empty map in case the request is nil. +func newHTTPResponseHeaderMap(resp *http.Response) (out map[string]model.ArchivalMaybeBinaryData) { + m := http.Header{} + if resp != nil { + m = resp.Header + } + return newHTTPHeaderMap(m) +} + +// httpResponseLocations returns the locations inside the response (if possible) +func httpResponseLocations(resp *http.Response) []string { + if resp == nil { + return []string{} + } + loc, err := resp.Location() + if err != nil { + return []string{} + } + return []string{loc.String()} +} + +// newHTTPHeaderList creates a list representation of HTTP headers +func newHTTPHeaderList(header http.Header) (out []model.ArchivalHTTPHeader) { + out = []model.ArchivalHTTPHeader{} + keys := []string{} + for key := range header { + keys = append(keys, key) + } + sort.Strings(keys) // ensure the output is consistent, which helps with testing + for _, key := range keys { + for _, value := range header[key] { + out = append(out, model.ArchivalHTTPHeader{ + Key: key, + Value: model.ArchivalMaybeBinaryData{ + Value: value, + }, + }) + } + } + return +} + +// newHTTPHeaderMap creates a map representation of HTTP headers +func newHTTPHeaderMap(header http.Header) (out map[string]model.ArchivalMaybeBinaryData) { + out = make(map[string]model.ArchivalMaybeBinaryData) + for key, values := range header { + for _, value := range values { + out[key] = model.ArchivalMaybeBinaryData{ + Value: value, + } + break + } + } + return +} diff --git a/internal/measurexlite/http_test.go b/internal/measurexlite/http_test.go new file mode 100644 index 0000000000..dacce5a489 --- /dev/null +++ b/internal/measurexlite/http_test.go @@ -0,0 +1,365 @@ +package measurexlite + +import ( + "net/http" + "net/url" + "testing" + "time" + + "github.com/google/go-cmp/cmp" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" + "github.com/ooni/probe-cli/v3/internal/netxlite/filtering" +) + +func TestNewArchivalHTTPRequestResult(t *testing.T) { + type args struct { + index int64 + started time.Duration + network string + address string + alpn string + transport string + req *http.Request + resp *http.Response + maxRespBodySize int64 + body []byte + err error + finished time.Duration + } + + type config struct { + name string + args args + expect *model.ArchivalHTTPRequestResult + } + + configs := []config{{ + name: "the code is defensive with all zero-value inputs", + args: args{ + index: 0, + started: 0, + network: "", + address: "", + alpn: "", + transport: "", + req: nil, + resp: nil, + maxRespBodySize: 0, + body: nil, + err: nil, + finished: 0, + }, + expect: &model.ArchivalHTTPRequestResult{ + Network: "", + Address: "", + ALPN: "", + Failure: nil, + Request: model.ArchivalHTTPRequest{ + Body: model.ArchivalMaybeBinaryData{}, + BodyIsTruncated: false, + HeadersList: []model.ArchivalHTTPHeader{}, + Headers: map[string]model.ArchivalMaybeBinaryData{}, + Method: "", + Tor: model.ArchivalHTTPTor{}, + Transport: "", + URL: "", + }, + Response: model.ArchivalHTTPResponse{ + Body: model.ArchivalMaybeBinaryData{}, + BodyIsTruncated: false, + Code: 0, + HeadersList: []model.ArchivalHTTPHeader{}, + Headers: map[string]model.ArchivalMaybeBinaryData{}, + Locations: []string{}, + }, + T0: 0, + T: 0, + TransactionID: 0, + }, + }, { + name: "case of request that failed with I/O issues", + args: args{ + index: 1, + started: 250 * time.Millisecond, + network: "tcp", + address: "8.8.8.8:80", + alpn: "", + transport: "tcp", + req: &http.Request{ + Method: "GET", + URL: &url.URL{ + Scheme: "http", + Host: "dns.google", + Path: "/", + }, + Header: http.Header{ + "Accept": {"*/*"}, + "User-Agent": {"miniooni/0.1.0-dev"}, + }, + }, + resp: nil, + maxRespBodySize: 1 << 19, + body: nil, + err: netxlite.NewTopLevelGenericErrWrapper(netxlite.ECONNRESET), + finished: 750 * time.Millisecond, + }, + expect: &model.ArchivalHTTPRequestResult{ + Network: "tcp", + Address: "8.8.8.8:80", + ALPN: "", + Failure: func() *string { + s := netxlite.FailureConnectionReset + return &s + }(), + Request: model.ArchivalHTTPRequest{ + Body: model.ArchivalMaybeBinaryData{}, + BodyIsTruncated: false, + HeadersList: []model.ArchivalHTTPHeader{{ + Key: "Accept", + Value: model.ArchivalMaybeBinaryData{ + Value: "*/*", + }, + }, { + Key: "User-Agent", + Value: model.ArchivalMaybeBinaryData{ + Value: "miniooni/0.1.0-dev", + }, + }}, + Headers: map[string]model.ArchivalMaybeBinaryData{ + "Accept": {Value: "*/*"}, + "User-Agent": {Value: "miniooni/0.1.0-dev"}, + }, + Method: "GET", + Tor: model.ArchivalHTTPTor{}, + Transport: "tcp", + URL: "http://dns.google/", + }, + Response: model.ArchivalHTTPResponse{ + Body: model.ArchivalMaybeBinaryData{}, + BodyIsTruncated: false, + Code: 0, + HeadersList: []model.ArchivalHTTPHeader{}, + Headers: map[string]model.ArchivalMaybeBinaryData{}, + Locations: []string{}, + }, + T0: 0.25, + T: 0.75, + TransactionID: 1, + }, + }, { + name: "case of request that succeded", + args: args{ + index: 44, + started: 1400 * time.Millisecond, + network: "udp", + address: "8.8.8.8:443", + alpn: "h3", + transport: "quic", + req: &http.Request{ + Method: "GET", + URL: &url.URL{ + Scheme: "https", + Host: "dns.google", + Path: "/", + }, + Header: http.Header{ + "Accept": {"*/*"}, + "User-Agent": {"miniooni/0.1.0-dev"}, + }, + }, + resp: &http.Response{ + StatusCode: 200, + Header: http.Header{ + "Content-Type": {"text/html; charset=iso-8859-1"}, + "Server": {"Apache"}, + }, + }, + maxRespBodySize: 1 << 19, + body: filtering.HTTPBlockpage451, + err: nil, + finished: 1500 * time.Millisecond, + }, + expect: &model.ArchivalHTTPRequestResult{ + Network: "udp", + Address: "8.8.8.8:443", + ALPN: "h3", + Failure: nil, + Request: model.ArchivalHTTPRequest{ + Body: model.ArchivalMaybeBinaryData{}, + BodyIsTruncated: false, + HeadersList: []model.ArchivalHTTPHeader{{ + Key: "Accept", + Value: model.ArchivalMaybeBinaryData{ + Value: "*/*", + }, + }, { + Key: "User-Agent", + Value: model.ArchivalMaybeBinaryData{ + Value: "miniooni/0.1.0-dev", + }, + }}, + Headers: map[string]model.ArchivalMaybeBinaryData{ + "Accept": {Value: "*/*"}, + "User-Agent": {Value: "miniooni/0.1.0-dev"}, + }, + Method: "GET", + Tor: model.ArchivalHTTPTor{}, + Transport: "quic", + URL: "https://dns.google/", + }, + Response: model.ArchivalHTTPResponse{ + Body: model.ArchivalMaybeBinaryData{ + Value: string(filtering.HTTPBlockpage451), + }, + BodyIsTruncated: false, + Code: 200, + HeadersList: []model.ArchivalHTTPHeader{{ + Key: "Content-Type", + Value: model.ArchivalMaybeBinaryData{ + Value: "text/html; charset=iso-8859-1", + }, + }, { + Key: "Server", + Value: model.ArchivalMaybeBinaryData{ + Value: "Apache", + }, + }}, + Headers: map[string]model.ArchivalMaybeBinaryData{ + "Content-Type": {Value: "text/html; charset=iso-8859-1"}, + "Server": {Value: "Apache"}, + }, + Locations: []string{}, + }, + T0: 1.4, + T: 1.5, + TransactionID: 44, + }, + }, { + name: "case of redirect", + args: args{ + index: 47, + started: 1400 * time.Millisecond, + network: "udp", + address: "8.8.8.8:443", + alpn: "h3", + transport: "quic", + req: &http.Request{ + Method: "GET", + URL: &url.URL{ + Scheme: "https", + Host: "dns.google", + Path: "/", + }, + Header: http.Header{ + "Accept": {"*/*"}, + "User-Agent": {"miniooni/0.1.0-dev"}, + }, + }, + resp: &http.Response{ + StatusCode: 302, + Header: http.Header{ + "Content-Type": {"text/html; charset=iso-8859-1"}, + "Location": {"/v2/index.html"}, + "Server": {"Apache"}, + }, + Request: &http.Request{ // necessary for Location to WAI + URL: &url.URL{ + Scheme: "https", + Host: "dns.google", + Path: "/", + }, + }, + }, + maxRespBodySize: 1 << 19, + body: nil, + err: nil, + finished: 1500 * time.Millisecond, + }, + expect: &model.ArchivalHTTPRequestResult{ + Network: "udp", + Address: "8.8.8.8:443", + ALPN: "h3", + Failure: nil, + Request: model.ArchivalHTTPRequest{ + Body: model.ArchivalMaybeBinaryData{}, + BodyIsTruncated: false, + HeadersList: []model.ArchivalHTTPHeader{{ + Key: "Accept", + Value: model.ArchivalMaybeBinaryData{ + Value: "*/*", + }, + }, { + Key: "User-Agent", + Value: model.ArchivalMaybeBinaryData{ + Value: "miniooni/0.1.0-dev", + }, + }}, + Headers: map[string]model.ArchivalMaybeBinaryData{ + "Accept": {Value: "*/*"}, + "User-Agent": {Value: "miniooni/0.1.0-dev"}, + }, + Method: "GET", + Tor: model.ArchivalHTTPTor{}, + Transport: "quic", + URL: "https://dns.google/", + }, + Response: model.ArchivalHTTPResponse{ + Body: model.ArchivalMaybeBinaryData{ + Value: "", + }, + BodyIsTruncated: false, + Code: 302, + HeadersList: []model.ArchivalHTTPHeader{{ + Key: "Content-Type", + Value: model.ArchivalMaybeBinaryData{ + Value: "text/html; charset=iso-8859-1", + }, + }, { + Key: "Location", + Value: model.ArchivalMaybeBinaryData{ + Value: "/v2/index.html", + }, + }, { + Key: "Server", + Value: model.ArchivalMaybeBinaryData{ + Value: "Apache", + }, + }}, + Headers: map[string]model.ArchivalMaybeBinaryData{ + "Content-Type": {Value: "text/html; charset=iso-8859-1"}, + "Location": {Value: "/v2/index.html"}, + "Server": {Value: "Apache"}, + }, + Locations: []string{ + "https://dns.google/v2/index.html", + }, + }, + T0: 1.4, + T: 1.5, + TransactionID: 47, + }, + }} + + for _, cnf := range configs { + t.Run(cnf.name, func(t *testing.T) { + out := NewArchivalHTTPRequestResult( + cnf.args.index, + cnf.args.started, + cnf.args.network, + cnf.args.address, + cnf.args.alpn, + cnf.args.transport, + cnf.args.req, + cnf.args.resp, + cnf.args.maxRespBodySize, + cnf.args.body, + cnf.args.err, + cnf.args.finished, + ) + if diff := cmp.Diff(cnf.expect, out); diff != "" { + t.Fatal(diff) + } + }) + } +} diff --git a/internal/model/archival.go b/internal/model/archival.go index 9e3ff0a180..23a333f50c 100644 --- a/internal/model/archival.go +++ b/internal/model/archival.go @@ -193,6 +193,9 @@ type ArchivalTLSOrQUICHandshakeResult struct { // // See https://github.com/ooni/spec/blob/master/data-formats/df-001-httpt.md. type ArchivalHTTPRequestResult struct { + Network string `json:"network,omitempty"` + Address string `json:"address,omitempty"` + ALPN string `json:"alpn,omitempty"` Failure *string `json:"failure"` Request ArchivalHTTPRequest `json:"request"` Response ArchivalHTTPResponse `json:"response"` From 8ce0c91b46452881e108dfe5ac1fdf139b45c7df Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 26 Aug 2022 12:57:38 +0200 Subject: [PATCH 2/2] ref to previous build that failed --- internal/measurexlite/http.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/measurexlite/http.go b/internal/measurexlite/http.go index 55476afd81..719f15bce8 100644 --- a/internal/measurexlite/http.go +++ b/internal/measurexlite/http.go @@ -173,7 +173,10 @@ func newHTTPHeaderList(header http.Header) (out []model.ArchivalHTTPHeader) { for key := range header { keys = append(keys, key) } - sort.Strings(keys) // ensure the output is consistent, which helps with testing + // ensure the output is consistent, which helps with testing + // for an example of why we need to sort headers, see + // https://github.com/ooni/probe-engine/pull/751/checks?check_run_id=853562310 + sort.Strings(keys) for _, key := range keys { for _, value := range header[key] { out = append(out, model.ArchivalHTTPHeader{