From 1a1d3126aec8f3ce1b562dcdb37530fb27dff9b5 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Fri, 26 Aug 2022 16:42:48 +0200 Subject: [PATCH] feat(webconnectivity): long-term-evolution prototype (#882) See https://github.com/ooni/probe/issues/2237 --- internal/experiment/webconnectivity/README.md | 109 ++++ .../webconnectivity/analysiscore.go | 144 +++++ .../experiment/webconnectivity/analysisdns.go | 368 +++++++++++++ .../webconnectivity/analysishttpcore.go | 145 +++++ .../webconnectivity/analysishttpdiff.go | 246 +++++++++ .../webconnectivity/analysistcpip.go | 82 +++ .../webconnectivity/cleartextflow.go | 287 ++++++++++ internal/experiment/webconnectivity/config.go | 8 + .../experiment/webconnectivity/control.go | 177 +++++++ .../experiment/webconnectivity/dnscache.go | 37 ++ .../webconnectivity/dnsresolvers.go | 498 ++++++++++++++++++ .../experiment/webconnectivity/dnswhoami.go | 79 +++ internal/experiment/webconnectivity/doc.go | 7 + .../experiment/webconnectivity/inputparser.go | 63 +++ .../experiment/webconnectivity/measurer.go | 135 +++++ .../experiment/webconnectivity/secureflow.go | 339 ++++++++++++ .../experiment/webconnectivity/summary.go | 23 + .../experiment/webconnectivity/testkeys.go | 291 ++++++++++ internal/registry/factory.go | 3 +- internal/registry/webconnectivityv05.go | 27 + 20 files changed, 3067 insertions(+), 1 deletion(-) create mode 100644 internal/experiment/webconnectivity/README.md create mode 100644 internal/experiment/webconnectivity/analysiscore.go create mode 100644 internal/experiment/webconnectivity/analysisdns.go create mode 100644 internal/experiment/webconnectivity/analysishttpcore.go create mode 100644 internal/experiment/webconnectivity/analysishttpdiff.go create mode 100644 internal/experiment/webconnectivity/analysistcpip.go create mode 100644 internal/experiment/webconnectivity/cleartextflow.go create mode 100644 internal/experiment/webconnectivity/config.go create mode 100644 internal/experiment/webconnectivity/control.go create mode 100644 internal/experiment/webconnectivity/dnscache.go create mode 100644 internal/experiment/webconnectivity/dnsresolvers.go create mode 100644 internal/experiment/webconnectivity/dnswhoami.go create mode 100644 internal/experiment/webconnectivity/doc.go create mode 100644 internal/experiment/webconnectivity/inputparser.go create mode 100644 internal/experiment/webconnectivity/measurer.go create mode 100644 internal/experiment/webconnectivity/secureflow.go create mode 100644 internal/experiment/webconnectivity/summary.go create mode 100644 internal/experiment/webconnectivity/testkeys.go create mode 100644 internal/registry/webconnectivityv05.go diff --git a/internal/experiment/webconnectivity/README.md b/internal/experiment/webconnectivity/README.md new file mode 100644 index 0000000000..194dec7fb5 --- /dev/null +++ b/internal/experiment/webconnectivity/README.md @@ -0,0 +1,109 @@ +# webconnectivity + +This directory contains a new implementation of [Web Connectivity]( +https://github.com/ooni/spec/blob/master/nettests/ts-017-web-connectivity.md). + +As of 2022-08-26, this code is experimental and is not selected +by default when you run the `websites` group. You can select this +implementation with `miniooni` using `miniooni web_connectivity@v0.5` +from the command line. + +Issue [#2237](https://github.com/ooni/probe/issues/2237) explains the rationale +behind writing this new implementation. + +## Implementation overview + +The experiment measures a single URL at a time. The OONI Engine invokes the +`Run` method inside the [measurer.go](measurer.go) file. + +This code starts a number of background tasks, waits for them to complete, and +finally calls `TestKeys.finalize` to finalize the content of the JSON measurement. + +The first task that is started deals with DNS and lives in the +[dnsresolvers.go](dnsresolvers.go) file. This task is responsible for +resolving the domain inside the URL into `0..N` IP addresses. + +The domain resolution includes the system resolver and a DNS-over-UDP +resolver. The implementaion _may_ do more than that, but this is the +bare minimum we're feeling like documenting right now. (We need to +experiment a bit more to understand what else we can do there, hence +the code is _probably_ doing more than just that.) + +Once we know the `0..N` IP addresses for the domain we do the following: + +1. start a background task to communicate with the Web Connectivity +test helper, using code inside [control.go](control.go); + +2. start an endpoint measurement task for each IP adddress (which of +course only happens when we know _at least_ one addr). + +Regarding starting endpoint measurements, we follow this policy: + +1. if the original URL is `http://...` then we start a cleartext task +and an encrypted task for each address using ports `80` and `443` +respectively. + +2. if it's `https://...`, then we only start encrypted tasks. + +Cleartext tasks are implemented by [cleartextflow.go](cleartextflow.go) while +the encrypted tasks live in [secureflow.go](secureflow.go). + +A cleartext task does the following: + +1. TCP connect; + +2. additionally, the first task to establish a connection also performs +a GET request to fetch a webpage (we cannot GET for all connections, because +that would be `websteps` and would require a different data format). + +An encrypted task does the following: + +1. TCP connect; + +2. TLS handshake; + +3. additionally, the first task to handshake also performs +a GET request to fetch a webpage _iff_ the input URL was `https://...` (we cannot GET +for all connections, because that would be `websteps` and would require a +different data format). + +If fetching the webpage returns a redirect, we start a new DNS task passing it +the redirect URL as the new URL to measure. We do not call the test helper again +when this happens, though. The Web Connectivity test helper already follows the whole +redirect chain, so we would need to change the test helper to get information on +each flow. When this will happen, this experiment will probably not be Web Connectivity +anymore, but rather some form of [websteps](https://github.com/bassosimone/websteps-illustrated/). + +Additionally, when the test helper terminates, we run TCP connect and TLS handshake +(when applicable) for new IP addresses discovered using the test helper that were +previously unknown to the probe, thus collecting extra information. This logic lives +inside the [control.go](control.go) file. + +As previously mentioned, when all tasks complete, we call `TestKeys.finalize`. + +In turn, this function analyzes the collected data by calling code implemented +inside the following files: + +- [analysiscore.go](analysiscore.go) contains the core analysis algorithm; + +- [analysisdns.go](analysisdns.go) contains DNS specific analysis; + +- [analysishttpcore.go](analysishttpcore.go) contains the bulk of the HTTP +analysis, where we mainly determine TLS blocking; + +- [analysishttpdiff.go](analysishttpdiff.go) contains the HTTP diff algorithm; + +- [analysistcpip.go](analysistcpip.go) checks for TCP/IP blocking. + +We emit the `blocking` and `accessible` keys we emitted before as well as new +keys, prefixed by `x_` to indicate that they're experimental. + +## Limitations and next steps + +We need to extend the Web Connectivity test helper to return us information +about TLS handshakes with IP addresses discovered by the probe. This information +would allow us to make more precise TLS blocking statements. + +Further changes are probably possible. Departing too radically from the Web +Connectivity model, though, will lead us to have a `websteps` implementation (but +then the data model would most likely be different). diff --git a/internal/experiment/webconnectivity/analysiscore.go b/internal/experiment/webconnectivity/analysiscore.go new file mode 100644 index 0000000000..1081dd68af --- /dev/null +++ b/internal/experiment/webconnectivity/analysiscore.go @@ -0,0 +1,144 @@ +package webconnectivity + +import "github.com/ooni/probe-cli/v3/internal/model" + +// +// Core analysis +// + +// These flags determine the context of TestKeys.Blocking. However, while .Blocking +// is an enumeration, these flags allow to describe multiple blocking methods. +const ( + // analysisFlagDNSBlocking indicates there's blocking at the DNS level. + analysisFlagDNSBlocking = 1 << iota + + // analysisFlagTCPIPBlocking indicates there's blocking at the TCP/IP level. + analysisFlagTCPIPBlocking + + // analysisFlagTLSBlocking indicates there were TLS issues. + analysisFlagTLSBlocking + + // analysisFlagHTTPBlocking indicates there was an HTTP failure. + analysisFlagHTTPBlocking + + // analysisFlagHTTPDiff indicates there's an HTTP diff. + analysisFlagHTTPDiff + + // analysisFlagSuccess indicates we did not detect any blocking. + analysisFlagSuccess +) + +// analysisToplevel is the toplevel function that analyses the results +// of the experiment once all network tasks have completed. +// +// The ultimate objective of this function is to set the toplevel flags +// used by the backend to score results. These flags are: +// +// - blocking (and x_blocking_flags) which contain information about +// the detected blocking method (or methods); +// +// - accessible which contains information on whether we think we +// could access the resource somehow. +// +// Originally, Web Connectivity only had a blocking scalar value so +// we could see ourselves in one of the following cases: +// +// +----------+------------+--------------------------+ +// | Blocking | Accessible | Meaning | +// +----------+------------+--------------------------+ +// | null | null | Probe analysis error | +// +----------+------------+--------------------------+ +// | false | true | We detected no blocking | +// +----------+------------+--------------------------+ +// | "..." | false | We detected blocking | +// +----------+------------+--------------------------+ +// +// While it would be possible in this implementation, which has a granular +// definition of blocking (x_blocking_flags), to set accessible to mean +// whether we could access the resource in some conditions, it seems quite +// dangerous to deviate from the original behavior. +// +// Our code will NEVER set .Blocking or .Accessible outside of this function +// and we'll instead rely on XBlockingFlags. This function's job is to call +// other functions that compute the .XBlockingFlags and then to assign the value +// of .Blocking and .Accessible from the .XBlockingFlags value. +// +// Accordingly, this is how we map the value of the .XBlockingFlags to the +// values of .Blocking and .Accessible: +// +// +--------------------------------------+----------------+-------------+ +// | XBlockingFlags | .Blocking | .Accessible | +// +--------------------------------------+----------------+-------------+ +// | (& DNSBlocking) != 0 | "dns" | false | +// +--------------------------------------+----------------+-------------+ +// | (& TCPIPBlocking) != 0 | "tcp_ip" | false | +// +--------------------------------------+----------------+-------------+ +// | (& (TLSBlocking|HTTPBlocking)) != 0 | "http-failure" | false | +// +--------------------------------------+----------------+-------------+ +// | (& HTTPDiff) != 0 | "http-diff" | false | +// +--------------------------------------+----------------+-------------+ +// | == FlagSuccess | false | true | +// +--------------------------------------+----------------+-------------+ +// | otherwise | null | null | +// +--------------------------------------+----------------+-------------+ +// +// It's a very simple rule, that should preserve previous semantics. +func (tk *TestKeys) analysisToplevel(logger model.Logger) { + // Since we run after all tasks have completed (or so we assume) we're + // not going to use any form of locking here. + + // these functions compute the value of XBlockingFlags + tk.analysisDNSToplevel(logger) + tk.analysisTCPIPToplevel(logger) + tk.analysisHTTPToplevel(logger) + + // now, let's determine .Accessible and .Blocking + switch { + case (tk.BlockingFlags & analysisFlagDNSBlocking) != 0: + tk.Blocking = "dns" + tk.Accessible = false + logger.Warnf( + "ANOMALY: flags=%d accessible=%+v, blocking=%+v", + tk.BlockingFlags, tk.Accessible, tk.Blocking, + ) + + case (tk.BlockingFlags & analysisFlagTCPIPBlocking) != 0: + tk.Blocking = "tcp_ip" + tk.Accessible = false + logger.Warnf( + "ANOMALY: flags=%d accessible=%+v, blocking=%+v", + tk.BlockingFlags, tk.Accessible, tk.Blocking, + ) + + case (tk.BlockingFlags & (analysisFlagTLSBlocking | analysisFlagHTTPBlocking)) != 0: + tk.Blocking = "http-failure" + tk.Accessible = false + logger.Warnf("ANOMALY: flags=%d accessible=%+v, blocking=%+v", + tk.BlockingFlags, tk.Accessible, tk.Blocking, + ) + + case (tk.BlockingFlags & analysisFlagHTTPDiff) != 0: + tk.Blocking = "http-diff" + tk.Accessible = false + logger.Warnf( + "ANOMALY: flags=%d accessible=%+v, blocking=%+v", + tk.BlockingFlags, tk.Accessible, tk.Blocking, + ) + + case tk.BlockingFlags == analysisFlagSuccess: + tk.Blocking = false + tk.Accessible = true + logger.Infof( + "SUCCESS: flags=%d accessible=%+v, blocking=%+v", + tk.BlockingFlags, tk.Accessible, tk.Blocking, + ) + + default: + tk.Blocking = nil + tk.Accessible = nil + logger.Warnf( + "UNKNOWN: flags=%d, accessible=%+v, blocking=%+v", + tk.BlockingFlags, tk.Accessible, tk.Blocking, + ) + } +} diff --git a/internal/experiment/webconnectivity/analysisdns.go b/internal/experiment/webconnectivity/analysisdns.go new file mode 100644 index 0000000000..0ae9b6e59f --- /dev/null +++ b/internal/experiment/webconnectivity/analysisdns.go @@ -0,0 +1,368 @@ +package webconnectivity + +// +// DNS analysis +// + +import ( + "net" + "net/url" + + "github.com/ooni/probe-cli/v3/internal/engine/geolocate" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +const ( + // AnalysisDNSBogon indicates we got any bogon reply + AnalysisDNSBogon = 1 << iota + + // AnalysisDNSUnexpectedFailure indicates the TH could + // resolve a domain while the probe couldn't + AnalysisDNSUnexpectedFailure + + // AnalysisDNSUnexpectedAddrs indicates the TH resolved + // different addresses from the probe + AnalysisDNSUnexpectedAddrs +) + +// analysisDNSToplevel is the toplevel analysis function for DNS results. +// +// The goals of this function are the following: +// +// 1. Set the legacy .DNSExperimentFailure field to the failure value of the +// first DNS query that failed among the ones we actually tried. Because we +// have multiple queries, unfortunately we are forced to pick one error among +// possibly many to assign to this field. This is why I consider it legacy. +// +// 2. Compute the XDNSFlags value. +// +// From the XDNSFlags value, we determine, in turn DNSConsistency and +// XBlockingFlags according to the following decision table: +// +// +-----------+----------------+---------------------+ +// | XDNSFlags | DNSConsistency | XBlockingFlags | +// +-----------+----------------+---------------------+ +// | 0 | "consistent" | no change | +// +-----------+----------------+---------------------+ +// | nonzero | "inconsistent" | set FlagDNSBlocking | +// +-----------+----------------+---------------------+ +// +// We explain how XDNSFlags is determined in the documentation of +// the functions that this function calls to do its job. +func (tk *TestKeys) analysisDNSToplevel(logger model.Logger) { + tk.analysisDNSExperimentFailure() + tk.analysisDNSBogon(logger) + tk.analysisDNSUnexpectedFailure(logger) + tk.analysisDNSUnexpectedAddrs(logger) + if tk.DNSFlags != 0 { + logger.Warn("DNSConsistency: inconsistent") + tk.DNSConsistency = "inconsistent" + tk.BlockingFlags |= analysisFlagDNSBlocking + } else { + logger.Info("DNSConsistency: consistent") + tk.DNSConsistency = "consistent" + } +} + +// analysisDNSExperimentFailure sets the legacy DNSExperimentFailure field. +func (tk *TestKeys) analysisDNSExperimentFailure() { + for _, query := range tk.Queries { + if fail := query.Failure; fail != nil { + if query.QueryType == "AAAA" && *query.Failure == netxlite.FailureDNSNoAnswer { + // maybe this heuristic could be further improved by checking + // whether the TH did actually see any IPv6 address? + continue + } + tk.DNSExperimentFailure = fail + return + } + } +} + +// analysisDNSBogon computes the AnalysisDNSBogon flag. We set this flag if +// we dectect any bogon in the .Queries field of the TestKeys. +func (tk *TestKeys) analysisDNSBogon(logger model.Logger) { + for _, query := range tk.Queries { + for _, answer := range query.Answers { + switch answer.AnswerType { + case "A": + if net.ParseIP(answer.IPv4) != nil && netxlite.IsBogon(answer.IPv4) { + logger.Warnf("DNS: BOGON %s in #%d", answer.IPv4, query.TransactionID) + tk.DNSFlags |= AnalysisDNSBogon + // continue processing so we print all the bogons we have + } + case "AAAA": + if net.ParseIP(answer.IPv6) != nil && netxlite.IsBogon(answer.IPv6) { + logger.Warnf("DNS: BOGON %s in #%d", answer.IPv6, query.TransactionID) + tk.DNSFlags |= AnalysisDNSBogon + // continue processing so we print all the bogons we have + } + default: + // nothing + } + } + } +} + +// analysisDNSUnexpectedFailure computes the AnalysisDNSUnexpectedFailure flags. We say +// a failure is unexpected when the TH could resolve a domain and the probe couldn't. +func (tk *TestKeys) analysisDNSUnexpectedFailure(logger model.Logger) { + // make sure we have control before proceeding futher + if tk.Control == nil || tk.ControlRequest == nil { + return + } + + // obtain thRequest and thResponse as shortcuts + thRequest := tk.ControlRequest + thResponse := tk.Control + + // obtain the domain that the TH has queried for + URL, err := url.Parse(thRequest.HTTPRequest) + if err != nil { + return // this looks like a bug + } + domain := URL.Hostname() + + // we obviously don't care if the domain was an IP adddress + if net.ParseIP(domain) != nil { + return + } + + // if the control didn't lookup any IP addresses our job here is done + // because we can't say whether we have unexpected failures + hasAddrs := len(thResponse.DNS.Addrs) > 0 + if !hasAddrs { + return + } + + // with TH-resolved addrs, any local query _for the same domain_ queried + // by the probe that contains an error is suspicious + for _, query := range tk.Queries { + if domain != query.Hostname { + continue // not the domain queried by the test helper + } + hasAddrs := false + Loop: + for _, answer := range query.Answers { + switch answer.AnswerType { + case "A", "AAA": + hasAddrs = true + break Loop + } + } + if hasAddrs { + // if the lookup returned any IP address, we are + // not dealing with unexpected failures + continue + } + if query.Failure == nil { + // we expect to see a failure if we don't see + // answers, so this seems a bug? + continue + } + if query.QueryType == "AAAA" && *query.Failure == netxlite.FailureDNSNoAnswer { + // maybe this heuristic could be further improved by checking + // whether the TH did actually see any IPv6 address? + continue + } + logger.Warnf("DNS: unexpected failure %s in #%d", *query.Failure, query.TransactionID) + tk.DNSFlags |= AnalysisDNSUnexpectedFailure + // continue processing so we print all the unexpected failures + } +} + +// analysisDNSUnexpectedAddrs computes the AnalysisDNSUnexpectedAddrs flags. This +// algorithm builds upon the original DNSDiff algorithm by introducing an additional +// TLS based heuristic for determining whether an IP address was legit. +func (tk *TestKeys) analysisDNSUnexpectedAddrs(logger model.Logger) { + // make sure we have control before proceeding futher + if tk.Control == nil || tk.ControlRequest == nil { + return + } + + // obtain thRequest and thResponse as shortcuts + thRequest := tk.ControlRequest + thResponse := tk.Control + + // obtain the domain that the TH has queried for + URL, err := url.Parse(thRequest.HTTPRequest) + if err != nil { + return // this looks like a bug + } + domain := URL.Hostname() + + // we obviously don't care if the domain was an IP adddress + if net.ParseIP(domain) != nil { + return + } + + // if the control didn't resolve any IP address, then we basically + // cannot run this algorithm at all + thAddrs := thResponse.DNS.Addrs + if len(thAddrs) <= 0 { + return + } + + // gather all the IP addresses queried by the probe + // for the same domain for which the TH queried. + var probeAddrs []string + for _, query := range tk.Queries { + if domain != query.Hostname { + continue // not the domain the TH queried for + } + for _, answer := range query.Answers { + switch answer.AnswerType { + case "A": + probeAddrs = append(probeAddrs, answer.IPv4) + case "AAAA": + probeAddrs = append(probeAddrs, answer.IPv6) + } + } + } + + // if the probe has not collected any addr for the same domain, it's + // definitely suspicious and counts as a difference + if len(probeAddrs) <= 0 { + logger.Warnf("DNS: no IP address resolved by the probe") + tk.DNSFlags |= AnalysisDNSUnexpectedAddrs + return + } + + // if there are no different addresses between the probe and the TH then + // our job here is done and we can just stop searching + differentAddrs := tk.analysisDNSDiffAddrs(probeAddrs, thAddrs) + if len(differentAddrs) <= 0 { + return + } + + // now, let's exclude the differentAddrs for which we successfully + // completed a TLS handshake: those should be good addrs + withoutHandshake := tk.findAddrsWithoutTLSHandshake(domain, differentAddrs) + if len(withoutHandshake) <= 0 { + return + } + + // as a last resort, accept the addresses without an handshake whose + // ASN overlaps with ASNs resolved by the TH + differentASNs := tk.analysisDNSDiffASN(withoutHandshake, thAddrs) + if len(differentASNs) <= 0 { + return + } + + // otherwise, conclude we have unexpected probe addrs + logger.Warnf( + "DNSDiff: differentAddrs: %+v; withoutHandshake: %+v; differentASNs: %+v", + differentAddrs, withoutHandshake, differentASNs, + ) + tk.DNSFlags |= AnalysisDNSUnexpectedAddrs +} + +// analysisDNSDiffAddrs returns all the IP addresses that are +// resolved by the probe but not by the test helper. +func (tk *TestKeys) analysisDNSDiffAddrs(probeAddrs, thAddrs []string) (diff []string) { + const ( + inProbe = 1 << iota + inTH + ) + mapping := make(map[string]int) + for _, addr := range probeAddrs { + mapping[addr] |= inProbe + } + for _, addr := range thAddrs { + mapping[addr] = inTH + } + for addr, where := range mapping { + if (where & inTH) == 0 { + diff = append(diff, addr) + } + } + return +} + +// analysisDNSDiffASN returns whether there are IP addresses in the probe's +// list with different ASNs from the ones in the TH's list. +func (tk *TestKeys) analysisDNSDiffASN(probeAddrs, thAddrs []string) (asns []uint) { + const ( + inProbe = 1 << iota + inTH + ) + mapping := make(map[uint]int) + for _, addr := range probeAddrs { + asn, _, _ := geolocate.LookupASN(addr) + mapping[asn] |= inProbe // including the zero ASN that means unknown + } + for _, addr := range thAddrs { + asn, _, _ := geolocate.LookupASN(addr) + mapping[asn] |= inTH // including the zero ASN that means unknown + } + for asn, where := range mapping { + if (where & inTH) == 0 { + asns = append(asns, asn) + } + } + return +} + +// findAddrsWithoutTLSHandshake computes the list of probe discovered [addresses] +// for which we couldn't successfully perform a TLS handshake for the given [domain]. +func (tk *TestKeys) findAddrsWithoutTLSHandshake(domain string, addresses []string) (output []string) { + const ( + resolved = 1 << iota + handshakeOK + ) + mapping := make(map[string]int) + + // fill the input map with the addresses we're interested to analyze + for _, addr := range addresses { + mapping[addr] = 0 + } + + // flag the subset of addresses resolved by the probe + for _, query := range tk.Queries { + for _, answer := range query.Answers { + var addr string + switch answer.AnswerType { + case "A": + addr = answer.IPv4 + case "AAAA": + addr = answer.IPv6 + default: + continue + } + if _, found := mapping[addr]; !found { + continue // we're not interested into this addr + } + mapping[addr] |= resolved + } + } + + // flag the subset of addrs with successful handshake for the right SNI + for _, thx := range tk.TLSHandshakes { + addr, _, err := net.SplitHostPort(thx.Address) + if err != nil { + continue // looks like a bug + } + if thx.Failure != nil { + continue // this handshake failed + } + if _, found := mapping[addr]; !found { + continue // we're not interested into this addr + } + if thx.ServerName != domain { + continue // the SNI is different, so... + } + mapping[addr] |= handshakeOK + } + + // compute the list of addresses without the handshakeOK flag + for addr, flags := range mapping { + if flags == 0 { + continue // this looks like a bug + } + if (flags & (resolved | handshakeOK)) == resolved { + output = append(output, addr) + } + } + return +} diff --git a/internal/experiment/webconnectivity/analysishttpcore.go b/internal/experiment/webconnectivity/analysishttpcore.go new file mode 100644 index 0000000000..51ab2359d0 --- /dev/null +++ b/internal/experiment/webconnectivity/analysishttpcore.go @@ -0,0 +1,145 @@ +package webconnectivity + +// +// HTTP core analysis +// + +import ( + "net/url" + + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// analysisHTTPToplevel is the toplevel analysis function for HTTP results. +// +// This function's job is to determine whether there were unexpected TLS +// handshake results (compared to what the TH observed), or unexpected +// failures during HTTP round trips (using the TH as benchmark), or whether +// the obtained body differs from the one obtained by the TH. +// +// This results in possibly setting these XBlockingFlags: +// +// - analysisFlagTLSBlocking +// +// - analysisFlagHTTPBlocking +// +// - analysisFlagHTTPDiff +// +// In websteps fashion, we don't stop at the first failure, rather we +// process all the available data and evaluate all possible errors. +func (tk *TestKeys) analysisHTTPToplevel(logger model.Logger) { + // don't perform any analysis without TH data + if tk.Control == nil || tk.ControlRequest == nil { + return + } + ctrl := tk.Control.HTTPRequest + + // don't perform any analysis if the TH's HTTP measurement failed + if ctrl.Failure != nil { + return + } + + // determine whether the original URL was HTTPS + origURL, err := url.Parse(tk.ControlRequest.HTTPRequest) + if err != nil { + return // this seeems like a bug? + } + isHTTPS := origURL.Scheme == "https" + + // determine whether we had any TLS handshake issue and, in such a case, + // declare that we had a case of "http-failure" through TLS. + // + // Note that this would eventually count as an "http-failure" for .Blocking + // because Web Connectivity did not have a concept of TLS based blocking. + if tk.hasWellKnownTLSHandshakeIssues(isHTTPS, logger) { + tk.BlockingFlags |= analysisFlagTLSBlocking + // continue processing + } + + // determine whether we had well known cleartext HTTP round trip issues + // and, in such a case, declare we had an "http-failure". + if tk.hasWellKnownHTTPRoundTripIssues(logger) { + tk.BlockingFlags |= analysisFlagHTTPBlocking + // continue processing + } + + // if we don't have any request to check, there's not much more we + // can actually do here, so let's just return. + if len(tk.Requests) <= 0 { + return + } + + // if the request has failed in any other way, we don't know. By convention, the first + // entry in the tk.Requests array is the last entry that was measured. + finalRequest := tk.Requests[0] + if finalRequest.Failure != nil { + return + } + + // fallback to the HTTP diff algo. + tk.analysisHTTPDiff(logger, finalRequest, &ctrl) +} + +// hasWellKnownTLSHandshakeIssues returns true in case we observed +// a set of well-known issues during the TLS handshake. +func (tk *TestKeys) hasWellKnownTLSHandshakeIssues(isHTTPS bool, logger model.Logger) (result bool) { + // TODO(bassosimone): we should return TLS information in the TH + // such that we can perform a TCP-like check. For now, instead, we + // only perform comparison when the initial URL was HTTPS. Given + // that we unconditionally check for HTTPS even when the URL is HTTP, + // we cannot blindly treat all TLS errors as blocking. A website + // may just not have HTTPS. While in the obvious cases we will see + // certificate errors, in some cases it may actually timeout. + if isHTTPS { + for _, thx := range tk.TLSHandshakes { + fail := thx.Failure + if fail == nil { + continue // this handshake succeded, so skip it + } + switch *fail { + case netxlite.FailureConnectionReset, + netxlite.FailureGenericTimeoutError, + netxlite.FailureEOFError, + netxlite.FailureSSLInvalidHostname, + netxlite.FailureSSLInvalidCertificate, + netxlite.FailureSSLUnknownAuthority: + logger.Warnf( + "TLS: endpoint %s fails with %s (see #%d)", + thx.Address, *fail, thx.TransactionID, + ) + result = true // flip the result but continue looping so we print them all + default: + // check next handshake + } + } + } + return +} + +// hasWellKnownHTTPRoundTripIssues checks whether any HTTP round +// trip failed in a well-known suspicious way +func (tk *TestKeys) hasWellKnownHTTPRoundTripIssues(logger model.Logger) (result bool) { + for _, rtx := range tk.Requests { + fail := rtx.Failure + if fail == nil { + // This one succeded, so skip it. Note that, in principle, we know + // the fist entry is the last request occurred, but I really do not + // want to embed this bad assumption in one extra place! + continue + } + switch *fail { + case netxlite.FailureConnectionReset, + netxlite.FailureGenericTimeoutError, + netxlite.FailureEOFError: + logger.Warnf( + "TLS: endpoint %s fails with %s (see #%d)", + "N/A", *fail, rtx.TransactionID, // TODO(bassosimone): implement + ) + result = true // flip the result but continue looping so we print them all + default: + // check next round trip + } + } + return +} diff --git a/internal/experiment/webconnectivity/analysishttpdiff.go b/internal/experiment/webconnectivity/analysishttpdiff.go new file mode 100644 index 0000000000..1773cac63f --- /dev/null +++ b/internal/experiment/webconnectivity/analysishttpdiff.go @@ -0,0 +1,246 @@ +package webconnectivity + +// +// HTTP diff analysis +// + +import ( + "net/url" + "reflect" + "strings" + + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +// analysisHTTPDiff computes the HTTP diff between the final request-response +// observed by the probe and the TH's result. The caller is responsible of passing +// us a valid probe observation and a valid TH observation with nil failure. +func (tk *TestKeys) analysisHTTPDiff(logger model.Logger, + probe *model.ArchivalHTTPRequestResult, th *webconnectivity.ControlHTTPRequestResult) { + // make sure the caller respected the contract + runtimex.PanicIfTrue( + probe.Failure != nil || th.Failure != nil, + "the caller should have passed us successful HTTP observations", + ) + + // if we're dealing with an HTTPS request, don't perform any comparison + // under the assumption that we're good if we're using TLS + URL, err := url.Parse(probe.Request.URL) + if err != nil { + return // looks like a bug + } + if URL.Scheme == "https" { + logger.Infof("HTTP: HTTPS && no error => #%d is successful", probe.TransactionID) + tk.BlockingFlags |= analysisFlagSuccess + return + } + + // original HTTP diff algorithm adapted for this implementation + tk.httpDiffBodyLengthChecks(probe, th) + tk.httpDiffStatusCodeMatch(probe, th) + tk.httpDiffHeadersMatch(probe, th) + tk.httpDiffTitleMatch(probe, th) + + if tk.StatusCodeMatch != nil && *tk.StatusCodeMatch { + if tk.BodyLengthMatch != nil && *tk.BodyLengthMatch { + logger.Infof( + "HTTP: statusCodeMatch && bodyLengthMatch => #%d is successful", + probe.TransactionID, + ) + tk.BlockingFlags |= analysisFlagSuccess + return + } + if tk.HeadersMatch != nil && *tk.HeadersMatch { + logger.Infof( + "HTTP: statusCodeMatch && headersMatch => #%d is successful", + probe.TransactionID, + ) + tk.BlockingFlags |= analysisFlagSuccess + return + } + if tk.TitleMatch != nil && *tk.TitleMatch { + logger.Infof( + "HTTP: statusCodeMatch && titleMatch => #%d is successful", + probe.TransactionID, + ) + tk.BlockingFlags |= analysisFlagSuccess + return + } + } + + tk.BlockingFlags |= analysisFlagHTTPDiff + logger.Warnf("HTTP: it seems #%d is a case of httpDiff", probe.TransactionID) +} + +// httpDiffBodyLengthChecks compares the bodies lengths. +func (tk *TestKeys) httpDiffBodyLengthChecks( + probe *model.ArchivalHTTPRequestResult, ctrl *webconnectivity.ControlHTTPRequestResult) { + control := ctrl.BodyLength + if control <= 0 { + return // no actual length + } + response := probe.Response + if response.BodyIsTruncated { + return // cannot trust body length in this case + } + measurement := int64(len(response.Body.Value)) + if measurement <= 0 { + return // no actual length + } + const bodyProportionFactor = 0.7 + var proportion float64 + if measurement >= control { + proportion = float64(control) / float64(measurement) + } else { + proportion = float64(measurement) / float64(control) + } + good := proportion > bodyProportionFactor + tk.BodyLengthMatch = &good +} + +// httpDiffStatusCodeMatch compares the status codes. +func (tk *TestKeys) httpDiffStatusCodeMatch( + probe *model.ArchivalHTTPRequestResult, ctrl *webconnectivity.ControlHTTPRequestResult) { + control := ctrl.StatusCode + measurement := probe.Response.Code + if control <= 0 { + return // no real status code + } + if measurement <= 0 { + return // no real status code + } + if control/100 != 2 { + return // avoid comparison if it seems the TH failed + } + good := control == measurement + tk.StatusCodeMatch = &good +} + +// httpDiffHeadersMatch compares the uncommon headers. +func (tk *TestKeys) httpDiffHeadersMatch( + probe *model.ArchivalHTTPRequestResult, ctrl *webconnectivity.ControlHTTPRequestResult) { + control := ctrl.Headers + measurement := probe.Response.Headers + if len(control) <= 0 || len(measurement) <= 0 { + return + } + // Implementation note: using map because we only care about the + // keys being different and we ignore the values. + const ( + inMeasurement = 1 << 0 + inControl = 1 << 1 + inBoth = inMeasurement | inControl + ) + commonHeaders := map[string]bool{ + "date": true, + "content-type": true, + "server": true, + "cache-control": true, + "vary": true, + "set-cookie": true, + "location": true, + "expires": true, + "x-powered-by": true, + "content-encoding": true, + "last-modified": true, + "accept-ranges": true, + "pragma": true, + "x-frame-options": true, + "etag": true, + "x-content-type-options": true, + "age": true, + "via": true, + "p3p": true, + "x-xss-protection": true, + "content-language": true, + "cf-ray": true, + "strict-transport-security": true, + "link": true, + "x-varnish": true, + } + matching := make(map[string]int) + ours := make(map[string]bool) + for key := range measurement { + key = strings.ToLower(key) + if _, ok := commonHeaders[key]; !ok { + matching[key] |= inMeasurement + } + ours[key] = true + } + theirs := make(map[string]bool) + for key := range control { + key = strings.ToLower(key) + if _, ok := commonHeaders[key]; !ok { + matching[key] |= inControl + } + theirs[key] = true + } + // if they are equal we're done + if good := reflect.DeepEqual(ours, theirs); good { + tk.HeadersMatch = &good + return + } + // compute the intersection of uncommon headers + found := false + for _, value := range matching { + if (value & inBoth) == inBoth { + found = true + break + } + } + tk.HeadersMatch = &found +} + +// httpDiffTitleMatch compares the titles. +func (tk *TestKeys) httpDiffTitleMatch( + probe *model.ArchivalHTTPRequestResult, ctrl *webconnectivity.ControlHTTPRequestResult) { + response := probe.Response + if response.Code <= 0 { + return + } + if response.BodyIsTruncated { + return + } + if ctrl.StatusCode <= 0 { + return + } + control := ctrl.Title + measurementBody := response.Body.Value + measurement := webconnectivity.GetTitle(measurementBody) + if control == "" || measurement == "" { + return + } + const ( + inMeasurement = 1 << 0 + inControl = 1 << 1 + inBoth = inMeasurement | inControl + ) + words := make(map[string]int) + // We don't consider to match words that are shorter than 5 + // characters (5 is the average word length for english) + // + // The original implementation considered the word order but + // considering different languages it seems we could have less + // false positives by ignoring the word order. + const minWordLength = 5 + for _, word := range strings.Split(measurement, " ") { + if len(word) >= minWordLength { + words[strings.ToLower(word)] |= inMeasurement + } + } + for _, word := range strings.Split(control, " ") { + if len(word) >= minWordLength { + words[strings.ToLower(word)] |= inControl + } + } + good := true + for _, score := range words { + if (score & inBoth) != inBoth { + good = false + break + } + } + tk.TitleMatch = &good +} diff --git a/internal/experiment/webconnectivity/analysistcpip.go b/internal/experiment/webconnectivity/analysistcpip.go new file mode 100644 index 0000000000..24f6086bf4 --- /dev/null +++ b/internal/experiment/webconnectivity/analysistcpip.go @@ -0,0 +1,82 @@ +package webconnectivity + +// +// TCP/IP analysis +// + +import ( + "fmt" + "net" + + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// analysisTCPIPToplevel is the toplevel analysis function for TCP/IP results. +// +// This algorithm has two objectives: +// +// 1. walk the list of TCP connect attempts and mark each of them as +// Status.Blocked = true | false | null depending on what the TH observed +// for the same set of IP addresses (it's ugly to modify a data struct +// in place, but this algorithm is defined by the spec); +// +// 2. assign the analysisFlagTCPIPBlocking flag to XBlockingFlags if +// we see any TCP endpoint for which Status.Blocked is true. +func (tk *TestKeys) analysisTCPIPToplevel(logger model.Logger) { + // if we don't have a control result, do nothing. + if tk.Control == nil || len(tk.Control.TCPConnect) <= 0 { + return + } + var ( + istrue = true + isfalse = false + ) + + // TODO(bassosimone): the TH should measure also some of the IP addrs it discovered + // and the probe did not discover to improve the analysis. Otherwise, the probe + // is fooled by the TH also failing for countries that return random IP addresses + // that are actually not working. Yet, ooni/data would definitely see this. + + // walk the list of probe results and compare with TH results + for _, entry := range tk.TCPConnect { + // skip successful entries + failure := entry.Status.Failure + if failure == nil { + entry.Status.Blocked = &isfalse + continue // did not fail + } + + // make sure we exclude the IPv6 failures caused by lack of + // proper IPv6 support by the probe + ipv6, err := netxlite.IsIPv6(entry.IP) + if err != nil { + continue // looks like a bug + } + if ipv6 { + ignore := (*failure == netxlite.FailureNetworkUnreachable || + *failure == netxlite.FailureHostUnreachable) + if ignore { + // this occurs when we don't have IPv6 on the probe + continue + } + } + + // obtain the corresponding endpoint + epnt := net.JoinHostPort(entry.IP, fmt.Sprintf("%d", entry.Port)) + ctrl, found := tk.Control.TCPConnect[epnt] + if !found { + continue // only the probe tested this, so hard to say anything... + } + if ctrl.Failure != nil { + // If the TH failed as well, don't set XBlockingFlags and + // also don't bother with setting .Status.Blocked thus leaving + // it null. Performing precise error mapping should be a job + // for the pipeline rather than for the probe. + continue + } + logger.Warnf("TCP/IP: endpoint %s is blocked (see #%d)", epnt, entry.TransactionID) + entry.Status.Blocked = &istrue + tk.BlockingFlags |= analysisFlagTCPIPBlocking + } +} diff --git a/internal/experiment/webconnectivity/cleartextflow.go b/internal/experiment/webconnectivity/cleartextflow.go new file mode 100644 index 0000000000..a1712d8a61 --- /dev/null +++ b/internal/experiment/webconnectivity/cleartextflow.go @@ -0,0 +1,287 @@ +package webconnectivity + +// +// CleartextFlow +// +// Generated by `boilerplate' using the http template. +// + +import ( + "context" + "io" + "net" + "net/http" + "net/url" + "sync" + "time" + + "github.com/ooni/probe-cli/v3/internal/atomicx" + "github.com/ooni/probe-cli/v3/internal/measurexlite" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// Measures HTTP endpoints. +// +// The zero value of this structure IS NOT valid and you MUST initialize +// all the fields marked as MANDATORY before using this structure. +type CleartextFlow struct { + // Address is the MANDATORY address to connect to. + Address string + + // DNSCache is the MANDATORY DNS cache. + DNSCache *DNSCache + + // IDGenerator is the MANDATORY atomic int64 to generate task IDs. + IDGenerator *atomicx.Int64 + + // Logger is the MANDATORY logger to use. + Logger model.Logger + + // Sema is the MANDATORY semaphore to allow just a single + // connection to perform the HTTP transaction. + Sema <-chan any + + // TestKeys is MANDATORY and contains the TestKeys. + TestKeys *TestKeys + + // ZeroTime is the MANDATORY measurement's zero time. + ZeroTime time.Time + + // WaitGroup is the MANDATORY wait group this task belongs to. + WaitGroup *sync.WaitGroup + + // CookieJar contains the OPTIONAL cookie jar, used for redirects. + CookieJar http.CookieJar + + // FollowRedirects is OPTIONAL and instructs this flow + // to follow HTTP redirects (if any). + FollowRedirects bool + + // HostHeader is the OPTIONAL host header to use. + HostHeader string + + // Referer contains the OPTIONAL referer, used for redirects. + Referer string + + // UDPAddress is the OPTIONAL address of the UDP resolver to use. If this + // field is not set we use a default one (e.g., `8.8.8.8:53`). + UDPAddress string + + // URLPath is the OPTIONAL URL path. + URLPath string + + // URLRawQuery is the OPTIONAL URL raw query. + URLRawQuery string +} + +// Start starts this task in a background goroutine. +func (t *CleartextFlow) Start(ctx context.Context) { + t.WaitGroup.Add(1) + index := t.IDGenerator.Add(1) + go func() { + defer t.WaitGroup.Done() // synchronize with the parent + t.Run(ctx, index) + }() +} + +// Run runs this task in the current goroutine. +func (t *CleartextFlow) Run(parentCtx context.Context, index int64) { + // create trace + trace := measurexlite.NewTrace(index, t.ZeroTime) + + // start the operation logger + ol := measurexlite.NewOperationLogger( + t.Logger, "[#%d] GET http://%s using %s", index, t.HostHeader, t.Address, + ) + + // perform the TCP connect + const tcpTimeout = 10 * time.Second + tcpCtx, tcpCancel := context.WithTimeout(parentCtx, tcpTimeout) + defer tcpCancel() + tcpDialer := trace.NewDialerWithoutResolver(t.Logger) + tcpConn, err := tcpDialer.DialContext(tcpCtx, "tcp", t.Address) + t.TestKeys.AppendTCPConnectResults(trace.TCPConnects()...) + if err != nil { + ol.Stop(err) + return + } + defer func() { + t.TestKeys.AppendNetworkEvents(trace.NetworkEvents()...) + tcpConn.Close() + }() + + alpn := "" // no ALPN because we're not using TLS + + // Only allow N flows to _use_ the connection + select { + case <-t.Sema: + default: + ol.Stop(nil) + return + } + + // create HTTP transport + httpTransport := netxlite.NewHTTPTransport( + t.Logger, + netxlite.NewSingleUseDialer(tcpConn), + netxlite.NewNullTLSDialer(), + ) + + // create HTTP request + const httpTimeout = 10 * time.Second + httpCtx, httpCancel := context.WithTimeout(parentCtx, httpTimeout) + defer httpCancel() + httpReq, err := t.newHTTPRequest(httpCtx) + if err != nil { + if t.Referer == "" { + // when the referer is empty, the failing URL comes from our backend + // or from the user, so it's a fundamental failure. After that, we + // are dealing with websites provided URLs, so we should not flag a + // fundamental failure, because we want to see the measurement submitted. + t.TestKeys.SetFundamentalFailure(err) + } + ol.Stop(err) + return + } + + // perform HTTP transaction + httpResp, httpRespBody, err := t.httpTransaction( + httpCtx, + "tcp", + t.Address, + alpn, + httpTransport, + httpReq, + trace, + ) + if err != nil { + ol.Stop(err) + return + } + + // if enabled, follow possible redirects + t.maybeFollowRedirects(parentCtx, httpResp) + + // TODO: insert here additional code if needed + _ = httpRespBody + + // completed successfully + ol.Stop(nil) +} + +// urlHost computes the host to include into the URL +func (t *CleartextFlow) urlHost(scheme string) (string, error) { + addr, port, err := net.SplitHostPort(t.Address) + if err != nil { + t.Logger.Warnf("BUG: net.SplitHostPort failed for %s: %s", t.Address, err.Error()) + return "", err + } + urlHost := t.HostHeader + if urlHost == "" { + urlHost = addr + } + if port == "80" && scheme == "http" { + return urlHost, nil + } + urlHost = net.JoinHostPort(urlHost, port) + return urlHost, nil +} + +// newHTTPRequest creates a new HTTP request. +func (t *CleartextFlow) newHTTPRequest(ctx context.Context) (*http.Request, error) { + const urlScheme = "http" + urlHost, err := t.urlHost(urlScheme) + if err != nil { + return nil, err + } + httpURL := &url.URL{ + Scheme: urlScheme, + Host: urlHost, + Path: t.URLPath, + RawQuery: t.URLRawQuery, + } + httpReq, err := http.NewRequestWithContext(ctx, "GET", httpURL.String(), nil) + if err != nil { + return nil, err + } + httpReq.Header.Set("Host", t.HostHeader) + httpReq.Header.Set("Accept", model.HTTPHeaderAccept) + httpReq.Header.Set("Accept-Language", model.HTTPHeaderAcceptLanguage) + httpReq.Header.Set("Referer", t.Referer) + httpReq.Header.Set("User-Agent", model.HTTPHeaderUserAgent) + httpReq.Host = t.HostHeader + if t.CookieJar != nil { + for _, cookie := range t.CookieJar.Cookies(httpURL) { + httpReq.AddCookie(cookie) + } + } + return httpReq, nil +} + +// httpTransaction runs the HTTP transaction and saves the results. +func (t *CleartextFlow) httpTransaction(ctx context.Context, network, address, alpn string, + txp model.HTTPTransport, req *http.Request, trace *measurexlite.Trace) (*http.Response, []byte, error) { + const maxbody = 1 << 19 + started := trace.TimeSince(trace.ZeroTime) + resp, err := txp.RoundTrip(req) + var body []byte + if err == nil { + defer resp.Body.Close() + if cookies := resp.Cookies(); t.CookieJar != nil && len(cookies) > 0 { + t.CookieJar.SetCookies(req.URL, cookies) + } + reader := io.LimitReader(resp.Body, maxbody) + body, err = netxlite.ReadAllContext(ctx, reader) + } + finished := trace.TimeSince(trace.ZeroTime) + ev := measurexlite.NewArchivalHTTPRequestResult( + trace.Index, + started, + network, + address, + alpn, + txp.Network(), + req, + resp, + maxbody, + body, + err, + finished, + ) + t.TestKeys.AppendRequests(ev) + return resp, body, err +} + +// maybeFollowRedirects follows redirects if configured and needed +func (t *CleartextFlow) maybeFollowRedirects(ctx context.Context, resp *http.Response) { + if !t.FollowRedirects { + return // not configured + } + switch resp.StatusCode { + case 301, 302, 307, 308: + location, err := resp.Location() + if err != nil { + return // broken response from server + } + t.Logger.Infof("redirect to: %s", location.String()) + resolvers := &DNSResolvers{ + CookieJar: t.CookieJar, + DNSCache: t.DNSCache, + Domain: location.Hostname(), + IDGenerator: t.IDGenerator, + Logger: t.Logger, + TestKeys: t.TestKeys, + URL: location, + ZeroTime: t.ZeroTime, + WaitGroup: t.WaitGroup, + Referer: resp.Request.URL.String(), + Session: nil, // no need to issue another control request + THAddr: "", // ditto + UDPAddress: t.UDPAddress, + } + resolvers.Start(ctx) + default: + // no redirect to follow + } +} diff --git a/internal/experiment/webconnectivity/config.go b/internal/experiment/webconnectivity/config.go new file mode 100644 index 0000000000..f5b6bc248d --- /dev/null +++ b/internal/experiment/webconnectivity/config.go @@ -0,0 +1,8 @@ +package webconnectivity + +// +// Config +// + +// Config contains webconnectivity experiment configuration. +type Config struct{} diff --git a/internal/experiment/webconnectivity/control.go b/internal/experiment/webconnectivity/control.go new file mode 100644 index 0000000000..4a2b3ab593 --- /dev/null +++ b/internal/experiment/webconnectivity/control.go @@ -0,0 +1,177 @@ +package webconnectivity + +import ( + "context" + "net" + "net/url" + "sync" + "time" + + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/httpx" + "github.com/ooni/probe-cli/v3/internal/measurexlite" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// EndpointMeasurementsStarter is used by Control to start extra +// measurements using new IP addrs discovered by the TH. +type EndpointMeasurementsStarter interface { + // startCleartextFlowsWithSema starts a TCP measurement flow for each IP addr. The [sema] + // argument allows to control how many flows are allowed to perform HTTP measurements. Every + // flow will attempt to read from [sema] and won't perform HTTP measurements if a + // nonblocking read fails. Hence, you must create a [sema] channel with buffer equal + // to N and N elements inside it to allow N flows to perform HTTP measurements. Passing + // a nil [sema] causes no flow to attempt HTTP measurements. + startCleartextFlowsWithSema(ctx context.Context, sema <-chan any, addresses []string) + + // startSecureFlowsWithSema starts a TCP+TLS measurement flow for each IP addr. See + // the docs of startCleartextFlowsWithSema for more info on the [sema] arg. + startSecureFlowsWithSema(ctx context.Context, sema <-chan any, addresses []string) +} + +// Control issues a Control request and saves the results +// inside of the experiment's TestKeys. +// +// The zero value of this structure IS NOT valid and you MUST initialize +// all the fields marked as MANDATORY before using this structure. +type Control struct { + // Addresses contains the MANDATORY addresses we've looked up. + Addresses []string + + // ExtraMeasurementsStarter is MANDATORY and allows this struct to + // start additional measurements using new TH-discovered addrs. + ExtraMeasurementsStarter EndpointMeasurementsStarter + + // Logger is the MANDATORY logger to use. + Logger model.Logger + + // TestKeys is MANDATORY and contains the TestKeys. + TestKeys *TestKeys + + // Session is the MANDATORY session to use. + Session model.ExperimentSession + + // THAddr is the MANDATORY TH's URL. + THAddr string + + // URL is the MANDATORY URL we are measuring. + URL *url.URL + + // WaitGroup is the MANDATORY wait group this task belongs to. + WaitGroup *sync.WaitGroup +} + +// Start starts this task in a background goroutine. +func (c *Control) Start(ctx context.Context) { + c.WaitGroup.Add(1) + go func() { + defer c.WaitGroup.Done() // synchronize with the parent + c.Run(ctx) + }() +} + +// Run runs this task until completion. +func (c *Control) Run(parentCtx context.Context) { + // create a subcontext attached to a maximum timeout + const timeout = 30 * time.Second + opCtx, cancel := context.WithTimeout(parentCtx, timeout) + defer cancel() + + // create control request + var endpoints []string + for _, address := range c.Addresses { + if port := c.URL.Port(); port != "" { // handle the case of a custom port + endpoints = append(endpoints, net.JoinHostPort(address, port)) + continue + } + // otherwise, always attempt to measure both 443 and 80 endpoints + endpoints = append(endpoints, net.JoinHostPort(address, "443")) + endpoints = append(endpoints, net.JoinHostPort(address, "80")) + } + creq := &webconnectivity.ControlRequest{ + HTTPRequest: c.URL.String(), + HTTPRequestHeaders: map[string][]string{ + "Accept": {model.HTTPHeaderAccept}, + "Accept-Language": {model.HTTPHeaderAcceptLanguage}, + "User-Agent": {model.HTTPHeaderUserAgent}, + }, + TCPConnect: endpoints, + } + c.TestKeys.SetControlRequest(creq) + + // TODO(bassosimone): the current TH will not perform TLS measurements for + // 443 endpoints. However, we should modify the TH to do that, such that we're + // able to be more confident about TLS measurements results. + + // create logger for this operation + ol := measurexlite.NewOperationLogger(c.Logger, "control for %s", creq.HTTPRequest) + + // create an API client + clnt := (&httpx.APIClientTemplate{ + Accept: "", + Authorization: "", + BaseURL: c.THAddr, + HTTPClient: c.Session.DefaultHTTPClient(), + Host: "", // use the one inside the URL + LogBody: true, + Logger: c.Logger, + UserAgent: c.Session.UserAgent(), + }).Build() + + // issue the control request and wait for the response + var cresp webconnectivity.ControlResponse + err := clnt.PostJSON(opCtx, "/", creq, &cresp) + if err != nil { + // make sure error is wrapped + err = netxlite.NewTopLevelGenericErrWrapper(err) + c.TestKeys.SetControlFailure(err) + ol.Stop(err) + return + } + + // if the TH returned us addresses we did not previously were + // aware of, make sure we also measure them + c.maybeStartExtraMeasurements(parentCtx, cresp.DNS.Addrs) + + // on success, save the control response + c.TestKeys.SetControl(&cresp) + ol.Stop(nil) +} + +// This function determines whether we should start new +// background measurements for previously unknown IP addrs. +func (c *Control) maybeStartExtraMeasurements(ctx context.Context, thAddrs []string) { + // classify addeesses by who discovered them + const ( + inProbe = 1 << iota + inTH + ) + mapping := make(map[string]int) + for _, addr := range c.Addresses { + mapping[addr] |= inProbe + } + for _, addr := range thAddrs { + mapping[addr] |= inTH + } + + // obtain the TH-only addresses + var thOnly []string + for addr, flags := range mapping { + if (flags & inProbe) != 0 { + continue // discovered by the probe => already tested + } + thOnly = append(thOnly, addr) + } + + // Start extra measurements for TH-only addresses. Because we already + // measured HTTP(S) using IP addrs discovered by the resolvers, we are not + // going to do that again now. I am not sure this is the right policy + // but I think we can just try it and then change if needed... + // + // Also, let's remember that reading from a nil chan blocks forever, so + // we're basically forcing the goroutines to avoid HTTP(S). + var nohttp chan any = nil + c.ExtraMeasurementsStarter.startCleartextFlowsWithSema(ctx, nohttp, thOnly) + c.ExtraMeasurementsStarter.startSecureFlowsWithSema(ctx, nohttp, thOnly) +} diff --git a/internal/experiment/webconnectivity/dnscache.go b/internal/experiment/webconnectivity/dnscache.go new file mode 100644 index 0000000000..7b62c2683b --- /dev/null +++ b/internal/experiment/webconnectivity/dnscache.go @@ -0,0 +1,37 @@ +package webconnectivity + +import "sync" + +// DNSCache wraps a model.Resolver to provide DNS caching. +// +// The zero value is invalid; please, use NewDNSCache to construct. +type DNSCache struct { + // mu provides mutual exclusion. + mu *sync.Mutex + + // values contains already resolved values. + values map[string][]string +} + +// Get gets values from the cache +func (c *DNSCache) Get(domain string) ([]string, bool) { + c.mu.Lock() + values, found := c.values[domain] + c.mu.Unlock() + return values, found +} + +// Set inserts into the cache +func (c *DNSCache) Set(domain string, values []string) { + c.mu.Lock() + c.values[domain] = values + c.mu.Unlock() +} + +// NewDNSCache creates a new DNSCache instance. +func NewDNSCache() *DNSCache { + return &DNSCache{ + mu: &sync.Mutex{}, + values: map[string][]string{}, + } +} diff --git a/internal/experiment/webconnectivity/dnsresolvers.go b/internal/experiment/webconnectivity/dnsresolvers.go new file mode 100644 index 0000000000..be28399573 --- /dev/null +++ b/internal/experiment/webconnectivity/dnsresolvers.go @@ -0,0 +1,498 @@ +package webconnectivity + +// +// DNSResolvers +// +// Generated by `boilerplate' using the multi-resolver template. +// + +import ( + "context" + "math/rand" + "net" + "net/http" + "net/url" + "sync" + "time" + + "github.com/apex/log" + "github.com/ooni/probe-cli/v3/internal/atomicx" + "github.com/ooni/probe-cli/v3/internal/measurexlite" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// Resolves the URL's domain using several resolvers. +// +// The zero value of this structure IS NOT valid and you MUST initialize +// all the fields marked as MANDATORY before using this structure. +type DNSResolvers struct { + // DNSCache is the MANDATORY DNS cache. + DNSCache *DNSCache + + // Domain is the MANDATORY domain to resolve. + Domain string + + // IDGenerator is the MANDATORY atomic int64 to generate task IDs. + IDGenerator *atomicx.Int64 + + // Logger is the MANDATORY logger to use. + Logger model.Logger + + // TestKeys is MANDATORY and contains the TestKeys. + TestKeys *TestKeys + + // URL is the MANDATORY URL we're measuring. + URL *url.URL + + // ZeroTime is the MANDATORY zero time of the measurement. + ZeroTime time.Time + + // WaitGroup is the MANDATORY wait group this task belongs to. + WaitGroup *sync.WaitGroup + + // CookieJar contains the OPTIONAL cookie jar, used for redirects. + CookieJar http.CookieJar + + // Referer contains the OPTIONAL referer, used for redirects. + Referer string + + // Session is the OPTIONAL session. If the session is set, we will use + // it to start the task that issues the control request. This request must + // only be sent during the first iteration. It would be pointless to + // issue such a request for subsequent redirects, because the TH will + // always follow the redirect chain caused by the provided URL. + Session model.ExperimentSession + + // THAddr is the OPTIONAL test helper address. + THAddr string + + // UDPAddress is the OPTIONAL address of the UDP resolver to use. If this + // field is not set we use a default one (e.g., `8.8.8.8:53`). + UDPAddress string +} + +// Start starts this task in a background goroutine. +func (t *DNSResolvers) Start(ctx context.Context) { + t.WaitGroup.Add(1) + go func() { + defer t.WaitGroup.Done() // synchronize with the parent + t.Run(ctx) + }() +} + +// run performs a DNS lookup and returns the looked up addrs +func (t *DNSResolvers) run(parentCtx context.Context) []string { + // create output channels for the lookup + systemOut := make(chan []string) + udpOut := make(chan []string) + httpsOut := make(chan []string) + whoamiSystemV4Out := make(chan []DNSWhoamiInfoEntry) + whoamiUDPv4Out := make(chan []DNSWhoamiInfoEntry) + + // TODO(bassosimone): add opportunistic support for detecting + // whether DNS queries are answered regardless of dest addr by + // sending a few queries to root DNS servers + + udpAddress := t.udpAddress() + + // start asynchronous lookups + go t.lookupHostSystem(parentCtx, systemOut) + go t.lookupHostUDP(parentCtx, udpAddress, udpOut) + go t.lookupHostDNSOverHTTPS(parentCtx, httpsOut) + go t.whoamiSystemV4(parentCtx, whoamiSystemV4Out) + go t.whoamiUDPv4(parentCtx, udpAddress, whoamiUDPv4Out) + + // collect resulting IP addresses (which may be nil/empty lists) + systemAddrs := <-systemOut + udpAddrs := <-udpOut + httpsAddrs := <-httpsOut + + // collect whoami results (which also may be nil/empty) + whoamiSystemV4 := <-whoamiSystemV4Out + whoamiUDPv4 := <-whoamiUDPv4Out + t.TestKeys.WithDNSWhoami(func(di *DNSWhoamiInfo) { + di.SystemV4 = whoamiSystemV4 + di.UDPv4[udpAddress] = whoamiUDPv4 + }) + + // merge the resolved IP addresses + merged := map[string]bool{} + for _, addr := range systemAddrs { + merged[addr] = true + } + for _, addr := range udpAddrs { + merged[addr] = true + } + for _, addr := range httpsAddrs { + merged[addr] = true + } + + // rearrange addresses to have IPv4 first + sorted := []string{} + for addr := range merged { + if v6, err := netxlite.IsIPv6(addr); err == nil && !v6 { + sorted = append(sorted, addr) + } + } + for addr := range merged { + if v6, err := netxlite.IsIPv6(addr); err == nil && v6 { + sorted = append(sorted, addr) + } + } + + // TODO(bassosimone): remove bogons + + return sorted +} + +// Run runs this task in the current goroutine. +func (t *DNSResolvers) Run(parentCtx context.Context) { + var ( + addresses []string + found bool + ) + + // attempt to use the dns cache + addresses, found = t.DNSCache.Get(t.Domain) + + if !found { + // fall back to performing a real dns lookup + addresses = t.run(parentCtx) + + // insert the addresses we just looked us into the cache + t.DNSCache.Set(t.Domain, addresses) + } + + log.Infof("using resolved addrs: %+v", addresses) + + // fan out a number of child async tasks to use the IP addrs + t.startCleartextFlows(parentCtx, addresses) + t.startSecureFlows(parentCtx, addresses) + t.maybeStartControlFlow(parentCtx, addresses) +} + +// whoamiSystemV4 performs a DNS whoami lookup for the system resolver. This function must +// always emit an ouput on the [out] channel to synchronize with the caller func. +func (t *DNSResolvers) whoamiSystemV4(parentCtx context.Context, out chan<- []DNSWhoamiInfoEntry) { + value, _ := DNSWhoamiSingleton.SystemV4(parentCtx) + t.Logger.Infof("DNS whoami for system resolver: %+v", value) + out <- value +} + +// whoamiUDPv4 performs a DNS whoami lookup for the given UDP resolver. This function must +// always emit an ouput on the [out] channel to synchronize with the caller func. +func (t *DNSResolvers) whoamiUDPv4(parentCtx context.Context, udpAddress string, out chan<- []DNSWhoamiInfoEntry) { + value, _ := DNSWhoamiSingleton.UDPv4(parentCtx, udpAddress) + t.Logger.Infof("DNS whoami for %s/udp resolver: %+v", udpAddress, value) + out <- value +} + +// lookupHostSystem performs a DNS lookup using the system resolver. This function must +// always emit an ouput on the [out] channel to synchronize with the caller func. +func (t *DNSResolvers) lookupHostSystem(parentCtx context.Context, out chan<- []string) { + // create context with attached a timeout + const timeout = 4 * time.Second + lookupCtx, lookpCancel := context.WithTimeout(parentCtx, timeout) + defer lookpCancel() + + // create trace's index + index := t.IDGenerator.Add(1) + + // create trace + trace := measurexlite.NewTrace(index, t.ZeroTime) + + // start the operation logger + ol := measurexlite.NewOperationLogger( + t.Logger, "[#%d] lookup %s using system", index, t.Domain, + ) + + // runs the lookup + reso := trace.NewStdlibResolver(t.Logger) + addrs, err := reso.LookupHost(lookupCtx, t.Domain) + t.TestKeys.AppendQueries(trace.DNSLookupsFromRoundTrip()...) + ol.Stop(err) + out <- addrs +} + +// lookupHostUDP performs a DNS lookup using an UDP resolver. This function must always +// emit an ouput on the [out] channel to synchronize with the caller func. +func (t *DNSResolvers) lookupHostUDP(parentCtx context.Context, udpAddress string, out chan<- []string) { + // create context with attached a timeout + const timeout = 4 * time.Second + lookupCtx, lookpCancel := context.WithTimeout(parentCtx, timeout) + defer lookpCancel() + + // create trace's index + index := t.IDGenerator.Add(1) + + // create trace + trace := measurexlite.NewTrace(index, t.ZeroTime) + + // start the operation logger + ol := measurexlite.NewOperationLogger( + t.Logger, "[#%d] lookup %s using %s", index, t.Domain, udpAddress, + ) + + // runs the lookup + dialer := netxlite.NewDialerWithoutResolver(t.Logger) + reso := trace.NewParallelUDPResolver(t.Logger, dialer, udpAddress) + addrs, err := reso.LookupHost(lookupCtx, t.Domain) + + // saves the results making sure we split Do53 queries from other queries + do53, other := t.do53SplitQueries(trace.DNSLookupsFromRoundTrip()) + t.TestKeys.AppendQueries(do53...) + t.TestKeys.WithTestKeysDo53(func(tkd *TestKeysDo53) { + tkd.Queries = append(tkd.Queries, other...) + tkd.NetworkEvents = append(tkd.NetworkEvents, trace.NetworkEvents()...) + }) + + ol.Stop(err) + out <- addrs +} + +// Divides queries generated by Do53 in Do53-proper queries and other queries. +func (t *DNSResolvers) do53SplitQueries( + input []*model.ArchivalDNSLookupResult) (do53, other []*model.ArchivalDNSLookupResult) { + for _, query := range input { + switch query.Engine { + case "udp", "tcp": + do53 = append(do53, query) + default: + other = append(other, query) + } + } + return +} + +// TODO(bassosimone): maybe cycle through a bunch of well known addresses + +// Returns the UDP resolver we should be using by default. +func (t *DNSResolvers) udpAddress() string { + if t.UDPAddress != "" { + return t.UDPAddress + } + return "8.8.4.4:53" +} + +// OpportunisticDNSOverHTTPS allows to perform opportunistic DNS-over-HTTPS +// measurements as part of Web Connectivity. +type OpportunisticDNSOverHTTPS struct { + // interval is the next interval after which to measure. + interval time.Duration + + // mu provides mutual exclusion + mu *sync.Mutex + + // rnd is the random number generator to use. + rnd *rand.Rand + + // t is when we last run an opportunistic measurement. + t time.Time + + // urls contains the urls of known DoH services. + urls []string +} + +// MaybeNextURL returns the next URL to measure, if any. Our aim is to perform +// periodic, opportunistic DoH measurements as part of Web Connectivity. +func (o *OpportunisticDNSOverHTTPS) MaybeNextURL() (string, bool) { + now := time.Now() + o.mu.Lock() + defer o.mu.Unlock() + if o.t.IsZero() || now.Sub(o.t) > o.interval { + o.rnd.Shuffle(len(o.urls), func(i, j int) { + o.urls[i], o.urls[j] = o.urls[j], o.urls[i] + }) + o.t = now + o.interval = time.Duration(20+o.rnd.Uint32()%20) * time.Second + return o.urls[0], true + } + return "", false +} + +// TODO(bassosimone): consider whether factoring out this code +// and storing the state on disk instead of using memory + +// TODO(bassosimone): consider unifying somehow this code and +// the systemresolver code (or maybe just the list of resolvers) + +// OpportunisticDNSOverHTTPSSingleton is the singleton used to keep +// track of the opportunistic DNS-over-HTTPS measurements state. +var OpportunisticDNSOverHTTPSSingleton = &OpportunisticDNSOverHTTPS{ + interval: 0, + mu: &sync.Mutex{}, + rnd: rand.New(rand.NewSource(time.Now().UnixNano())), + t: time.Time{}, + urls: []string{ + "https://mozilla.cloudflare-dns.com/dns-query", + "https://dns.nextdns.io/dns-query", + "https://dns.google/dns-query", + "https://dns.quad9.net/dns-query", + }, +} + +// lookupHostDNSOverHTTPS performs a DNS lookup using a DoH resolver. This function must +// always emit an ouput on the [out] channel to synchronize with the caller func. +func (t *DNSResolvers) lookupHostDNSOverHTTPS(parentCtx context.Context, out chan<- []string) { + // obtain an opportunistic DoH URL + URL, good := OpportunisticDNSOverHTTPSSingleton.MaybeNextURL() + if !good { + // no need to perform opportunistic DoH at this time but we still + // need to fake out a lookup to please our caller + out <- []string{} + return + } + + // create context with attached a timeout + const timeout = 4 * time.Second + lookupCtx, lookpCancel := context.WithTimeout(parentCtx, timeout) + defer lookpCancel() + + // create trace's index + index := t.IDGenerator.Add(1) + + // create trace + trace := measurexlite.NewTrace(index, t.ZeroTime) + + // start the operation logger + ol := measurexlite.NewOperationLogger( + t.Logger, "[#%d] lookup %s using %s", index, t.Domain, URL, + ) + + // runs the lookup + reso := trace.NewParallelDNSOverHTTPSResolver(t.Logger, URL) + addrs, err := reso.LookupHost(lookupCtx, t.Domain) + reso.CloseIdleConnections() + + // save results making sure we properly split DoH queries from other queries + doh, other := t.dohSplitQueries(trace.DNSLookupsFromRoundTrip()) + t.TestKeys.AppendQueries(doh...) + t.TestKeys.WithTestKeysDoH(func(tkdh *TestKeysDoH) { + tkdh.Queries = append(tkdh.Queries, other...) + tkdh.NetworkEvents = append(tkdh.NetworkEvents, trace.NetworkEvents()...) + tkdh.TCPConnect = append(tkdh.TCPConnect, trace.TCPConnects()...) + tkdh.TLSHandshakes = append(tkdh.TLSHandshakes, trace.TLSHandshakes()...) + }) + + ol.Stop(err) + out <- addrs +} + +// Divides queries generated by DoH in DoH-proper queries and other queries. +func (t *DNSResolvers) dohSplitQueries( + input []*model.ArchivalDNSLookupResult) (doh, other []*model.ArchivalDNSLookupResult) { + for _, query := range input { + switch query.Engine { + case "doh": + doh = append(doh, query) + default: + other = append(other, query) + } + } + return +} + +// startCleartextFlows starts a TCP measurement flow for each IP addr. +func (t *DNSResolvers) startCleartextFlows(ctx context.Context, addresses []string) { + sema := make(chan any, 1) + sema <- true // allow a single flow to fetch the HTTP body + t.startCleartextFlowsWithSema(ctx, sema, addresses) +} + +// startCleartextFlowsWithSema implements EndpointMeasurementsStarter. +func (t *DNSResolvers) startCleartextFlowsWithSema(ctx context.Context, sema <-chan any, addresses []string) { + if t.URL.Scheme != "http" { + // Do not bother with measuring HTTP when the user + // has asked us to measure an HTTPS URL. + return + } + port := "80" + if urlPort := t.URL.Port(); urlPort != "" { + port = urlPort + } + for _, addr := range addresses { + task := &CleartextFlow{ + Address: net.JoinHostPort(addr, port), + DNSCache: t.DNSCache, + IDGenerator: t.IDGenerator, + Logger: t.Logger, + Sema: sema, + TestKeys: t.TestKeys, + ZeroTime: t.ZeroTime, + WaitGroup: t.WaitGroup, + CookieJar: t.CookieJar, + FollowRedirects: t.URL.Scheme == "http", + HostHeader: t.URL.Host, + Referer: t.Referer, + UDPAddress: t.UDPAddress, + URLPath: t.URL.Path, + URLRawQuery: t.URL.RawQuery, + } + task.Start(ctx) + } +} + +// startSecureFlows starts a TCP+TLS measurement flow for each IP addr. +func (t *DNSResolvers) startSecureFlows(ctx context.Context, addresses []string) { + sema := make(chan any, 1) + if t.URL.Scheme == "https" { + // Allows just a single worker to fetch the response body but do that + // only if the test-lists URL uses "https" as the scheme. Otherwise, just + // validate IPs by performing a TLS handshake. + sema <- true + } + t.startSecureFlowsWithSema(ctx, sema, addresses) +} + +// startSecureFlowsWithSema implements EndpointMeasurementsStarter. +func (t *DNSResolvers) startSecureFlowsWithSema(ctx context.Context, sema <-chan any, addresses []string) { + port := "443" + if urlPort := t.URL.Port(); urlPort != "" { + if t.URL.Scheme != "https" { + // If the URL is like http://example.com:8080/, we don't know + // which would be the correct port where to use HTTPS. + return + } + port = urlPort + } + for _, addr := range addresses { + task := &SecureFlow{ + Address: net.JoinHostPort(addr, port), + DNSCache: t.DNSCache, + IDGenerator: t.IDGenerator, + Logger: t.Logger, + Sema: sema, + TestKeys: t.TestKeys, + ZeroTime: t.ZeroTime, + WaitGroup: t.WaitGroup, + ALPN: []string{"h2", "http/1.1"}, + CookieJar: t.CookieJar, + FollowRedirects: t.URL.Scheme == "https", + SNI: t.URL.Hostname(), + HostHeader: t.URL.Host, + Referer: t.Referer, + UDPAddress: t.UDPAddress, + URLPath: t.URL.Path, + URLRawQuery: t.URL.RawQuery, + } + task.Start(ctx) + } +} + +// maybeStartControlFlow starts the control flow iff .Session and .THAddr are set. +func (t *DNSResolvers) maybeStartControlFlow(ctx context.Context, addresses []string) { + if t.Session != nil && t.THAddr != "" { + ctrl := &Control{ + Addresses: addresses, + ExtraMeasurementsStarter: t, // allows starting follow-up measurement flows + Logger: t.Logger, + TestKeys: t.TestKeys, + Session: t.Session, + THAddr: t.THAddr, + URL: t.URL, + WaitGroup: t.WaitGroup, + } + ctrl.Start(ctx) + } +} diff --git a/internal/experiment/webconnectivity/dnswhoami.go b/internal/experiment/webconnectivity/dnswhoami.go new file mode 100644 index 0000000000..3a0636cacd --- /dev/null +++ b/internal/experiment/webconnectivity/dnswhoami.go @@ -0,0 +1,79 @@ +package webconnectivity + +import ( + "context" + "sync" + "time" + + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// TODO(bassosimone): this code needs refining before we can merge it inside +// master. For one, we already have systemv4 info. Additionally, it would +// be neat to avoid additional AAAA queries. Furthermore, we should also see +// to implement support for IPv6 only clients as well. + +// DNSWhoamiService is a service that performs DNS whoami lookups. +type DNSWhoamiService struct { + // mu provides mutual exclusion + mu *sync.Mutex + + // systemv4 contains systemv4 results + systemv4 []DNSWhoamiInfoEntry + + // udpv4 contains udpv4 results + udpv4 map[string][]DNSWhoamiInfoEntry +} + +// SystemV4 returns the results of querying using the system resolver and IPv4. +func (svc *DNSWhoamiService) SystemV4(ctx context.Context) ([]DNSWhoamiInfoEntry, bool) { + svc.mu.Lock() + defer svc.mu.Unlock() + if len(svc.systemv4) <= 0 { + ctx, cancel := context.WithTimeout(ctx, 4*time.Second) + defer cancel() + reso := netxlite.NewStdlibResolver(model.DiscardLogger) + addrs, err := reso.LookupHost(ctx, "whoami.v4.powerdns.org") + if err != nil || len(addrs) < 1 { + return nil, false + } + svc.systemv4 = []DNSWhoamiInfoEntry{{ + Address: addrs[0], + }} + } + return svc.systemv4, len(svc.systemv4) > 0 +} + +// UDPv4 returns the results of querying a given UDP resolver and IPv4. +func (svc *DNSWhoamiService) UDPv4(ctx context.Context, address string) ([]DNSWhoamiInfoEntry, bool) { + svc.mu.Lock() + defer svc.mu.Unlock() + if len(svc.udpv4[address]) <= 0 { + ctx, cancel := context.WithTimeout(ctx, 4*time.Second) + defer cancel() + dialer := netxlite.NewDialerWithStdlibResolver(model.DiscardLogger) + reso := netxlite.NewParallelUDPResolver(model.DiscardLogger, dialer, address) + // TODO(bassosimone): this should actually only send an A query. Sending an AAAA + // query is _way_ unnecessary since we know that only A is going to work. + addrs, err := reso.LookupHost(ctx, "whoami.v4.powerdns.org") + if err != nil || len(addrs) < 1 { + return nil, false + } + svc.udpv4[address] = []DNSWhoamiInfoEntry{{ + Address: addrs[0], + }} + } + value := svc.udpv4[address] + return value, len(value) > 0 +} + +// TODO(bassosimone): consider factoring this code and keeping state +// on disk rather than on memory. + +// DNSWhoamiSingleton is the DNSWhoamiService singleton. +var DNSWhoamiSingleton = &DNSWhoamiService{ + mu: &sync.Mutex{}, + systemv4: []DNSWhoamiInfoEntry{}, + udpv4: map[string][]DNSWhoamiInfoEntry{}, +} diff --git a/internal/experiment/webconnectivity/doc.go b/internal/experiment/webconnectivity/doc.go new file mode 100644 index 0000000000..0c713d1373 --- /dev/null +++ b/internal/experiment/webconnectivity/doc.go @@ -0,0 +1,7 @@ +// Package webconnectivity implements the web_connectivity experiment. +// +// Spec: https://github.com/ooni/spec/blob/master/nettests/ts-017-web-connectivity.md. +// +// This implementation, in particular, contains extensions over the original model, +// which we document at https://github.com/ooni/probe/issues/2237. +package webconnectivity diff --git a/internal/experiment/webconnectivity/inputparser.go b/internal/experiment/webconnectivity/inputparser.go new file mode 100644 index 0000000000..a8b055e225 --- /dev/null +++ b/internal/experiment/webconnectivity/inputparser.go @@ -0,0 +1,63 @@ +package webconnectivity + +// +// Input parsing +// + +import ( + "errors" + "net" + "net/url" + + "github.com/ooni/probe-cli/v3/internal/runtimex" +) + +// InputParser helps to print the experiment's input. +type InputParser struct { + // List of accepted URL schemes. + AcceptedSchemes []string + + // Whether to allow endpoints in input. + AllowEndpoints bool + + // The default scheme to use if AllowEndpoints == true. + DefaultScheme string +} + +// Parse parses the experiment input and returns the resulting URL. +func (ip *InputParser) Parse(input string) (*url.URL, error) { + // put this check at top-level such that we always see the crash if needed + runtimex.PanicIfTrue( + ip.AllowEndpoints && ip.DefaultScheme == "", + "invalid configuration for InputParser.AllowEndpoints == true", + ) + URL, err := url.Parse(input) + if err != nil { + return ip.maybeAllowEndpoints(URL, err) + } + for _, scheme := range ip.AcceptedSchemes { + if URL.Scheme == scheme { + // TODO: here you may want to perform additional parsing + return URL, nil + } + } + return nil, errors.New("cannot parse input") +} + +// Conditionally allows endpoints when ip.AllowEndpoints is true. +func (ip *InputParser) maybeAllowEndpoints(URL *url.URL, err error) (*url.URL, error) { + runtimex.PanicIfNil(err, "expected to be called with a non-nil error") + if ip.AllowEndpoints && URL.Scheme != "" && URL.Opaque != "" && URL.User == nil && + URL.Host == "" && URL.Path == "" && URL.RawPath == "" && + URL.RawQuery == "" && URL.Fragment == "" && URL.RawFragment == "" { + // See https://go.dev/play/p/Rk5pS_zGY5U + // + // Note that we know that `ip.DefaultScheme != ""` from the above runtime check. + out := &url.URL{ + Scheme: ip.DefaultScheme, + Host: net.JoinHostPort(URL.Scheme, URL.Opaque), + } + return out, nil + } + return nil, err +} diff --git a/internal/experiment/webconnectivity/measurer.go b/internal/experiment/webconnectivity/measurer.go new file mode 100644 index 0000000000..86012fa07b --- /dev/null +++ b/internal/experiment/webconnectivity/measurer.go @@ -0,0 +1,135 @@ +package webconnectivity + +// +// Measurer +// + +import ( + "context" + "errors" + "net/http/cookiejar" + "sync" + + "github.com/ooni/probe-cli/v3/internal/atomicx" + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/model" + "golang.org/x/net/publicsuffix" +) + +// Measurer for the web_connectivity experiment. +type Measurer struct { + // Contains the experiment's config. + Config *Config +} + +// NewExperimentMeasurer creates a new model.ExperimentMeasurer. +func NewExperimentMeasurer(config *Config) model.ExperimentMeasurer { + return &Measurer{ + Config: config, + } +} + +// ExperimentName implements model.ExperimentMeasurer. +func (m *Measurer) ExperimentName() string { + return "web_connectivity" +} + +// ExperimentVersion implements model.ExperimentMeasurer. +func (m *Measurer) ExperimentVersion() string { + return "0.5.0" +} + +// Run implements model.ExperimentMeasurer. +func (m *Measurer) Run(ctx context.Context, sess model.ExperimentSession, + measurement *model.Measurement, callbacks model.ExperimentCallbacks) error { + // Reminder: when this function returns an error, the measurement result + // WILL NOT be submitted to the OONI backend. You SHOULD only return an error + // for fundamental errors (e.g., the input is invalid or missing). + + // honour InputOrQueryBackend + input := measurement.Input + if input == "" { + return errors.New("no input provided") + } + + // convert the input string to a URL + inputParser := &InputParser{ + AcceptedSchemes: []string{ + "http", + "https", + }, + AllowEndpoints: false, + DefaultScheme: "", + } + URL, err := inputParser.Parse(string(measurement.Input)) + if err != nil { + return err + } + + // initialize the experiment's test keys + tk := NewTestKeys() + measurement.TestKeys = tk + + // create variables required to run parallel tasks + idGenerator := &atomicx.Int64{} + wg := &sync.WaitGroup{} + + // create cookiejar + jar, err := cookiejar.New(&cookiejar.Options{ + PublicSuffixList: publicsuffix.List, + }) + if err != nil { + return err + } + + // obtain the test helper's address + testhelpers, _ := sess.GetTestHelpersByName("web-connectivity") + var thAddr string + for _, th := range testhelpers { + if th.Type == "https" { + thAddr = th.Address + measurement.TestHelpers = map[string]any{ + "backend": &th, + } + break + } + } + if thAddr == "" { + sess.Logger().Warnf("continuing without a valid TH address") + tk.SetControlFailure(webconnectivity.ErrNoAvailableTestHelpers) + } + + // start background tasks + resos := &DNSResolvers{ + DNSCache: NewDNSCache(), + Domain: URL.Hostname(), + IDGenerator: idGenerator, + Logger: sess.Logger(), + TestKeys: tk, + URL: URL, + ZeroTime: measurement.MeasurementStartTimeSaved, + WaitGroup: wg, + CookieJar: jar, + Referer: "", + Session: sess, + THAddr: thAddr, + UDPAddress: "", + } + resos.Start(ctx) + + // wait for background tasks to join + wg.Wait() + + // If the context passed to us has been cancelled, we cannot + // trust this experiment's results to be okay. + if err := ctx.Err(); err != nil { + return err + } + + // perform any deferred computation on the test keys + tk.Finalize(sess.Logger()) + + // return whether there was a fundamental failure, which would prevent + // the measurement from being submitted to the OONI collector. + return tk.fundamentalFailure +} diff --git a/internal/experiment/webconnectivity/secureflow.go b/internal/experiment/webconnectivity/secureflow.go new file mode 100644 index 0000000000..dd837d356e --- /dev/null +++ b/internal/experiment/webconnectivity/secureflow.go @@ -0,0 +1,339 @@ +package webconnectivity + +// +// SecureFlow +// +// Generated by `boilerplate' using the https template. +// + +import ( + "context" + "crypto/tls" + "io" + "net" + "net/http" + "net/url" + "sync" + "time" + + "github.com/ooni/probe-cli/v3/internal/atomicx" + "github.com/ooni/probe-cli/v3/internal/measurexlite" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/netxlite" +) + +// Measures HTTPS endpoints. +// +// The zero value of this structure IS NOT valid and you MUST initialize +// all the fields marked as MANDATORY before using this structure. +type SecureFlow struct { + // Address is the MANDATORY address to connect to. + Address string + + // DNSCache is the MANDATORY DNS cache. + DNSCache *DNSCache + + // IDGenerator is the MANDATORY atomic int64 to generate task IDs. + IDGenerator *atomicx.Int64 + + // Logger is the MANDATORY logger to use. + Logger model.Logger + + // Sema is the MANDATORY semaphore to allow just a single + // connection to perform the HTTP transaction. + Sema <-chan any + + // TestKeys is MANDATORY and contains the TestKeys. + TestKeys *TestKeys + + // ZeroTime is the MANDATORY measurement's zero time. + ZeroTime time.Time + + // WaitGroup is the MANDATORY wait group this task belongs to. + WaitGroup *sync.WaitGroup + + // ALPN is the OPTIONAL ALPN to use. + ALPN []string + + // CookieJar contains the OPTIONAL cookie jar, used for redirects. + CookieJar http.CookieJar + + // FollowRedirects is OPTIONAL and instructs this flow + // to follow HTTP redirects (if any). + FollowRedirects bool + + // HostHeader is the OPTIONAL host header to use. + HostHeader string + + // Referer contains the OPTIONAL referer, used for redirects. + Referer string + + // SNI is the OPTIONAL SNI to use. + SNI string + + // UDPAddress is the OPTIONAL address of the UDP resolver to use. If this + // field is not set we use a default one (e.g., `8.8.8.8:53`). + UDPAddress string + + // URLPath is the OPTIONAL URL path. + URLPath string + + // URLRawQuery is the OPTIONAL URL raw query. + URLRawQuery string +} + +// Start starts this task in a background goroutine. +func (t *SecureFlow) Start(ctx context.Context) { + t.WaitGroup.Add(1) + index := t.IDGenerator.Add(1) + go func() { + defer t.WaitGroup.Done() // synchronize with the parent + t.Run(ctx, index) + }() +} + +// Run runs this task in the current goroutine. +func (t *SecureFlow) Run(parentCtx context.Context, index int64) { + // create trace + trace := measurexlite.NewTrace(index, t.ZeroTime) + + // start the operation logger + ol := measurexlite.NewOperationLogger( + t.Logger, "[#%d] GET https://%s using %s", index, t.HostHeader, t.Address, + ) + + // perform the TCP connect + const tcpTimeout = 10 * time.Second + tcpCtx, tcpCancel := context.WithTimeout(parentCtx, tcpTimeout) + defer tcpCancel() + tcpDialer := trace.NewDialerWithoutResolver(t.Logger) + tcpConn, err := tcpDialer.DialContext(tcpCtx, "tcp", t.Address) + t.TestKeys.AppendTCPConnectResults(trace.TCPConnects()...) + if err != nil { + ol.Stop(err) + return + } + defer func() { + t.TestKeys.AppendNetworkEvents(trace.NetworkEvents()...) + tcpConn.Close() + }() + + // perform TLS handshake + tlsSNI, err := t.sni() + if err != nil { + t.TestKeys.SetFundamentalFailure(err) + ol.Stop(err) + return + } + tlsHandshaker := trace.NewTLSHandshakerStdlib(t.Logger) + tlsConfig := &tls.Config{ + NextProtos: t.alpn(), + RootCAs: netxlite.NewDefaultCertPool(), + ServerName: tlsSNI, + } + const tlsTimeout = 10 * time.Second + tlsCtx, tlsCancel := context.WithTimeout(parentCtx, tlsTimeout) + defer tlsCancel() + tlsConn, tlsConnState, err := tlsHandshaker.Handshake(tlsCtx, tcpConn, tlsConfig) + t.TestKeys.AppendTLSHandshakes(trace.TLSHandshakes()...) + if err != nil { + ol.Stop(err) + return + } + defer tlsConn.Close() + + alpn := tlsConnState.NegotiatedProtocol + + // Only allow N flows to _use_ the connection + select { + case <-t.Sema: + default: + ol.Stop(nil) + return + } + + // create HTTP transport + httpTransport := netxlite.NewHTTPTransport( + t.Logger, + netxlite.NewNullDialer(), + // note: netxlite guarantees that here tlsConn is a netxlite.TLSConn + netxlite.NewSingleUseTLSDialer(tlsConn.(netxlite.TLSConn)), + ) + + // create HTTP request + const httpTimeout = 10 * time.Second + httpCtx, httpCancel := context.WithTimeout(parentCtx, httpTimeout) + defer httpCancel() + httpReq, err := t.newHTTPRequest(httpCtx) + if err != nil { + if t.Referer == "" { + // when the referer is empty, the failing URL comes from our backend + // or from the user, so it's a fundamental failure. After that, we + // are dealing with websites provided URLs, so we should not flag a + // fundamental failure, because we want to see the measurement submitted. + t.TestKeys.SetFundamentalFailure(err) + } + ol.Stop(err) + return + } + + // perform HTTP transaction + httpResp, httpRespBody, err := t.httpTransaction( + httpCtx, + "tcp", + t.Address, + alpn, + httpTransport, + httpReq, + trace, + ) + if err != nil { + ol.Stop(err) + return + } + + // if enabled, follow possible redirects + t.maybeFollowRedirects(parentCtx, httpResp) + + // TODO: insert here additional code if needed + _ = httpRespBody + + // completed successfully + ol.Stop(nil) +} + +// alpn returns the user-configured ALPN or a reasonable default +func (t *SecureFlow) alpn() []string { + if len(t.ALPN) > 0 { + return t.ALPN + } + return []string{"h2", "http/1.1"} +} + +// sni returns the user-configured SNI or a reasonable default +func (t *SecureFlow) sni() (string, error) { + if t.SNI != "" { + return t.SNI, nil + } + addr, _, err := net.SplitHostPort(t.Address) + if err != nil { + return "", err + } + return addr, nil +} + +// urlHost computes the host to include into the URL +func (t *SecureFlow) urlHost(scheme string) (string, error) { + addr, port, err := net.SplitHostPort(t.Address) + if err != nil { + t.Logger.Warnf("BUG: net.SplitHostPort failed for %s: %s", t.Address, err.Error()) + return "", err + } + urlHost := t.HostHeader + if urlHost == "" { + urlHost = addr + } + if port == "443" && scheme == "https" { + return urlHost, nil + } + urlHost = net.JoinHostPort(urlHost, port) + return urlHost, nil +} + +// newHTTPRequest creates a new HTTP request. +func (t *SecureFlow) newHTTPRequest(ctx context.Context) (*http.Request, error) { + const urlScheme = "https" + urlHost, err := t.urlHost(urlScheme) + if err != nil { + return nil, err + } + httpURL := &url.URL{ + Scheme: urlScheme, + Host: urlHost, + Path: t.URLPath, + RawQuery: t.URLRawQuery, + } + httpReq, err := http.NewRequestWithContext(ctx, "GET", httpURL.String(), nil) + if err != nil { + return nil, err + } + httpReq.Header.Set("Host", t.HostHeader) + httpReq.Header.Set("Accept", model.HTTPHeaderAccept) + httpReq.Header.Set("Accept-Language", model.HTTPHeaderAcceptLanguage) + httpReq.Header.Set("Referer", t.Referer) + httpReq.Header.Set("User-Agent", model.HTTPHeaderUserAgent) + httpReq.Host = t.HostHeader + if t.CookieJar != nil { + for _, cookie := range t.CookieJar.Cookies(httpURL) { + httpReq.AddCookie(cookie) + } + } + return httpReq, nil +} + +// httpTransaction runs the HTTP transaction and saves the results. +func (t *SecureFlow) httpTransaction(ctx context.Context, network, address, alpn string, + txp model.HTTPTransport, req *http.Request, trace *measurexlite.Trace) (*http.Response, []byte, error) { + const maxbody = 1 << 19 + started := trace.TimeSince(trace.ZeroTime) + resp, err := txp.RoundTrip(req) + var body []byte + if err == nil { + defer resp.Body.Close() + if cookies := resp.Cookies(); t.CookieJar != nil && len(cookies) > 0 { + t.CookieJar.SetCookies(req.URL, cookies) + } + reader := io.LimitReader(resp.Body, maxbody) + body, err = netxlite.ReadAllContext(ctx, reader) + } + finished := trace.TimeSince(trace.ZeroTime) + ev := measurexlite.NewArchivalHTTPRequestResult( + trace.Index, + started, + network, + address, + alpn, + txp.Network(), + req, + resp, + maxbody, + body, + err, + finished, + ) + t.TestKeys.AppendRequests(ev) + return resp, body, err +} + +// maybeFollowRedirects follows redirects if configured and needed +func (t *SecureFlow) maybeFollowRedirects(ctx context.Context, resp *http.Response) { + if !t.FollowRedirects { + return // not configured + } + switch resp.StatusCode { + case 301, 302, 307, 308: + location, err := resp.Location() + if err != nil { + return // broken response from server + } + t.Logger.Infof("redirect to: %s", location.String()) + resolvers := &DNSResolvers{ + CookieJar: t.CookieJar, + DNSCache: t.DNSCache, + Domain: location.Hostname(), + IDGenerator: t.IDGenerator, + Logger: t.Logger, + TestKeys: t.TestKeys, + URL: location, + ZeroTime: t.ZeroTime, + WaitGroup: t.WaitGroup, + Referer: resp.Request.URL.String(), + Session: nil, // no need to issue another control request + THAddr: "", // ditto + UDPAddress: t.UDPAddress, + } + resolvers.Start(ctx) + default: + // no redirect to follow + } +} diff --git a/internal/experiment/webconnectivity/summary.go b/internal/experiment/webconnectivity/summary.go new file mode 100644 index 0000000000..2865c88cff --- /dev/null +++ b/internal/experiment/webconnectivity/summary.go @@ -0,0 +1,23 @@ +package webconnectivity + +// +// Summary +// + +import "github.com/ooni/probe-cli/v3/internal/model" + +// Summary contains the summary results. +// +// Note that this structure is part of the ABI contract with ooniprobe +// therefore we should be careful when changing it. +type SummaryKeys struct { + // TODO: add here additional summary fields. + isAnomaly bool +} + +// GetSummaryKeys implements model.ExperimentMeasurer.GetSummaryKeys. +func (m *Measurer) GetSummaryKeys(measurement *model.Measurement) (any, error) { + // TODO(bassosimone): fill all the SummaryKeys + sk := SummaryKeys{isAnomaly: false} + return sk, nil +} diff --git a/internal/experiment/webconnectivity/testkeys.go b/internal/experiment/webconnectivity/testkeys.go new file mode 100644 index 0000000000..de6bf452ae --- /dev/null +++ b/internal/experiment/webconnectivity/testkeys.go @@ -0,0 +1,291 @@ +package webconnectivity + +// +// TestKeys for web_connectivity. +// +// Note: for historical reasons, we call TestKeys the JSON object +// containing the results produced by OONI experiments. +// + +import ( + "sync" + + "github.com/ooni/probe-cli/v3/internal/engine/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/model" + "github.com/ooni/probe-cli/v3/internal/tracex" +) + +// TestKeys contains the results produced by web_connectivity. +type TestKeys struct { + // NetworkEvents contains network events. + NetworkEvents []*model.ArchivalNetworkEvent `json:"network_events"` + + // DNSWhoami contains results of using the DNS whoami functionality for the + // possibly cleartext resolvers that we're using. + DNSWoami *DNSWhoamiInfo `json:"x_dns_whoami"` + + // DoH contains ancillary observations collected by DoH resolvers. + DoH *TestKeysDoH `json:"x_doh"` + + // Do53 contains ancillary observations collected by Do53 resolvers. + Do53 *TestKeysDo53 `json:"x_do53"` + + // Queries contains DNS queries. + Queries []*model.ArchivalDNSLookupResult `json:"queries"` + + // Requests contains HTTP results. + Requests []*model.ArchivalHTTPRequestResult `json:"requests"` + + // TCPConnect contains TCP connect results. + TCPConnect []*model.ArchivalTCPConnectResult `json:"tcp_connect"` + + // TLSHandshakes contains TLS handshakes results. + TLSHandshakes []*model.ArchivalTLSOrQUICHandshakeResult `json:"tls_handshakes"` + + // ControlRequest is the control request we sent. + ControlRequest *webconnectivity.ControlRequest `json:"x_control_request"` + + // Control contains the TH's response. + Control *webconnectivity.ControlResponse `json:"control"` + + // ControlFailure contains the failure of the control experiment. + ControlFailure *string `json:"control_failure"` + + // DNSFlags contains DNS analysis flags. + DNSFlags int64 `json:"x_dns_flags"` + + // DNSExperimentFailure indicates whether there was a failure in any + // of the DNS experiments we performed. + DNSExperimentFailure *string `json:"dns_experiment_failure"` + + // DNSConsistency indicates whether there is consistency between + // the TH's DNS results and the probe's DNS results. + DNSConsistency string `json:"dns_consistency"` + + // BlockingFlags contains blocking flags. + BlockingFlags int64 `json:"x_blocking_flags"` + + // BodyLength match tells us whether the body length matches. + BodyLengthMatch *bool `json:"body_length_match"` + + // HeadersMatch tells us whether the headers match. + HeadersMatch *bool `json:"headers_match"` + + // StatusCodeMatch tells us whether the status code matches. + StatusCodeMatch *bool `json:"status_code_match"` + + // TitleMatch tells us whether the title matches. + TitleMatch *bool `json:"title_match"` + + // Blocking indicates the reason for blocking. This is notoriously a bad + // type because it can be one of the following values: + // + // - "tcp_ip" + // - "dns" + // - "http-diff" + // - "http-failure" + // - false + // - null + // + // In addition to having a ~bad type, this field has the issue that it + // reduces the reason for blocking to an enum, whereas it's a set of flags, + // hence we introduced the x_blocking_flags field. + Blocking any `json:"blocking"` + + // Accessible indicates whether the resource is accessible. Possible + // values for this field are: nil, true, and false. + Accessible any `json:"accessible"` + + // fundamentalFailure indicates that some fundamental error occurred + // in a background task. A fundamental error is something like a programmer + // such as a failure to parse a URL that was hardcoded in the codebase. When + // this class of errors happens, you certainly don't want to submit the + // resulting measurement to the OONI collector. + fundamentalFailure error + + // mu provides mutual exclusion for accessing the test keys. + mu *sync.Mutex +} + +// DNSWhoamiInfoEntry contains an entry for DNSWhoamiInfo. +type DNSWhoamiInfoEntry struct { + // Address is the IP address + Address string `json:"address"` +} + +// DNSWhoamiInfo contains info about DNS whoami. +type DNSWhoamiInfo struct { + // SystemV4 contains results related to the system resolver using IPv4. + SystemV4 []DNSWhoamiInfoEntry `json:"system_v4"` + + // UDPv4 contains results related to an UDP resolver using IPv4. + UDPv4 map[string][]DNSWhoamiInfoEntry `json:"udp_v4"` +} + +// TestKeysDoH contains ancillary observations collected using DoH (e.g., the +// DNS lookups, TCP connects, TLS handshakes caused by given DoH lookups). +// +// They are on a separate hierarchy to simplify processing. +type TestKeysDoH struct { + // NetworkEvents contains network events. + NetworkEvents []*model.ArchivalNetworkEvent `json:"network_events"` + + // Queries contains DNS queries. + Queries []*model.ArchivalDNSLookupResult `json:"queries"` + + // Requests contains HTTP results. + Requests []*model.ArchivalHTTPRequestResult `json:"requests"` + + // TCPConnect contains TCP connect results. + TCPConnect []*model.ArchivalTCPConnectResult `json:"tcp_connect"` + + // TLSHandshakes contains TLS handshakes results. + TLSHandshakes []*model.ArchivalTLSOrQUICHandshakeResult `json:"tls_handshakes"` +} + +// TestKeysDo53 contains ancillary observations collected using Do53. +// +// They are on a separate hierarchy to simplify processing. +type TestKeysDo53 struct { + // NetworkEvents contains network events. + NetworkEvents []*model.ArchivalNetworkEvent `json:"network_events"` + + // Queries contains DNS queries. + Queries []*model.ArchivalDNSLookupResult `json:"queries"` +} + +// AppendNetworkEvents appends to NetworkEvents. +func (tk *TestKeys) AppendNetworkEvents(v ...*model.ArchivalNetworkEvent) { + tk.mu.Lock() + tk.NetworkEvents = append(tk.NetworkEvents, v...) + tk.mu.Unlock() +} + +// AppendQueries appends to Queries. +func (tk *TestKeys) AppendQueries(v ...*model.ArchivalDNSLookupResult) { + tk.mu.Lock() + tk.Queries = append(tk.Queries, v...) + tk.mu.Unlock() +} + +// AppendRequests appends to Requests. +func (tk *TestKeys) AppendRequests(v ...*model.ArchivalHTTPRequestResult) { + tk.mu.Lock() + // Implementation note: append at the front since the most recent + // request must be at the beginning of the list. + tk.Requests = append(v, tk.Requests...) + tk.mu.Unlock() +} + +// AppendTCPConnectResults appends to TCPConnect. +func (tk *TestKeys) AppendTCPConnectResults(v ...*model.ArchivalTCPConnectResult) { + tk.mu.Lock() + tk.TCPConnect = append(tk.TCPConnect, v...) + tk.mu.Unlock() +} + +// AppendTLSHandshakes appends to TLSHandshakes. +func (tk *TestKeys) AppendTLSHandshakes(v ...*model.ArchivalTLSOrQUICHandshakeResult) { + tk.mu.Lock() + tk.TLSHandshakes = append(tk.TLSHandshakes, v...) + tk.mu.Unlock() +} + +// SetControlRequest sets the value of controlRequest. +func (tk *TestKeys) SetControlRequest(v *webconnectivity.ControlRequest) { + tk.mu.Lock() + tk.ControlRequest = v + tk.mu.Unlock() +} + +// SetControl sets the value of Control. +func (tk *TestKeys) SetControl(v *webconnectivity.ControlResponse) { + tk.mu.Lock() + tk.Control = v + tk.mu.Unlock() +} + +// SetControlFailure sets the value of controlFailure. +func (tk *TestKeys) SetControlFailure(err error) { + tk.mu.Lock() + tk.ControlFailure = tracex.NewFailure(err) + tk.mu.Unlock() +} + +// SetFundamentalFailure sets the value of fundamentalFailure. +func (tk *TestKeys) SetFundamentalFailure(err error) { + tk.mu.Lock() + tk.fundamentalFailure = err + tk.mu.Unlock() +} + +// WithTestKeysDoH calls the given function with the mutex locked passing to +// it as argument the pointer to the DoH field. +func (tk *TestKeys) WithTestKeysDoH(f func(*TestKeysDoH)) { + tk.mu.Lock() + f(tk.DoH) + tk.mu.Unlock() +} + +// WithTestKeysDo53 calls the given function with the mutex locked passing to +// it as argument the pointer to the Do53 field. +func (tk *TestKeys) WithTestKeysDo53(f func(*TestKeysDo53)) { + tk.mu.Lock() + f(tk.Do53) + tk.mu.Unlock() +} + +// WithDNSWhoami calls the given function with the mutex locked passing to +// it as argument the pointer to the DNSWhoami field. +func (tk *TestKeys) WithDNSWhoami(fun func(*DNSWhoamiInfo)) { + tk.mu.Lock() + fun(tk.DNSWoami) + tk.mu.Unlock() +} + +// NewTestKeys creates a new instance of TestKeys. +func NewTestKeys() *TestKeys { + return &TestKeys{ + NetworkEvents: []*model.ArchivalNetworkEvent{}, + DNSWoami: &DNSWhoamiInfo{ + SystemV4: []DNSWhoamiInfoEntry{}, + UDPv4: map[string][]DNSWhoamiInfoEntry{}, + }, + DoH: &TestKeysDoH{ + NetworkEvents: []*model.ArchivalNetworkEvent{}, + Queries: []*model.ArchivalDNSLookupResult{}, + Requests: []*model.ArchivalHTTPRequestResult{}, + TCPConnect: []*model.ArchivalTCPConnectResult{}, + TLSHandshakes: []*model.ArchivalTLSOrQUICHandshakeResult{}, + }, + Do53: &TestKeysDo53{ + NetworkEvents: []*model.ArchivalNetworkEvent{}, + Queries: []*model.ArchivalDNSLookupResult{}, + }, + Queries: []*model.ArchivalDNSLookupResult{}, + Requests: []*model.ArchivalHTTPRequestResult{}, + TCPConnect: []*model.ArchivalTCPConnectResult{}, + TLSHandshakes: []*model.ArchivalTLSOrQUICHandshakeResult{}, + Control: nil, + ControlFailure: nil, + DNSFlags: 0, + DNSExperimentFailure: nil, + DNSConsistency: "", + BlockingFlags: 0, + BodyLengthMatch: nil, + HeadersMatch: nil, + StatusCodeMatch: nil, + TitleMatch: nil, + Blocking: nil, + Accessible: nil, + ControlRequest: nil, + fundamentalFailure: nil, + mu: &sync.Mutex{}, + } +} + +// Finalize performs any delayed computation on the test keys. This function +// must be called from the measurer after all the tasks have completed. +func (tk *TestKeys) Finalize(logger model.Logger) { + tk.analysisToplevel(logger) +} diff --git a/internal/registry/factory.go b/internal/registry/factory.go index b0b441670f..e8be338d0d 100644 --- a/internal/registry/factory.go +++ b/internal/registry/factory.go @@ -215,7 +215,8 @@ func CanonicalizeExperimentName(name string) string { // NewFactory creates a new Factory instance. func NewFactory(name string) (*Factory, error) { - factory := allexperiments[CanonicalizeExperimentName(name)] + name = CanonicalizeExperimentName(name) + factory := allexperiments[name] if factory == nil { return nil, fmt.Errorf("no such experiment: %s", name) } diff --git a/internal/registry/webconnectivityv05.go b/internal/registry/webconnectivityv05.go new file mode 100644 index 0000000000..2697c1e08c --- /dev/null +++ b/internal/registry/webconnectivityv05.go @@ -0,0 +1,27 @@ +package registry + +// +// Registers the `web_connectivity@v0.5' experiment. +// +// See https://github.com/ooni/probe/issues/2237 +// + +import ( + "github.com/ooni/probe-cli/v3/internal/experiment/webconnectivity" + "github.com/ooni/probe-cli/v3/internal/model" +) + +func init() { + // Note: the name inserted into the table is the canonicalized experiment + // name though we advertise using `web_connectivity@v0.5`. + allexperiments["web_connectivity@v_0_5"] = &Factory{ + build: func(config any) model.ExperimentMeasurer { + return webconnectivity.NewExperimentMeasurer( + config.(*webconnectivity.Config), + ) + }, + config: &webconnectivity.Config{}, + interruptible: false, + inputPolicy: model.InputOrQueryBackend, + } +}