Skip to content

Commit

Permalink
cleanup(minipipeline): factor code into utility functions (ooni#1400)
Browse files Browse the repository at this point in the history
While there, make the code slightly more correct and/or compact:

1. do not extract the AS org name
2. only extract the HTTP response if failure is nil
3. only extract the control HTTP response if failure is nil
4. do not include the zero ASN in results

Also, write a script that updates all the minipipeline regression tests.

Part of ooni/probe#2634
  • Loading branch information
bassosimone authored and Murphy-OrangeMud committed Feb 13, 2024
1 parent 412145f commit de99ecf
Show file tree
Hide file tree
Showing 38 changed files with 140 additions and 431 deletions.
2 changes: 1 addition & 1 deletion internal/cmd/minipipeline/testdata/analysis.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
"TCPTransactionsWithUnexpectedTLSHandshakeFailures": {},
"TCPTransactionsWithUnexpectedHTTPFailures": {},
"TCPTransactionsWithUnexplainedUnexpectedFailures": {}
}
}
8 changes: 1 addition & 7 deletions internal/cmd/minipipeline/testdata/observations.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
"DNSEngine": "udp",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
"IPAddressBogon": null,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -46,7 +45,6 @@
"DNSEngine": "doh",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
"IPAddressBogon": null,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -86,7 +84,6 @@
"DNSEngine": "udp",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressOrg": "Consortium GARR",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -124,7 +121,6 @@
"DNSEngine": "getaddrinfo",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressOrg": "Consortium GARR",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -162,7 +158,6 @@
"DNSEngine": "doh",
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressOrg": "Consortium GARR",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -202,7 +197,6 @@
"DNSEngine": null,
"IPAddress": "130.192.16.171",
"IPAddressASN": 137,
"IPAddressOrg": "Consortium GARR",
"IPAddressBogon": false,
"EndpointTransactionID": 4,
"EndpointProto": "tcp",
Expand Down Expand Up @@ -263,4 +257,4 @@
"ControlHTTPResponseTitle": "Nexa Center for Internet \u0026 Society | Il centro Nexa è un centro di ricerca del Dipartimento di Automatica e Informatica del Politecnico di Torino"
}
}
}
}
105 changes: 26 additions & 79 deletions internal/minipipeline/observation.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"net"
"net/url"
"strconv"
"strings"

"github.com/ooni/probe-cli/v3/internal/geoipx"
"github.com/ooni/probe-cli/v3/internal/measurexlite"
Expand Down Expand Up @@ -107,11 +106,6 @@ type WebObservation struct {
// means that the probe failed to discover the IP address ASN.
IPAddressASN optional.Value[int64]

// IPAddressOrg is the optional organization name associated to this IP adddress
// as discovered by the probe while performing the measurement. When this field is
// optional.None, it means that the probe failed to discover the IP address org.
IPAddressOrg optional.Value[string]

// IPAddressBogon is true if IPAddress is a bogon.
IPAddressBogon optional.Value[bool]

Expand Down Expand Up @@ -284,41 +278,27 @@ func (c *WebObservationsContainer) ingestDNSLookupSuccesses(evs ...*model.Archiv
}

// walk through the answers
for _, answer := range ev.Answers {
// extract the IP address we resolved
var addr string
switch answer.AnswerType {
case "A":
addr = answer.IPv4
case "AAAA":
addr = answer.IPv6
default:
continue
}

utilsForEachIPAddress(ev.Answers, func(ipAddr string) {
// create the record
obs := &WebObservation{
DNSTransactionID: optional.Some(ev.TransactionID),
DNSDomain: optional.Some(ev.Hostname),
DNSLookupFailure: optional.Some(""),
DNSQueryType: optional.Some(ev.QueryType),
DNSEngine: optional.Some(ev.Engine),
IPAddress: optional.Some(addr),
IPAddressBogon: optional.Some(netxlite.IsBogon(addr)),
}
if asn, asOrg, err := geoipx.LookupASN(addr); err == nil {
obs.IPAddressASN = optional.Some(int64(asn))
obs.IPAddressOrg = optional.Some(asOrg)
IPAddress: optional.Some(ipAddr),
IPAddressASN: utilsGeoipxLookupASN(ipAddr),
IPAddressBogon: optional.Some(netxlite.IsBogon(ipAddr)),
}

// add record
c.DNSLookupSuccesses = append(c.DNSLookupSuccesses, obs)

// store the first lookup that resolved this address
if _, found := c.knownIPAddresses[addr]; !found {
c.knownIPAddresses[addr] = obs
if _, found := c.knownIPAddresses[ipAddr]; !found {
c.knownIPAddresses[ipAddr] = obs
}
}
})
}
}

Expand All @@ -331,12 +311,9 @@ func (c *WebObservationsContainer) IngestTCPConnectEvents(evs ...*model.Archival
if !found {
obs = &WebObservation{
IPAddress: optional.Some(ev.IP),
IPAddressASN: utilsGeoipxLookupASN(ev.IP),
IPAddressBogon: optional.Some(netxlite.IsBogon(ev.IP)),
}
if asn, asOrg, err := geoipx.LookupASN(ev.IP); err == nil {
obs.IPAddressASN = optional.Some(int64(asn))
obs.IPAddressOrg = optional.Some(asOrg)
}
}

// clone the record because the same IP address MAY belong
Expand All @@ -350,7 +327,6 @@ func (c *WebObservationsContainer) IngestTCPConnectEvents(evs ...*model.Archival
DNSLookupFailure: obs.DNSLookupFailure,
IPAddress: obs.IPAddress,
IPAddressASN: obs.IPAddressASN,
IPAddressOrg: obs.IPAddressOrg,
IPAddressBogon: obs.IPAddressBogon,
EndpointTransactionID: optional.Some(ev.TransactionID),
EndpointProto: optional.Some("tcp"),
Expand Down Expand Up @@ -390,37 +366,20 @@ func (c *WebObservationsContainer) IngestHTTPRoundTripEvents(evs ...*model.Archi
continue
}

// update the record
// start updating the record
obs.HTTPRequestURL = optional.Some(ev.Request.URL)
obs.HTTPFailure = optional.Some(utilsStringPointerToString(ev.Failure))
obs.HTTPResponseStatusCode = optional.Some(ev.Response.Code)
obs.HTTPResponseBodyLength = optional.Some(int64(len(ev.Response.Body)))
obs.HTTPResponseBodyIsTruncated = optional.Some(ev.Request.BodyIsTruncated)

httpResponseHeadersKeys := make(map[string]bool)
for key := range ev.Response.Headers {
httpResponseHeadersKeys[key] = true
}
obs.HTTPResponseHeadersKeys = optional.Some(httpResponseHeadersKeys)

if value := measurexlite.WebGetTitle(string(ev.Response.Body)); value != "" {
obs.HTTPResponseTitle = optional.Some(value)
}
for key, value := range ev.Response.Headers {
if strings.ToLower(key) == "location" {
obs.HTTPResponseLocation = optional.Some(string(value))
break // only first entry (typically there's just a single entry)
}
// consider the response authoritative only in case of success
if ev.Failure == nil {
obs.HTTPResponseStatusCode = optional.Some(ev.Response.Code)
obs.HTTPResponseBodyLength = optional.Some(int64(len(ev.Response.Body)))
obs.HTTPResponseBodyIsTruncated = optional.Some(ev.Request.BodyIsTruncated)
obs.HTTPResponseHeadersKeys = utilsExtractHTTPHeaderKeys(ev.Response.Headers)
obs.HTTPResponseTitle = optional.Some(measurexlite.WebGetTitle(string(ev.Response.Body)))
obs.HTTPResponseLocation = utilsExtractHTTPLocation(ev.Response.Headers)
obs.HTTPResponseIsFinal = utilsDetermineWhetherHTTPResponseIsFinal(ev.Response.Code)
}

obs.HTTPResponseIsFinal = optional.Some((func() bool {
switch ev.Response.Code / 100 {
case 2, 4, 5:
return true
default:
return false
}
}()))
}
}

Expand Down Expand Up @@ -554,30 +513,18 @@ func (c *WebObservationsContainer) controlXrefTLSFailures(resp *model.THResponse
}

func (c *WebObservationsContainer) controlSetHTTPFinalResponseExpectation(resp *model.THResponse) {
// Implementation note: the TH response does not have a clear semantics for "missing" values
// therefore we are accepting as valid values within the correct range
//
// also note that we add control information to all endpoints and then we check for "final"
// responses and only compare against "final" responses during the analysis
for _, obs := range c.KnownTCPEndpoints {
obs.ControlHTTPFailure = optional.Some(utilsStringPointerToString(resp.HTTPRequest.Failure))
if value := resp.HTTPRequest.StatusCode; value > 0 {
obs.ControlHTTPResponseStatusCode = optional.Some(value)
}
if value := resp.HTTPRequest.BodyLength; value >= 0 {
obs.ControlHTTPResponseBodyLength = optional.Some(value)
}

controlHTTPResponseHeadersKeys := make(map[string]bool)
for key := range resp.HTTPRequest.Headers {
controlHTTPResponseHeadersKeys[key] = true
}
if len(controlHTTPResponseHeadersKeys) > 0 {
obs.ControlHTTPResponseHeadersKeys = optional.Some(controlHTTPResponseHeadersKeys)
// leave everything else nil if there was a failure, like we
// already do when processing the probe events
if resp.HTTPRequest.Failure != nil {
continue
}

if v := resp.HTTPRequest.Title; v != "" {
obs.ControlHTTPResponseTitle = optional.Some(v)
}
obs.ControlHTTPResponseStatusCode = optional.Some(resp.HTTPRequest.StatusCode)
obs.ControlHTTPResponseBodyLength = optional.Some(resp.HTTPRequest.BodyLength)
obs.ControlHTTPResponseHeadersKeys = utilsExtractHTTPHeaderKeys(resp.HTTPRequest.Headers)
obs.ControlHTTPResponseTitle = optional.Some(resp.HTTPRequest.Title)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
"DNSEngine": "udp",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
"IPAddressBogon": null,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -48,7 +47,6 @@
"DNSEngine": "getaddrinfo",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -86,7 +84,6 @@
"DNSEngine": "udp",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -126,7 +123,6 @@
"DNSEngine": null,
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": 3,
"EndpointProto": "tcp",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
"DNSEngine": "udp",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
"IPAddressBogon": null,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -46,7 +45,6 @@
"DNSEngine": "doh",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
"IPAddressBogon": null,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -86,7 +84,6 @@
"DNSEngine": "getaddrinfo",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -124,7 +121,6 @@
"DNSEngine": "udp",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -162,7 +158,6 @@
"DNSEngine": "doh",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -202,7 +197,6 @@
"DNSEngine": null,
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": 4,
"EndpointProto": "tcp",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
"DNSEngine": "udp",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
"IPAddressBogon": null,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -48,7 +47,6 @@
"DNSEngine": "getaddrinfo",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -86,7 +84,6 @@
"DNSEngine": "udp",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -126,7 +123,6 @@
"DNSEngine": null,
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": 3,
"EndpointProto": "tcp",
Expand Down Expand Up @@ -169,7 +165,6 @@
"DNSEngine": null,
"IPAddress": "93.184.216.34",
"IPAddressASN": 15133,
"IPAddressOrg": "Edgecast Inc.",
"IPAddressBogon": false,
"EndpointTransactionID": 4,
"EndpointProto": "tcp",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
"DNSEngine": "udp",
"IPAddress": null,
"IPAddressASN": null,
"IPAddressOrg": null,
"IPAddressBogon": null,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -48,7 +47,6 @@
"DNSEngine": "udp",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -86,7 +84,6 @@
"DNSEngine": "getaddrinfo",
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": null,
"EndpointProto": null,
Expand Down Expand Up @@ -126,7 +123,6 @@
"DNSEngine": null,
"IPAddress": "104.154.89.105",
"IPAddressASN": 396982,
"IPAddressOrg": "Google LLC",
"IPAddressBogon": false,
"EndpointTransactionID": 3,
"EndpointProto": "tcp",
Expand Down
Loading

0 comments on commit de99ecf

Please sign in to comment.