Skip to content

Commit

Permalink
feat: rewrite the pipeline to match ooni/data more closely
Browse files Browse the repository at this point in the history
  • Loading branch information
bassosimone committed Nov 24, 2023
1 parent ff42f3c commit 5ad88d5
Show file tree
Hide file tree
Showing 10 changed files with 566 additions and 11 deletions.
32 changes: 32 additions & 0 deletions internal/cmd/minipipeline/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package main

import (
"os"

"github.com/ooni/probe-cli/v3/internal/minipipeline"
"github.com/ooni/probe-cli/v3/internal/must"
"github.com/ooni/probe-cli/v3/internal/pipeline"
)

func main() {
rawMeasurement := must.ReadFile(os.Args[1])
var meas pipeline.CanonicalMeasurement
must.UnmarshalJSON(rawMeasurement, &meas)

container := minipipeline.NewWebObservationsContainer()
container.CreateDNSLookupFailures(meas.TestKeys.Unwrap().Queries...)
container.CreateKnownIPAddresses(meas.TestKeys.Unwrap().Queries...)
container.CreateKnownTCPEndpoints(meas.TestKeys.Unwrap().TCPConnect...)
container.NoteTLSHandshakeResults(meas.TestKeys.Unwrap().TLSHandshakes...)
container.NoteHTTPRoundTripResults(meas.TestKeys.Unwrap().Requests...)
container.NoteControlResults(meas.TestKeys.Unwrap().XControlRequest.Unwrap(), meas.TestKeys.Unwrap().Control.Unwrap())

must.WriteFile("db.json", must.MarshalJSON(container), 0600)

/*
ax := &pipeline.Analysis{}
ax.ComputeAllValues(db)
must.WriteFile("ax.json", must.MarshalJSON(ax), 0600)
*/
}
2 changes: 1 addition & 1 deletion internal/experiment/webconnectivity/httpanalysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ func HTTPTitleMatch(tk urlgetter.TestKeys, ctrl ControlResponse) (out *bool) {
}
control := ctrl.HTTPRequest.Title
measurementBody := string(response.Body)
measurement := measurexlite.WebGetTitle(measurementBody)
measurement := measurexlite.WebGetTitleString(measurementBody)
if measurement == "" {
return
}
Expand Down
2 changes: 1 addition & 1 deletion internal/experiment/webconnectivitylte/analysishttpdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ func (tk *TestKeys) httpDiffTitleMatch(
}
control := ctrl.Title
measurementBody := string(response.Body)
measurement := measurexlite.WebGetTitle(measurementBody)
measurement := measurexlite.WebGetTitleString(measurementBody)
if control == "" || measurement == "" {
return
}
Expand Down
15 changes: 9 additions & 6 deletions internal/measurexlite/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@ package measurexlite

import "regexp"

// WebGetTitle returns the title or an empty string.
func WebGetTitle(measurementBody string) string {
// MK used {1,128} but we're making it larger here to get longer titles
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
re := regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)
v := re.FindStringSubmatch(measurementBody)
// webTitleRegexp is the regexp to extract the title
//
// MK used {1,128} but we're making it larger here to get longer titles
// e.g. <http://www.isa.gov.il/Pages/default.aspx>'s one
var webTitleRegexp = regexp.MustCompile(`(?i)<title>([^<]{1,512})</title>`)

// WebGetTitleString returns the title or an empty string.
func WebGetTitleString(measurementBody string) string {
v := webTitleRegexp.FindStringSubmatch(measurementBody)
if len(v) < 2 {
return ""
}
Expand Down
2 changes: 1 addition & 1 deletion internal/measurexlite/web_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func TestWebGetTitle(t *testing.T) {
}}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotOut := WebGetTitle(tt.args.body)
gotOut := WebGetTitleString(tt.args.body)
if diff := cmp.Diff(tt.wantOut, gotOut); diff != "" {
t.Fatal(diff)
}
Expand Down
3 changes: 3 additions & 0 deletions internal/minipipeline/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package minipipeline implements a minimal data processing pipeline used
// to analyze local measurements collected by OONI Probe.
package minipipeline
Loading

0 comments on commit 5ad88d5

Please sign in to comment.