Skip to content

Commit

Permalink
refactor(scrubber): reorganize tests and expose ScrubBytes (#1320)
Browse files Browse the repository at this point in the history
This set of changes is the continuation of what I started doing in
4a86c54
to revamp out scrubbing capabilities.

Reorganizing tests is pure refactoring. Exposing ScrubBytes is because I
think we can rationalize a bit what we scrub on the fly in the
`internal/model/archival.go` file and we need to indeed start scrubbing
byte arrays.

I'm doing this work as part of ooni/probe#2531
  • Loading branch information
bassosimone authored Sep 28, 2023
1 parent 4a86c54 commit 2db5293
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 124 deletions.
2 changes: 1 addition & 1 deletion internal/experiment/tor/tor.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ func maybeSanitize(input TargetResults, kt keytarget) TargetResults {
// Implementation note: here we are using a strict scrubbing policy where
// we remove all IP _endpoints_, mainly for convenience, because we already
// have a well tested implementation that does that.
data = []byte(scrubber.Scrub(string(data)))
data = []byte(scrubber.ScrubString(string(data)))
var out TargetResults
err = json.Unmarshal(data, &out)
runtimex.PanicOnError(err, "json.Unmarshal should not fail here")
Expand Down
6 changes: 3 additions & 3 deletions internal/logx/scrubber.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ type ScrubberLogger struct {

// Debug scrubs and emits a debug message.
func (sl *ScrubberLogger) Debug(message string) {
sl.Logger.Debug(scrubber.Scrub(message))
sl.Logger.Debug(scrubber.ScrubString(message))
}

// Debugf scrubs, formats, and emits a debug message.
Expand All @@ -28,7 +28,7 @@ func (sl *ScrubberLogger) Debugf(format string, v ...interface{}) {

// Info scrubs and emits an informational message.
func (sl *ScrubberLogger) Info(message string) {
sl.Logger.Info(scrubber.Scrub(message))
sl.Logger.Info(scrubber.ScrubString(message))
}

// Infof scrubs, formats, and emits an informational message.
Expand All @@ -38,7 +38,7 @@ func (sl *ScrubberLogger) Infof(format string, v ...interface{}) {

// Warn scrubs and emits a warning message.
func (sl *ScrubberLogger) Warn(message string) {
sl.Logger.Warn(scrubber.Scrub(message))
sl.Logger.Warn(scrubber.ScrubString(message))
}

// Warnf scrubs, formats, and emits a warning message.
Expand Down
2 changes: 1 addition & 1 deletion internal/netxlite/classify.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ func ClassifyGenericError(err error) string {
}

formatted := fmt.Sprintf("%s: %s", FailureUnknown, err.Error())
return scrubber.Scrub(formatted) // scrub IP addresses in the error
return scrubber.ScrubString(formatted) // scrub IP addresses in the error
}

// classifyWithStringSuffix is a subset of ClassifyGenericError that
Expand Down
10 changes: 5 additions & 5 deletions internal/scrubber/scrubber.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ var scrubberPatterns = []*regexp.Regexp{

var addressRegexp = regexp.MustCompile(addressPattern)

func scrub(b []byte) []byte {
// ScrubBytes scrubs bytes to remove references to IP endpoints.
func ScrubBytes(b []byte) []byte {
scrubbedBytes := b
for _, pattern := range scrubberPatterns {
// this is a workaround since go does not yet support look ahead or look
Expand All @@ -66,8 +67,7 @@ func scrub(b []byte) []byte {
return scrubbedBytes
}

// Scrub sanitizes a string containing an error such that
// any occurrence of IP endpoints is scrubbed.
func Scrub(s string) string {
return string(scrub([]byte(s)))
// ScrubString scrubs a string to remove references to IP endpoints.
func ScrubString(s string) string {
return string(ScrubBytes([]byte(s)))
}
229 changes: 115 additions & 114 deletions internal/scrubber/scrubber_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,121 +39,122 @@ import (
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// ================================================================================

// Test the log scrubber on known problematic log messages
func TestLogScrubberMessages(t *testing.T) {
for _, test := range []struct {
input, expected string
}{
{
"http: TLS handshake error from 129.97.208.23:38310: ",
"http: TLS handshake error from [scrubbed]: ",
},
{
"http2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack",
"http2: panic serving [scrubbed]: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack",
},
{
//Make sure it doesn't scrub fingerprint
"a=fingerprint:sha-256 33:B6:FA:F6:94:CA:74:61:45:4A:D2:1F:2C:2F:75:8A:D9:EB:23:34:B2:30:E9:1B:2A:A6:A9:E0:44:72:CC:74",
"a=fingerprint:sha-256 33:B6:FA:F6:94:CA:74:61:45:4A:D2:1F:2C:2F:75:8A:D9:EB:23:34:B2:30:E9:1B:2A:A6:A9:E0:44:72:CC:74",
},
{
//try with enclosing parens
"(1:2:3:4:c:d:e:f) {1:2:3:4:c:d:e:f}",
"([scrubbed]) {[scrubbed]}",
},
{
//Make sure it doesn't scrub timestamps
"2019/05/08 15:37:31 starting",
"2019/05/08 15:37:31 starting",
},
{
//Make sure ipv6 addresses where : are encoded as %3A or %3a are scrubbed
"error dialing relay: wss://snowflake.torproject.net/?client_ip=6201%3ac8%3A3004%3A%3A1234",
"error dialing relay: wss://snowflake.torproject.net/?client_ip=[scrubbed]",
},
{
// make sure url encoded IPv6 IPs get scrubbed (%3a)
"http2: panic serving [fd00%3a111%3af000%3a777%3a9999%3abbbb%3affff%3adddd]:58344: xxx",
"http2: panic serving [scrubbed]: xxx",
},
{
// make sure url encoded IPv6 IPs get scrubbed (%3A)
"http2: panic serving [fd00%3a111%3af000%3a777%3a9999%3abbbb%3affff%3adddd]:58344: xxx",
"http2: panic serving [scrubbed]: xxx",
},
{
// make sure url encoded IPv6 IPs get scrubbed, different URL (%3A)
"error dialing relay: wss://snowflake.torproject.net/?client_ip=fd00%3A8888%3Abbbb%3Acccc%3Adddd%3Aeeee%3A2222%3A123 = dial tcp xxx",
"error dialing relay: wss://snowflake.torproject.net/?client_ip=[scrubbed] = dial tcp xxx",
},
{
// make sure url encoded IPv6 IPs get scrubbed (%3A), compressed
"http2: panic serving [1%3A2%3A3%3A%3Ad%3Ae%3Af]:55: xxx",
"http2: panic serving [scrubbed]: xxx",
},
{
// make sure url encoded IPv6 IPs get scrubbed (%3A), compressed
"error dialing relay: wss://snowflake.torproject.net/?client_ip=1%3A2%3A3%3A%3Ad%3Ae%3Af = dial tcp xxx",
"error dialing relay: wss://snowflake.torproject.net/?client_ip=[scrubbed] = dial tcp xxx",
},
} {
if Scrub(test.input) != test.expected {
t.Error(cmp.Diff(test.input, test.expected))
func TestScrubString(t *testing.T) {
t.Run("for problematic log messages", func(t *testing.T) {
for _, test := range []struct {
input, expected string
}{
{
"http: TLS handshake error from 129.97.208.23:38310: ",
"http: TLS handshake error from [scrubbed]: ",
},
{
"http2: panic serving [2620:101:f000:780:9097:75b1:519f:dbb8]:58344: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack",
"http2: panic serving [scrubbed]: interface conversion: *http2.responseWriter is not http.Hijacker: missing method Hijack",
},
{
//Make sure it doesn't scrub fingerprint
"a=fingerprint:sha-256 33:B6:FA:F6:94:CA:74:61:45:4A:D2:1F:2C:2F:75:8A:D9:EB:23:34:B2:30:E9:1B:2A:A6:A9:E0:44:72:CC:74",
"a=fingerprint:sha-256 33:B6:FA:F6:94:CA:74:61:45:4A:D2:1F:2C:2F:75:8A:D9:EB:23:34:B2:30:E9:1B:2A:A6:A9:E0:44:72:CC:74",
},
{
//try with enclosing parens
"(1:2:3:4:c:d:e:f) {1:2:3:4:c:d:e:f}",
"([scrubbed]) {[scrubbed]}",
},
{
//Make sure it doesn't scrub timestamps
"2019/05/08 15:37:31 starting",
"2019/05/08 15:37:31 starting",
},
{
//Make sure ipv6 addresses where : are encoded as %3A or %3a are scrubbed
"error dialing relay: wss://snowflake.torproject.net/?client_ip=6201%3ac8%3A3004%3A%3A1234",
"error dialing relay: wss://snowflake.torproject.net/?client_ip=[scrubbed]",
},
{
// make sure url encoded IPv6 IPs get scrubbed (%3a)
"http2: panic serving [fd00%3a111%3af000%3a777%3a9999%3abbbb%3affff%3adddd]:58344: xxx",
"http2: panic serving [scrubbed]: xxx",
},
{
// make sure url encoded IPv6 IPs get scrubbed (%3A)
"http2: panic serving [fd00%3a111%3af000%3a777%3a9999%3abbbb%3affff%3adddd]:58344: xxx",
"http2: panic serving [scrubbed]: xxx",
},
{
// make sure url encoded IPv6 IPs get scrubbed, different URL (%3A)
"error dialing relay: wss://snowflake.torproject.net/?client_ip=fd00%3A8888%3Abbbb%3Acccc%3Adddd%3Aeeee%3A2222%3A123 = dial tcp xxx",
"error dialing relay: wss://snowflake.torproject.net/?client_ip=[scrubbed] = dial tcp xxx",
},
{
// make sure url encoded IPv6 IPs get scrubbed (%3A), compressed
"http2: panic serving [1%3A2%3A3%3A%3Ad%3Ae%3Af]:55: xxx",
"http2: panic serving [scrubbed]: xxx",
},
{
// make sure url encoded IPv6 IPs get scrubbed (%3A), compressed
"error dialing relay: wss://snowflake.torproject.net/?client_ip=1%3A2%3A3%3A%3Ad%3Ae%3Af = dial tcp xxx",
"error dialing relay: wss://snowflake.torproject.net/?client_ip=[scrubbed] = dial tcp xxx",
},
} {
if ScrubString(test.input) != test.expected {
t.Error(cmp.Diff(test.input, test.expected))
}
}
}
}
})

func TestLogScrubberGoodFormats(t *testing.T) {
for _, addr := range []string{
// IPv4
"1.2.3.4",
"255.255.255.255",
// IPv4 with port
"1.2.3.4:55",
"255.255.255.255:65535",
// IPv6
"1:2:3:4:c:d:e:f",
"1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF",
// IPv6 with brackets
"[1:2:3:4:c:d:e:f]",
"[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]",
// IPv6 with brackets and port
"[1:2:3:4:c:d:e:f]:55",
"[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]:65535",
// compressed IPv6
"::f",
"::d:e:f",
"1:2:3::",
"1:2:3::d:e:f",
"1:2:3:d:e:f::",
"::1:2:3:d:e:f",
"1111:2222:3333::DDDD:EEEE:FFFF",
// compressed IPv6 with brackets
"[::d:e:f]",
"[1:2:3::]",
"[1:2:3::d:e:f]",
"[1111:2222:3333::DDDD:EEEE:FFFF]",
"[1:2:3:4:5:6::8]",
"[1::7:8]",
// compressed IPv6 with brackets and port
"[1::]:58344",
"[::d:e:f]:55",
"[1:2:3::]:55",
"[1:2:3::d:e:f]:55",
"[1111:2222:3333::DDDD:EEEE:FFFF]:65535",
// IPv4-compatible and IPv4-mapped
"::255.255.255.255",
"::ffff:255.255.255.255",
"[::255.255.255.255]",
"[::ffff:255.255.255.255]",
"[::255.255.255.255]:65535",
"[::ffff:255.255.255.255]:65535",
"[::ffff:0:255.255.255.255]",
"[2001:db8:3:4::192.0.2.33]",
} {
if Scrub(addr) != "[scrubbed]" {
t.Error(cmp.Diff(addr, "[scrubbed]"))
t.Run("for strings containing IP addresses and endpoints", func(t *testing.T) {
for _, addr := range []string{
// IPv4
"1.2.3.4",
"255.255.255.255",
// IPv4 with port
"1.2.3.4:55",
"255.255.255.255:65535",
// IPv6
"1:2:3:4:c:d:e:f",
"1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF",
// IPv6 with brackets
"[1:2:3:4:c:d:e:f]",
"[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]",
// IPv6 with brackets and port
"[1:2:3:4:c:d:e:f]:55",
"[1111:2222:3333:4444:CCCC:DDDD:EEEE:FFFF]:65535",
// compressed IPv6
"::f",
"::d:e:f",
"1:2:3::",
"1:2:3::d:e:f",
"1:2:3:d:e:f::",
"::1:2:3:d:e:f",
"1111:2222:3333::DDDD:EEEE:FFFF",
// compressed IPv6 with brackets
"[::d:e:f]",
"[1:2:3::]",
"[1:2:3::d:e:f]",
"[1111:2222:3333::DDDD:EEEE:FFFF]",
"[1:2:3:4:5:6::8]",
"[1::7:8]",
// compressed IPv6 with brackets and port
"[1::]:58344",
"[::d:e:f]:55",
"[1:2:3::]:55",
"[1:2:3::d:e:f]:55",
"[1111:2222:3333::DDDD:EEEE:FFFF]:65535",
// IPv4-compatible and IPv4-mapped
"::255.255.255.255",
"::ffff:255.255.255.255",
"[::255.255.255.255]",
"[::ffff:255.255.255.255]",
"[::255.255.255.255]:65535",
"[::ffff:255.255.255.255]:65535",
"[::ffff:0:255.255.255.255]",
"[2001:db8:3:4::192.0.2.33]",
} {
if ScrubString(addr) != "[scrubbed]" {
t.Error(cmp.Diff(addr, "[scrubbed]"))
}
}
}
})
}

0 comments on commit 2db5293

Please sign in to comment.