Skip to content

Commit

Permalink
chore(enginenetx): more tests and robustness checks (#1314)
Browse files Browse the repository at this point in the history
This diff attempts to improve the code quality of enginenetx by
identifying cases where the code could be made to crash with specially
written input and adds regress tests and checks to avoid these kind of
crashes to happen.

While there, perform a code review and improve comments and naming.

Part of ooni/probe#2531
  • Loading branch information
bassosimone authored Sep 26, 2023
1 parent bc569cb commit eee7e71
Show file tree
Hide file tree
Showing 11 changed files with 514 additions and 106 deletions.
14 changes: 7 additions & 7 deletions internal/enginenetx/beaconspolicy.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,19 @@ func (p *beaconsPolicy) LookupTactics(ctx context.Context, domain, port string)
out := make(chan *httpsDialerTactic)

go func() {
defer close(out)
defer close(out) // tell the parent when we're done
index := 0

// emit beacons related tactics first
// emit beacons related tactics first which are empty if there are
// no beacons for the givend domain and port
for tx := range p.tacticsForDomain(domain, port) {
tx.InitialDelay = happyEyeballsDelay(index)
index += 1
out <- tx
}

// now emit tactics using the DNS
// now fallback to get more tactics (typically here the fallback
// uses the DNS and obtains some extra tactics)
for tx := range p.Fallback.LookupTactics(ctx, domain, port) {
tx.InitialDelay = happyEyeballsDelay(index)
index += 1
Expand All @@ -55,7 +57,7 @@ func (p *beaconsPolicy) tacticsForDomain(domain, port string) <-chan *httpsDiale
out := make(chan *httpsDialerTactic)

go func() {
defer close(out)
defer close(out) // tell the parent when we're done

// we currently only have beacons for api.ooni.io
if domain != "api.ooni.io" {
Expand All @@ -68,9 +70,7 @@ func (p *beaconsPolicy) tacticsForDomain(domain, port string) <-chan *httpsDiale
snis[i], snis[j] = snis[j], snis[i]
})

ipAddrs := p.beaconsAddrs()

for _, ipAddr := range ipAddrs {
for _, ipAddr := range p.beaconsAddrs() {
for _, sni := range snis {
out <- &httpsDialerTactic{
Address: ipAddr,
Expand Down
2 changes: 2 additions & 0 deletions internal/enginenetx/dnspolicy.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func (p *dnsPolicy) LookupTactics(

go func() {
// make sure we close the output channel when done
// so the reader knows that we're done
defer close(out)

// Do not even start the DNS lookup if the context has already been canceled, which
Expand All @@ -54,6 +55,7 @@ func (p *dnsPolicy) LookupTactics(
return
}

// The tactics we generate here have SNI == VerifyHostname == domain
for idx, addr := range addrs {
tactic := &httpsDialerTactic{
Address: addr,
Expand Down
57 changes: 36 additions & 21 deletions internal/enginenetx/httpsdialer.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ func (dt *httpsDialerTactic) String() string {
return string(runtimex.Try1(json.Marshal(dt)))
}

// tacticSummaryKey returns a string summarizing this [httpsDialerTactic] for
// the specific purpose of inserting the struct into a map.
// tacticSummaryKey returns a string summarizing the tactic's features.
//
// The fields used to compute the summary are:
//
Expand All @@ -68,33 +67,45 @@ func (dt *httpsDialerTactic) String() string {
//
// - VerifyHostname
//
// The returned string contains the above fields separated by space.
// The returned string contains the above fields separated by space with
// `sni=` before the SNI and `verify=` before the verify hostname.
//
// We should be careful not to change this format unless we also change the
// format version used by static policies and by the state management.
func (dt *httpsDialerTactic) tacticSummaryKey() string {
return fmt.Sprintf("%v sni=%v verify=%v", net.JoinHostPort(dt.Address, dt.Port), dt.SNI, dt.VerifyHostname)
return fmt.Sprintf(
"%v sni=%v verify=%v",
net.JoinHostPort(dt.Address, dt.Port),
dt.SNI,
dt.VerifyHostname,
)
}

// domainEndpointKey returns the domain's endpoint string key for storing into a map.
// domainEndpointKey returns a string consisting of the domain endpoint only.
//
// We always use the VerifyHostname and the Port to construct the domain endpoint.
func (dt *httpsDialerTactic) domainEndpointKey() string {
return net.JoinHostPort(dt.VerifyHostname, dt.Port)
}

// httpsDialerPolicy describes the policy used by the [*httpsDialer].
// httpsDialerPolicy is a policy used by the [*httpsDialer].
type httpsDialerPolicy interface {
// LookupTactics returns zero or more tactics for the given host and port.
// LookupTactics emits zero or more tactics for the given host and port
// through the returned channel, which is closed when done.
LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic
}

// httpsDialerStatsTracker tracks what happens while dialing TLS connections.
type httpsDialerStatsTracker interface {
// httpsDialerEventsHandler handles events occurring while we try dialing TLS.
type httpsDialerEventsHandler interface {
// These callbacks are invoked during the TLS handshake to inform this
// tactic about events that occurred. A tactic SHOULD keep track of which
// interface about events that occurred. A policy SHOULD keep track of which
// addresses, SNIs, etc. work and return them more frequently.
//
// Callbacks that take an error as argument also take a context as
// argument and MUST check whether the context has been canceled or
// its timeout has expired (i.e., using ctx.Err()) to determine
// whether the operation failed or was merely canceled. In the latter
// case, obviously, the policy MUST NOT consider the tactic failed.
// case, obviously, you MUST NOT consider the tactic failed.
OnStarting(tactic *httpsDialerTactic)
OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error)
OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error)
Expand Down Expand Up @@ -125,7 +136,7 @@ type httpsDialer struct {
rootCAs *x509.CertPool

// stats tracks what happens while dialing.
stats httpsDialerStatsTracker
stats httpsDialerEventsHandler
}

// newHTTPSDialer constructs a new [*httpsDialer] instance.
Expand All @@ -146,7 +157,7 @@ func newHTTPSDialer(
logger model.Logger,
netx *netxlite.Netx,
policy httpsDialerPolicy,
stats httpsDialerStatsTracker,
stats httpsDialerEventsHandler,
) *httpsDialer {
return &httpsDialer{
idGenerator: &atomic.Int64{},
Expand Down Expand Up @@ -196,8 +207,8 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo
ctx, cancel := context.WithCancel(ctx)
defer cancel()

// The emitter will emit tactics and then close the channel when done. We spawn 1+ workers
// that handle tactics in paralellel and posts on the collector channel.
// The emitter will emit tactics and then close the channel when done. We spawn 16 workers
// that handle tactics in parallel and post results on the collector channel.
emitter := hd.policy.LookupTactics(ctx, hostname, port)
collector := make(chan *httpsDialerErrorOrConn)
joiner := make(chan any)
Expand Down Expand Up @@ -252,8 +263,10 @@ func httpsDialerReduceResult(connv []model.TLSConn, errorv []error) (model.TLSCo
}
}

// worker attempts to establish a TLS connection using and emits a single
// [*httpsDialerErrorOrConn] for each tactic.
// worker attempts to establish a TLS connection and emits the result using
// a [*httpsDialerErrorOrConn] for each tactic, until there are no more tactics
// and the reader channel is closed. At which point it posts on joiner to let
// the parent know that this goroutine has done its job.
func (hd *httpsDialer) worker(
ctx context.Context,
joiner chan<- any,
Expand All @@ -270,8 +283,10 @@ func (hd *httpsDialer) worker(
Logger: hd.logger,
}

// perform the actual dial
conn, err := hd.dialTLS(ctx, prefixLogger, t0, tactic)

// send results to the parent
writer <- &httpsDialerErrorOrConn{Conn: conn, Err: err}
}
}
Expand All @@ -283,12 +298,12 @@ func (hd *httpsDialer) dialTLS(
t0 time.Time,
tactic *httpsDialerTactic,
) (model.TLSConn, error) {
// wait for the tactic to be ready to run
// honor happy-eyeballs delays and wait for the tactic to be ready to run
if err := httpsDialerTacticWaitReady(ctx, t0, tactic); err != nil {
return nil, err
}

// tell the tactic that we're starting
// tell the observer that we're starting
hd.stats.OnStarting(tactic)

// create dialer and establish TCP connection
Expand All @@ -306,7 +321,7 @@ func (hd *httpsDialer) dialTLS(

// create TLS configuration
tlsConfig := &tls.Config{
InsecureSkipVerify: true, // Note: we're going to verify at the end of the func
InsecureSkipVerify: true, // Note: we're going to verify at the end of the func!
NextProtos: []string{"h2", "http/1.1"},
RootCAs: hd.rootCAs,
ServerName: tactic.SNI,
Expand Down Expand Up @@ -343,7 +358,7 @@ func (hd *httpsDialer) dialTLS(
return nil, err
}

// make sure the tactic know it worked
// make sure the observer knows it worked
hd.stats.OnSuccess(tactic)

return tlsConn, nil
Expand Down
34 changes: 17 additions & 17 deletions internal/enginenetx/httpsdialer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const (
httpsDialerCancelingContextStatsTrackerOnSuccess
)

// httpsDialerCancelingContextStatsTracker is an [httpsDialerStatsTracker] with a cancel
// httpsDialerCancelingContextStatsTracker is an [httpsDialerEventsHandler] with a cancel
// function that causes the context to be canceled once we start dialing.
//
// This struct helps with testing [httpsDialer] is WAI when the context
Expand All @@ -36,31 +36,31 @@ type httpsDialerCancelingContextStatsTracker struct {
flags int
}

var _ httpsDialerStatsTracker = &httpsDialerCancelingContextStatsTracker{}
var _ httpsDialerEventsHandler = &httpsDialerCancelingContextStatsTracker{}

// OnStarting implements httpsDialerStatsTracker.
// OnStarting implements httpsDialerEventsHandler.
func (st *httpsDialerCancelingContextStatsTracker) OnStarting(tactic *httpsDialerTactic) {
if (st.flags & httpsDialerCancelingContextStatsTrackerOnStarting) != 0 {
st.cancel()
}
}

// OnTCPConnectError implements httpsDialerStatsTracker.
// OnTCPConnectError implements httpsDialerEventsHandler.
func (*httpsDialerCancelingContextStatsTracker) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) {
// nothing
}

// OnTLSHandshakeError implements httpsDialerStatsTracker.
// OnTLSHandshakeError implements httpsDialerEventsHandler.
func (*httpsDialerCancelingContextStatsTracker) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) {
// nothing
}

// OnTLSVerifyError implements httpsDialerStatsTracker.
// OnTLSVerifyError implements httpsDialerEventsHandler.
func (*httpsDialerCancelingContextStatsTracker) OnTLSVerifyError(tactic *httpsDialerTactic, err error) {
// nothing
}

// OnSuccess implements httpsDialerStatsTracker.
// OnSuccess implements httpsDialerEventsHandler.
func (st *httpsDialerCancelingContextStatsTracker) OnSuccess(tactic *httpsDialerTactic) {
if (st.flags & httpsDialerCancelingContextStatsTrackerOnSuccess) != 0 {
st.cancel()
Expand All @@ -78,7 +78,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
short bool

// stats is the stats tracker to use.
stats httpsDialerStatsTracker
stats httpsDialerEventsHandler

// endpoint is the endpoint to connect to consisting of a domain
// name or IP address followed by a TCP port
Expand All @@ -101,7 +101,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "net.SplitHostPort failure",
short: true,
stats: &nullStatsTracker{},
stats: &nullStatsManager{},
endpoint: "www.example.com", // note: here the port is missing
scenario: netemx.InternetScenario,
configureDPI: func(dpi *netem.DPIEngine) {
Expand All @@ -117,7 +117,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "hd.policy.LookupTactics failure",
short: true,
stats: &nullStatsTracker{},
stats: &nullStatsManager{},
endpoint: "www.example.nonexistent:443", // note: the domain does not exist
scenario: netemx.InternetScenario,
configureDPI: func(dpi *netem.DPIEngine) {
Expand All @@ -131,7 +131,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "successful dial with multiple addresses",
short: true,
stats: &nullStatsTracker{},
stats: &nullStatsManager{},
endpoint: "www.example.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Domains: []string{
Expand All @@ -157,7 +157,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with TCP connect errors",
short: true,
stats: &nullStatsTracker{},
stats: &nullStatsManager{},
endpoint: "www.example.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Domains: []string{
Expand Down Expand Up @@ -191,7 +191,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with TLS handshake errors",
short: true,
stats: &nullStatsTracker{},
stats: &nullStatsManager{},
endpoint: "www.example.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Domains: []string{
Expand Down Expand Up @@ -221,7 +221,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with a TLS certificate valid for ANOTHER domain",
short: true,
stats: &nullStatsTracker{},
stats: &nullStatsManager{},
endpoint: "wrong.host.badssl.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Domains: []string{
Expand All @@ -246,7 +246,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with TLS certificate signed by an unknown authority",
short: true,
stats: &nullStatsTracker{},
stats: &nullStatsManager{},
endpoint: "untrusted-root.badssl.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Domains: []string{
Expand All @@ -271,7 +271,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) {
{
name: "with expired TLS certificate",
short: true,
stats: &nullStatsTracker{},
stats: &nullStatsManager{},
endpoint: "expired.badssl.com:443",
scenario: []*netemx.ScenarioDomainAddresses{{
Domains: []string{
Expand Down Expand Up @@ -512,7 +512,7 @@ func TestHTTPSDialerHostNetworkQA(t *testing.T) {
Logger: log.Log,
Resolver: resolver,
},
&nullStatsTracker{},
&nullStatsManager{},
)

URL := runtimex.Try1(url.Parse(server.URL))
Expand Down
Loading

0 comments on commit eee7e71

Please sign in to comment.