Skip to content

Commit

Permalink
feat(enginenetx): prune old entries from stats (#1305)
Browse files Browse the repository at this point in the history
If an entry is older than one week ago, we remove it from the stats.

This helps ensuring the stats cache does not grow too large.

Additionally, this helps ensuring we are not going to try tactics that
worked long time ago and it may be pointless trying now.

Part of ooni/probe#2531
  • Loading branch information
bassosimone authored Sep 26, 2023
1 parent 74e1073 commit 98685c4
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 33 deletions.
16 changes: 8 additions & 8 deletions internal/enginenetx/network_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ func TestNetworkUnit(t *testing.T) {
},
},
stats: &statsManager{
kvStore: &kvstore.Memory{},
logger: model.DiscardLogger,
mu: sync.Mutex{},
root: &statsContainer{},
container: &statsContainer{},
kvStore: &kvstore.Memory{},
logger: model.DiscardLogger,
mu: sync.Mutex{},
},
txp: expected,
}
Expand All @@ -57,10 +57,10 @@ func TestNetworkUnit(t *testing.T) {
netx := &Network{
reso: expected,
stats: &statsManager{
kvStore: &kvstore.Memory{},
logger: model.DiscardLogger,
mu: sync.Mutex{},
root: &statsContainer{},
container: &statsContainer{},
kvStore: &kvstore.Memory{},
logger: model.DiscardLogger,
mu: sync.Mutex{},
},
txp: &mocks.HTTPTransport{
MockCloseIdleConnections: func() {
Expand Down
103 changes: 81 additions & 22 deletions internal/enginenetx/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,29 +51,86 @@ type statsTactic struct {
Tactic *HTTPSDialerTactic
}

func statsCloneMapStringInt64(input map[string]int64) (output map[string]int64) {
for key, value := range input {
if output == nil {
output = make(map[string]int64) // the idea here is to clone a nil map to a nil map
}
output[key] = value
}
return
}

// Clone clones a given [*statsTactic]
func (st *statsTactic) Clone() *statsTactic {
return &statsTactic{
CountStarted: st.CountStarted,
CountTCPConnectError: st.CountTCPConnectError,
CountTCPConnectInterrupt: st.CountTCPConnectInterrupt,
CountTLSHandshakeError: st.CountTLSHandshakeError,
CountTLSHandshakeInterrupt: st.CountTLSHandshakeInterrupt,
CountTLSVerificationError: st.CountTLSVerificationError,
CountSuccess: st.CountSuccess,
HistoTCPConnectError: statsCloneMapStringInt64(st.HistoTCPConnectError),
HistoTLSHandshakeError: statsCloneMapStringInt64(st.HistoTLSHandshakeError),
HistoTLSVerificationError: statsCloneMapStringInt64(st.HistoTLSVerificationError),
LastUpdated: st.LastUpdated,
Tactic: st.Tactic.Clone(),
}
}

// statsDomain contains stats associated with a domain.
type statsDomain struct {
Tactics map[string]*statsTactic
}

// statsDomainRemoveOldEntries returns a copy of a [*statsDomain] with old entries removed.
func statsDomainRemoveOldEntries(input *statsDomain) (output *statsDomain) {
output = &statsDomain{
Tactics: map[string]*statsTactic{},
}
oneWeek := 7 * 24 * time.Hour
now := time.Now()
for summary, tactic := range input.Tactics {
if delta := now.Sub(tactic.LastUpdated); delta > oneWeek {
continue
}
output.Tactics[summary] = tactic.Clone()
}
return
}

// statsContainerVersion is the current version of [statsContainer].
const statsContainerVersion = 2

// statsContainer is the root container for stats.
// statsContainer is the root container for the stats.
//
// The zero value is invalid; construct using [newStatsContainer].
type statsContainer struct {
// Domains maps a domain name to its tactics
// Domains maps a domain name to its tactics.
Domains map[string]*statsDomain

// Version is the version of the container data format.
Version int
}

// Get returns the tactic record for the given [*statsTactic] instance.
// statsDomainRemoveOldEntries returns a copy of a [*statsContainer] with old entries removed.
func statsContainerRemoveOldEntries(input *statsContainer) (output *statsContainer) {
output = newStatsContainer()
for domain, inputStats := range input.Domains {
prunedStats := statsDomainRemoveOldEntries(inputStats)
if len(prunedStats.Tactics) <= 0 {
continue
}
output.Domains[domain] = prunedStats
}
return
}

// GetStatsTacticLocked returns the tactic record for the given [*statsTactic] instance.
//
// At the name implies, this function MUST be called while holding the [*statsManager] mutex.
func (c *statsContainer) GetLocked(tactic *HTTPSDialerTactic) (*statsTactic, bool) {
func (c *statsContainer) GetStatsTacticLocked(tactic *HTTPSDialerTactic) (*statsTactic, bool) {
domainRecord, found := c.Domains[tactic.VerifyHostname]
if !found {
return nil, false
Expand All @@ -82,10 +139,10 @@ func (c *statsContainer) GetLocked(tactic *HTTPSDialerTactic) (*statsTactic, boo
return tacticRecord, found
}

// Set sets the tactic record for the given the given [*statsTactic] instance.
// SetStatsTacticLocked sets the tactic record for the given the given [*statsTactic] instance.
//
// At the name implies, this function MUST be called while holding the [*statsManager] mutex.
func (c *statsContainer) SetLocked(tactic *HTTPSDialerTactic, record *statsTactic) {
func (c *statsContainer) SetStatsTacticLocked(tactic *HTTPSDialerTactic, record *statsTactic) {
domainRecord, found := c.Domains[tactic.VerifyHostname]
if !found {
domainRecord = &statsDomain{
Expand Down Expand Up @@ -117,6 +174,9 @@ func newStatsContainer() *statsContainer {
// The zero value of this structure is not ready to use; please, use the
// [newStatsManager] factory to create a new instance.
type statsManager struct {
// container is the container container for stats
container *statsContainer

// kvStore is the key-value store we're using
kvStore model.KeyValueStore

Expand All @@ -125,9 +185,6 @@ type statsManager struct {

// mu provides mutual exclusion when accessing the stats.
mu sync.Mutex

// root is the root container for stats
root *statsContainer
}

// statsKey is the key used in the key-value store to access the state.
Expand All @@ -136,7 +193,7 @@ const statsKey = "httpsdialerstats.state"
// errStatsContainerWrongVersion means that the stats container document has the wrong version number.
var errStatsContainerWrongVersion = errors.New("wrong stats container version")

// loadStatsContainer loads a state container from the given key-value store.
// loadStatsContainer loads a stats container from the given [model.KeyValueStore].
func loadStatsContainer(kvStore model.KeyValueStore) (*statsContainer, error) {
// load data from the kvstore
data, err := kvStore.Get(statsKey)
Expand All @@ -162,7 +219,9 @@ func loadStatsContainer(kvStore model.KeyValueStore) (*statsContainer, error) {
return nil, err
}

return &container, nil
// make sure we remove old entries
pruned := statsContainerRemoveOldEntries(&container)
return pruned, nil
}

// newStatsManager constructs a new instance of [*statsManager].
Expand All @@ -173,10 +232,10 @@ func newStatsManager(kvStore model.KeyValueStore, logger model.Logger) *statsMan
}

return &statsManager{
root: root,
kvStore: kvStore,
logger: logger,
mu: sync.Mutex{},
container: root,
kvStore: kvStore,
logger: logger,
mu: sync.Mutex{},
}
}

Expand All @@ -189,7 +248,7 @@ func (mt *statsManager) OnStarting(tactic *HTTPSDialerTactic) {
mt.mu.Lock()

// get the record
record, found := mt.root.GetLocked(tactic)
record, found := mt.container.GetStatsTacticLocked(tactic)
if !found {
record = &statsTactic{
CountStarted: 0,
Expand All @@ -205,7 +264,7 @@ func (mt *statsManager) OnStarting(tactic *HTTPSDialerTactic) {
LastUpdated: time.Time{},
Tactic: tactic.Clone(), // avoid storing the original
}
mt.root.SetLocked(tactic, record)
mt.container.SetStatsTacticLocked(tactic, record)
}

// update stats
Expand All @@ -220,7 +279,7 @@ func (mt *statsManager) OnTCPConnectError(ctx context.Context, tactic *HTTPSDial
mt.mu.Lock()

// get the record
record, found := mt.root.GetLocked(tactic)
record, found := mt.container.GetStatsTacticLocked(tactic)
if !found {
mt.logger.Warnf("HTTPSDialerStatsManager.OnTCPConnectError: not found: %+v", tactic)
return
Expand All @@ -243,7 +302,7 @@ func (mt *statsManager) OnTLSHandshakeError(ctx context.Context, tactic *HTTPSDi
mt.mu.Lock()

// get the record
record, found := mt.root.GetLocked(tactic)
record, found := mt.container.GetStatsTacticLocked(tactic)
if !found {
mt.logger.Warnf("HTTPSDialerStatsManager.OnTLSHandshakeError: not found: %+v", tactic)
return
Expand All @@ -266,7 +325,7 @@ func (mt *statsManager) OnTLSVerifyError(tactic *HTTPSDialerTactic, err error) {
mt.mu.Lock()

// get the record
record, found := mt.root.GetLocked(tactic)
record, found := mt.container.GetStatsTacticLocked(tactic)
if !found {
mt.logger.Warnf("HTTPSDialerStatsManager.OnTLSVerificationError: not found: %+v", tactic)
return
Expand All @@ -285,7 +344,7 @@ func (mt *statsManager) OnSuccess(tactic *HTTPSDialerTactic) {
mt.mu.Lock()

// get the record
record, found := mt.root.GetLocked(tactic)
record, found := mt.container.GetStatsTacticLocked(tactic)
if !found {
mt.logger.Warnf("HTTPSDialerStatsManager.OnSuccess: not found: %+v", tactic)
return
Expand All @@ -305,5 +364,5 @@ func (mt *statsManager) Close() error {
mt.mu.Lock()

// write updated stats into the underlying key-value store
return mt.kvStore.Set(statsKey, runtimex.Try1(json.Marshal(mt.root)))
return mt.kvStore.Set(statsKey, runtimex.Try1(json.Marshal(mt.container)))
}
68 changes: 65 additions & 3 deletions internal/enginenetx/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,10 @@ func TestLoadStatsContainer(t *testing.T) {
expectRoot *statsContainer
}

fourtyFiveMinutesAgo := time.Now().Add(-45 * time.Minute)

twoWeeksAgo := time.Now().Add(-14 * 24 * time.Hour)

cases := []testcase{{
name: "when the key-value store does not contain any data",
input: func() []byte {
Expand All @@ -319,7 +323,7 @@ func TestLoadStatsContainer(t *testing.T) {
expectErr: "httpsdialerstats.state: wrong stats container version: expected=2 got=1",
expectRoot: nil,
}, {
name: "on success",
name: "on success including correct entries pruning",
input: func() []byte {
root := &statsContainer{
Domains: map[string]*statsDomain{
Expand All @@ -340,14 +344,64 @@ func TestLoadStatsContainer(t *testing.T) {
HistoTLSVerificationError: map[string]int64{
"ssl_invalid_hostname": 1,
},
LastUpdated: time.Date(2023, 9, 25, 0, 0, 0, 0, time.UTC),
LastUpdated: fourtyFiveMinutesAgo,
Tactic: &HTTPSDialerTactic{
Endpoint: "162.55.247.208:443",
InitialDelay: 0,
SNI: "www.example.com",
VerifyHostname: "api.ooni.io",
},
},
"162.55.247.208:443 sni=www.example.org verify=api.ooni.io": { // should be skipped b/c it's old
CountStarted: 4,
CountTCPConnectError: 1,
CountTLSHandshakeError: 1,
CountTLSVerificationError: 1,
CountSuccess: 1,
HistoTCPConnectError: map[string]int64{
"connection_refused": 1,
},
HistoTLSHandshakeError: map[string]int64{
"generic_timeout_error": 1,
},
HistoTLSVerificationError: map[string]int64{
"ssl_invalid_hostname": 1,
},
LastUpdated: twoWeeksAgo,
Tactic: &HTTPSDialerTactic{
Endpoint: "162.55.247.208:443",
InitialDelay: 0,
SNI: "www.example.org",
VerifyHostname: "api.ooni.io",
},
},
},
},
"www.kernel.org": { // this whole entry should be skipped because it's too old
Tactics: map[string]*statsTactic{
"162.55.247.208:443 sni=www.example.com verify=www.kernel.org": {
CountStarted: 4,
CountTCPConnectError: 1,
CountTLSHandshakeError: 1,
CountTLSVerificationError: 1,
CountSuccess: 1,
HistoTCPConnectError: map[string]int64{
"connection_refused": 1,
},
HistoTLSHandshakeError: map[string]int64{
"generic_timeout_error": 1,
},
HistoTLSVerificationError: map[string]int64{
"ssl_invalid_hostname": 1,
},
LastUpdated: twoWeeksAgo,
Tactic: &HTTPSDialerTactic{
Endpoint: "162.55.247.208:443",
InitialDelay: 0,
SNI: "www.example.com",
VerifyHostname: "www.kernel.org",
},
},
},
},
},
Expand Down Expand Up @@ -375,7 +429,7 @@ func TestLoadStatsContainer(t *testing.T) {
HistoTLSVerificationError: map[string]int64{
"ssl_invalid_hostname": 1,
},
LastUpdated: time.Date(2023, 9, 25, 0, 0, 0, 0, time.UTC),
LastUpdated: fourtyFiveMinutesAgo,
Tactic: &HTTPSDialerTactic{
Endpoint: "162.55.247.208:443",
InitialDelay: 0,
Expand Down Expand Up @@ -433,6 +487,8 @@ func TestStatsManagerCallbacks(t *testing.T) {
expectRoot *statsContainer
}

fourtyFiveMinutesAgo := time.Now().Add(-45 * time.Minute)

cases := []testcase{

// When TCP connect fails and the reason is a canceled context
Expand All @@ -444,6 +500,8 @@ func TestStatsManagerCallbacks(t *testing.T) {
Tactics: map[string]*statsTactic{
"162.55.247.208:443 sni=www.example.com verify=api.ooni.io": {
CountStarted: 1,
LastUpdated: fourtyFiveMinutesAgo,
Tactic: &HTTPSDialerTactic{}, // only required for cloning
},
},
},
Expand Down Expand Up @@ -472,6 +530,7 @@ func TestStatsManagerCallbacks(t *testing.T) {
"162.55.247.208:443 sni=www.example.com verify=api.ooni.io": {
CountStarted: 1,
CountTCPConnectInterrupt: 1,
Tactic: &HTTPSDialerTactic{},
},
},
},
Expand Down Expand Up @@ -516,6 +575,8 @@ func TestStatsManagerCallbacks(t *testing.T) {
Tactics: map[string]*statsTactic{
"162.55.247.208:443 sni=www.example.com verify=api.ooni.io": {
CountStarted: 1,
LastUpdated: fourtyFiveMinutesAgo,
Tactic: &HTTPSDialerTactic{}, // only for cloning
},
},
},
Expand Down Expand Up @@ -544,6 +605,7 @@ func TestStatsManagerCallbacks(t *testing.T) {
"162.55.247.208:443 sni=www.example.com verify=api.ooni.io": {
CountStarted: 1,
CountTLSHandshakeInterrupt: 1,
Tactic: &HTTPSDialerTactic{},
},
},
},
Expand Down

0 comments on commit 98685c4

Please sign in to comment.