From cfe7308cf22e217ce79eb2d5e263865eb1862f5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lloren=C3=A7=20Muntaner?= Date: Wed, 6 Nov 2024 15:49:28 +0100 Subject: [PATCH] Change dynamic captcha registration rates calculation (#2684) * Change dynamic captcha registration rates calculation * Fix formatting * Fix test * Fix tests * Fix test * Fix test dynamic captcha --- .../src/storage/registration_rates.rs | 79 +++++++------------ .../tests/integration/http.rs | 6 +- .../identity_register/dynamic_captcha.rs | 10 +-- 3 files changed, 36 insertions(+), 59 deletions(-) diff --git a/src/internet_identity/src/storage/registration_rates.rs b/src/internet_identity/src/storage/registration_rates.rs index 8ab12da740..ce93d58229 100644 --- a/src/internet_identity/src/storage/registration_rates.rs +++ b/src/internet_identity/src/storage/registration_rates.rs @@ -57,12 +57,25 @@ impl RegistrationRates { self.current_rate_data.push(&now).expect("out of memory"); } + /// Calculates the registration rates for the current and reference intervals along with the threschold. + /// + /// The calculation assumes that the data has been pruned of old timestamps before the rates are calculated. + /// + /// Initially the window for the rate calculation was the difference between now and the oldest point. + /// However, we have accumulated data the last 3 weeks. + /// Therefore, we can assume that we have data for all the interval and use the interval as the window. + /// Otherwise, the rate will be underestimated. pub fn registration_rates(&self) -> Option { let config = dynamic_captcha_config()?; - let now = time(); - let reference_rate_per_second = calculate_registration_rate(now, &self.reference_rate_data); - let current_rate_per_second = calculate_registration_rate(now, &self.current_rate_data); + let reference_rate_per_second = rate_per_second( + self.reference_rate_data.len(), + config.reference_rate_retention_ns, + ); + let current_rate_per_second = rate_per_second( + self.current_rate_data.len(), + config.current_rate_retention_ns, + ); let captcha_threshold_rate = reference_rate_per_second * config.threshold_multiplier; let rates = NormalizedRegistrationRates { reference_rate_per_second, @@ -87,39 +100,6 @@ impl RegistrationRates { } } -/// Calculates the rate per second of registrations taking into account for how long data has -/// already been collected. Adjusting the window to the actual data collected is important because -/// * rates are underestimated by fixed window calculations -/// * the reference registration rate window is generally longer than the current rate window -/// -/// => this means that the captcha would be triggered prematurely during the period where data has -/// not been collected for the full reference registration rate data retention window. -/// -/// Example: -/// * `data_retention_ns` is 3 weeks -/// * there are currently 3 data points: `[1727768623000000000, 1727855023000000000, 1727941423000000000]` -/// (these are 24h apart each) -/// -/// If the rate was calculated over a 3-week time window, this would be -/// 3 registrations / 1814400 seconds = 0.000001653439153 registrations / second -/// -/// However, because the data is not actually spanning 3 weeks, this underestimates the actual rate. -/// Taking into account that the data is only spanning 3 days we get the following: -/// 3 registrations / 259200 seconds = 0.00001157407407 registrations / second -fn calculate_registration_rate(now: u64, data: &MinHeap) -> f64 { - data - // get the oldest value - .peek() - // calculate the time window length with respect to the current time - .map(|ts| now - ts) - // the value _could_ be 0 if the oldest timestamp was added in the same execution round - .filter(|val| *val != 0) - // use the value to calculate the rate per second - .map(|val| rate_per_second(data.len(), val)) - // if we don't have data, the rate is 0 - .unwrap_or(0.0) -} - fn rate_per_second(count: u64, duration_ns: u64) -> f64 { count as f64 / Duration::from_nanos(duration_ns).as_secs_f64() } @@ -203,13 +183,12 @@ mod test { registration_rates.new_registration(); - // 1 data point -> still 0 rates assert_eq!( registration_rates.registration_rates().unwrap(), NormalizedRegistrationRates { - reference_rate_per_second: 0.0, - current_rate_per_second: 0.0, - captcha_threshold_rate: 0.0, + reference_rate_per_second: 0.001, // 1 / 1000, as per config + current_rate_per_second: 0.01, // 1 / 100, as per config + captcha_threshold_rate: 0.0012, // 20% more than the reference rate, as per config } ); @@ -220,9 +199,9 @@ mod test { assert_eq!( registration_rates.registration_rates().unwrap(), NormalizedRegistrationRates { - reference_rate_per_second: 2.0, - current_rate_per_second: 2.0, - captcha_threshold_rate: 2.4, // 20% more than the reference rate, as per config + reference_rate_per_second: 0.002, // 2 / 1000, as per config + current_rate_per_second: 0.02, // 2 / 100, as per config + captcha_threshold_rate: 0.0024, // 20% more than the reference rate, as per config } ); } @@ -340,9 +319,9 @@ mod test { assert_eq!( registration_rates.registration_rates().unwrap(), NormalizedRegistrationRates { - current_rate_per_second: 100.0, - reference_rate_per_second: 100.0, - captcha_threshold_rate: 120.0, + current_rate_per_second: 1.0, + reference_rate_per_second: 0.1, + captcha_threshold_rate: 0.12, } ); @@ -350,16 +329,14 @@ mod test { TIME.with_borrow_mut(|t| *t += Duration::from_secs(1000).as_nanos() as u64); // Adding a new data point prunes everything except the one data point added now - // -> there are at least 2 data points required to calculate a rate - // -> rates are 0.0 registration_rates.new_registration(); assert_eq!( registration_rates.registration_rates().unwrap(), NormalizedRegistrationRates { - current_rate_per_second: 0.0, - reference_rate_per_second: 0.0, - captcha_threshold_rate: 0.0, + current_rate_per_second: 0.01, + reference_rate_per_second: 0.001, + captcha_threshold_rate: 0.0012, } ); } diff --git a/src/internet_identity/tests/integration/http.rs b/src/internet_identity/tests/integration/http.rs index da2b4503ab..78750172df 100644 --- a/src/internet_identity/tests/integration/http.rs +++ b/src/internet_identity/tests/integration/http.rs @@ -805,19 +805,19 @@ fn should_report_registration_rates() -> Result<(), CallError> { assert_metric_approx( &metrics, "internet_identity_registrations_per_second{type=\"reference_rate\"}", - 1.6, + 0.4, 0.1, ); assert_metric_approx( &metrics, "internet_identity_registrations_per_second{type=\"current_rate\"}", - 1.3, + 2f64, 0.1, ); assert_metric_approx( &metrics, "internet_identity_registrations_per_second{type=\"captcha_threshold_rate\"}", - 1.9, + 0.48, 0.1, ); Ok(()) diff --git a/src/internet_identity/tests/integration/v2_api/identity_register/dynamic_captcha.rs b/src/internet_identity/tests/integration/v2_api/identity_register/dynamic_captcha.rs index 36e02a8728..f64aa14033 100644 --- a/src/internet_identity/tests/integration/v2_api/identity_register/dynamic_captcha.rs +++ b/src/internet_identity/tests/integration/v2_api/identity_register/dynamic_captcha.rs @@ -34,15 +34,15 @@ fn should_require_captcha_above_threshold_rate() { install_ii_canister_with_arg(&env, II_WASM.clone(), arg_with_dynamic_captcha()); let authn_method = test_authn_method(); - // initialize a base rate of one registration every 2 seconds - for _ in 0..10 { + // initialize a base rate of one registration every 4 seconds for 100 seconds (reference rate) + for _ in 0..25 { create_identity_with_authn_method(&env, canister_id, &authn_method); - env.advance_time(Duration::from_secs(2)) + env.advance_time(Duration::from_secs(4)) } // Double the rate of registrations to one per second - // The 20% threshold rate should allow 5 registrations before the captcha kicks in - for i in 0..5 { + // The 20% threshold rate should allow 2 registrations before the captcha kicks in + for i in 0..2 { let flow_principal = test_principal(i); let result = api_v2::identity_registration_start(&env, canister_id, flow_principal) .expect("API call failed")