Skip to content

Commit

Permalink
Change dynamic captcha registration rates calculation (#2684)
Browse files Browse the repository at this point in the history
* Change dynamic captcha registration rates calculation

* Fix formatting

* Fix test

* Fix tests

* Fix test

* Fix test dynamic captcha
  • Loading branch information
lmuntaner authored Nov 6, 2024
1 parent 44aaa1c commit cfe7308
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 59 deletions.
79 changes: 28 additions & 51 deletions src/internet_identity/src/storage/registration_rates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,25 @@ impl<M: Memory> RegistrationRates<M> {
self.current_rate_data.push(&now).expect("out of memory");
}

/// Calculates the registration rates for the current and reference intervals along with the threschold.
///
/// The calculation assumes that the data has been pruned of old timestamps before the rates are calculated.
///
/// Initially the window for the rate calculation was the difference between now and the oldest point.
/// However, we have accumulated data the last 3 weeks.
/// Therefore, we can assume that we have data for all the interval and use the interval as the window.
/// Otherwise, the rate will be underestimated.
pub fn registration_rates(&self) -> Option<NormalizedRegistrationRates> {
let config = dynamic_captcha_config()?;
let now = time();

let reference_rate_per_second = calculate_registration_rate(now, &self.reference_rate_data);
let current_rate_per_second = calculate_registration_rate(now, &self.current_rate_data);
let reference_rate_per_second = rate_per_second(
self.reference_rate_data.len(),
config.reference_rate_retention_ns,
);
let current_rate_per_second = rate_per_second(
self.current_rate_data.len(),
config.current_rate_retention_ns,
);
let captcha_threshold_rate = reference_rate_per_second * config.threshold_multiplier;
let rates = NormalizedRegistrationRates {
reference_rate_per_second,
Expand All @@ -87,39 +100,6 @@ impl<M: Memory> RegistrationRates<M> {
}
}

/// Calculates the rate per second of registrations taking into account for how long data has
/// already been collected. Adjusting the window to the actual data collected is important because
/// * rates are underestimated by fixed window calculations
/// * the reference registration rate window is generally longer than the current rate window
///
/// => this means that the captcha would be triggered prematurely during the period where data has
/// not been collected for the full reference registration rate data retention window.
///
/// Example:
/// * `data_retention_ns` is 3 weeks
/// * there are currently 3 data points: `[1727768623000000000, 1727855023000000000, 1727941423000000000]`
/// (these are 24h apart each)
///
/// If the rate was calculated over a 3-week time window, this would be
/// 3 registrations / 1814400 seconds = 0.000001653439153 registrations / second
///
/// However, because the data is not actually spanning 3 weeks, this underestimates the actual rate.
/// Taking into account that the data is only spanning 3 days we get the following:
/// 3 registrations / 259200 seconds = 0.00001157407407 registrations / second
fn calculate_registration_rate<M: Memory>(now: u64, data: &MinHeap<Timestamp, M>) -> f64 {
data
// get the oldest value
.peek()
// calculate the time window length with respect to the current time
.map(|ts| now - ts)
// the value _could_ be 0 if the oldest timestamp was added in the same execution round
.filter(|val| *val != 0)
// use the value to calculate the rate per second
.map(|val| rate_per_second(data.len(), val))
// if we don't have data, the rate is 0
.unwrap_or(0.0)
}

fn rate_per_second(count: u64, duration_ns: u64) -> f64 {
count as f64 / Duration::from_nanos(duration_ns).as_secs_f64()
}
Expand Down Expand Up @@ -203,13 +183,12 @@ mod test {

registration_rates.new_registration();

// 1 data point -> still 0 rates
assert_eq!(
registration_rates.registration_rates().unwrap(),
NormalizedRegistrationRates {
reference_rate_per_second: 0.0,
current_rate_per_second: 0.0,
captcha_threshold_rate: 0.0,
reference_rate_per_second: 0.001, // 1 / 1000, as per config
current_rate_per_second: 0.01, // 1 / 100, as per config
captcha_threshold_rate: 0.0012, // 20% more than the reference rate, as per config
}
);

Expand All @@ -220,9 +199,9 @@ mod test {
assert_eq!(
registration_rates.registration_rates().unwrap(),
NormalizedRegistrationRates {
reference_rate_per_second: 2.0,
current_rate_per_second: 2.0,
captcha_threshold_rate: 2.4, // 20% more than the reference rate, as per config
reference_rate_per_second: 0.002, // 2 / 1000, as per config
current_rate_per_second: 0.02, // 2 / 100, as per config
captcha_threshold_rate: 0.0024, // 20% more than the reference rate, as per config
}
);
}
Expand Down Expand Up @@ -340,26 +319,24 @@ mod test {
assert_eq!(
registration_rates.registration_rates().unwrap(),
NormalizedRegistrationRates {
current_rate_per_second: 100.0,
reference_rate_per_second: 100.0,
captcha_threshold_rate: 120.0,
current_rate_per_second: 1.0,
reference_rate_per_second: 0.1,
captcha_threshold_rate: 0.12,
}
);

// move time forward by reference rate time interval
TIME.with_borrow_mut(|t| *t += Duration::from_secs(1000).as_nanos() as u64);

// Adding a new data point prunes everything except the one data point added now
// -> there are at least 2 data points required to calculate a rate
// -> rates are 0.0
registration_rates.new_registration();

assert_eq!(
registration_rates.registration_rates().unwrap(),
NormalizedRegistrationRates {
current_rate_per_second: 0.0,
reference_rate_per_second: 0.0,
captcha_threshold_rate: 0.0,
current_rate_per_second: 0.01,
reference_rate_per_second: 0.001,
captcha_threshold_rate: 0.0012,
}
);
}
Expand Down
6 changes: 3 additions & 3 deletions src/internet_identity/tests/integration/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -805,19 +805,19 @@ fn should_report_registration_rates() -> Result<(), CallError> {
assert_metric_approx(
&metrics,
"internet_identity_registrations_per_second{type=\"reference_rate\"}",
1.6,
0.4,
0.1,
);
assert_metric_approx(
&metrics,
"internet_identity_registrations_per_second{type=\"current_rate\"}",
1.3,
2f64,
0.1,
);
assert_metric_approx(
&metrics,
"internet_identity_registrations_per_second{type=\"captcha_threshold_rate\"}",
1.9,
0.48,
0.1,
);
Ok(())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ fn should_require_captcha_above_threshold_rate() {
install_ii_canister_with_arg(&env, II_WASM.clone(), arg_with_dynamic_captcha());
let authn_method = test_authn_method();

// initialize a base rate of one registration every 2 seconds
for _ in 0..10 {
// initialize a base rate of one registration every 4 seconds for 100 seconds (reference rate)
for _ in 0..25 {
create_identity_with_authn_method(&env, canister_id, &authn_method);
env.advance_time(Duration::from_secs(2))
env.advance_time(Duration::from_secs(4))
}

// Double the rate of registrations to one per second
// The 20% threshold rate should allow 5 registrations before the captcha kicks in
for i in 0..5 {
// The 20% threshold rate should allow 2 registrations before the captcha kicks in
for i in 0..2 {
let flow_principal = test_principal(i);
let result = api_v2::identity_registration_start(&env, canister_id, flow_principal)
.expect("API call failed")
Expand Down

0 comments on commit cfe7308

Please sign in to comment.