Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change dynamic captcha registration rates calculation #2684

Merged
merged 6 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 28 additions & 51 deletions src/internet_identity/src/storage/registration_rates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,25 @@ impl<M: Memory> RegistrationRates<M> {
self.current_rate_data.push(&now).expect("out of memory");
}

/// Calculates the registration rates for the current and reference intervals along with the threschold.
///
/// The calculation assumes that the data has been pruned of old timestamps before the rates are calculated.
///
/// Initially the window for the rate calculation was the difference between now and the oldest point.
/// However, we have accumulated data the last 3 weeks.
/// Therefore, we can assume that we have data for all the interval and use the interval as the window.
/// Otherwise, the rate will be underestimated.
pub fn registration_rates(&self) -> Option<NormalizedRegistrationRates> {
let config = dynamic_captcha_config()?;
let now = time();

let reference_rate_per_second = calculate_registration_rate(now, &self.reference_rate_data);
let current_rate_per_second = calculate_registration_rate(now, &self.current_rate_data);
let reference_rate_per_second = rate_per_second(
self.reference_rate_data.len(),
config.reference_rate_retention_ns,
);
let current_rate_per_second = rate_per_second(
self.current_rate_data.len(),
config.current_rate_retention_ns,
);
let captcha_threshold_rate = reference_rate_per_second * config.threshold_multiplier;
let rates = NormalizedRegistrationRates {
reference_rate_per_second,
Expand All @@ -87,39 +100,6 @@ impl<M: Memory> RegistrationRates<M> {
}
}

/// Calculates the rate per second of registrations taking into account for how long data has
/// already been collected. Adjusting the window to the actual data collected is important because
/// * rates are underestimated by fixed window calculations
/// * the reference registration rate window is generally longer than the current rate window
///
/// => this means that the captcha would be triggered prematurely during the period where data has
/// not been collected for the full reference registration rate data retention window.
///
/// Example:
/// * `data_retention_ns` is 3 weeks
/// * there are currently 3 data points: `[1727768623000000000, 1727855023000000000, 1727941423000000000]`
/// (these are 24h apart each)
///
/// If the rate was calculated over a 3-week time window, this would be
/// 3 registrations / 1814400 seconds = 0.000001653439153 registrations / second
///
/// However, because the data is not actually spanning 3 weeks, this underestimates the actual rate.
/// Taking into account that the data is only spanning 3 days we get the following:
/// 3 registrations / 259200 seconds = 0.00001157407407 registrations / second
fn calculate_registration_rate<M: Memory>(now: u64, data: &MinHeap<Timestamp, M>) -> f64 {
data
// get the oldest value
.peek()
// calculate the time window length with respect to the current time
.map(|ts| now - ts)
// the value _could_ be 0 if the oldest timestamp was added in the same execution round
.filter(|val| *val != 0)
// use the value to calculate the rate per second
.map(|val| rate_per_second(data.len(), val))
// if we don't have data, the rate is 0
.unwrap_or(0.0)
}

fn rate_per_second(count: u64, duration_ns: u64) -> f64 {
count as f64 / Duration::from_nanos(duration_ns).as_secs_f64()
}
Expand Down Expand Up @@ -203,13 +183,12 @@ mod test {

registration_rates.new_registration();

// 1 data point -> still 0 rates
assert_eq!(
registration_rates.registration_rates().unwrap(),
NormalizedRegistrationRates {
reference_rate_per_second: 0.0,
current_rate_per_second: 0.0,
captcha_threshold_rate: 0.0,
reference_rate_per_second: 0.001, // 1 / 1000, as per config
current_rate_per_second: 0.01, // 1 / 100, as per config
captcha_threshold_rate: 0.0012, // 20% more than the reference rate, as per config
}
);

Expand All @@ -220,9 +199,9 @@ mod test {
assert_eq!(
registration_rates.registration_rates().unwrap(),
NormalizedRegistrationRates {
reference_rate_per_second: 2.0,
current_rate_per_second: 2.0,
captcha_threshold_rate: 2.4, // 20% more than the reference rate, as per config
reference_rate_per_second: 0.002, // 2 / 1000, as per config
current_rate_per_second: 0.02, // 2 / 100, as per config
captcha_threshold_rate: 0.0024, // 20% more than the reference rate, as per config
}
);
}
Expand Down Expand Up @@ -340,26 +319,24 @@ mod test {
assert_eq!(
registration_rates.registration_rates().unwrap(),
NormalizedRegistrationRates {
current_rate_per_second: 100.0,
reference_rate_per_second: 100.0,
captcha_threshold_rate: 120.0,
current_rate_per_second: 1.0,
reference_rate_per_second: 0.1,
captcha_threshold_rate: 0.12,
}
);

// move time forward by reference rate time interval
TIME.with_borrow_mut(|t| *t += Duration::from_secs(1000).as_nanos() as u64);

// Adding a new data point prunes everything except the one data point added now
// -> there are at least 2 data points required to calculate a rate
// -> rates are 0.0
registration_rates.new_registration();

assert_eq!(
registration_rates.registration_rates().unwrap(),
NormalizedRegistrationRates {
current_rate_per_second: 0.0,
reference_rate_per_second: 0.0,
captcha_threshold_rate: 0.0,
current_rate_per_second: 0.01,
reference_rate_per_second: 0.001,
captcha_threshold_rate: 0.0012,
}
);
}
Expand Down
6 changes: 3 additions & 3 deletions src/internet_identity/tests/integration/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -805,19 +805,19 @@ fn should_report_registration_rates() -> Result<(), CallError> {
assert_metric_approx(
&metrics,
"internet_identity_registrations_per_second{type=\"reference_rate\"}",
1.6,
0.4,
0.1,
);
assert_metric_approx(
&metrics,
"internet_identity_registrations_per_second{type=\"current_rate\"}",
1.3,
2f64,
0.1,
);
assert_metric_approx(
&metrics,
"internet_identity_registrations_per_second{type=\"captcha_threshold_rate\"}",
1.9,
0.48,
0.1,
);
Ok(())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ fn should_require_captcha_above_threshold_rate() {
install_ii_canister_with_arg(&env, II_WASM.clone(), arg_with_dynamic_captcha());
let authn_method = test_authn_method();

// initialize a base rate of one registration every 2 seconds
for _ in 0..10 {
// initialize a base rate of one registration every 4 seconds for 100 seconds (reference rate)
for _ in 0..25 {
create_identity_with_authn_method(&env, canister_id, &authn_method);
env.advance_time(Duration::from_secs(2))
env.advance_time(Duration::from_secs(4))
}

// Double the rate of registrations to one per second
// The 20% threshold rate should allow 5 registrations before the captcha kicks in
for i in 0..5 {
// The 20% threshold rate should allow 2 registrations before the captcha kicks in
for i in 0..2 {
let flow_principal = test_principal(i);
let result = api_v2::identity_registration_start(&env, canister_id, flow_principal)
.expect("API call failed")
Expand Down