-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
orphaned session protection, funnel service errors to http client
- Loading branch information
Showing
12 changed files
with
389 additions
and
119 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
use crate::MatchmakerState; | ||
use async_nats::jetstream::{self}; | ||
use edgegap::apis::sessions_api::*; | ||
use futures::StreamExt; | ||
use log::*; | ||
|
||
// need an erlang/OTP like supervision tree! | ||
pub async fn delete_session_worker_supervisor( | ||
state: &MatchmakerState, | ||
) -> Result<(), async_nats::Error> { | ||
loop { | ||
let state = state.clone(); | ||
let handle = tokio::spawn(async move { | ||
let res = delete_session_worker(&state).await; | ||
if let Err(e) = res { | ||
error!("delete_session_worker error: {e:?}"); | ||
} | ||
}); | ||
futures::future::join_all([handle]).await; | ||
warn!("delete_session_worker exited, restarting after timeout"); | ||
tokio::time::sleep(std::time::Duration::from_secs(30)).await; | ||
} | ||
Ok(()) | ||
} | ||
|
||
async fn delete_session_worker(state: &MatchmakerState) -> Result<(), async_nats::Error> { | ||
let stream = state.nats.delete_session_stream(); | ||
let consumer = stream | ||
.create_consumer(jetstream::consumer::pull::Config { | ||
durable_name: Some("api-deleter-1".to_string()), | ||
description: Some("Calls edgegap session delete api".to_string()), | ||
ack_policy: jetstream::consumer::AckPolicy::Explicit, | ||
..Default::default() | ||
}) | ||
.await?; | ||
|
||
loop { | ||
let mut messages = consumer.fetch().max_messages(100).messages().await?; | ||
while let Some(Ok(message)) = messages.next().await { | ||
let session_id = String::from_utf8(message.payload.to_vec())?; | ||
match session_delete(state.configuration(), session_id.as_str()).await { | ||
Ok(session_delete_response) => { | ||
info!("session_delete ok: {:?}", session_delete_response); | ||
message.ack().await?; | ||
} | ||
Err(edgegap::apis::Error::ResponseError(resp_content)) => { | ||
match resp_content.status.as_u16() { | ||
404 => { | ||
// session already deleted or never existed. | ||
warn!("session_delete 404: {session_id}"); | ||
message.ack().await?; | ||
} | ||
410 => { | ||
// "instance already terminated" | ||
warn!("session_delete 410 'instance already terminated': {session_id}"); | ||
message.ack().await?; | ||
} | ||
code => { | ||
error!("session_delete error status = {code} for {session_id} {resp_content:?}"); | ||
} | ||
} | ||
} | ||
Err(e) => { | ||
// TODO What to do about junk data on queue that can never be deleted? | ||
error!("unhandled session_delete error {session_id}: {e:?}"); | ||
} | ||
} | ||
} | ||
tokio::time::sleep(std::time::Duration::from_millis(5000)).await; | ||
} | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
/// Detects orphaned edgegap sessions and schedules them for deletion by the API | ||
/// Actual API-delete call happens in the session_delete_worker. | ||
use crate::MatchmakerState; | ||
use ::time::OffsetDateTime; | ||
use async_nats::jetstream::kv::Operation; | ||
use futures::{StreamExt, TryStreamExt}; | ||
use log::*; | ||
use tokio::time::{self, Duration}; | ||
|
||
pub(crate) async fn session_cleanup_supervisor( | ||
orig_state: &MatchmakerState, | ||
) -> Result<(), async_nats::Error> { | ||
let state = orig_state.clone(); | ||
let handle1 = tokio::spawn(async move { | ||
loop { | ||
let _ = session_cleanup_watcher(&state).await; | ||
error!("session_cleanup_watcher exited, restarting"); | ||
} | ||
}); | ||
let state = orig_state.clone(); | ||
let handle2 = tokio::spawn(async move { | ||
loop { | ||
let _ = unclaimed_session_reaper(&state).await; | ||
error!("unclaimed_session_reaper exited, restarting"); | ||
} | ||
}); | ||
futures::future::join_all([handle1, handle2]).await; | ||
Ok(()) | ||
} | ||
|
||
/// Get all the session keys in unclaimed sessions - if any are older than 30 seconds, | ||
/// enqueue them for deletion. | ||
/// Session ids must be removed from unclaimed_sessions once a gameserver connection happens. | ||
async fn unclaimed_session_reaper(state: &MatchmakerState) -> Result<(), async_nats::Error> { | ||
// how often to check for orphaned sessions: | ||
let mut interval = time::interval(Duration::from_millis(5000)); | ||
let kv = state.nats.kv_unclaimed_sessions(); | ||
loop { | ||
interval.tick().await; | ||
let mut keys = kv.keys().await?.boxed(); | ||
while let Some(key) = keys.try_next().await? { | ||
let Ok(Some(entry)) = kv.entry(&key).await else { | ||
continue; | ||
}; | ||
let session_id = String::from_utf8(entry.value.to_vec()) | ||
.expect("Failed to convert session_id to string"); | ||
let age = OffsetDateTime::now_utc() - entry.created; | ||
info!("* Session {session_id} is {age} old"); | ||
if age > Duration::from_secs(30) { | ||
warn!("Unclaimed session {session_id} is older than 30 seconds = {age}"); | ||
// write to delete_sessions work queue and remove from unclaimed_sessions KV | ||
state | ||
.nats | ||
.enqueue_session_delete(session_id.clone()) | ||
.await?; | ||
kv.delete(&key).await?; | ||
} | ||
} | ||
} | ||
Ok(()) | ||
} | ||
|
||
/// Deletes sessions once a gameserver removes the active_sessions KV entry. | ||
/// this is the happy path, where there were no orphans.. | ||
async fn session_cleanup_watcher(state: &MatchmakerState) -> Result<(), async_nats::Error> { | ||
let kv = state.nats.kv_active_connections(); | ||
let mut watcher = kv.watch(">").await?; | ||
while let Some(event) = watcher.next().await { | ||
info!("{event:?}"); | ||
match event { | ||
Ok(event) => { | ||
let session_id = event.key; | ||
if event.operation == Operation::Delete { | ||
info!("active_connection deleted, deleting session {session_id}",); | ||
state | ||
.nats | ||
.enqueue_session_delete(session_id.clone()) | ||
.await?; | ||
} | ||
if event.operation == Operation::Put { | ||
info!("New Session put {session_id}, deleting from unclaimed_sessions "); | ||
// delete this session_id from unclaimed_sessions. | ||
let _ = state | ||
.nats | ||
.kv_unclaimed_sessions() | ||
.delete(session_id.as_str()) | ||
.await; | ||
} | ||
} | ||
Err(e) => { | ||
warn!("KV event error watching for session cleanup: {:?}", e); | ||
} | ||
} | ||
} | ||
Ok(()) | ||
} |
Oops, something went wrong.