Skip to content

Commit

Permalink
Release cache lock after proxy upstream filter, serve stale
Browse files Browse the repository at this point in the history
The cache locks may be held after serving stale, proxy upstream filter,
or revalidate uncacheable resulting in dangling cache locks.
Also only disable cache on final error if cache was not already
disabled, and add DeclinedToUpstream / UpstreamError no cache reasons.
  • Loading branch information
drcaramelsyrup committed Nov 23, 2024
1 parent 1756948 commit e309436
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .bleep
Original file line number Diff line number Diff line change
@@ -1 +1 @@
4cba0f37f5682058c05904a3c5a395919a8ac498
89319e9383d6f99066dfeace750a553d45e1c167
70 changes: 46 additions & 24 deletions pingora-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,12 @@ pub enum NoCacheReason {
///
/// This happens when the cache predictor predicted that this request is not cacheable, but
/// the response turns out to be OK to cache. However, it might be too large to re-enable caching
/// for this request.
/// for this request
Deferred,
/// Due to the proxy upstream filter declining the current request from going upstream
DeclinedToUpstream,
/// Due to the upstream being unreachable or otherwise erroring during proxying
UpstreamError,
/// The writer of the cache lock sees that the request is not cacheable (Could be OriginNotCache)
CacheLockGiveUp,
/// This request waited too long for the writer of the cache lock to finish, so this request will
Expand All @@ -147,6 +151,8 @@ impl NoCacheReason {
StorageError => "StorageError",
InternalError => "InternalError",
Deferred => "Deferred",
DeclinedToUpstream => "DeclinedToUpstream",
UpstreamError => "UpstreamError",
CacheLockGiveUp => "CacheLockGiveUp",
CacheLockTimeout => "CacheLockTimeout",
Custom(s) => s,
Expand Down Expand Up @@ -299,38 +305,52 @@ impl HttpCache {
.is_some()
}

/// Release the cache lock if the current request is a cache writer.
///
/// Generally callers should prefer using `disable` when a cache lock should be released
/// due to an error to clear all cache context. This function is for releasing the cache lock
/// while still keeping the cache around for reading, e.g. when serving stale.
pub fn release_write_lock(&mut self, reason: NoCacheReason) {
use NoCacheReason::*;
if let Some(inner) = self.inner.as_mut() {
let lock = inner.lock.take();
if let Some(Locked::Write(_r)) = lock {
let lock_status = match reason {
// let the next request try to fetch it
InternalError | StorageError | Deferred | UpstreamError => {
LockStatus::TransientError
}
// depends on why the proxy upstream filter declined the request,
// for now still allow next request try to acquire to avoid thundering herd
DeclinedToUpstream => LockStatus::TransientError,
// no need for the lock anymore
OriginNotCache | ResponseTooLarge => LockStatus::GiveUp,
// not sure which LockStatus make sense, we treat it as GiveUp for now
Custom(_) => LockStatus::GiveUp,
// should never happen, NeverEnabled shouldn't hold a lock
NeverEnabled => panic!("NeverEnabled holds a write lock"),
CacheLockGiveUp | CacheLockTimeout => {
panic!("CacheLock* are for cache lock readers only")
}
};
inner
.cache_lock
.unwrap()
.release(inner.key.as_ref().unwrap(), lock_status);
}
}
}

/// Disable caching
pub fn disable(&mut self, reason: NoCacheReason) {
use NoCacheReason::*;
match self.phase {
CachePhase::Disabled(_) => {
// replace reason
self.phase = CachePhase::Disabled(reason);
}
_ => {
self.phase = CachePhase::Disabled(reason);
if let Some(inner) = self.inner.as_mut() {
let lock = inner.lock.take();
if let Some(Locked::Write(_r)) = lock {
let lock_status = match reason {
// let the next request try to fetch it
InternalError | StorageError | Deferred => LockStatus::TransientError,
// no need for the lock anymore
OriginNotCache | ResponseTooLarge => LockStatus::GiveUp,
// not sure which LockStatus make sense, we treat it as GiveUp for now
Custom(_) => LockStatus::GiveUp,
// should never happen, NeverEnabled shouldn't hold a lock
NeverEnabled => panic!("NeverEnabled holds a write lock"),
CacheLockGiveUp | CacheLockTimeout => {
panic!("CacheLock* are for cache lock readers only")
}
};
inner
.cache_lock
.unwrap()
.release(inner.key.as_ref().unwrap(), lock_status);
}
}
self.release_write_lock(reason);
// log initial disable reason
self.inner_mut()
.traces
Expand Down Expand Up @@ -824,6 +844,8 @@ impl HttpCache {
CachePhase::Stale => {
// replace cache meta header
self.inner_mut().meta.as_mut().unwrap().0.header = header;
// upstream request done, release write lock
self.release_write_lock(reason);
}
_ => panic!("wrong phase {:?}", self.phase),
}
Expand Down
2 changes: 1 addition & 1 deletion pingora-cache/src/predictor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ where
// CacheLockGiveUp: the writer will set OriginNotCache (if applicable)
// readers don't need to do it
NeverEnabled | StorageError | InternalError | Deferred | CacheLockGiveUp
| CacheLockTimeout => {
| CacheLockTimeout | DeclinedToUpstream | UpstreamError => {
return None;
}
// Skip certain NoCacheReason::Custom according to user
Expand Down
17 changes: 16 additions & 1 deletion pingora-proxy/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,10 @@ impl<SV> HttpProxy<SV> {
if !proxy_to_upstream {
// The hook can choose to write its own response, but if it doesn't, we respond
// with a generic 502
if session.cache.enabled() {
// drop the cache lock that this request may be holding onto
session.cache.disable(NoCacheReason::DeclinedToUpstream);
}
if session.response_written().is_none() {
match session.write_response_header_ref(&BAD_GATEWAY).await {
Ok(()) => {}
Expand All @@ -557,6 +561,10 @@ impl<SV> HttpProxy<SV> {
/* else continue */
}
Err(e) => {
if session.cache.enabled() {
session.cache.disable(NoCacheReason::InternalError);
}

self.handle_error(
&mut session,
&mut ctx,
Expand Down Expand Up @@ -621,7 +629,14 @@ impl<SV> HttpProxy<SV> {

if let Some(e) = final_error.as_ref() {
// If we have errored and are still holding a cache lock, release it.
session.cache.disable(NoCacheReason::InternalError);
if session.cache.enabled() {
let reason = if *e.esource() == ErrorSource::Upstream {
NoCacheReason::UpstreamError
} else {
NoCacheReason::InternalError
};
session.cache.disable(reason);
}
let status = self.inner.fail_to_proxy(&mut session, e, &mut ctx).await;

// final error will have > 0 status unless downstream connection is dead
Expand Down
17 changes: 16 additions & 1 deletion pingora-proxy/src/proxy_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -667,8 +667,18 @@ impl<SV> HttpProxy<SV> {
None,
None,
);
self.inner
if self
.inner
.should_serve_stale(session, ctx, Some(&http_status_error))
{
// no more need to keep the write lock
session
.cache
.release_write_lock(NoCacheReason::UpstreamError);
true
} else {
false
}
} else {
false // not 304, not stale if error status code
}
Expand Down Expand Up @@ -712,6 +722,11 @@ impl<SV> HttpProxy<SV> {
self.inner.request_summary(session, ctx)
);

// no more need to hang onto the cache lock
session
.cache
.release_write_lock(NoCacheReason::UpstreamError);

Some(self.proxy_cache_hit(session, ctx).await)
}

Expand Down
1 change: 1 addition & 0 deletions pingora-proxy/src/proxy_h2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ impl<SV> HttpProxy<SV> {
.cache_http_task(session, &task, ctx, serve_from_cache)
.await
{
session.cache.disable(NoCacheReason::StorageError);
if serve_from_cache.is_miss_body() {
// if the response stream cache body during miss but write fails, it has to
// give up the entire request
Expand Down

0 comments on commit e309436

Please sign in to comment.