diff --git a/.bleep b/.bleep index 1f8104d5..d1591c7e 100644 --- a/.bleep +++ b/.bleep @@ -1 +1 @@ -4cba0f37f5682058c05904a3c5a395919a8ac498 +89319e9383d6f99066dfeace750a553d45e1c167 diff --git a/pingora-cache/src/lib.rs b/pingora-cache/src/lib.rs index 6bebbd31..6ac49a59 100644 --- a/pingora-cache/src/lib.rs +++ b/pingora-cache/src/lib.rs @@ -125,8 +125,12 @@ pub enum NoCacheReason { /// /// This happens when the cache predictor predicted that this request is not cacheable, but /// the response turns out to be OK to cache. However, it might be too large to re-enable caching - /// for this request. + /// for this request Deferred, + /// Due to the proxy upstream filter declining the current request from going upstream + DeclinedToUpstream, + /// Due to the upstream being unreachable or otherwise erroring during proxying + UpstreamError, /// The writer of the cache lock sees that the request is not cacheable (Could be OriginNotCache) CacheLockGiveUp, /// This request waited too long for the writer of the cache lock to finish, so this request will @@ -147,6 +151,8 @@ impl NoCacheReason { StorageError => "StorageError", InternalError => "InternalError", Deferred => "Deferred", + DeclinedToUpstream => "DeclinedToUpstream", + UpstreamError => "UpstreamError", CacheLockGiveUp => "CacheLockGiveUp", CacheLockTimeout => "CacheLockTimeout", Custom(s) => s, @@ -299,9 +305,44 @@ impl HttpCache { .is_some() } + /// Release the cache lock if the current request is a cache writer. + /// + /// Generally callers should prefer using `disable` when a cache lock should be released + /// due to an error to clear all cache context. This function is for releasing the cache lock + /// while still keeping the cache around for reading, e.g. when serving stale. + pub fn release_write_lock(&mut self, reason: NoCacheReason) { + use NoCacheReason::*; + if let Some(inner) = self.inner.as_mut() { + let lock = inner.lock.take(); + if let Some(Locked::Write(_r)) = lock { + let lock_status = match reason { + // let the next request try to fetch it + InternalError | StorageError | Deferred | UpstreamError => { + LockStatus::TransientError + } + // depends on why the proxy upstream filter declined the request, + // for now still allow next request try to acquire to avoid thundering herd + DeclinedToUpstream => LockStatus::TransientError, + // no need for the lock anymore + OriginNotCache | ResponseTooLarge => LockStatus::GiveUp, + // not sure which LockStatus make sense, we treat it as GiveUp for now + Custom(_) => LockStatus::GiveUp, + // should never happen, NeverEnabled shouldn't hold a lock + NeverEnabled => panic!("NeverEnabled holds a write lock"), + CacheLockGiveUp | CacheLockTimeout => { + panic!("CacheLock* are for cache lock readers only") + } + }; + inner + .cache_lock + .unwrap() + .release(inner.key.as_ref().unwrap(), lock_status); + } + } + } + /// Disable caching pub fn disable(&mut self, reason: NoCacheReason) { - use NoCacheReason::*; match self.phase { CachePhase::Disabled(_) => { // replace reason @@ -309,28 +350,7 @@ impl HttpCache { } _ => { self.phase = CachePhase::Disabled(reason); - if let Some(inner) = self.inner.as_mut() { - let lock = inner.lock.take(); - if let Some(Locked::Write(_r)) = lock { - let lock_status = match reason { - // let the next request try to fetch it - InternalError | StorageError | Deferred => LockStatus::TransientError, - // no need for the lock anymore - OriginNotCache | ResponseTooLarge => LockStatus::GiveUp, - // not sure which LockStatus make sense, we treat it as GiveUp for now - Custom(_) => LockStatus::GiveUp, - // should never happen, NeverEnabled shouldn't hold a lock - NeverEnabled => panic!("NeverEnabled holds a write lock"), - CacheLockGiveUp | CacheLockTimeout => { - panic!("CacheLock* are for cache lock readers only") - } - }; - inner - .cache_lock - .unwrap() - .release(inner.key.as_ref().unwrap(), lock_status); - } - } + self.release_write_lock(reason); // log initial disable reason self.inner_mut() .traces @@ -824,6 +844,8 @@ impl HttpCache { CachePhase::Stale => { // replace cache meta header self.inner_mut().meta.as_mut().unwrap().0.header = header; + // upstream request done, release write lock + self.release_write_lock(reason); } _ => panic!("wrong phase {:?}", self.phase), } diff --git a/pingora-cache/src/predictor.rs b/pingora-cache/src/predictor.rs index df8f3743..d131c20d 100644 --- a/pingora-cache/src/predictor.rs +++ b/pingora-cache/src/predictor.rs @@ -120,7 +120,7 @@ where // CacheLockGiveUp: the writer will set OriginNotCache (if applicable) // readers don't need to do it NeverEnabled | StorageError | InternalError | Deferred | CacheLockGiveUp - | CacheLockTimeout => { + | CacheLockTimeout | DeclinedToUpstream | UpstreamError => { return None; } // Skip certain NoCacheReason::Custom according to user diff --git a/pingora-proxy/src/lib.rs b/pingora-proxy/src/lib.rs index f13a1f79..731cdb41 100644 --- a/pingora-proxy/src/lib.rs +++ b/pingora-proxy/src/lib.rs @@ -535,6 +535,10 @@ impl HttpProxy { if !proxy_to_upstream { // The hook can choose to write its own response, but if it doesn't, we respond // with a generic 502 + if session.cache.enabled() { + // drop the cache lock that this request may be holding onto + session.cache.disable(NoCacheReason::DeclinedToUpstream); + } if session.response_written().is_none() { match session.write_response_header_ref(&BAD_GATEWAY).await { Ok(()) => {} @@ -557,6 +561,10 @@ impl HttpProxy { /* else continue */ } Err(e) => { + if session.cache.enabled() { + session.cache.disable(NoCacheReason::InternalError); + } + self.handle_error( &mut session, &mut ctx, @@ -621,7 +629,14 @@ impl HttpProxy { if let Some(e) = final_error.as_ref() { // If we have errored and are still holding a cache lock, release it. - session.cache.disable(NoCacheReason::InternalError); + if session.cache.enabled() { + let reason = if *e.esource() == ErrorSource::Upstream { + NoCacheReason::UpstreamError + } else { + NoCacheReason::InternalError + }; + session.cache.disable(reason); + } let status = self.inner.fail_to_proxy(&mut session, e, &mut ctx).await; // final error will have > 0 status unless downstream connection is dead diff --git a/pingora-proxy/src/proxy_cache.rs b/pingora-proxy/src/proxy_cache.rs index 4369ec8b..8957bc76 100644 --- a/pingora-proxy/src/proxy_cache.rs +++ b/pingora-proxy/src/proxy_cache.rs @@ -667,8 +667,18 @@ impl HttpProxy { None, None, ); - self.inner + if self + .inner .should_serve_stale(session, ctx, Some(&http_status_error)) + { + // no more need to keep the write lock + session + .cache + .release_write_lock(NoCacheReason::UpstreamError); + true + } else { + false + } } else { false // not 304, not stale if error status code } @@ -712,6 +722,11 @@ impl HttpProxy { self.inner.request_summary(session, ctx) ); + // no more need to hang onto the cache lock + session + .cache + .release_write_lock(NoCacheReason::UpstreamError); + Some(self.proxy_cache_hit(session, ctx).await) } diff --git a/pingora-proxy/src/proxy_h2.rs b/pingora-proxy/src/proxy_h2.rs index ba832b24..a27c559b 100644 --- a/pingora-proxy/src/proxy_h2.rs +++ b/pingora-proxy/src/proxy_h2.rs @@ -409,6 +409,7 @@ impl HttpProxy { .cache_http_task(session, &task, ctx, serve_from_cache) .await { + session.cache.disable(NoCacheReason::StorageError); if serve_from_cache.is_miss_body() { // if the response stream cache body during miss but write fails, it has to // give up the entire request