diff --git a/.bleep b/.bleep
index 1f8104d5..d1591c7e 100644
--- a/.bleep
+++ b/.bleep
@@ -1 +1 @@
-4cba0f37f5682058c05904a3c5a395919a8ac498
+89319e9383d6f99066dfeace750a553d45e1c167
diff --git a/pingora-cache/src/lib.rs b/pingora-cache/src/lib.rs
index 6bebbd31..6ac49a59 100644
--- a/pingora-cache/src/lib.rs
+++ b/pingora-cache/src/lib.rs
@@ -125,8 +125,12 @@ pub enum NoCacheReason {
     ///
     /// This happens when the cache predictor predicted that this request is not cacheable, but
     /// the response turns out to be OK to cache. However, it might be too large to re-enable caching
-    /// for this request.
+    /// for this request
     Deferred,
+    /// Due to the proxy upstream filter declining the current request from going upstream
+    DeclinedToUpstream,
+    /// Due to the upstream being unreachable or otherwise erroring during proxying
+    UpstreamError,
     /// The writer of the cache lock sees that the request is not cacheable (Could be OriginNotCache)
     CacheLockGiveUp,
     /// This request waited too long for the writer of the cache lock to finish, so this request will
@@ -147,6 +151,8 @@ impl NoCacheReason {
             StorageError => "StorageError",
             InternalError => "InternalError",
             Deferred => "Deferred",
+            DeclinedToUpstream => "DeclinedToUpstream",
+            UpstreamError => "UpstreamError",
             CacheLockGiveUp => "CacheLockGiveUp",
             CacheLockTimeout => "CacheLockTimeout",
             Custom(s) => s,
@@ -299,9 +305,44 @@ impl HttpCache {
             .is_some()
     }
 
+    /// Release the cache lock if the current request is a cache writer.
+    ///
+    /// Generally callers should prefer using `disable` when a cache lock should be released
+    /// due to an error to clear all cache context. This function is for releasing the cache lock
+    /// while still keeping the cache around for reading, e.g. when serving stale.
+    pub fn release_write_lock(&mut self, reason: NoCacheReason) {
+        use NoCacheReason::*;
+        if let Some(inner) = self.inner.as_mut() {
+            let lock = inner.lock.take();
+            if let Some(Locked::Write(_r)) = lock {
+                let lock_status = match reason {
+                    // let the next request try to fetch it
+                    InternalError | StorageError | Deferred | UpstreamError => {
+                        LockStatus::TransientError
+                    }
+                    // depends on why the proxy upstream filter declined the request,
+                    // for now still allow next request try to acquire to avoid thundering herd
+                    DeclinedToUpstream => LockStatus::TransientError,
+                    // no need for the lock anymore
+                    OriginNotCache | ResponseTooLarge => LockStatus::GiveUp,
+                    // not sure which LockStatus make sense, we treat it as GiveUp for now
+                    Custom(_) => LockStatus::GiveUp,
+                    // should never happen, NeverEnabled shouldn't hold a lock
+                    NeverEnabled => panic!("NeverEnabled holds a write lock"),
+                    CacheLockGiveUp | CacheLockTimeout => {
+                        panic!("CacheLock* are for cache lock readers only")
+                    }
+                };
+                inner
+                    .cache_lock
+                    .unwrap()
+                    .release(inner.key.as_ref().unwrap(), lock_status);
+            }
+        }
+    }
+
     /// Disable caching
     pub fn disable(&mut self, reason: NoCacheReason) {
-        use NoCacheReason::*;
         match self.phase {
             CachePhase::Disabled(_) => {
                 // replace reason
@@ -309,28 +350,7 @@ impl HttpCache {
             }
             _ => {
                 self.phase = CachePhase::Disabled(reason);
-                if let Some(inner) = self.inner.as_mut() {
-                    let lock = inner.lock.take();
-                    if let Some(Locked::Write(_r)) = lock {
-                        let lock_status = match reason {
-                            // let the next request try to fetch it
-                            InternalError | StorageError | Deferred => LockStatus::TransientError,
-                            // no need for the lock anymore
-                            OriginNotCache | ResponseTooLarge => LockStatus::GiveUp,
-                            // not sure which LockStatus make sense, we treat it as GiveUp for now
-                            Custom(_) => LockStatus::GiveUp,
-                            // should never happen, NeverEnabled shouldn't hold a lock
-                            NeverEnabled => panic!("NeverEnabled holds a write lock"),
-                            CacheLockGiveUp | CacheLockTimeout => {
-                                panic!("CacheLock* are for cache lock readers only")
-                            }
-                        };
-                        inner
-                            .cache_lock
-                            .unwrap()
-                            .release(inner.key.as_ref().unwrap(), lock_status);
-                    }
-                }
+                self.release_write_lock(reason);
                 // log initial disable reason
                 self.inner_mut()
                     .traces
@@ -824,6 +844,8 @@ impl HttpCache {
             CachePhase::Stale => {
                 // replace cache meta header
                 self.inner_mut().meta.as_mut().unwrap().0.header = header;
+                // upstream request done, release write lock
+                self.release_write_lock(reason);
             }
             _ => panic!("wrong phase {:?}", self.phase),
         }
diff --git a/pingora-cache/src/predictor.rs b/pingora-cache/src/predictor.rs
index df8f3743..d131c20d 100644
--- a/pingora-cache/src/predictor.rs
+++ b/pingora-cache/src/predictor.rs
@@ -120,7 +120,7 @@ where
             // CacheLockGiveUp: the writer will set OriginNotCache (if applicable)
             // readers don't need to do it
             NeverEnabled | StorageError | InternalError | Deferred | CacheLockGiveUp
-            | CacheLockTimeout => {
+            | CacheLockTimeout | DeclinedToUpstream | UpstreamError => {
                 return None;
             }
             // Skip certain NoCacheReason::Custom according to user
diff --git a/pingora-proxy/src/lib.rs b/pingora-proxy/src/lib.rs
index f13a1f79..731cdb41 100644
--- a/pingora-proxy/src/lib.rs
+++ b/pingora-proxy/src/lib.rs
@@ -535,6 +535,10 @@ impl<SV> HttpProxy<SV> {
                 if !proxy_to_upstream {
                     // The hook can choose to write its own response, but if it doesn't, we respond
                     // with a generic 502
+                    if session.cache.enabled() {
+                        // drop the cache lock that this request may be holding onto
+                        session.cache.disable(NoCacheReason::DeclinedToUpstream);
+                    }
                     if session.response_written().is_none() {
                         match session.write_response_header_ref(&BAD_GATEWAY).await {
                             Ok(()) => {}
@@ -557,6 +561,10 @@ impl<SV> HttpProxy<SV> {
                 /* else continue */
             }
             Err(e) => {
+                if session.cache.enabled() {
+                    session.cache.disable(NoCacheReason::InternalError);
+                }
+
                 self.handle_error(
                     &mut session,
                     &mut ctx,
@@ -621,7 +629,14 @@ impl<SV> HttpProxy<SV> {
 
         if let Some(e) = final_error.as_ref() {
             // If we have errored and are still holding a cache lock, release it.
-            session.cache.disable(NoCacheReason::InternalError);
+            if session.cache.enabled() {
+                let reason = if *e.esource() == ErrorSource::Upstream {
+                    NoCacheReason::UpstreamError
+                } else {
+                    NoCacheReason::InternalError
+                };
+                session.cache.disable(reason);
+            }
             let status = self.inner.fail_to_proxy(&mut session, e, &mut ctx).await;
 
             // final error will have > 0 status unless downstream connection is dead
diff --git a/pingora-proxy/src/proxy_cache.rs b/pingora-proxy/src/proxy_cache.rs
index 4369ec8b..8957bc76 100644
--- a/pingora-proxy/src/proxy_cache.rs
+++ b/pingora-proxy/src/proxy_cache.rs
@@ -667,8 +667,18 @@ impl<SV> HttpProxy<SV> {
                         None,
                         None,
                     );
-                    self.inner
+                    if self
+                        .inner
                         .should_serve_stale(session, ctx, Some(&http_status_error))
+                    {
+                        // no more need to keep the write lock
+                        session
+                            .cache
+                            .release_write_lock(NoCacheReason::UpstreamError);
+                        true
+                    } else {
+                        false
+                    }
                 } else {
                     false // not 304, not stale if error status code
                 }
@@ -712,6 +722,11 @@ impl<SV> HttpProxy<SV> {
             self.inner.request_summary(session, ctx)
         );
 
+        // no more need to hang onto the cache lock
+        session
+            .cache
+            .release_write_lock(NoCacheReason::UpstreamError);
+
         Some(self.proxy_cache_hit(session, ctx).await)
     }
 
diff --git a/pingora-proxy/src/proxy_h2.rs b/pingora-proxy/src/proxy_h2.rs
index ba832b24..a27c559b 100644
--- a/pingora-proxy/src/proxy_h2.rs
+++ b/pingora-proxy/src/proxy_h2.rs
@@ -409,6 +409,7 @@ impl<SV> HttpProxy<SV> {
                     .cache_http_task(session, &task, ctx, serve_from_cache)
                     .await
                 {
+                    session.cache.disable(NoCacheReason::StorageError);
                     if serve_from_cache.is_miss_body() {
                         // if the response stream cache body during miss but write fails, it has to
                         // give up the entire request