Skip to content

Commit

Permalink
wip: refactoring the cache logic
Browse files Browse the repository at this point in the history
  • Loading branch information
junkurihara committed Dec 8, 2023
1 parent 6030beb commit f5197d0
Show file tree
Hide file tree
Showing 6 changed files with 261 additions and 22 deletions.
8 changes: 4 additions & 4 deletions rpxy-lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ http3-s2n = [
sticky-cookie = ["base64", "sha2", "chrono"]
native-tls-backend = ["hyper-tls"]
rustls-backend = []
cache = [] #"http-cache-semantics", "lru"]
cache = ["http-cache-semantics", "lru"]
native-roots = [] #"hyper-rustls/native-tokio"]

[dependencies]
Expand Down Expand Up @@ -66,7 +66,7 @@ hyper-tls = { version = "0.6.0", features = ["alpn"], optional = true }

# tls and cert management for server
hot_reload = "0.1.4"
rustls = { version = "0.21.9", default-features = false }
rustls = { version = "0.21.10", default-features = false }
tokio-rustls = { version = "0.24.1", features = ["early-data"] }
webpki = "0.22.4"
x509-parser = "0.15.1"
Expand All @@ -88,8 +88,8 @@ s2n-quic-rustls = { version = "0.32.0", optional = true }
socket2 = { version = "0.5.5", features = ["all"], optional = true }

# # cache
# http-cache-semantics = { path = "../submodules/rusty-http-cache-semantics/", optional = true }
# lru = { version = "0.12.1", optional = true }
http-cache-semantics = { path = "../submodules/rusty-http-cache-semantics/", optional = true }
lru = { version = "0.12.1", optional = true }

# cookie handling for sticky cookie
chrono = { version = "0.4.31", default-features = false, features = [
Expand Down
9 changes: 9 additions & 0 deletions rpxy-lib/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,15 @@ pub enum RpxyError {
#[error("Failed to fetch from upstream: {0}")]
FailedToFetchFromUpstream(String),

// Cache errors,
#[cfg(feature = "cache")]
#[error("Invalid null request and/or response")]
NullRequestOrResponse,

#[cfg(feature = "cache")]
#[error("Failed to write byte buffer")]
FailedToWriteByteBufferForCache,

// Upstream connection setting errors
#[error("Unsupported upstream option")]
UnsupportedUpstreamOption,
Expand Down
161 changes: 161 additions & 0 deletions rpxy-lib/src/forwarder/cache.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
use crate::{error::*, globals::Globals, log::*};
use http::{Request, Response};
use http_cache_semantics::CachePolicy;
use lru::LruCache;
use std::{
path::{Path, PathBuf},
sync::{atomic::AtomicUsize, Arc, Mutex},
};
use tokio::{fs, sync::RwLock};

/* ---------------------------------------------- */
#[derive(Clone, Debug)]
pub struct RpxyCache {
/// Lru cache storing http message caching policy
inner: LruCacheManager,
/// Managing cache file objects through RwLock's lock mechanism for file lock
file_store: FileStore,
/// Async runtime
runtime_handle: tokio::runtime::Handle,
/// Maximum size of each cache file object
max_each_size: usize,
/// Maximum size of cache object on memory
max_each_size_on_memory: usize,
}

impl RpxyCache {
/// Generate cache storage
pub async fn new(globals: &Globals) -> Option<Self> {
if !globals.proxy_config.cache_enabled {
return None;
}
let path = globals.proxy_config.cache_dir.as_ref().unwrap();
let file_store = FileStore::new(path, &globals.runtime_handle).await;
let inner = LruCacheManager::new(globals.proxy_config.cache_max_entry);

let max_each_size = globals.proxy_config.cache_max_each_size;
let mut max_each_size_on_memory = globals.proxy_config.cache_max_each_size_on_memory;
if max_each_size < max_each_size_on_memory {
warn!(
"Maximum size of on memory cache per entry must be smaller than or equal to the maximum of each file cache"
);
max_each_size_on_memory = max_each_size;
}

Some(Self {
file_store,
inner,
runtime_handle: globals.runtime_handle.clone(),
max_each_size,
max_each_size_on_memory,
})
}
}

/* ---------------------------------------------- */
#[derive(Debug, Clone)]
/// Cache file manager outer that is responsible to handle `RwLock`
struct FileStore {
inner: Arc<RwLock<FileStoreInner>>,
}
impl FileStore {
/// Build manager
async fn new(path: impl AsRef<Path>, runtime_handle: &tokio::runtime::Handle) -> Self {
Self {
inner: Arc::new(RwLock::new(FileStoreInner::new(path, runtime_handle).await)),
}
}
}

#[derive(Debug)]
/// Manager inner for cache on file system
struct FileStoreInner {
/// Directory of temporary files
cache_dir: PathBuf,
/// Counter of current cached files
cnt: usize,
/// Async runtime
runtime_handle: tokio::runtime::Handle,
}

impl FileStoreInner {
/// Build new cache file manager.
/// This first creates cache file dir if not exists, and cleans up the file inside the directory.
/// TODO: Persistent cache is really difficult. `sqlite` or something like that is needed.
async fn new(path: impl AsRef<Path>, runtime_handle: &tokio::runtime::Handle) -> Self {
let path_buf = path.as_ref().to_path_buf();
if let Err(e) = fs::remove_dir_all(path).await {
warn!("Failed to clean up the cache dir: {e}");
};
fs::create_dir_all(&path_buf).await.unwrap();
Self {
cache_dir: path_buf.clone(),
cnt: 0,
runtime_handle: runtime_handle.clone(),
}
}
}

/* ---------------------------------------------- */

#[derive(Clone, Debug)]
/// Cache target in hybrid manner of on-memory and file system
pub enum CacheFileOrOnMemory {
/// Pointer to the temporary cache file
File(PathBuf),
/// Cached body itself
OnMemory(Vec<u8>),
}

#[derive(Clone, Debug)]
/// Cache object definition
struct CacheObject {
/// Cache policy to determine if the stored cache can be used as a response to a new incoming request
pub policy: CachePolicy,
/// Cache target: on-memory object or temporary file
pub target: CacheFileOrOnMemory,
/// SHA256 hash of target to strongly bind the cache metadata (this object) and file target
pub hash: Vec<u8>,
}

/* ---------------------------------------------- */
#[derive(Debug, Clone)]
/// Lru cache manager that is responsible to handle `Mutex` as an outer of `LruCache`
struct LruCacheManager {
inner: Arc<Mutex<LruCache<String, CacheObject>>>, // TODO: keyはstring urlでいいのか疑問。全requestに対してcheckすることになりそう
cnt: Arc<AtomicUsize>,
}

impl LruCacheManager {
/// Build LruCache
fn new(cache_max_entry: usize) -> Self {
Self {
inner: Arc::new(Mutex::new(LruCache::new(
std::num::NonZeroUsize::new(cache_max_entry).unwrap(),
))),
cnt: Arc::new(AtomicUsize::default()),
}
}
}

/* ---------------------------------------------- */
pub fn get_policy_if_cacheable<B1, B2>(
req: Option<&Request<B1>>,
res: Option<&Response<B2>>,
) -> RpxyResult<Option<CachePolicy>>
// where
// B1: core::fmt::Debug,
{
// deduce cache policy from req and res
let (Some(req), Some(res)) = (req, res) else {
return Err(RpxyError::NullRequestOrResponse);
};

let new_policy = CachePolicy::new(req, res);
if new_policy.is_storable() {
// debug!("Response is cacheable: {:?}\n{:?}", req, res.headers());
Ok(Some(new_policy))
} else {
Ok(None)
}
}
102 changes: 85 additions & 17 deletions rpxy-lib/src/forwarder/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,20 @@ use crate::{
};
use async_trait::async_trait;
use http::{Request, Response, Version};
use hyper::body::Body;
use hyper::body::{Body, Incoming};
use hyper_util::client::legacy::{
connect::{Connect, HttpConnector},
Client,
};
use std::sync::Arc;

#[cfg(feature = "cache")]
use super::cache::{get_policy_if_cacheable, RpxyCache};
#[cfg(feature = "cache")]
use crate::hyper_ext::body::{full, BoxBody};
#[cfg(feature = "cache")]
use http_body_util::BodyExt;

#[async_trait]
/// Definition of the forwarder that simply forward requests from downstream client to upstream app servers.
pub trait ForwardRequest<B1, B2> {
Expand All @@ -25,27 +32,71 @@ pub trait ForwardRequest<B1, B2> {

/// Forwarder http client struct responsible to cache handling
pub struct Forwarder<C, B> {
// #[cfg(feature = "cache")]
// cache: Option<RpxyCache>,
#[cfg(feature = "cache")]
cache: Option<RpxyCache>,
inner: Client<C, B>,
inner_h2: Client<C, B>, // `h2c` or http/2-only client is defined separately
}

#[async_trait]
impl<C, B1, B2> ForwardRequest<B1, IncomingOr<B2>> for Forwarder<C, B1>
impl<C, B1> ForwardRequest<B1, IncomingOr<BoxBody>> for Forwarder<C, B1>
where
C: Send + Sync + Connect + Clone + 'static,
B1: Body + Send + Sync + Unpin + 'static,
<B1 as Body>::Data: Send,
<B1 as Body>::Error: Into<Box<(dyn std::error::Error + Send + Sync + 'static)>>,
B2: Body,
{
type Error = RpxyError;

async fn request(&self, req: Request<B1>) -> Result<Response<IncomingOr<B2>>, Self::Error> {
async fn request(&self, req: Request<B1>) -> Result<Response<IncomingOr<BoxBody>>, Self::Error> {
// TODO: cache handling
#[cfg(feature = "cache")]
{
let mut synth_req = None;
if self.cache.is_some() {
// if let Some(cached_response) = self.cache.as_ref().unwrap().get(&req).await {
// // if found, return it as response.
// info!("Cache hit - Return from cache");
// return Ok(cached_response);
// };

// Synthetic request copy used just for caching (cannot clone request object...)
synth_req = Some(build_synth_req_for_cache(&req));
}
let res = self.request_directly(req).await;

if self.cache.is_none() {
return res.map(wrap_incoming_body_response::<BoxBody>);
}

// check cacheability and store it if cacheable
let Ok(Some(cache_policy)) = get_policy_if_cacheable(synth_req.as_ref(), res.as_ref().ok()) else {
return res.map(wrap_incoming_body_response::<BoxBody>);
};
let (parts, body) = res.unwrap().into_parts();
let Ok(bytes) = body.collect().await.map(|v| v.to_bytes()) else {
return Err(RpxyError::FailedToWriteByteBufferForCache);
};

// if let Err(cache_err) = self
// .cache
// .as_ref()
// .unwrap()
// .put(synth_req.unwrap().uri(), &bytes, &cache_policy)
// .await
// {
// error!("{:?}", cache_err);
// };

// response with cached body
Ok(Response::from_parts(parts, IncomingOr::Right(full(bytes))))
}

self.request_directly(req).await
// No cache handling
#[cfg(not(feature = "cache"))]
{
self.request_directly(req).await.map(wrap_incoming_body_response::<B2>)
}
}
}

Expand All @@ -56,13 +107,15 @@ where
<B1 as Body>::Data: Send,
<B1 as Body>::Error: Into<Box<(dyn std::error::Error + Send + Sync + 'static)>>,
{
async fn request_directly<B2: Body>(&self, req: Request<B1>) -> RpxyResult<Response<IncomingOr<B2>>> {
async fn request_directly(&self, req: Request<B1>) -> RpxyResult<Response<Incoming>> {
// TODO: This 'match' condition is always evaluated at every 'request' invocation. So, it is inefficient.
// Needs to be reconsidered. Currently, this is a kind of work around.
// This possibly relates to https://github.com/hyperium/hyper/issues/2417.
match req.version() {
Version::HTTP_2 => self.inner_h2.request(req).await, // handles `h2c` requests
_ => self.inner.request(req).await,
}
.map_err(|e| RpxyError::FailedToFetchFromUpstream(e.to_string()))
.map(wrap_incoming_body_response::<B2>)
}
}

Expand Down Expand Up @@ -90,7 +143,9 @@ Please enable native-tls-backend or rustls-backend feature to enable TLS support

Ok(Self {
inner,
inner_h2: inner.clone(),
inner_h2,
#[cfg(feature = "cache")]
cache: RpxyCache::new(_globals).await,
})
}
}
Expand Down Expand Up @@ -130,13 +185,12 @@ where
.http2_only(true)
.build::<_, B1>(connector_h2);

// #[cfg(feature = "cache")]
// {
// let cache = RpxyCache::new(_globals).await;
// Self { inner, inner_h2, cache }
// }
// #[cfg(not(feature = "cache"))]
Ok(Self { inner, inner_h2 })
Ok(Self {
inner,
inner_h2,
#[cfg(feature = "cache")]
cache: RpxyCache::new(_globals).await,
})
}
}

Expand Down Expand Up @@ -172,3 +226,17 @@ where
// let inner_h2 = Client::builder().http2_only(true).build::<_, Body>(connector_h2);
}
}

#[cfg(feature = "cache")]
/// Build synthetic request to cache
fn build_synth_req_for_cache<T>(req: &Request<T>) -> Request<()> {
let mut builder = Request::builder()
.method(req.method())
.uri(req.uri())
.version(req.version());
// TODO: omit extensions. is this approach correct?
for (header_key, header_value) in req.headers() {
builder = builder.header(header_key, header_value);
}
builder.body(()).unwrap()
}
1 change: 1 addition & 0 deletions rpxy-lib/src/forwarder/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod cache;
mod client;

use crate::hyper_ext::body::{IncomingLike, IncomingOr};
Expand Down
2 changes: 1 addition & 1 deletion submodules/rusty-http-cache-semantics

0 comments on commit f5197d0

Please sign in to comment.