diff --git a/Cargo.lock b/Cargo.lock index b0d22d8d9e5b..8a6b345d3f4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5117,6 +5117,7 @@ dependencies = [ "npm-package-json", "package-lock-json-parser", "requirements", + "serde", "serde-jsonlines 0.4.0", "serde_json", "serdeconv", diff --git a/crates/tabby-scheduler/Cargo.toml b/crates/tabby-scheduler/Cargo.toml index c988f5c2f26a..4972e644aee0 100644 --- a/crates/tabby-scheduler/Cargo.toml +++ b/crates/tabby-scheduler/Cargo.toml @@ -37,6 +37,8 @@ npm-package-json = "0.1.3" yarn-lock-parser = "0.7.0" text-splitter = "0.10.0" kv = { version = "0.24.0", features = ["json-value"] } +serde.workspace = true +serde_json.workspace = true [dev-dependencies] temp_testdir = { workspace = true } diff --git a/crates/tabby-scheduler/src/cache.rs b/crates/tabby-scheduler/src/cache.rs index 218e36a43bf7..cc62af762888 100644 --- a/crates/tabby-scheduler/src/cache.rs +++ b/crates/tabby-scheduler/src/cache.rs @@ -5,7 +5,8 @@ use std::{ }; use anyhow::{bail, Context, Result}; -use kv::{Batch, Bucket, Config, Item, Json, Store}; +use kv::{Batch, Bucket, Config, Json, Store}; +use serde::{Deserialize, Serialize}; use tabby_common::{config::RepositoryConfig, languages::get_language_by_ext, SourceFile}; use tracing::{info, warn}; @@ -19,17 +20,46 @@ fn get_git_hash(path: &Path) -> Result { Ok(String::from_utf8(output.stdout)?.trim().to_string()) } -fn compute_source_file_key(path: &Path) -> Result { - if !path.is_file() { - bail!("Path is not a file"); +#[derive(Deserialize, Serialize)] +struct SourceFileKey { + path: PathBuf, + language: String, + git_hash: String, +} + +impl TryFrom<&str> for SourceFileKey { + type Error = serde_json::Error; + + fn try_from(s: &str) -> Result { + serde_json::from_str(s) } +} - let git_hash = get_git_hash(path)?; - let ext = path.extension().context("Failed to get extension")?; - let Some(lang) = get_language_by_ext(ext) else { - bail!("Unknown language for extension {:?}", ext); - }; - Ok(format!("{}-{}", lang.language(), git_hash)) +impl TryFrom<&Path> for SourceFileKey { + type Error = anyhow::Error; + + fn try_from(path: &Path) -> Result { + if !path.is_file() { + bail!("Path is not a file"); + } + + let git_hash = get_git_hash(path)?; + let ext = path.extension().context("Failed to get extension")?; + let Some(lang) = get_language_by_ext(ext) else { + bail!("Unknown language for extension {:?}", ext); + }; + Ok(Self { + path: path.to_owned(), + language: lang.language().to_string(), + git_hash: git_hash.to_string(), + }) + } +} + +impl ToString for SourceFileKey { + fn to_string(&self) -> String { + serde_json::to_string(&self).expect("Failed to serialize SourceFileKey") + } } pub struct CacheStore { @@ -50,7 +80,7 @@ impl CacheStore { config: &RepositoryConfig, path: &Path, ) -> Option { - let key = compute_source_file_key(path).ok()?; + let key: String = SourceFileKey::try_from(path).ok()?.to_string(); let dataset_bucket: Bucket>> = self .store @@ -75,7 +105,7 @@ impl CacheStore { pub fn garbage_collection(&self) { info!("Running garbage collection"); - let bucket = self + let bucket: Bucket> = self .store .bucket(Some(SOURCE_FILE_BUCKET_KEY)) .expect("Could not access dataset bucket"); @@ -88,7 +118,8 @@ impl CacheStore { .iter() .filter_map(|item| { let item = item.expect("Failed to read item"); - if is_item_key_matched(&item) { + let item_key: String = item.key().expect("Failed to get key"); + if is_item_key_matched(&item_key) { num_keep += 1; None } else { @@ -106,21 +137,17 @@ impl CacheStore { } } -fn is_item_key_matched(item: &Item>) -> bool { - let Ok(item_key) = item.key::() else { - return false; - }; - - let Ok(Json(file)) = item.value() else { +fn is_item_key_matched(item_key: &str) -> bool { + let Ok(key) = SourceFileKey::try_from(item_key) else { return false; }; - let filepath = PathBuf::from(file.basedir).join(file.filepath); - let Ok(file_key) = compute_source_file_key(&filepath) else { + let Ok(file_key) = SourceFileKey::try_from(key.path.as_path()) else { return false; }; - file_key == item_key + // If key doesn't match, means file has been removed / modified. + file_key.to_string() == item_key } fn create_source_file(