From c65a9d67ff6bc7b63140c414386324e4bea53495 Mon Sep 17 00:00:00 2001 From: boxbeam Date: Mon, 1 Apr 2024 17:50:23 -0400 Subject: [PATCH 01/17] Begin implementing db for RepositoryMeta --- Cargo.lock | 10 +++++ Cargo.toml | 4 +- crates/tabby-common/src/path.rs | 4 ++ crates/tabby-repositories/.env | 1 + crates/tabby-repositories/Cargo.toml | 14 ++++++ crates/tabby-repositories/schema.sql | 10 +++++ crates/tabby-repositories/schema.sqlite | Bin 0 -> 8192 bytes crates/tabby-repositories/src/lib.rs | 57 ++++++++++++++++++++++++ ee/tabby-db/Cargo.toml | 2 +- 9 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 crates/tabby-repositories/.env create mode 100644 crates/tabby-repositories/Cargo.toml create mode 100644 crates/tabby-repositories/schema.sql create mode 100644 crates/tabby-repositories/schema.sqlite create mode 100644 crates/tabby-repositories/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index dbc02e25d2e6..4f512c5cf045 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4551,6 +4551,16 @@ dependencies = [ "trie-rs", ] +[[package]] +name = "tabby-repositories" +version = "0.10.0-dev.0" +dependencies = [ + "anyhow", + "serde_json", + "sqlx", + "tabby-common", +] + [[package]] name = "tabby-scheduler" version = "0.10.0-dev.0" diff --git a/Cargo.toml b/Cargo.toml index 448ddbadc0dc..26e2d3d3fa60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,12 +6,13 @@ members = [ "crates/tabby-scheduler", "crates/tabby-download", "crates/tabby-inference", + "crates/tabby-repositories", "crates/llama-cpp-bindings", "crates/http-api-bindings", "crates/aim-downloader", "crates/juniper-axum", "ee/tabby-webserver", - "ee/tabby-db", "ee/tabby-db-macros", + "ee/tabby-db", "ee/tabby-db-macros", "crates/tabby-repositories", ] [workspace.package] @@ -21,6 +22,7 @@ authors = ["Meng Zhang"] homepage = "https://github.com/TabbyML/tabby" [workspace.dependencies] +sqlx = { version = "0.7.3", features = ["sqlite", "chrono", "runtime-tokio", "macros"] } lazy_static = "1.4.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1" diff --git a/crates/tabby-common/src/path.rs b/crates/tabby-common/src/path.rs index 823d64f91ce4..25b0f491ca35 100644 --- a/crates/tabby-common/src/path.rs +++ b/crates/tabby-common/src/path.rs @@ -29,6 +29,10 @@ pub fn config_file() -> PathBuf { tabby_root().join("config.toml") } +pub fn repository_meta_db() -> PathBuf { + tabby_root().join("repositories.sqlite") +} + pub fn usage_id_file() -> PathBuf { tabby_root().join("usage_anonymous_id") } diff --git a/crates/tabby-repositories/.env b/crates/tabby-repositories/.env new file mode 100644 index 000000000000..5f4d57ee15eb --- /dev/null +++ b/crates/tabby-repositories/.env @@ -0,0 +1 @@ +DATABASE_URL=sqlite://crates/tabby-repositories/schema.sqlite diff --git a/crates/tabby-repositories/Cargo.toml b/crates/tabby-repositories/Cargo.toml new file mode 100644 index 000000000000..a5960dac3577 --- /dev/null +++ b/crates/tabby-repositories/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "tabby-repositories" +version.workspace = true +edition.workspace = true +authors.workspace = true +homepage.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +sqlx = { workspace = true } +anyhow = { workspace = true } +tabby-common = { path = "../tabby-common" } +serde_json = { workspace = true } diff --git a/crates/tabby-repositories/schema.sql b/crates/tabby-repositories/schema.sql new file mode 100644 index 000000000000..c98c37dd85be --- /dev/null +++ b/crates/tabby-repositories/schema.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS repository_meta; +CREATE TABLE repository_meta( + git_url TEXT NOT NULL, + filepath TEXT NOT NULL, + language TEXT NOT NULL, + max_line_length INTEGER NOT NULL, + avg_line_length REAL NOT NULL, + alphanum_fraction REAL NOT NULL, + tags TEXT NOT NULL +); diff --git a/crates/tabby-repositories/schema.sqlite b/crates/tabby-repositories/schema.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..8ee377f05eb26ee32b8e11219a3b342748ae77e0 GIT binary patch literal 8192 zcmeI#PfNov7zXgP2*SYO&FhONW#Yvz;FKUjnK(1>R^CaP1=2rc=}^4s&9CT}F&(?C z6ufwn=b=d&-nS+EZh3mlY|vEHzT`kx;#j1qxF!-pq`MK9ZIU=2?B2Vj+y9nSTyDOO zVplT$7V$wK009U<00Izz00bZa0SG_<0)H*=ahe>SOeV?ad*G$hUN`mHhT6a7r4HQN zrnhrBRg%>7CX>`Fo{cER*pRp0k&@4fW)E>Jvg~48Dy-8Dht*Kcab;RIdZ X00Izz00bZa0SG_<0uX?}KNk1_qJ(0m literal 0 HcmV?d00001 diff --git a/crates/tabby-repositories/src/lib.rs b/crates/tabby-repositories/src/lib.rs new file mode 100644 index 000000000000..01ea76052559 --- /dev/null +++ b/crates/tabby-repositories/src/lib.rs @@ -0,0 +1,57 @@ +use anyhow::Result; +use sqlx::sqlite::SqlitePoolOptions; +use sqlx::{query, SqlitePool}; +use sqlx::{sqlite::SqliteConnectOptions, Pool, Sqlite}; +use std::str::FromStr; +use tabby_common::Tag; + +pub struct RepositoryCache { + pool: Pool, +} + +struct RepositoryMetaDAO { + git_url: String, + filepath: String, + language: String, + max_line_length: usize, + avg_line_length: f32, + alphanum_fraction: f32, + tags: String, +} + +impl RepositoryCache { + pub async fn new() -> Result { + let init_query = include_str!("../schema.sql"); + let options = SqliteConnectOptions::new() + .filename(tabby_common::path::repository_meta_db()) + .create_if_missing(true); + let pool = SqlitePool::connect_with(options).await?; + sqlx::query(init_query).execute(&pool).await?; + Ok(RepositoryCache { pool }) + } + + pub async fn clear(&self) -> Result<()> { + query!("DELETE FROM repository_meta") + .execute(&self.pool) + .await?; + Ok(()) + } + + pub async fn add_repository_meta( + &self, + git_url: String, + filepath: String, + language: String, + max_line_length: i64, + avg_line_length: f32, + alphanum_fraction: f32, + tags: Vec, + ) -> Result<()> { + let tags = serde_json::to_string(&tags)?; + query!("INSERT INTO repository_meta (git_url, filepath, language, max_line_length, avg_line_length, alphanum_fraction, tags) + VALUES ($1, $2, $3, $4, $5, $6, $7)", + git_url, filepath, language, max_line_length, avg_line_length, alphanum_fraction, tags + ).execute(&self.pool).await?; + Ok(()) + } +} diff --git a/ee/tabby-db/Cargo.toml b/ee/tabby-db/Cargo.toml index 9f8f288068f7..e6c584281bbe 100644 --- a/ee/tabby-db/Cargo.toml +++ b/ee/tabby-db/Cargo.toml @@ -15,8 +15,8 @@ anyhow.workspace = true chrono = { workspace = true, features = ["serde"] } hash-ids.workspace = true lazy_static.workspace = true +sqlx = { workspace = true } sql_query_builder = { version = "2.1.0", features = ["sqlite"] } -sqlx = { version = "0.7.3", features = ["sqlite", "chrono", "runtime-tokio", "macros"] } tabby-common = { path = "../../crates/tabby-common" } tokio = { workspace = true, features = ["fs"] } uuid.workspace = true From a1304e86b2e3fa2b8c15bfc35997d1da39958e36 Mon Sep 17 00:00:00 2001 From: boxbeam Date: Mon, 1 Apr 2024 17:54:09 -0400 Subject: [PATCH 02/17] Add get_repository_meta --- crates/tabby-repositories/src/lib.rs | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/crates/tabby-repositories/src/lib.rs b/crates/tabby-repositories/src/lib.rs index 01ea76052559..b724551022dd 100644 --- a/crates/tabby-repositories/src/lib.rs +++ b/crates/tabby-repositories/src/lib.rs @@ -1,6 +1,6 @@ use anyhow::Result; use sqlx::sqlite::SqlitePoolOptions; -use sqlx::{query, SqlitePool}; +use sqlx::{query, query_as, SqlitePool}; use sqlx::{sqlite::SqliteConnectOptions, Pool, Sqlite}; use std::str::FromStr; use tabby_common::Tag; @@ -13,9 +13,9 @@ struct RepositoryMetaDAO { git_url: String, filepath: String, language: String, - max_line_length: usize, - avg_line_length: f32, - alphanum_fraction: f32, + max_line_length: i64, + avg_line_length: f64, + alphanum_fraction: f64, tags: String, } @@ -54,4 +54,18 @@ impl RepositoryCache { ).execute(&self.pool).await?; Ok(()) } + + pub async fn get_repository_meta( + &self, + git_url: String, + filepath: String, + ) -> Result { + // TODO(boxbeam): Conversion from RepositoryMetaDAO to RepositoryMeta / SourceFile to never expose RepositoryMetaDAO + let meta = query_as!( + RepositoryMetaDAO, + "SELECT git_url, filepath, language, max_line_length, avg_line_length, alphanum_fraction, tags FROM repository_meta WHERE git_url = ? AND filepath = ?", + git_url, filepath + ).fetch_one(&self.pool).await?; + Ok(meta) + } } From 0617b10f802babd6745fce03312edd6caa4e3aa9 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 22:01:23 +0000 Subject: [PATCH 03/17] [autofix.ci] apply automated fixes --- crates/tabby-repositories/src/lib.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/tabby-repositories/src/lib.rs b/crates/tabby-repositories/src/lib.rs index b724551022dd..c05324d2cb37 100644 --- a/crates/tabby-repositories/src/lib.rs +++ b/crates/tabby-repositories/src/lib.rs @@ -1,8 +1,11 @@ + + use anyhow::Result; -use sqlx::sqlite::SqlitePoolOptions; -use sqlx::{query, query_as, SqlitePool}; -use sqlx::{sqlite::SqliteConnectOptions, Pool, Sqlite}; -use std::str::FromStr; +use sqlx::{ + query, query_as, + sqlite::{SqliteConnectOptions}, + Pool, Sqlite, SqlitePool, +}; use tabby_common::Tag; pub struct RepositoryCache { From 11892e8ac9983b28479147c8c88a0c6c1446f072 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 1 Apr 2024 22:08:14 +0000 Subject: [PATCH 04/17] [autofix.ci] apply automated fixes (attempt 2/3) --- crates/tabby-repositories/src/lib.rs | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/crates/tabby-repositories/src/lib.rs b/crates/tabby-repositories/src/lib.rs index c05324d2cb37..47ba44cb57c1 100644 --- a/crates/tabby-repositories/src/lib.rs +++ b/crates/tabby-repositories/src/lib.rs @@ -1,11 +1,5 @@ - - use anyhow::Result; -use sqlx::{ - query, query_as, - sqlite::{SqliteConnectOptions}, - Pool, Sqlite, SqlitePool, -}; +use sqlx::{query, query_as, sqlite::SqliteConnectOptions, Pool, Sqlite, SqlitePool}; use tabby_common::Tag; pub struct RepositoryCache { From b630c925b8917debb1b5ad57eced8e617e6fdcb7 Mon Sep 17 00:00:00 2001 From: boxbeam Date: Tue, 2 Apr 2024 15:47:19 -0400 Subject: [PATCH 05/17] Rewrite with kv --- Cargo.lock | 103 +++++++++++++++++++++--- Cargo.toml | 2 +- crates/tabby-common/src/path.rs | 2 +- crates/tabby-repositories/.env | 1 - crates/tabby-repositories/Cargo.toml | 3 +- crates/tabby-repositories/schema.sql | 10 --- crates/tabby-repositories/schema.sqlite | Bin 8192 -> 0 bytes crates/tabby-repositories/src/lib.rs | 86 ++++++++------------ ee/tabby-webserver/Cargo.toml | 1 + 9 files changed, 132 insertions(+), 76 deletions(-) delete mode 100644 crates/tabby-repositories/.env delete mode 100644 crates/tabby-repositories/schema.sql delete mode 100644 crates/tabby-repositories/schema.sqlite diff --git a/Cargo.lock b/Cargo.lock index 4f512c5cf045..bf6f68ecea8c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -527,7 +527,7 @@ dependencies = [ "petgraph", "semver", "serde", - "toml", + "toml 0.7.4", "url", ] @@ -930,7 +930,7 @@ dependencies = [ "hashbrown 0.14.3", "lock_api", "once_cell", - "parking_lot_core", + "parking_lot_core 0.9.8", ] [[package]] @@ -1243,6 +1243,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "fs4" version = "0.6.6" @@ -1314,7 +1324,7 @@ checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" dependencies = [ "futures-core", "lock_api", - "parking_lot", + "parking_lot 0.12.1", ] [[package]] @@ -1370,6 +1380,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generator" version = "0.7.4" @@ -2034,6 +2053,20 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "kv" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "620727085ac39ee9650b373fe6d8073a0aee6f99e52a9c72b25f7671078039ab" +dependencies = [ + "pin-project-lite", + "serde", + "serde_json", + "sled", + "thiserror", + "toml 0.5.11", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -2823,6 +2856,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.6", +] + [[package]] name = "parking_lot" version = "0.12.1" @@ -2830,7 +2874,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core", + "parking_lot_core 0.9.8", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall 0.2.16", + "smallvec", + "winapi", ] [[package]] @@ -3837,7 +3895,7 @@ dependencies = [ "rmp-serde", "serde", "serde_json", - "toml", + "toml 0.7.4", "trackable", ] @@ -3851,7 +3909,7 @@ dependencies = [ "futures", "lazy_static", "log", - "parking_lot", + "parking_lot 0.12.1", "serial_test_derive 2.0.0", ] @@ -3865,7 +3923,7 @@ dependencies = [ "futures", "lazy_static", "log", - "parking_lot", + "parking_lot 0.12.1", "serial_test_derive 3.0.0", ] @@ -3999,6 +4057,22 @@ dependencies = [ "autocfg", ] +[[package]] +name = "sled" +version = "0.34.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" +dependencies = [ + "crc32fast", + "crossbeam-epoch", + "crossbeam-utils", + "fs2", + "fxhash", + "libc", + "log", + "parking_lot 0.11.2", +] + [[package]] name = "smallvec" version = "1.10.0" @@ -4556,8 +4630,9 @@ name = "tabby-repositories" version = "0.10.0-dev.0" dependencies = [ "anyhow", + "kv", + "serde", "serde_json", - "sqlx", "tabby-common", ] @@ -4632,6 +4707,7 @@ dependencies = [ "serial_test 3.0.0", "tabby-common", "tabby-db", + "tabby-repositories", "tarpc", "temp_testdir", "thiserror", @@ -4953,7 +5029,7 @@ dependencies = [ "libc", "mio", "num_cpus", - "parking_lot", + "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2 0.5.5", @@ -5081,6 +5157,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + [[package]] name = "toml" version = "0.7.4" diff --git a/Cargo.toml b/Cargo.toml index 26e2d3d3fa60..0e6a6c6e6afd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ members = [ "crates/aim-downloader", "crates/juniper-axum", "ee/tabby-webserver", - "ee/tabby-db", "ee/tabby-db-macros", "crates/tabby-repositories", + "ee/tabby-db", "ee/tabby-db-macros", ] [workspace.package] diff --git a/crates/tabby-common/src/path.rs b/crates/tabby-common/src/path.rs index 25b0f491ca35..c7f2eaadbccc 100644 --- a/crates/tabby-common/src/path.rs +++ b/crates/tabby-common/src/path.rs @@ -30,7 +30,7 @@ pub fn config_file() -> PathBuf { } pub fn repository_meta_db() -> PathBuf { - tabby_root().join("repositories.sqlite") + tabby_root().join("repositories.db") } pub fn usage_id_file() -> PathBuf { diff --git a/crates/tabby-repositories/.env b/crates/tabby-repositories/.env deleted file mode 100644 index 5f4d57ee15eb..000000000000 --- a/crates/tabby-repositories/.env +++ /dev/null @@ -1 +0,0 @@ -DATABASE_URL=sqlite://crates/tabby-repositories/schema.sqlite diff --git a/crates/tabby-repositories/Cargo.toml b/crates/tabby-repositories/Cargo.toml index a5960dac3577..0011fc8fd1cb 100644 --- a/crates/tabby-repositories/Cargo.toml +++ b/crates/tabby-repositories/Cargo.toml @@ -8,7 +8,8 @@ homepage.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -sqlx = { workspace = true } anyhow = { workspace = true } tabby-common = { path = "../tabby-common" } serde_json = { workspace = true } +serde = { workspace = true, features = ["derive"] } +kv = { version = "0.24.0", features = ["json-value"] } diff --git a/crates/tabby-repositories/schema.sql b/crates/tabby-repositories/schema.sql deleted file mode 100644 index c98c37dd85be..000000000000 --- a/crates/tabby-repositories/schema.sql +++ /dev/null @@ -1,10 +0,0 @@ -DROP TABLE IF EXISTS repository_meta; -CREATE TABLE repository_meta( - git_url TEXT NOT NULL, - filepath TEXT NOT NULL, - language TEXT NOT NULL, - max_line_length INTEGER NOT NULL, - avg_line_length REAL NOT NULL, - alphanum_fraction REAL NOT NULL, - tags TEXT NOT NULL -); diff --git a/crates/tabby-repositories/schema.sqlite b/crates/tabby-repositories/schema.sqlite deleted file mode 100644 index 8ee377f05eb26ee32b8e11219a3b342748ae77e0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 8192 zcmeI#PfNov7zXgP2*SYO&FhONW#Yvz;FKUjnK(1>R^CaP1=2rc=}^4s&9CT}F&(?C z6ufwn=b=d&-nS+EZh3mlY|vEHzT`kx;#j1qxF!-pq`MK9ZIU=2?B2Vj+y9nSTyDOO zVplT$7V$wK009U<00Izz00bZa0SG_<0)H*=ahe>SOeV?ad*G$hUN`mHhT6a7r4HQN zrnhrBRg%>7CX>`Fo{cER*pRp0k&@4fW)E>Jvg~48Dy-8Dht*Kcab;RIdZ X00Izz00bZa0SG_<0uX?}KNk1_qJ(0m diff --git a/crates/tabby-repositories/src/lib.rs b/crates/tabby-repositories/src/lib.rs index 47ba44cb57c1..90b14a56a1e1 100644 --- a/crates/tabby-repositories/src/lib.rs +++ b/crates/tabby-repositories/src/lib.rs @@ -1,68 +1,48 @@ -use anyhow::Result; -use sqlx::{query, query_as, sqlite::SqliteConnectOptions, Pool, Sqlite, SqlitePool}; -use tabby_common::Tag; +use anyhow::{anyhow, Result}; +use kv::{Bucket, Config, Json, Store}; +use tabby_common::SourceFile; -pub struct RepositoryCache { - pool: Pool, -} +type RepositoryBucket<'a> = Bucket<'a, String, Json>; -struct RepositoryMetaDAO { - git_url: String, - filepath: String, - language: String, - max_line_length: i64, - avg_line_length: f64, - alphanum_fraction: f64, - tags: String, +pub struct RepositoryCache { + cache: Store, } impl RepositoryCache { - pub async fn new() -> Result { - let init_query = include_str!("../schema.sql"); - let options = SqliteConnectOptions::new() - .filename(tabby_common::path::repository_meta_db()) - .create_if_missing(true); - let pool = SqlitePool::connect_with(options).await?; - sqlx::query(init_query).execute(&pool).await?; - Ok(RepositoryCache { pool }) + pub fn new() -> Result { + let config = Config::new(tabby_common::path::repository_meta_db()); + let store = Store::new(config)?; + Ok(RepositoryCache { cache: store }) + } + + fn bucket(&self) -> Result { + Ok(self.cache.bucket(Some("repositories"))?) } - pub async fn clear(&self) -> Result<()> { - query!("DELETE FROM repository_meta") - .execute(&self.pool) - .await?; + pub fn clear(&self) -> Result<()> { + self.bucket()?.clear()?; Ok(()) } - pub async fn add_repository_meta( - &self, - git_url: String, - filepath: String, - language: String, - max_line_length: i64, - avg_line_length: f32, - alphanum_fraction: f32, - tags: Vec, - ) -> Result<()> { - let tags = serde_json::to_string(&tags)?; - query!("INSERT INTO repository_meta (git_url, filepath, language, max_line_length, avg_line_length, alphanum_fraction, tags) - VALUES ($1, $2, $3, $4, $5, $6, $7)", - git_url, filepath, language, max_line_length, avg_line_length, alphanum_fraction, tags - ).execute(&self.pool).await?; + pub fn add_repository_meta(&self, file: SourceFile) -> Result<()> { + let key = format!("{}:{}", file.git_url, file.filepath); + self.bucket()?.set(&key, &Json(file))?; Ok(()) } - pub async fn get_repository_meta( - &self, - git_url: String, - filepath: String, - ) -> Result { - // TODO(boxbeam): Conversion from RepositoryMetaDAO to RepositoryMeta / SourceFile to never expose RepositoryMetaDAO - let meta = query_as!( - RepositoryMetaDAO, - "SELECT git_url, filepath, language, max_line_length, avg_line_length, alphanum_fraction, tags FROM repository_meta WHERE git_url = ? AND filepath = ?", - git_url, filepath - ).fetch_one(&self.pool).await?; - Ok(meta) + pub fn get_repository_meta(&self, git_url: &str, filepath: &str) -> Result { + let key = format!("{git_url}:{filepath}"); + let Some(Json(val)) = self.bucket()?.get(&key)? else { + return Err(anyhow!("Repository meta not found")); + }; + Ok(val) + } + + pub fn reload(&self) -> Result<()> { + self.clear()?; + for file in SourceFile::all()? { + self.add_repository_meta(file)?; + } + Ok(()) } } diff --git a/ee/tabby-webserver/Cargo.toml b/ee/tabby-webserver/Cargo.toml index d59047df3ef4..601dcf48ad1e 100644 --- a/ee/tabby-webserver/Cargo.toml +++ b/ee/tabby-webserver/Cargo.toml @@ -33,6 +33,7 @@ serde.workspace = true serde_json.workspace = true tabby-common = { path = "../../crates/tabby-common" } tabby-db = { path = "../../ee/tabby-db" } +tabby-repositories = { path = "../../crates/tabby-repositories" } tarpc = { version = "0.33.0", features = ["serde-transport"] } thiserror.workspace = true tokio = { workspace = true, features = ["fs", "process"] } From 04c421ddbf471f4f8a62ef7548d72bb95c95f850 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 19:53:56 +0000 Subject: [PATCH 06/17] [autofix.ci] apply automated fixes --- Cargo.lock | 3 --- crates/tabby-repositories/Cargo.toml | 2 -- ee/tabby-webserver/Cargo.toml | 1 - 3 files changed, 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bf6f68ecea8c..f5436f9d5977 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4631,8 +4631,6 @@ version = "0.10.0-dev.0" dependencies = [ "anyhow", "kv", - "serde", - "serde_json", "tabby-common", ] @@ -4707,7 +4705,6 @@ dependencies = [ "serial_test 3.0.0", "tabby-common", "tabby-db", - "tabby-repositories", "tarpc", "temp_testdir", "thiserror", diff --git a/crates/tabby-repositories/Cargo.toml b/crates/tabby-repositories/Cargo.toml index 0011fc8fd1cb..5ea10b0254d9 100644 --- a/crates/tabby-repositories/Cargo.toml +++ b/crates/tabby-repositories/Cargo.toml @@ -10,6 +10,4 @@ homepage.workspace = true [dependencies] anyhow = { workspace = true } tabby-common = { path = "../tabby-common" } -serde_json = { workspace = true } -serde = { workspace = true, features = ["derive"] } kv = { version = "0.24.0", features = ["json-value"] } diff --git a/ee/tabby-webserver/Cargo.toml b/ee/tabby-webserver/Cargo.toml index 601dcf48ad1e..d59047df3ef4 100644 --- a/ee/tabby-webserver/Cargo.toml +++ b/ee/tabby-webserver/Cargo.toml @@ -33,7 +33,6 @@ serde.workspace = true serde_json.workspace = true tabby-common = { path = "../../crates/tabby-common" } tabby-db = { path = "../../ee/tabby-db" } -tabby-repositories = { path = "../../crates/tabby-repositories" } tarpc = { version = "0.33.0", features = ["serde-transport"] } thiserror.workspace = true tokio = { workspace = true, features = ["fs", "process"] } From f56f9e29e063c6230d3410436ccd1cfdacabf7b0 Mon Sep 17 00:00:00 2001 From: boxbeam Date: Wed, 3 Apr 2024 17:11:12 -0400 Subject: [PATCH 07/17] Use scheduler indexing logic for RepositoryCache --- Cargo.lock | 108 +--------- crates/tabby-common/src/lib.rs | 1 + crates/tabby-repositories/Cargo.toml | 25 +++ crates/tabby-repositories/queries/csharp.scm | 19 ++ crates/tabby-repositories/queries/go.scm | 27 +++ crates/tabby-repositories/queries/kotlin.scm | 12 ++ crates/tabby-repositories/queries/rust.scm | 35 ++++ .../tabby-repositories/queries/solidity.scm | 45 +++++ crates/tabby-repositories/queries/tsx.scm | 26 +++ crates/tabby-repositories/src/dataset.rs | 189 ++++++++++++++++++ .../src/dataset/deps/javascript.rs | 133 ++++++++++++ .../src/dataset/deps/mod.rs | 29 +++ .../src/dataset/deps/python.rs | 30 +++ .../src/dataset/deps/rust.rs | 42 ++++ crates/tabby-repositories/src/dataset/tags.rs | 169 ++++++++++++++++ crates/tabby-repositories/src/lib.rs | 19 +- crates/tabby-scheduler/src/dataset.rs | 6 +- ee/tabby-webserver/src/handler.rs | 2 +- .../src/repositories/resolve.rs | 40 +--- 19 files changed, 811 insertions(+), 146 deletions(-) create mode 100644 crates/tabby-repositories/queries/csharp.scm create mode 100644 crates/tabby-repositories/queries/go.scm create mode 100644 crates/tabby-repositories/queries/kotlin.scm create mode 100644 crates/tabby-repositories/queries/rust.scm create mode 100644 crates/tabby-repositories/queries/solidity.scm create mode 100644 crates/tabby-repositories/queries/tsx.scm create mode 100644 crates/tabby-repositories/src/dataset.rs create mode 100644 crates/tabby-repositories/src/dataset/deps/javascript.rs create mode 100644 crates/tabby-repositories/src/dataset/deps/mod.rs create mode 100644 crates/tabby-repositories/src/dataset/deps/python.rs create mode 100644 crates/tabby-repositories/src/dataset/deps/rust.rs create mode 100644 crates/tabby-repositories/src/dataset/tags.rs diff --git a/Cargo.lock b/Cargo.lock index f5436f9d5977..dbc02e25d2e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -527,7 +527,7 @@ dependencies = [ "petgraph", "semver", "serde", - "toml 0.7.4", + "toml", "url", ] @@ -930,7 +930,7 @@ dependencies = [ "hashbrown 0.14.3", "lock_api", "once_cell", - "parking_lot_core 0.9.8", + "parking_lot_core", ] [[package]] @@ -1243,16 +1243,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "fs4" version = "0.6.6" @@ -1324,7 +1314,7 @@ checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" dependencies = [ "futures-core", "lock_api", - "parking_lot 0.12.1", + "parking_lot", ] [[package]] @@ -1380,15 +1370,6 @@ dependencies = [ "slab", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "generator" version = "0.7.4" @@ -2053,20 +2034,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "kv" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "620727085ac39ee9650b373fe6d8073a0aee6f99e52a9c72b25f7671078039ab" -dependencies = [ - "pin-project-lite", - "serde", - "serde_json", - "sled", - "thiserror", - "toml 0.5.11", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -2856,17 +2823,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.1" @@ -2874,21 +2830,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.8", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -3895,7 +3837,7 @@ dependencies = [ "rmp-serde", "serde", "serde_json", - "toml 0.7.4", + "toml", "trackable", ] @@ -3909,7 +3851,7 @@ dependencies = [ "futures", "lazy_static", "log", - "parking_lot 0.12.1", + "parking_lot", "serial_test_derive 2.0.0", ] @@ -3923,7 +3865,7 @@ dependencies = [ "futures", "lazy_static", "log", - "parking_lot 0.12.1", + "parking_lot", "serial_test_derive 3.0.0", ] @@ -4057,22 +3999,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "sled" -version = "0.34.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" -dependencies = [ - "crc32fast", - "crossbeam-epoch", - "crossbeam-utils", - "fs2", - "fxhash", - "libc", - "log", - "parking_lot 0.11.2", -] - [[package]] name = "smallvec" version = "1.10.0" @@ -4625,15 +4551,6 @@ dependencies = [ "trie-rs", ] -[[package]] -name = "tabby-repositories" -version = "0.10.0-dev.0" -dependencies = [ - "anyhow", - "kv", - "tabby-common", -] - [[package]] name = "tabby-scheduler" version = "0.10.0-dev.0" @@ -5026,7 +4943,7 @@ dependencies = [ "libc", "mio", "num_cpus", - "parking_lot 0.12.1", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2 0.5.5", @@ -5154,15 +5071,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "toml" version = "0.7.4" diff --git a/crates/tabby-common/src/lib.rs b/crates/tabby-common/src/lib.rs index 0c91be5b7cc1..e0201497c49f 100644 --- a/crates/tabby-common/src/lib.rs +++ b/crates/tabby-common/src/lib.rs @@ -24,6 +24,7 @@ use serde_jsonlines::JsonLinesReader; #[derive(Serialize, Deserialize)] pub struct SourceFile { pub git_url: String, + pub repository_name: String, pub filepath: String, pub content: String, pub language: String, diff --git a/crates/tabby-repositories/Cargo.toml b/crates/tabby-repositories/Cargo.toml index 5ea10b0254d9..0d5d689bfe7f 100644 --- a/crates/tabby-repositories/Cargo.toml +++ b/crates/tabby-repositories/Cargo.toml @@ -10,4 +10,29 @@ homepage.workspace = true [dependencies] anyhow = { workspace = true } tabby-common = { path = "../tabby-common" } +tantivy = { workspace = true } +tracing = { workspace = true } +tree-sitter-tags = "0.20.2" +lazy_static = { workspace = true } +tree-sitter-python = "0.20.2" +tree-sitter-java = "0.20.2" +tree-sitter-kotlin = "0.3.1" +tree-sitter-rust = "0.20.3" +tree-sitter-typescript = "0.20.3" +tree-sitter-go = "0.20.0" +tree-sitter-ruby = "0.20.0" +tree-sitter-c = { git = "https://github.com/tree-sitter/tree-sitter-c/", rev = "212a80f" } +tree-sitter-cpp = { git = "https://github.com/tree-sitter/tree-sitter-cpp", rev = "a714740" } +tree-sitter-c-sharp = "0.20.0" +tree-sitter-solidity = { git = "https://github.com/JoranHonig/tree-sitter-solidity", rev = "b239a95" } +ignore = "0.4.20" +kdam = { version = "0.5.0" } +requirements = "0.3.0" +serdeconv.workspace = true +cargo-lock = { version = "9.0.0", features = ["dependency-tree"] } +tokio-cron-scheduler = { workspace = true } +tokio = { workspace = true, features = ["process"] } +package-lock-json-parser = "0.4.0" +npm-package-json = "0.1.3" +yarn-lock-parser = "0.7.0" kv = { version = "0.24.0", features = ["json-value"] } diff --git a/crates/tabby-repositories/queries/csharp.scm b/crates/tabby-repositories/queries/csharp.scm new file mode 100644 index 000000000000..e49897aa9750 --- /dev/null +++ b/crates/tabby-repositories/queries/csharp.scm @@ -0,0 +1,19 @@ +( + (class_declaration (identifier) @name) @definition.class +) + +( + (struct_declaration (identifier) @name) @definition.struct +) + +( + (method_declaration (identifier) @name) @definition.method +) + +( + (interface_declaration (identifier) @name) @definition.interface +) + +( + (local_function_statement (identifier) @name) @definition.function +) \ No newline at end of file diff --git a/crates/tabby-repositories/queries/go.scm b/crates/tabby-repositories/queries/go.scm new file mode 100644 index 000000000000..d8d52719f72e --- /dev/null +++ b/crates/tabby-repositories/queries/go.scm @@ -0,0 +1,27 @@ +( + (comment)* @doc + . + (function_declaration + name: (identifier) @name) @definition.function + (#strip! @doc "^//\\s*") + (#set-adjacent! @doc @definition.function) +) + +( + (comment)* @doc + . + (method_declaration + name: (field_identifier) @name) @definition.method + (#strip! @doc "^//\\s*") + (#set-adjacent! @doc @definition.method) +) + +(call_expression + function: [ + (identifier) @name + (parenthesized_expression (identifier) @name) + (selector_expression field: (field_identifier) @name) + (parenthesized_expression (selector_expression field: (field_identifier) @name)) + ]) @reference.call + +(type_declaration (type_spec name: (type_identifier) @name)) @definition.type \ No newline at end of file diff --git a/crates/tabby-repositories/queries/kotlin.scm b/crates/tabby-repositories/queries/kotlin.scm new file mode 100644 index 000000000000..cb8c933bb05a --- /dev/null +++ b/crates/tabby-repositories/queries/kotlin.scm @@ -0,0 +1,12 @@ +( + (function_declaration (simple_identifier) @name) @definition.function +) + +( + (class_declaration (type_identifier) @name) @definition.class +) + +( + (object_literal (delegation_specifier) @name) @definition.object +) + diff --git a/crates/tabby-repositories/queries/rust.scm b/crates/tabby-repositories/queries/rust.scm new file mode 100644 index 000000000000..d270566826d1 --- /dev/null +++ b/crates/tabby-repositories/queries/rust.scm @@ -0,0 +1,35 @@ +; ADT definitions + +(struct_item + name: (type_identifier) @name) @definition.class + +(enum_item + name: (type_identifier) @name) @definition.class + +(union_item + name: (type_identifier) @name) @definition.class + +; type aliases + +(type_item + name: (type_identifier) @name) @definition.class + +; method definitions + +(declaration_list + (function_item + name: (identifier) @name) @definition.method) + +; function definitions + +(function_item + name: (identifier) @name) @definition.function + +; trait definitions +(trait_item + name: (type_identifier) @name) @definition.interface + +; macro definitions + +(macro_definition + name: (identifier) @name) @definition.macro \ No newline at end of file diff --git a/crates/tabby-repositories/queries/solidity.scm b/crates/tabby-repositories/queries/solidity.scm new file mode 100644 index 000000000000..aa1bbd35029d --- /dev/null +++ b/crates/tabby-repositories/queries/solidity.scm @@ -0,0 +1,45 @@ +;; Copied from https://github.com/JoranHonig/tree-sitter-solidity/blob/master/queries/tags.scm +;; +;; Method and Function declarations +(contract_declaration (_ + (function_definition + name: (identifier) @name) @definition.method)) + +(source_file + (function_definition + name: (identifier) @name) @definition.function) + +;; Contract, struct, enum and interface declarations +(contract_declaration + name: (identifier) @name) @definition.class + +(interface_declaration + name: (identifier) @name) @definition.interface + +(library_declaration + name: (identifier) @name) @definition.interface + +(struct_declaration name: (identifier) @name) @definition.class +(enum_declaration name: (identifier) @name) @definition.class +(event_definition name: (identifier) @name) @definition.class + +;; Function calls +(call_expression (identifier) @name ) @reference.call + +(call_expression + (member_expression + property: (identifier) @name )) @reference.call + +;; Log emit +(emit_statement name: (identifier) @name) @reference.class + + +;; Inheritance + +(inheritance_specifier + ancestor: (user_defined_type (identifier) @name . )) @reference.class + + +;; Imports ( note that unknown is not standardised ) +(import_directive + import_name: (identifier) @name ) @reference.unknown diff --git a/crates/tabby-repositories/queries/tsx.scm b/crates/tabby-repositories/queries/tsx.scm new file mode 100644 index 000000000000..7a3c729cc850 --- /dev/null +++ b/crates/tabby-repositories/queries/tsx.scm @@ -0,0 +1,26 @@ +(function_declaration + name: (identifier) @name) @definition.function + +(class_declaration + name: (type_identifier) @name) @definition.class + +(interface_declaration + name: (type_identifier) @name) @definition.interface + +(type_alias_declaration + (type_identifier) @name) @definition.type + +;; Top-level arrow function are definitions. +(program + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function))) @definition.function) + +;; Exported top-level arrow function are also definitions. +(program + (export_statement + (lexical_declaration + (variable_declarator + name: (identifier) @name + value: (arrow_function))) @definition.function)) \ No newline at end of file diff --git a/crates/tabby-repositories/src/dataset.rs b/crates/tabby-repositories/src/dataset.rs new file mode 100644 index 000000000000..1a2aab013929 --- /dev/null +++ b/crates/tabby-repositories/src/dataset.rs @@ -0,0 +1,189 @@ +mod deps; +mod tags; + +use std::{ + collections::HashMap, + ffi::OsStr, + fs::read_to_string, + io::{IsTerminal, Write}, +}; + +use anyhow::{anyhow, Result}; +use ignore::{DirEntry, Walk}; +use kdam::BarExt; +use kdam::{tqdm, Bar}; +use lazy_static::lazy_static; +use tabby_common::{config::RepositoryConfig, DependencyFile, SourceFile}; +use tracing::error; +use tree_sitter_tags::TagsContext; + +use crate::RepositoryCache; + +trait RepositoryExt { + fn create_dataset(&self, writer: &mut impl Write) -> Result<()>; +} + +fn index_repository(cache: &RepositoryCache, repository: &RepositoryConfig) -> Result<()> { + let dir = repository.dir(); + + let walk_dir_iter = || { + Walk::new(dir.as_path()) + .filter_map(Result::ok) + .filter(is_source_code) + }; + + let mut pb = std::io::stdout() + .is_terminal() + .then(|| tqdm(walk_dir_iter().count())); + let walk_dir = walk_dir_iter(); + + let mut context = TagsContext::new(); + for entry in walk_dir { + pb.as_mut().map(|b| b.update(1)).transpose()?; + + let relative_path = entry + .path() + .strip_prefix(dir.as_path()) + .expect("Paths always begin with the prefix"); + let language = get_language( + relative_path + .extension() + .ok_or_else(|| anyhow!("Unknown file extension for {relative_path:?}"))?, + ) + .ok_or_else(|| anyhow!("Unknown language for {relative_path:?}"))? + .to_owned(); + match read_to_string(entry.path()) { + Ok(file_content) => { + let file = SourceFile { + git_url: repository.git_url.clone(), + repository_name: repository.name(), + filepath: relative_path.display().to_string(), + max_line_length: metrics::max_line_length(&file_content), + avg_line_length: metrics::avg_line_length(&file_content), + alphanum_fraction: metrics::alphanum_fraction(&file_content), + tags: tags::collect(&mut context, &language, &file_content), + language, + content: file_content, + }; + cache.add_repository_meta(file)?; + } + Err(e) => { + error!("Cannot read {relative_path:?}: {e:?}"); + } + } + } + + Ok(()) +} + +fn get_language(ext: &OsStr) -> Option<&str> { + let ext = ext.to_str().unwrap_or(""); + EXTENSION_LANGUAGE.get(ext).copied() +} + +fn is_source_code(entry: &DirEntry) -> bool { + if entry.file_type().is_some_and(|x| x.is_file()) { + entry.path().extension().and_then(get_language).is_some() + } else { + false + } +} + +pub fn reload_index(cache: &RepositoryCache, config: &[RepositoryConfig]) -> Result<()> { + cache.clear()?; + + let mut deps = DependencyFile::default(); + for repository in config { + deps::collect(repository.dir().as_path(), &mut deps); + index_repository(&cache, &repository)?; + } + + Ok(()) +} + +mod metrics { + pub fn max_line_length(content: &str) -> usize { + content.lines().map(|x| x.len()).max().unwrap_or(0) + } + + pub fn avg_line_length(content: &str) -> f32 { + let mut total = 0; + let mut len = 0; + for x in content.lines() { + len += 1; + total += x.len(); + } + + if len > 0 { + total as f32 / len as f32 + } else { + 0.0 + } + } + + pub fn alphanum_fraction(content: &str) -> f32 { + let num_alphanumn: f32 = content + .chars() + .map(|x| f32::from(u8::from(x.is_alphanumeric()))) + .sum(); + if !content.is_empty() { + num_alphanumn / content.len() as f32 + } else { + 0.0 + } + } +} + +lazy_static! { + static ref LANGUAGE_EXTENSION: HashMap<&'static str, Vec<&'static str>> = { + HashMap::from([ + ("c", vec!["c", "h"]), + ("csharp", vec!["cs"]), + ( + "cpp", + vec!["cpp", "hpp", "c++", "h++", "cc", "hh", "C", "H", "tcc"], + ), + ("css", vec!["css"]), + ("dockerfile", vec!["Dockerfile"]), + ("go", vec!["go"]), + ("haskell", vec!["hs"]), + ("html", vec!["html"]), + ("java", vec!["java"]), + ("kotlin", vec!["kt", "kts"]), + ("julia", vec!["jl"]), + ("lua", vec!["lua"]), + ("makefile", vec!["Makefile"]), + ("markdown", vec!["md", "markdown"]), + ("php", vec!["php", "php3", "php4", "php5", "phps", "phpt"]), + ("perl", vec!["pl", "pm", "pod", "perl"]), + ("powershell", vec!["ps1", "psd1", "psm1"]), + ("python", vec!["py"]), + ("ruby", vec!["rb"]), + ("rust", vec!["rs"]), + ("solidity", vec!["sol"]), + ("sql", vec!["sql"]), + ("scala", vec!["scala"]), + ("shellscript", vec!["sh", "bash", "command", "zsh"]), + ( + "javascript-typescript", + vec!["ts", "mts", "js", "mjs", "jsx", "tsx"], + ), + ("tex", vec!["tex"]), + ("vb", vec!["vb"]), + ]) + }; + static ref EXTENSION_LANGUAGE: HashMap<&'static str, &'static str> = { + let mut map = HashMap::new(); + for (lang, exts) in &*LANGUAGE_EXTENSION { + for ext in exts { + map.insert(*ext, *lang); + } + } + + map + }; +} + +fn tqdm(total: usize) -> Bar { + tqdm!(total = total, ncols = 40, force_refresh = true) +} diff --git a/crates/tabby-repositories/src/dataset/deps/javascript.rs b/crates/tabby-repositories/src/dataset/deps/javascript.rs new file mode 100644 index 000000000000..0d611f5c6b9c --- /dev/null +++ b/crates/tabby-repositories/src/dataset/deps/javascript.rs @@ -0,0 +1,133 @@ +use std::{collections::HashSet, fs::read_to_string, path::Path}; + +use anyhow::Result; +use tabby_common::Package; + +fn process_package_json(path: &Path) -> Result> { + let package_json_file = path.join("package.json"); + let mut package_json_contents = npm_package_json::Package::from_path(package_json_file)?; + let mut package_deps = package_json_contents.dependencies; + package_deps.append(&mut package_json_contents.dev_dependencies); + package_deps.append(&mut package_json_contents.peer_dependencies); + package_deps.append(&mut package_json_contents.optional_dependencies); + + let deps = package_deps + .into_iter() + .map(|(name, version)| Package { + name, + language: String::from("javascript"), + version: Some(version), + }) + .collect(); + + Ok(deps) +} + +pub fn process_package_lock_json(path: &Path) -> Result> { + let package_lock_file = path.join("package-lock.json"); + let lockfile = package_lock_json_parser::parse(read_to_string(package_lock_file)?)?; + let package_json_deps = process_package_json(path)?; + + if let Some(lockfile_packages) = lockfile.packages { + let mut deps = HashSet::new(); + + for package_dep in package_json_deps { + let version = lockfile_packages + .get(&package_dep.name) + .map_or(package_dep.version, |dep| Some(dep.version.to_string())); + + deps.insert(Package { + version, + ..package_dep + }); + } + + Ok(deps.into_iter().collect()) + } else { + Ok(package_json_deps) + } +} + +pub fn process_yarn_lock(path: &Path) -> Result> { + let yarn_lock_file = path.join("yarn.lock"); + let yarn_lock_contents = read_to_string(yarn_lock_file)?; + let lockfile_packages = yarn_lock_parser::parse_str(&yarn_lock_contents)?; + let package_json_deps = process_package_json(path)?; + + let mut deps = HashSet::new(); + + for package_dep in package_json_deps { + let version = lockfile_packages + .binary_search_by(|p| p.name.cmp(&package_dep.name)) + .map_or(package_dep.version, |dep| { + Some(lockfile_packages[dep].version.to_string()) + }); + + deps.insert(Package { + version, + ..package_dep + }); + } + + Ok(deps.into_iter().collect()) +} + +#[cfg(test)] +mod tests { + use std::{env, path::PathBuf}; + + use super::*; + + thread_local! { + static EXPECTED_DEPS: Vec = Vec::from([ + Package { + language: String::from("javascript"), + name: String::from("fsevents"), + version: Some(String::from("2.2.2")), + }, + Package { + language: String::from("javascript"), + name: String::from("react"), + version: Some(String::from("18.2.0")), + }, + Package { + language: String::from("javascript"), + name: String::from("vite"), + version: Some(String::from("5.1.4")), + }, + Package { + language: String::from("javascript"), + name: String::from("zustand"), + version: Some(String::from("4.5.1")), + }, + ]); + } + + #[test] + fn it_parses_top_level_deps_from_package_lock() -> Result<()> { + EXPECTED_DEPS.with(|expected_deps| { + let project_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?); + let test_fixtures_path = project_path.join("testdata"); + let mut deps = process_package_lock_json(test_fixtures_path.as_path())?; + + deps.sort(); + + assert_eq!(expected_deps, &deps); + Ok(()) + }) + } + + #[test] + fn it_parses_top_level_deps_from_yarn_lock() -> Result<()> { + EXPECTED_DEPS.with(|expected_deps| { + let project_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?); + let test_fixtures_path = project_path.join("testdata"); + let mut deps = process_yarn_lock(test_fixtures_path.as_path())?; + + deps.sort(); + + assert_eq!(expected_deps, &deps); + Ok(()) + }) + } +} diff --git a/crates/tabby-repositories/src/dataset/deps/mod.rs b/crates/tabby-repositories/src/dataset/deps/mod.rs new file mode 100644 index 000000000000..3a4fa9ab61b3 --- /dev/null +++ b/crates/tabby-repositories/src/dataset/deps/mod.rs @@ -0,0 +1,29 @@ +mod javascript; +mod python; +mod rust; + +use std::{collections::HashSet, path::Path}; + +use tabby_common::DependencyFile; + +pub fn collect(path: &Path, file: &mut DependencyFile) { + if let Ok(mut deps) = python::process_requirements_txt(path) { + file.direct.append(&mut deps); + } + + if let Ok(mut deps) = rust::process_cargo(path) { + file.direct.append(&mut deps); + } + + if let Ok(mut deps) = javascript::process_package_lock_json(path) { + file.direct.append(&mut deps); + } + + if let Ok(mut deps) = javascript::process_yarn_lock(path) { + file.direct.append(&mut deps); + } + + // Remove duplicates across sources. + let deps = file.direct.clone().into_iter().collect::>(); + file.direct = deps.into_iter().collect(); +} diff --git a/crates/tabby-repositories/src/dataset/deps/python.rs b/crates/tabby-repositories/src/dataset/deps/python.rs new file mode 100644 index 000000000000..a8cdb3d0b95a --- /dev/null +++ b/crates/tabby-repositories/src/dataset/deps/python.rs @@ -0,0 +1,30 @@ +use std::{collections::HashSet, path::Path}; + +use anyhow::Result; +use tabby_common::Package; +use tracing::warn; + +pub fn process_requirements_txt(path: &Path) -> Result> { + let requirements_txt = path.join("requirements.txt"); + let content = std::fs::read_to_string(requirements_txt)?; + + let mut deps = HashSet::new(); + match requirements::parse_str(&content) { + Ok(requirements) => { + for requirement in requirements { + if let Some(name) = requirement.name { + deps.insert(Package { + language: "python".to_owned(), + name, + version: None, // requirements.txt doesn't come with accurate version information. + }); + } + } + } + Err(err) => { + warn!("Failed to parse requirements.txt: {}", err); + } + } + + Ok(deps.into_iter().collect()) +} diff --git a/crates/tabby-repositories/src/dataset/deps/rust.rs b/crates/tabby-repositories/src/dataset/deps/rust.rs new file mode 100644 index 000000000000..19da9d1bfed2 --- /dev/null +++ b/crates/tabby-repositories/src/dataset/deps/rust.rs @@ -0,0 +1,42 @@ +use std::path::Path; + +use anyhow::Result; +use cargo_lock::dependency::graph::EdgeDirection; +use tabby_common::Package; + +fn extract_deps<'a, I>(packages: I) -> Vec +where + I: IntoIterator, +{ + let mut res = packages + .into_iter() + .map(|package| Package { + language: String::from("rust"), + name: package.name.to_string(), + version: Some(package.version.to_string()), + }) + .collect::>() + .into_iter() + .collect::>(); + res.sort_unstable(); + res +} + +pub fn process_cargo(path: &Path) -> Result> { + let cargo_lock_file = path.join("Cargo.lock"); + + let lockfile = cargo_lock::Lockfile::load(cargo_lock_file)?; + + let tree = lockfile.dependency_tree()?; + let graph = tree.graph(); + + let root_pkg_idx = graph + .externals(EdgeDirection::Incoming) + .collect::>(); + let direct_deps_idx = root_pkg_idx + .iter() + .flat_map(|idx| graph.neighbors_directed(*idx, EdgeDirection::Outgoing)) + .collect::>(); + let deps = extract_deps(direct_deps_idx.iter().map(|dep_idx| &graph[*dep_idx])); + Ok(deps) +} diff --git a/crates/tabby-repositories/src/dataset/tags.rs b/crates/tabby-repositories/src/dataset/tags.rs new file mode 100644 index 000000000000..5429e09da7cd --- /dev/null +++ b/crates/tabby-repositories/src/dataset/tags.rs @@ -0,0 +1,169 @@ +use std::collections::HashMap; + +use lazy_static::lazy_static; +use tabby_common::{Point, Tag}; +use tree_sitter_tags::{TagsConfiguration, TagsContext}; + +pub fn collect(context: &mut TagsContext, language: &str, content: &str) -> Vec { + let config = LANGUAGE_TAGS.get(language); + let empty = Vec::new(); + + let Some(config) = config else { + return empty; + }; + + let Ok((tags, has_error)) = context.generate_tags(&config.0, content.as_bytes(), None) else { + return empty; + }; + + if has_error { + return empty; + } + + tags.filter_map(|x| x.ok()) + .map(|x| Tag { + range: x.range, + name_range: x.name_range, + utf16_column_range: x.utf16_column_range, + line_range: x.line_range, + docs: x.docs, + is_definition: x.is_definition, + syntax_type_name: config.0.syntax_type_name(x.syntax_type_id).to_owned(), + span: Point::new(x.span.start.row, x.span.start.column) + ..Point::new(x.span.end.row, x.span.end.column), + }) + .collect() +} + +// Mark TagsConfiguration as thread sync / safe. +struct TagsConfigurationSync(TagsConfiguration); +unsafe impl Send for TagsConfigurationSync {} +unsafe impl Sync for TagsConfigurationSync {} + +lazy_static! { + static ref LANGUAGE_TAGS: HashMap<&'static str, TagsConfigurationSync> = { + HashMap::from([ + ( + "python", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_python::language(), + tree_sitter_python::TAGGING_QUERY, + "", + ) + .unwrap(), + ), + ), + ( + "rust", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_rust::language(), + include_str!("../../queries/rust.scm"), + "", + ) + .unwrap(), + ), + ), + ( + "java", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_java::language(), + tree_sitter_java::TAGGING_QUERY, + "", + ) + .unwrap(), + ), + ), + ( + "kotlin", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_kotlin::language(), + include_str!("../../queries/kotlin.scm"), + "", + ) + .unwrap(), + ), + ), + ( + "javascript-typescript", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_typescript::language_tsx(), + include_str!("../../queries/tsx.scm"), + "", + ) + .unwrap(), + ), + ), + ( + "go", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_go::language(), + include_str!("../../queries/go.scm"), + "", + ) + .unwrap(), + ), + ), + ( + "ruby", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_ruby::language(), + tree_sitter_ruby::TAGGING_QUERY, + "", + ) + .unwrap(), + ), + ), + ( + "c", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_c::language(), + tree_sitter_c::TAGS_QUERY, + "", + ) + .unwrap(), + ), + ), + ( + "cpp", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_cpp::language(), + tree_sitter_cpp::TAGS_QUERY, + "", + ) + .unwrap(), + ), + ), + ( + "csharp", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_c_sharp::language(), + include_str!("../../queries/csharp.scm"), + "", + ) + .unwrap(), + ), + ), + ( + "solidity", + TagsConfigurationSync( + TagsConfiguration::new( + tree_sitter_solidity::language(), + include_str!("../../queries/solidity.scm"), + "", + ) + .unwrap(), + ), + ), + ]) + }; +} diff --git a/crates/tabby-repositories/src/lib.rs b/crates/tabby-repositories/src/lib.rs index 90b14a56a1e1..0c140bcd18e1 100644 --- a/crates/tabby-repositories/src/lib.rs +++ b/crates/tabby-repositories/src/lib.rs @@ -1,6 +1,9 @@ use anyhow::{anyhow, Result}; +use dataset::reload_index; use kv::{Bucket, Config, Json, Store}; -use tabby_common::SourceFile; +use tabby_common::{config::RepositoryConfig, SourceFile}; + +mod dataset; type RepositoryBucket<'a> = Bucket<'a, String, Json>; @@ -25,24 +28,20 @@ impl RepositoryCache { } pub fn add_repository_meta(&self, file: SourceFile) -> Result<()> { - let key = format!("{}:{}", file.git_url, file.filepath); + let key = format!("{}:{}", file.repository_name, file.filepath); self.bucket()?.set(&key, &Json(file))?; Ok(()) } - pub fn get_repository_meta(&self, git_url: &str, filepath: &str) -> Result { - let key = format!("{git_url}:{filepath}"); + pub fn get_repository_meta(&self, repository_name: &str, filepath: &str) -> Result { + let key = format!("{repository_name}:{filepath}"); let Some(Json(val)) = self.bucket()?.get(&key)? else { return Err(anyhow!("Repository meta not found")); }; Ok(val) } - pub fn reload(&self) -> Result<()> { - self.clear()?; - for file in SourceFile::all()? { - self.add_repository_meta(file)?; - } - Ok(()) + pub fn reload(&self, repositories: &[RepositoryConfig]) -> Result<()> { + reload_index(&self, repositories) } } diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index db746f566a3a..0dc82966adc5 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -13,7 +13,6 @@ use file_rotate::{compression::Compression, suffix::AppendCount, ContentLimit, F use ignore::{DirEntry, Walk}; use kdam::BarExt; use lazy_static::lazy_static; -use serde_jsonlines::WriteExt; use tabby_common::{ config::RepositoryConfig, path::{dataset_dir, dependency_file}, @@ -62,6 +61,7 @@ impl RepositoryExt for RepositoryConfig { Ok(file_content) => { let source_file = SourceFile { git_url: self.git_url.clone(), + repository_name: self.name(), filepath: relative_path.display().to_string(), max_line_length: metrics::max_line_length(&file_content), avg_line_length: metrics::avg_line_length(&file_content), @@ -120,10 +120,8 @@ pub fn create_dataset(config: &[RepositoryConfig]) -> Result<()> { } mod metrics { - use std::cmp::max; - pub fn max_line_length(content: &str) -> usize { - content.lines().map(|x| x.len()).reduce(max).unwrap_or(0) + content.lines().map(|x| x.len()).max().unwrap_or(0) } pub fn avg_line_length(content: &str) -> f32 { diff --git a/ee/tabby-webserver/src/handler.rs b/ee/tabby-webserver/src/handler.rs index d965b1bf9ab8..b6ff5981b31c 100644 --- a/ee/tabby-webserver/src/handler.rs +++ b/ee/tabby-webserver/src/handler.rs @@ -50,7 +50,7 @@ impl WebserverHandle { create_service_locator(self.logger(), code, self.db.clone(), is_chat_enabled).await; let events = cron::run_cron(ctx.auth(), ctx.job(), ctx.worker(), local_port).await; - let repository_cache = RepositoryCache::new_initialized(ctx.repository(), &events).await; + let repository_cache = RepositoryCache::new_initialized(&events).await; let schema = Arc::new(create_schema()); let rs = Arc::new(repository_cache); diff --git a/ee/tabby-webserver/src/repositories/resolve.rs b/ee/tabby-webserver/src/repositories/resolve.rs index 883e39f25d71..1205f90608f4 100644 --- a/ee/tabby-webserver/src/repositories/resolve.rs +++ b/ee/tabby-webserver/src/repositories/resolve.rs @@ -20,14 +20,10 @@ use tower::ServiceExt; use tower_http::services::ServeDir; use tracing::{debug, error, warn}; -use crate::{ - cron::{CronEvents, StartListener}, - schema::repository::RepositoryService, -}; +use crate::cron::{CronEvents, StartListener}; pub struct RepositoryCache { repository_lookup: RwLock>, - service: Arc, } impl std::fmt::Debug for RepositoryCache { @@ -39,13 +35,9 @@ impl std::fmt::Debug for RepositoryCache { } impl RepositoryCache { - pub async fn new_initialized( - service: Arc, - events: &CronEvents, - ) -> Arc { + pub async fn new_initialized(events: &CronEvents) -> Arc { let cache = RepositoryCache { repository_lookup: Default::default(), - service, }; if let Err(e) = cache.reload().await { error!("Failed to load repositories: {e}"); @@ -56,16 +48,9 @@ impl RepositoryCache { } async fn reload(&self) -> Result<()> { - let new_repositories = self - .service - .list_repositories(None, None, None, None) - .await? - .into_iter() - .map(|repository| RepositoryConfig::new_named(repository.name, repository.git_url)) - .collect(); let mut repository_lookup = self.repository_lookup.write().unwrap(); debug!("Reloading repositoriy metadata..."); - *repository_lookup = load_meta(new_repositories); + *repository_lookup = load_meta(); Ok(()) } @@ -168,26 +153,19 @@ impl From for RepositoryMeta { } } -fn load_meta(repositories: Vec) -> HashMap { +fn load_meta() -> HashMap { let mut dataset = HashMap::new(); - // Construct map of String -> &RepositoryConfig for lookup - let repo_conf = repositories - .iter() - .map(|repo| (repo.git_url.clone(), repo)) - .collect::>(); let Ok(iter) = SourceFile::all() else { return dataset; }; // Source files contain all metadata, read repository metadata from json // (SourceFile can be converted into RepositoryMeta) for file in iter { - if let Some(repo_name) = repo_conf.get(&file.git_url).map(|repo| repo.name()) { - let key = RepositoryKey { - repo_name, - rel_path: file.filepath.clone(), - }; - dataset.insert(key, file.into()); - } + let key = RepositoryKey { + repo_name: file.repository_name.clone(), + rel_path: file.filepath.clone(), + }; + dataset.insert(key, file.into()); } dataset } From 0714b36f56b8fdc0deb62fa41fe84752d936344f Mon Sep 17 00:00:00 2001 From: boxbeam Date: Wed, 3 Apr 2024 17:21:44 -0400 Subject: [PATCH 08/17] Revert change to tabby-scheduler --- crates/tabby-scheduler/src/dataset.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index 0dc82966adc5..db746f566a3a 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -13,6 +13,7 @@ use file_rotate::{compression::Compression, suffix::AppendCount, ContentLimit, F use ignore::{DirEntry, Walk}; use kdam::BarExt; use lazy_static::lazy_static; +use serde_jsonlines::WriteExt; use tabby_common::{ config::RepositoryConfig, path::{dataset_dir, dependency_file}, @@ -61,7 +62,6 @@ impl RepositoryExt for RepositoryConfig { Ok(file_content) => { let source_file = SourceFile { git_url: self.git_url.clone(), - repository_name: self.name(), filepath: relative_path.display().to_string(), max_line_length: metrics::max_line_length(&file_content), avg_line_length: metrics::avg_line_length(&file_content), @@ -120,8 +120,10 @@ pub fn create_dataset(config: &[RepositoryConfig]) -> Result<()> { } mod metrics { + use std::cmp::max; + pub fn max_line_length(content: &str) -> usize { - content.lines().map(|x| x.len()).max().unwrap_or(0) + content.lines().map(|x| x.len()).reduce(max).unwrap_or(0) } pub fn avg_line_length(content: &str) -> f32 { From b89d1cbc1e86e29a7bd4b0741992c9ebc7fd5d0a Mon Sep 17 00:00:00 2001 From: boxbeam Date: Thu, 4 Apr 2024 15:51:57 -0400 Subject: [PATCH 09/17] Rewrite --- Cargo.lock | 1 + Cargo.toml | 1 - crates/tabby-common/src/config.rs | 8 + crates/tabby-common/src/lib.rs | 2 +- crates/tabby-repositories/Cargo.toml | 38 ---- crates/tabby-repositories/queries/csharp.scm | 19 -- crates/tabby-repositories/queries/go.scm | 27 --- crates/tabby-repositories/queries/kotlin.scm | 12 -- crates/tabby-repositories/queries/rust.scm | 35 ---- .../tabby-repositories/queries/solidity.scm | 45 ----- crates/tabby-repositories/queries/tsx.scm | 26 --- crates/tabby-repositories/src/dataset.rs | 189 ------------------ .../src/dataset/deps/javascript.rs | 133 ------------ .../src/dataset/deps/mod.rs | 29 --- .../src/dataset/deps/python.rs | 30 --- .../src/dataset/deps/rust.rs | 42 ---- crates/tabby-repositories/src/dataset/tags.rs | 169 ---------------- crates/tabby-repositories/src/lib.rs | 47 ----- crates/tabby-scheduler/src/dataset.rs | 20 +- crates/tabby-scheduler/src/lib.rs | 8 +- ee/tabby-webserver/Cargo.toml | 1 + ee/tabby-webserver/src/cron/mod.rs | 68 +------ ee/tabby-webserver/src/cron/scheduler.rs | 6 +- ee/tabby-webserver/src/handler.rs | 4 +- ee/tabby-webserver/src/hub/api.rs | 13 +- ee/tabby-webserver/src/repositories/mod.rs | 12 +- .../src/repositories/resolve.rs | 140 ++++++------- 27 files changed, 111 insertions(+), 1014 deletions(-) delete mode 100644 crates/tabby-repositories/Cargo.toml delete mode 100644 crates/tabby-repositories/queries/csharp.scm delete mode 100644 crates/tabby-repositories/queries/go.scm delete mode 100644 crates/tabby-repositories/queries/kotlin.scm delete mode 100644 crates/tabby-repositories/queries/rust.scm delete mode 100644 crates/tabby-repositories/queries/solidity.scm delete mode 100644 crates/tabby-repositories/queries/tsx.scm delete mode 100644 crates/tabby-repositories/src/dataset.rs delete mode 100644 crates/tabby-repositories/src/dataset/deps/javascript.rs delete mode 100644 crates/tabby-repositories/src/dataset/deps/mod.rs delete mode 100644 crates/tabby-repositories/src/dataset/deps/python.rs delete mode 100644 crates/tabby-repositories/src/dataset/deps/rust.rs delete mode 100644 crates/tabby-repositories/src/dataset/tags.rs delete mode 100644 crates/tabby-repositories/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index dbc02e25d2e6..8bcd9eb0558e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4609,6 +4609,7 @@ dependencies = [ "jsonwebtoken", "juniper", "juniper-axum", + "kv", "lazy_static", "lettre", "mime_guess", diff --git a/Cargo.toml b/Cargo.toml index 0e6a6c6e6afd..57527fbb66c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,6 @@ members = [ "crates/tabby-scheduler", "crates/tabby-download", "crates/tabby-inference", - "crates/tabby-repositories", "crates/llama-cpp-bindings", "crates/http-api-bindings", "crates/aim-downloader", diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs index d93d1574d0c3..b633d329c833 100644 --- a/crates/tabby-common/src/config.rs +++ b/crates/tabby-common/src/config.rs @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::{ path::repositories_dir, terminal::{HeaderFormat, InfoMessage}, + SourceFile, }; #[derive(Serialize, Deserialize, Default)] @@ -145,6 +146,13 @@ impl Default for ServerConfig { #[async_trait] pub trait RepositoryAccess: Send + Sync { async fn list_repositories(&self) -> Result>; + + fn clear_index(&self) -> Result<()> { + Ok(()) + } + fn write_index(&self, _source_file: SourceFile) -> Result<()> { + Ok(()) + } } pub struct ConfigRepositoryAccess; diff --git a/crates/tabby-common/src/lib.rs b/crates/tabby-common/src/lib.rs index e0201497c49f..6b7cda3e66b5 100644 --- a/crates/tabby-common/src/lib.rs +++ b/crates/tabby-common/src/lib.rs @@ -21,7 +21,7 @@ use path::dataset_dir; use serde::{Deserialize, Serialize}; use serde_jsonlines::JsonLinesReader; -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Clone)] pub struct SourceFile { pub git_url: String, pub repository_name: String, diff --git a/crates/tabby-repositories/Cargo.toml b/crates/tabby-repositories/Cargo.toml deleted file mode 100644 index 0d5d689bfe7f..000000000000 --- a/crates/tabby-repositories/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -[package] -name = "tabby-repositories" -version.workspace = true -edition.workspace = true -authors.workspace = true -homepage.workspace = true - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -anyhow = { workspace = true } -tabby-common = { path = "../tabby-common" } -tantivy = { workspace = true } -tracing = { workspace = true } -tree-sitter-tags = "0.20.2" -lazy_static = { workspace = true } -tree-sitter-python = "0.20.2" -tree-sitter-java = "0.20.2" -tree-sitter-kotlin = "0.3.1" -tree-sitter-rust = "0.20.3" -tree-sitter-typescript = "0.20.3" -tree-sitter-go = "0.20.0" -tree-sitter-ruby = "0.20.0" -tree-sitter-c = { git = "https://github.com/tree-sitter/tree-sitter-c/", rev = "212a80f" } -tree-sitter-cpp = { git = "https://github.com/tree-sitter/tree-sitter-cpp", rev = "a714740" } -tree-sitter-c-sharp = "0.20.0" -tree-sitter-solidity = { git = "https://github.com/JoranHonig/tree-sitter-solidity", rev = "b239a95" } -ignore = "0.4.20" -kdam = { version = "0.5.0" } -requirements = "0.3.0" -serdeconv.workspace = true -cargo-lock = { version = "9.0.0", features = ["dependency-tree"] } -tokio-cron-scheduler = { workspace = true } -tokio = { workspace = true, features = ["process"] } -package-lock-json-parser = "0.4.0" -npm-package-json = "0.1.3" -yarn-lock-parser = "0.7.0" -kv = { version = "0.24.0", features = ["json-value"] } diff --git a/crates/tabby-repositories/queries/csharp.scm b/crates/tabby-repositories/queries/csharp.scm deleted file mode 100644 index e49897aa9750..000000000000 --- a/crates/tabby-repositories/queries/csharp.scm +++ /dev/null @@ -1,19 +0,0 @@ -( - (class_declaration (identifier) @name) @definition.class -) - -( - (struct_declaration (identifier) @name) @definition.struct -) - -( - (method_declaration (identifier) @name) @definition.method -) - -( - (interface_declaration (identifier) @name) @definition.interface -) - -( - (local_function_statement (identifier) @name) @definition.function -) \ No newline at end of file diff --git a/crates/tabby-repositories/queries/go.scm b/crates/tabby-repositories/queries/go.scm deleted file mode 100644 index d8d52719f72e..000000000000 --- a/crates/tabby-repositories/queries/go.scm +++ /dev/null @@ -1,27 +0,0 @@ -( - (comment)* @doc - . - (function_declaration - name: (identifier) @name) @definition.function - (#strip! @doc "^//\\s*") - (#set-adjacent! @doc @definition.function) -) - -( - (comment)* @doc - . - (method_declaration - name: (field_identifier) @name) @definition.method - (#strip! @doc "^//\\s*") - (#set-adjacent! @doc @definition.method) -) - -(call_expression - function: [ - (identifier) @name - (parenthesized_expression (identifier) @name) - (selector_expression field: (field_identifier) @name) - (parenthesized_expression (selector_expression field: (field_identifier) @name)) - ]) @reference.call - -(type_declaration (type_spec name: (type_identifier) @name)) @definition.type \ No newline at end of file diff --git a/crates/tabby-repositories/queries/kotlin.scm b/crates/tabby-repositories/queries/kotlin.scm deleted file mode 100644 index cb8c933bb05a..000000000000 --- a/crates/tabby-repositories/queries/kotlin.scm +++ /dev/null @@ -1,12 +0,0 @@ -( - (function_declaration (simple_identifier) @name) @definition.function -) - -( - (class_declaration (type_identifier) @name) @definition.class -) - -( - (object_literal (delegation_specifier) @name) @definition.object -) - diff --git a/crates/tabby-repositories/queries/rust.scm b/crates/tabby-repositories/queries/rust.scm deleted file mode 100644 index d270566826d1..000000000000 --- a/crates/tabby-repositories/queries/rust.scm +++ /dev/null @@ -1,35 +0,0 @@ -; ADT definitions - -(struct_item - name: (type_identifier) @name) @definition.class - -(enum_item - name: (type_identifier) @name) @definition.class - -(union_item - name: (type_identifier) @name) @definition.class - -; type aliases - -(type_item - name: (type_identifier) @name) @definition.class - -; method definitions - -(declaration_list - (function_item - name: (identifier) @name) @definition.method) - -; function definitions - -(function_item - name: (identifier) @name) @definition.function - -; trait definitions -(trait_item - name: (type_identifier) @name) @definition.interface - -; macro definitions - -(macro_definition - name: (identifier) @name) @definition.macro \ No newline at end of file diff --git a/crates/tabby-repositories/queries/solidity.scm b/crates/tabby-repositories/queries/solidity.scm deleted file mode 100644 index aa1bbd35029d..000000000000 --- a/crates/tabby-repositories/queries/solidity.scm +++ /dev/null @@ -1,45 +0,0 @@ -;; Copied from https://github.com/JoranHonig/tree-sitter-solidity/blob/master/queries/tags.scm -;; -;; Method and Function declarations -(contract_declaration (_ - (function_definition - name: (identifier) @name) @definition.method)) - -(source_file - (function_definition - name: (identifier) @name) @definition.function) - -;; Contract, struct, enum and interface declarations -(contract_declaration - name: (identifier) @name) @definition.class - -(interface_declaration - name: (identifier) @name) @definition.interface - -(library_declaration - name: (identifier) @name) @definition.interface - -(struct_declaration name: (identifier) @name) @definition.class -(enum_declaration name: (identifier) @name) @definition.class -(event_definition name: (identifier) @name) @definition.class - -;; Function calls -(call_expression (identifier) @name ) @reference.call - -(call_expression - (member_expression - property: (identifier) @name )) @reference.call - -;; Log emit -(emit_statement name: (identifier) @name) @reference.class - - -;; Inheritance - -(inheritance_specifier - ancestor: (user_defined_type (identifier) @name . )) @reference.class - - -;; Imports ( note that unknown is not standardised ) -(import_directive - import_name: (identifier) @name ) @reference.unknown diff --git a/crates/tabby-repositories/queries/tsx.scm b/crates/tabby-repositories/queries/tsx.scm deleted file mode 100644 index 7a3c729cc850..000000000000 --- a/crates/tabby-repositories/queries/tsx.scm +++ /dev/null @@ -1,26 +0,0 @@ -(function_declaration - name: (identifier) @name) @definition.function - -(class_declaration - name: (type_identifier) @name) @definition.class - -(interface_declaration - name: (type_identifier) @name) @definition.interface - -(type_alias_declaration - (type_identifier) @name) @definition.type - -;; Top-level arrow function are definitions. -(program - (lexical_declaration - (variable_declarator - name: (identifier) @name - value: (arrow_function))) @definition.function) - -;; Exported top-level arrow function are also definitions. -(program - (export_statement - (lexical_declaration - (variable_declarator - name: (identifier) @name - value: (arrow_function))) @definition.function)) \ No newline at end of file diff --git a/crates/tabby-repositories/src/dataset.rs b/crates/tabby-repositories/src/dataset.rs deleted file mode 100644 index 1a2aab013929..000000000000 --- a/crates/tabby-repositories/src/dataset.rs +++ /dev/null @@ -1,189 +0,0 @@ -mod deps; -mod tags; - -use std::{ - collections::HashMap, - ffi::OsStr, - fs::read_to_string, - io::{IsTerminal, Write}, -}; - -use anyhow::{anyhow, Result}; -use ignore::{DirEntry, Walk}; -use kdam::BarExt; -use kdam::{tqdm, Bar}; -use lazy_static::lazy_static; -use tabby_common::{config::RepositoryConfig, DependencyFile, SourceFile}; -use tracing::error; -use tree_sitter_tags::TagsContext; - -use crate::RepositoryCache; - -trait RepositoryExt { - fn create_dataset(&self, writer: &mut impl Write) -> Result<()>; -} - -fn index_repository(cache: &RepositoryCache, repository: &RepositoryConfig) -> Result<()> { - let dir = repository.dir(); - - let walk_dir_iter = || { - Walk::new(dir.as_path()) - .filter_map(Result::ok) - .filter(is_source_code) - }; - - let mut pb = std::io::stdout() - .is_terminal() - .then(|| tqdm(walk_dir_iter().count())); - let walk_dir = walk_dir_iter(); - - let mut context = TagsContext::new(); - for entry in walk_dir { - pb.as_mut().map(|b| b.update(1)).transpose()?; - - let relative_path = entry - .path() - .strip_prefix(dir.as_path()) - .expect("Paths always begin with the prefix"); - let language = get_language( - relative_path - .extension() - .ok_or_else(|| anyhow!("Unknown file extension for {relative_path:?}"))?, - ) - .ok_or_else(|| anyhow!("Unknown language for {relative_path:?}"))? - .to_owned(); - match read_to_string(entry.path()) { - Ok(file_content) => { - let file = SourceFile { - git_url: repository.git_url.clone(), - repository_name: repository.name(), - filepath: relative_path.display().to_string(), - max_line_length: metrics::max_line_length(&file_content), - avg_line_length: metrics::avg_line_length(&file_content), - alphanum_fraction: metrics::alphanum_fraction(&file_content), - tags: tags::collect(&mut context, &language, &file_content), - language, - content: file_content, - }; - cache.add_repository_meta(file)?; - } - Err(e) => { - error!("Cannot read {relative_path:?}: {e:?}"); - } - } - } - - Ok(()) -} - -fn get_language(ext: &OsStr) -> Option<&str> { - let ext = ext.to_str().unwrap_or(""); - EXTENSION_LANGUAGE.get(ext).copied() -} - -fn is_source_code(entry: &DirEntry) -> bool { - if entry.file_type().is_some_and(|x| x.is_file()) { - entry.path().extension().and_then(get_language).is_some() - } else { - false - } -} - -pub fn reload_index(cache: &RepositoryCache, config: &[RepositoryConfig]) -> Result<()> { - cache.clear()?; - - let mut deps = DependencyFile::default(); - for repository in config { - deps::collect(repository.dir().as_path(), &mut deps); - index_repository(&cache, &repository)?; - } - - Ok(()) -} - -mod metrics { - pub fn max_line_length(content: &str) -> usize { - content.lines().map(|x| x.len()).max().unwrap_or(0) - } - - pub fn avg_line_length(content: &str) -> f32 { - let mut total = 0; - let mut len = 0; - for x in content.lines() { - len += 1; - total += x.len(); - } - - if len > 0 { - total as f32 / len as f32 - } else { - 0.0 - } - } - - pub fn alphanum_fraction(content: &str) -> f32 { - let num_alphanumn: f32 = content - .chars() - .map(|x| f32::from(u8::from(x.is_alphanumeric()))) - .sum(); - if !content.is_empty() { - num_alphanumn / content.len() as f32 - } else { - 0.0 - } - } -} - -lazy_static! { - static ref LANGUAGE_EXTENSION: HashMap<&'static str, Vec<&'static str>> = { - HashMap::from([ - ("c", vec!["c", "h"]), - ("csharp", vec!["cs"]), - ( - "cpp", - vec!["cpp", "hpp", "c++", "h++", "cc", "hh", "C", "H", "tcc"], - ), - ("css", vec!["css"]), - ("dockerfile", vec!["Dockerfile"]), - ("go", vec!["go"]), - ("haskell", vec!["hs"]), - ("html", vec!["html"]), - ("java", vec!["java"]), - ("kotlin", vec!["kt", "kts"]), - ("julia", vec!["jl"]), - ("lua", vec!["lua"]), - ("makefile", vec!["Makefile"]), - ("markdown", vec!["md", "markdown"]), - ("php", vec!["php", "php3", "php4", "php5", "phps", "phpt"]), - ("perl", vec!["pl", "pm", "pod", "perl"]), - ("powershell", vec!["ps1", "psd1", "psm1"]), - ("python", vec!["py"]), - ("ruby", vec!["rb"]), - ("rust", vec!["rs"]), - ("solidity", vec!["sol"]), - ("sql", vec!["sql"]), - ("scala", vec!["scala"]), - ("shellscript", vec!["sh", "bash", "command", "zsh"]), - ( - "javascript-typescript", - vec!["ts", "mts", "js", "mjs", "jsx", "tsx"], - ), - ("tex", vec!["tex"]), - ("vb", vec!["vb"]), - ]) - }; - static ref EXTENSION_LANGUAGE: HashMap<&'static str, &'static str> = { - let mut map = HashMap::new(); - for (lang, exts) in &*LANGUAGE_EXTENSION { - for ext in exts { - map.insert(*ext, *lang); - } - } - - map - }; -} - -fn tqdm(total: usize) -> Bar { - tqdm!(total = total, ncols = 40, force_refresh = true) -} diff --git a/crates/tabby-repositories/src/dataset/deps/javascript.rs b/crates/tabby-repositories/src/dataset/deps/javascript.rs deleted file mode 100644 index 0d611f5c6b9c..000000000000 --- a/crates/tabby-repositories/src/dataset/deps/javascript.rs +++ /dev/null @@ -1,133 +0,0 @@ -use std::{collections::HashSet, fs::read_to_string, path::Path}; - -use anyhow::Result; -use tabby_common::Package; - -fn process_package_json(path: &Path) -> Result> { - let package_json_file = path.join("package.json"); - let mut package_json_contents = npm_package_json::Package::from_path(package_json_file)?; - let mut package_deps = package_json_contents.dependencies; - package_deps.append(&mut package_json_contents.dev_dependencies); - package_deps.append(&mut package_json_contents.peer_dependencies); - package_deps.append(&mut package_json_contents.optional_dependencies); - - let deps = package_deps - .into_iter() - .map(|(name, version)| Package { - name, - language: String::from("javascript"), - version: Some(version), - }) - .collect(); - - Ok(deps) -} - -pub fn process_package_lock_json(path: &Path) -> Result> { - let package_lock_file = path.join("package-lock.json"); - let lockfile = package_lock_json_parser::parse(read_to_string(package_lock_file)?)?; - let package_json_deps = process_package_json(path)?; - - if let Some(lockfile_packages) = lockfile.packages { - let mut deps = HashSet::new(); - - for package_dep in package_json_deps { - let version = lockfile_packages - .get(&package_dep.name) - .map_or(package_dep.version, |dep| Some(dep.version.to_string())); - - deps.insert(Package { - version, - ..package_dep - }); - } - - Ok(deps.into_iter().collect()) - } else { - Ok(package_json_deps) - } -} - -pub fn process_yarn_lock(path: &Path) -> Result> { - let yarn_lock_file = path.join("yarn.lock"); - let yarn_lock_contents = read_to_string(yarn_lock_file)?; - let lockfile_packages = yarn_lock_parser::parse_str(&yarn_lock_contents)?; - let package_json_deps = process_package_json(path)?; - - let mut deps = HashSet::new(); - - for package_dep in package_json_deps { - let version = lockfile_packages - .binary_search_by(|p| p.name.cmp(&package_dep.name)) - .map_or(package_dep.version, |dep| { - Some(lockfile_packages[dep].version.to_string()) - }); - - deps.insert(Package { - version, - ..package_dep - }); - } - - Ok(deps.into_iter().collect()) -} - -#[cfg(test)] -mod tests { - use std::{env, path::PathBuf}; - - use super::*; - - thread_local! { - static EXPECTED_DEPS: Vec = Vec::from([ - Package { - language: String::from("javascript"), - name: String::from("fsevents"), - version: Some(String::from("2.2.2")), - }, - Package { - language: String::from("javascript"), - name: String::from("react"), - version: Some(String::from("18.2.0")), - }, - Package { - language: String::from("javascript"), - name: String::from("vite"), - version: Some(String::from("5.1.4")), - }, - Package { - language: String::from("javascript"), - name: String::from("zustand"), - version: Some(String::from("4.5.1")), - }, - ]); - } - - #[test] - fn it_parses_top_level_deps_from_package_lock() -> Result<()> { - EXPECTED_DEPS.with(|expected_deps| { - let project_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?); - let test_fixtures_path = project_path.join("testdata"); - let mut deps = process_package_lock_json(test_fixtures_path.as_path())?; - - deps.sort(); - - assert_eq!(expected_deps, &deps); - Ok(()) - }) - } - - #[test] - fn it_parses_top_level_deps_from_yarn_lock() -> Result<()> { - EXPECTED_DEPS.with(|expected_deps| { - let project_path = PathBuf::from(env::var("CARGO_MANIFEST_DIR")?); - let test_fixtures_path = project_path.join("testdata"); - let mut deps = process_yarn_lock(test_fixtures_path.as_path())?; - - deps.sort(); - - assert_eq!(expected_deps, &deps); - Ok(()) - }) - } -} diff --git a/crates/tabby-repositories/src/dataset/deps/mod.rs b/crates/tabby-repositories/src/dataset/deps/mod.rs deleted file mode 100644 index 3a4fa9ab61b3..000000000000 --- a/crates/tabby-repositories/src/dataset/deps/mod.rs +++ /dev/null @@ -1,29 +0,0 @@ -mod javascript; -mod python; -mod rust; - -use std::{collections::HashSet, path::Path}; - -use tabby_common::DependencyFile; - -pub fn collect(path: &Path, file: &mut DependencyFile) { - if let Ok(mut deps) = python::process_requirements_txt(path) { - file.direct.append(&mut deps); - } - - if let Ok(mut deps) = rust::process_cargo(path) { - file.direct.append(&mut deps); - } - - if let Ok(mut deps) = javascript::process_package_lock_json(path) { - file.direct.append(&mut deps); - } - - if let Ok(mut deps) = javascript::process_yarn_lock(path) { - file.direct.append(&mut deps); - } - - // Remove duplicates across sources. - let deps = file.direct.clone().into_iter().collect::>(); - file.direct = deps.into_iter().collect(); -} diff --git a/crates/tabby-repositories/src/dataset/deps/python.rs b/crates/tabby-repositories/src/dataset/deps/python.rs deleted file mode 100644 index a8cdb3d0b95a..000000000000 --- a/crates/tabby-repositories/src/dataset/deps/python.rs +++ /dev/null @@ -1,30 +0,0 @@ -use std::{collections::HashSet, path::Path}; - -use anyhow::Result; -use tabby_common::Package; -use tracing::warn; - -pub fn process_requirements_txt(path: &Path) -> Result> { - let requirements_txt = path.join("requirements.txt"); - let content = std::fs::read_to_string(requirements_txt)?; - - let mut deps = HashSet::new(); - match requirements::parse_str(&content) { - Ok(requirements) => { - for requirement in requirements { - if let Some(name) = requirement.name { - deps.insert(Package { - language: "python".to_owned(), - name, - version: None, // requirements.txt doesn't come with accurate version information. - }); - } - } - } - Err(err) => { - warn!("Failed to parse requirements.txt: {}", err); - } - } - - Ok(deps.into_iter().collect()) -} diff --git a/crates/tabby-repositories/src/dataset/deps/rust.rs b/crates/tabby-repositories/src/dataset/deps/rust.rs deleted file mode 100644 index 19da9d1bfed2..000000000000 --- a/crates/tabby-repositories/src/dataset/deps/rust.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::path::Path; - -use anyhow::Result; -use cargo_lock::dependency::graph::EdgeDirection; -use tabby_common::Package; - -fn extract_deps<'a, I>(packages: I) -> Vec -where - I: IntoIterator, -{ - let mut res = packages - .into_iter() - .map(|package| Package { - language: String::from("rust"), - name: package.name.to_string(), - version: Some(package.version.to_string()), - }) - .collect::>() - .into_iter() - .collect::>(); - res.sort_unstable(); - res -} - -pub fn process_cargo(path: &Path) -> Result> { - let cargo_lock_file = path.join("Cargo.lock"); - - let lockfile = cargo_lock::Lockfile::load(cargo_lock_file)?; - - let tree = lockfile.dependency_tree()?; - let graph = tree.graph(); - - let root_pkg_idx = graph - .externals(EdgeDirection::Incoming) - .collect::>(); - let direct_deps_idx = root_pkg_idx - .iter() - .flat_map(|idx| graph.neighbors_directed(*idx, EdgeDirection::Outgoing)) - .collect::>(); - let deps = extract_deps(direct_deps_idx.iter().map(|dep_idx| &graph[*dep_idx])); - Ok(deps) -} diff --git a/crates/tabby-repositories/src/dataset/tags.rs b/crates/tabby-repositories/src/dataset/tags.rs deleted file mode 100644 index 5429e09da7cd..000000000000 --- a/crates/tabby-repositories/src/dataset/tags.rs +++ /dev/null @@ -1,169 +0,0 @@ -use std::collections::HashMap; - -use lazy_static::lazy_static; -use tabby_common::{Point, Tag}; -use tree_sitter_tags::{TagsConfiguration, TagsContext}; - -pub fn collect(context: &mut TagsContext, language: &str, content: &str) -> Vec { - let config = LANGUAGE_TAGS.get(language); - let empty = Vec::new(); - - let Some(config) = config else { - return empty; - }; - - let Ok((tags, has_error)) = context.generate_tags(&config.0, content.as_bytes(), None) else { - return empty; - }; - - if has_error { - return empty; - } - - tags.filter_map(|x| x.ok()) - .map(|x| Tag { - range: x.range, - name_range: x.name_range, - utf16_column_range: x.utf16_column_range, - line_range: x.line_range, - docs: x.docs, - is_definition: x.is_definition, - syntax_type_name: config.0.syntax_type_name(x.syntax_type_id).to_owned(), - span: Point::new(x.span.start.row, x.span.start.column) - ..Point::new(x.span.end.row, x.span.end.column), - }) - .collect() -} - -// Mark TagsConfiguration as thread sync / safe. -struct TagsConfigurationSync(TagsConfiguration); -unsafe impl Send for TagsConfigurationSync {} -unsafe impl Sync for TagsConfigurationSync {} - -lazy_static! { - static ref LANGUAGE_TAGS: HashMap<&'static str, TagsConfigurationSync> = { - HashMap::from([ - ( - "python", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_python::language(), - tree_sitter_python::TAGGING_QUERY, - "", - ) - .unwrap(), - ), - ), - ( - "rust", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_rust::language(), - include_str!("../../queries/rust.scm"), - "", - ) - .unwrap(), - ), - ), - ( - "java", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_java::language(), - tree_sitter_java::TAGGING_QUERY, - "", - ) - .unwrap(), - ), - ), - ( - "kotlin", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_kotlin::language(), - include_str!("../../queries/kotlin.scm"), - "", - ) - .unwrap(), - ), - ), - ( - "javascript-typescript", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_typescript::language_tsx(), - include_str!("../../queries/tsx.scm"), - "", - ) - .unwrap(), - ), - ), - ( - "go", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_go::language(), - include_str!("../../queries/go.scm"), - "", - ) - .unwrap(), - ), - ), - ( - "ruby", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_ruby::language(), - tree_sitter_ruby::TAGGING_QUERY, - "", - ) - .unwrap(), - ), - ), - ( - "c", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_c::language(), - tree_sitter_c::TAGS_QUERY, - "", - ) - .unwrap(), - ), - ), - ( - "cpp", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_cpp::language(), - tree_sitter_cpp::TAGS_QUERY, - "", - ) - .unwrap(), - ), - ), - ( - "csharp", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_c_sharp::language(), - include_str!("../../queries/csharp.scm"), - "", - ) - .unwrap(), - ), - ), - ( - "solidity", - TagsConfigurationSync( - TagsConfiguration::new( - tree_sitter_solidity::language(), - include_str!("../../queries/solidity.scm"), - "", - ) - .unwrap(), - ), - ), - ]) - }; -} diff --git a/crates/tabby-repositories/src/lib.rs b/crates/tabby-repositories/src/lib.rs deleted file mode 100644 index 0c140bcd18e1..000000000000 --- a/crates/tabby-repositories/src/lib.rs +++ /dev/null @@ -1,47 +0,0 @@ -use anyhow::{anyhow, Result}; -use dataset::reload_index; -use kv::{Bucket, Config, Json, Store}; -use tabby_common::{config::RepositoryConfig, SourceFile}; - -mod dataset; - -type RepositoryBucket<'a> = Bucket<'a, String, Json>; - -pub struct RepositoryCache { - cache: Store, -} - -impl RepositoryCache { - pub fn new() -> Result { - let config = Config::new(tabby_common::path::repository_meta_db()); - let store = Store::new(config)?; - Ok(RepositoryCache { cache: store }) - } - - fn bucket(&self) -> Result { - Ok(self.cache.bucket(Some("repositories"))?) - } - - pub fn clear(&self) -> Result<()> { - self.bucket()?.clear()?; - Ok(()) - } - - pub fn add_repository_meta(&self, file: SourceFile) -> Result<()> { - let key = format!("{}:{}", file.repository_name, file.filepath); - self.bucket()?.set(&key, &Json(file))?; - Ok(()) - } - - pub fn get_repository_meta(&self, repository_name: &str, filepath: &str) -> Result { - let key = format!("{repository_name}:{filepath}"); - let Some(Json(val)) = self.bucket()?.get(&key)? else { - return Err(anyhow!("Repository meta not found")); - }; - Ok(val) - } - - pub fn reload(&self, repositories: &[RepositoryConfig]) -> Result<()> { - reload_index(&self, repositories) - } -} diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index db746f566a3a..0f3c55851a0a 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -15,7 +15,7 @@ use kdam::BarExt; use lazy_static::lazy_static; use serde_jsonlines::WriteExt; use tabby_common::{ - config::RepositoryConfig, + config::{RepositoryAccess, RepositoryConfig}, path::{dataset_dir, dependency_file}, DependencyFile, SourceFile, }; @@ -25,11 +25,16 @@ use tree_sitter_tags::TagsContext; use crate::utils::tqdm; trait RepositoryExt { - fn create_dataset(&self, writer: &mut impl Write) -> Result<()>; + fn create_dataset(&self, writer: &mut impl Write, access: &impl RepositoryAccess) + -> Result<()>; } impl RepositoryExt for RepositoryConfig { - fn create_dataset(&self, writer: &mut impl Write) -> Result<()> { + fn create_dataset( + &self, + writer: &mut impl Write, + access: &impl RepositoryAccess, + ) -> Result<()> { let dir = self.dir(); let walk_dir_iter = || { @@ -61,6 +66,7 @@ impl RepositoryExt for RepositoryConfig { match read_to_string(entry.path()) { Ok(file_content) => { let source_file = SourceFile { + repository_name: self.name(), git_url: self.git_url.clone(), filepath: relative_path.display().to_string(), max_line_length: metrics::max_line_length(&file_content), @@ -70,7 +76,8 @@ impl RepositoryExt for RepositoryConfig { language, content: file_content, }; - writer.write_json_lines([source_file])?; + writer.write_json_lines([source_file.clone()])?; + access.write_index(source_file)?; } Err(e) => { error!("Cannot read {relative_path:?}: {e:?}"); @@ -95,9 +102,10 @@ fn is_source_code(entry: &DirEntry) -> bool { } } -pub fn create_dataset(config: &[RepositoryConfig]) -> Result<()> { +pub fn create_dataset(config: &[RepositoryConfig], access: &impl RepositoryAccess) -> Result<()> { fs::remove_dir_all(dataset_dir()).ok(); fs::create_dir_all(dataset_dir())?; + access.clear_index()?; let mut writer = FileRotate::new( SourceFile::files_jsonl(), AppendCount::new(usize::max_value()), @@ -110,7 +118,7 @@ pub fn create_dataset(config: &[RepositoryConfig]) -> Result<()> { let mut deps = DependencyFile::default(); for repository in config { deps::collect(repository.dir().as_path(), &mut deps); - repository.create_dataset(&mut writer)?; + repository.create_dataset(&mut writer, access)?; } serdeconv::to_json_file(&deps, dependency_file())?; diff --git a/crates/tabby-scheduler/src/lib.rs b/crates/tabby-scheduler/src/lib.rs index e4b55d38a91f..7b14e9155f7e 100644 --- a/crates/tabby-scheduler/src/lib.rs +++ b/crates/tabby-scheduler/src/lib.rs @@ -15,7 +15,7 @@ use tracing::{error, info, warn}; pub async fn scheduler(now: bool, access: T) -> Result<()> { if now { let repositories = access.list_repositories().await?; - job_sync(&repositories)?; + job_sync(&repositories, &access)?; job_index(&repositories)?; } else { let access = Arc::new(access); @@ -37,7 +37,7 @@ pub async fn scheduler(now: bool, access: T) -> R .list_repositories() .await .expect("Must be able to retrieve repositories for sync"); - if let Err(e) = job_sync(&repositories) { + if let Err(e) = job_sync(&repositories, &*access) { error!("{e}"); } if let Err(e) = job_index(&repositories) { @@ -66,7 +66,7 @@ fn job_index(repositories: &[RepositoryConfig]) -> Result<()> { Ok(()) } -fn job_sync(repositories: &[RepositoryConfig]) -> Result<()> { +fn job_sync(repositories: &[RepositoryConfig], access: &impl RepositoryAccess) -> Result<()> { println!("Syncing {} repositories...", repositories.len()); let ret = repository::sync_repositories(repositories); if let Err(err) = ret { @@ -74,7 +74,7 @@ fn job_sync(repositories: &[RepositoryConfig]) -> Result<()> { } println!("Building dataset..."); - let ret = dataset::create_dataset(repositories); + let ret = dataset::create_dataset(repositories, access); if let Err(err) = ret { return Err(err.context("Failed to build dataset")); } diff --git a/ee/tabby-webserver/Cargo.toml b/ee/tabby-webserver/Cargo.toml index d59047df3ef4..43a3a312f581 100644 --- a/ee/tabby-webserver/Cargo.toml +++ b/ee/tabby-webserver/Cargo.toml @@ -22,6 +22,7 @@ hyper = { workspace = true, features = ["client"] } jsonwebtoken = "9.1.0" juniper.workspace = true juniper-axum = { path = "../../crates/juniper-axum" } +kv = { version = "0.24.0", features = ["json-value"] } lazy_static.workspace = true lettre = { version = "0.11.3", features = ["tokio1", "tokio1-native-tls"] } mime_guess = "2.0.4" diff --git a/ee/tabby-webserver/src/cron/mod.rs b/ee/tabby-webserver/src/cron/mod.rs index 4992efee823c..554bca068826 100644 --- a/ee/tabby-webserver/src/cron/mod.rs +++ b/ee/tabby-webserver/src/cron/mod.rs @@ -3,45 +3,10 @@ mod scheduler; use std::sync::Arc; -use futures::Future; -use tokio::sync::broadcast::{self, error::RecvError, Receiver}; use tokio_cron_scheduler::{Job, JobScheduler}; use crate::schema::{auth::AuthenticationService, job::JobService, worker::WorkerService}; -pub(crate) struct CronEvents { - pub scheduler_job_succeeded: Receiver<()>, -} - -pub trait StartListener { - fn start_listener(&self, handler: F) - where - F: Fn(E) -> Fut + Send + 'static, - Fut: Future + Send, - E: Clone + Send + 'static; -} - -impl StartListener for Receiver { - fn start_listener(&self, handler: F) - where - F: Fn(E) -> Fut + Send + 'static, - Fut: Future + Send, - E: Clone + Send + 'static, - { - let mut recv = self.resubscribe(); - tokio::spawn(async move { - loop { - let event = match recv.recv().await { - Ok(event) => event, - Err(RecvError::Closed) => break, - Err(_) => continue, - }; - handler(event).await; - } - }); - } -} - async fn new_job_scheduler(jobs: Vec) -> anyhow::Result { let scheduler = JobScheduler::new().await?; for job in jobs { @@ -56,9 +21,8 @@ pub async fn run_cron( job: Arc, worker: Arc, local_port: u16, -) -> CronEvents { +) { let mut jobs = vec![]; - let (send_scheduler_complete, receive_scheduler_complete) = broadcast::channel::<()>(2); let job1 = db::refresh_token_job(auth.clone()) .await @@ -70,7 +34,7 @@ pub async fn run_cron( .expect("failed to create password reset token cleanup job"); jobs.push(job2); - let job3 = scheduler::scheduler_job(job.clone(), worker, send_scheduler_complete, local_port) + let job3 = scheduler::scheduler_job(job.clone(), worker, local_port) .await .expect("failed to create scheduler job"); jobs.push(job3); @@ -83,32 +47,4 @@ pub async fn run_cron( new_job_scheduler(jobs) .await .expect("failed to start job scheduler"); - CronEvents { - scheduler_job_succeeded: receive_scheduler_complete, - } -} - -#[cfg(test)] -mod tests { - use std::time::Duration; - - use tokio::sync::Mutex; - - use super::*; - - #[tokio::test] - async fn test_receiver_events() { - let (send, receive) = broadcast::channel(1); - let counter = Arc::new(Mutex::new(0)); - let clone = counter.clone(); - receive.start_listener(move |_| { - let clone = clone.clone(); - async move { - *clone.lock().await += 1; - } - }); - send.send(()).unwrap(); - tokio::time::sleep(Duration::from_millis(50)).await; - assert_eq!(*counter.lock().await, 1); - } } diff --git a/ee/tabby-webserver/src/cron/scheduler.rs b/ee/tabby-webserver/src/cron/scheduler.rs index 0055878f3a41..c891889022cc 100644 --- a/ee/tabby-webserver/src/cron/scheduler.rs +++ b/ee/tabby-webserver/src/cron/scheduler.rs @@ -1,7 +1,7 @@ use std::{process::Stdio, sync::Arc}; use anyhow::{Context, Result}; -use tokio::{io::AsyncBufReadExt, sync::broadcast}; +use tokio::io::AsyncBufReadExt; use tokio_cron_scheduler::Job; use tracing::{error, info, warn}; @@ -10,7 +10,6 @@ use crate::schema::{job::JobService, worker::WorkerService}; pub async fn scheduler_job( job: Arc, worker: Arc, - events: broadcast::Sender<()>, local_port: u16, ) -> anyhow::Result { let scheduler_mutex = Arc::new(tokio::sync::Mutex::new(())); @@ -19,7 +18,6 @@ pub async fn scheduler_job( let worker = worker.clone(); let job = job.clone(); let scheduler_mutex = scheduler_mutex.clone(); - let events = events.clone(); Box::pin(async move { let Ok(_guard) = scheduler_mutex.try_lock() else { warn!("Scheduler job overlapped, skipping..."); @@ -28,8 +26,6 @@ pub async fn scheduler_job( if let Err(err) = run_scheduler_now(job, worker, local_port).await { error!("Failed to run scheduler job, reason: `{}`", err); - } else { - let _ = events.send(()); } if let Ok(Some(next_tick)) = scheduler.next_tick_for_job(uuid).await { diff --git a/ee/tabby-webserver/src/handler.rs b/ee/tabby-webserver/src/handler.rs index b6ff5981b31c..5df4dc38014a 100644 --- a/ee/tabby-webserver/src/handler.rs +++ b/ee/tabby-webserver/src/handler.rs @@ -48,9 +48,9 @@ impl WebserverHandle { ) -> (Router, Router) { let ctx = create_service_locator(self.logger(), code, self.db.clone(), is_chat_enabled).await; - let events = cron::run_cron(ctx.auth(), ctx.job(), ctx.worker(), local_port).await; + cron::run_cron(ctx.auth(), ctx.job(), ctx.worker(), local_port).await; - let repository_cache = RepositoryCache::new_initialized(&events).await; + let repository_cache = RepositoryCache::new().expect("Failed to create repository index"); let schema = Arc::new(create_schema()); let rs = Arc::new(repository_cache); diff --git a/ee/tabby-webserver/src/hub/api.rs b/ee/tabby-webserver/src/hub/api.rs index 49f48a16fcdc..51a53e534636 100644 --- a/ee/tabby-webserver/src/hub/api.rs +++ b/ee/tabby-webserver/src/hub/api.rs @@ -11,13 +11,14 @@ use tabby_common::{ event::{EventLogger, LogEntry}, }, config::{RepositoryAccess, RepositoryConfig}, + SourceFile, }; use tarpc::context::Context; use tokio_tungstenite::connect_async; use super::websocket::WebSocketTransport; -use crate::schema::worker::Worker; pub use crate::schema::worker::WorkerKind; +use crate::{repositories::RepositoryCache, schema::worker::Worker}; #[tarpc::service] pub trait Hub { @@ -196,4 +197,14 @@ impl RepositoryAccess for SchedulerClient { async fn list_repositories(&self) -> Result> { Ok(self.0.list_repositories(Context::current()).await?) } + + fn clear_index(&self) -> Result<()> { + RepositoryCache::new()?.clear()?; + Ok(()) + } + + fn write_index(&self, source_file: SourceFile) -> Result<()> { + RepositoryCache::new()?.add_repository_meta(source_file)?; + Ok(()) + } } diff --git a/ee/tabby-webserver/src/repositories/mod.rs b/ee/tabby-webserver/src/repositories/mod.rs index 95f8e21d5784..791e3458686a 100644 --- a/ee/tabby-webserver/src/repositories/mod.rs +++ b/ee/tabby-webserver/src/repositories/mod.rs @@ -21,7 +21,7 @@ use crate::{ pub type ResolveState = Arc; -pub fn routes(rs: Arc, auth: Arc) -> Router { +pub fn routes(rs: ResolveState, auth: Arc) -> Router { Router::new() .route("/resolve", routing::get(resolve)) .route("/resolve/", routing::get(resolve)) @@ -42,10 +42,10 @@ async fn not_found() -> StatusCode { #[instrument(skip(repo))] async fn resolve_path( - State(rs): State>, + State(rs): State, Path(repo): Path, ) -> Result { - let Some(conf) = rs.find_repository(repo.name_str()) else { + let Ok(conf) = rs.find_repository(repo.name_str()) else { return Err(StatusCode::NOT_FOUND); }; let root = conf.dir(); @@ -76,16 +76,16 @@ async fn resolve_path( #[instrument(skip(repo))] async fn meta( - State(rs): State>, + State(rs): State, Path(repo): Path, ) -> Result, StatusCode> { let key = repo.dataset_key(); - if let Some(resp) = rs.resolve_meta(&key) { + if let Ok(resp) = rs.resolve_meta(&key) { return Ok(Json(resp)); } Err(StatusCode::NOT_FOUND) } -async fn resolve(State(rs): State>) -> Result { +async fn resolve(State(rs): State) -> Result { rs.resolve_all().map_err(|_| StatusCode::NOT_FOUND) } diff --git a/ee/tabby-webserver/src/repositories/resolve.rs b/ee/tabby-webserver/src/repositories/resolve.rs index 1205f90608f4..0d48b925dfb4 100644 --- a/ee/tabby-webserver/src/repositories/resolve.rs +++ b/ee/tabby-webserver/src/repositories/resolve.rs @@ -1,12 +1,6 @@ -use std::{ - collections::HashMap, - ops::Deref, - path::PathBuf, - str::FromStr, - sync::{Arc, RwLock}, -}; +use std::{path::PathBuf, str::FromStr}; -use anyhow::Result; +use anyhow::{anyhow, Result}; use axum::{ body::boxed, http::{header, Request, Uri}, @@ -14,60 +8,58 @@ use axum::{ Json, }; use hyper::Body; +use kv::{Bucket, Config, Store}; use serde::{Deserialize, Serialize}; use tabby_common::{config::RepositoryConfig, SourceFile, Tag}; use tower::ServiceExt; use tower_http::services::ServeDir; -use tracing::{debug, error, warn}; - -use crate::cron::{CronEvents, StartListener}; pub struct RepositoryCache { - repository_lookup: RwLock>, + cache: Store, } impl std::fmt::Debug for RepositoryCache { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("RepositoryCache") - .field("repository_lookup", &self.repository_lookup) - .finish() + f.debug_struct("RepositoryCache").finish() } } +type RepositoryBucket<'a> = Bucket<'a, String, kv::Json>; impl RepositoryCache { - pub async fn new_initialized(events: &CronEvents) -> Arc { - let cache = RepositoryCache { - repository_lookup: Default::default(), - }; - if let Err(e) = cache.reload().await { - error!("Failed to load repositories: {e}"); - }; - let cache = Arc::new(cache); - cache.start_reload_listener(events); - cache + pub fn new() -> Result { + let config = Config::new(tabby_common::path::repository_meta_db()); + let store = Store::new(config)?; + Ok(RepositoryCache { cache: store }) + } + + fn bucket(&self) -> Result { + Ok(self.cache.bucket(Some("repositories"))?) } - async fn reload(&self) -> Result<()> { - let mut repository_lookup = self.repository_lookup.write().unwrap(); - debug!("Reloading repositoriy metadata..."); - *repository_lookup = load_meta(); + pub fn clear(&self) -> Result<()> { + self.bucket()?.clear()?; Ok(()) } - fn start_reload_listener(self: &Arc, events: &CronEvents) { - let clone = self.clone(); - events.scheduler_job_succeeded.start_listener(move |_| { - let clone = clone.clone(); - async move { - if let Err(e) = clone.reload().await { - warn!("Error when reloading repository cache: {e}"); - }; - } - }); + pub fn add_repository_meta(&self, file: SourceFile) -> Result<()> { + let key = format!("{}:{}", file.repository_name, file.filepath); + self.bucket()?.set(&key, &kv::Json(file))?; + Ok(()) + } + + pub fn get_repository_meta(&self, repository_name: &str, filepath: &str) -> Result { + let key = format!("{repository_name}:{filepath}"); + let Some(kv::Json(val)) = self.bucket()?.get(&key)? else { + return Err(anyhow!("Repository meta not found")); + }; + Ok(val) } - fn repositories(&self) -> impl Deref> + '_ { - self.repository_lookup.read().unwrap() + fn str_to_key(str: &str) -> Option { + str.split_once(':').map(|(name, path)| RepositoryKey { + repo_name: name.to_string(), + rel_path: path.to_string(), + }) } } @@ -153,23 +145,7 @@ impl From for RepositoryMeta { } } -fn load_meta() -> HashMap { - let mut dataset = HashMap::new(); - let Ok(iter) = SourceFile::all() else { - return dataset; - }; - // Source files contain all metadata, read repository metadata from json - // (SourceFile can be converted into RepositoryMeta) - for file in iter { - let key = RepositoryKey { - repo_name: file.repository_name.clone(), - rel_path: file.filepath.clone(), - }; - dataset.insert(key, file.into()); - } - dataset -} - +/// Webserver resolve functions impl RepositoryCache { /// Resolve a directory pub async fn resolve_dir( @@ -235,24 +211,23 @@ impl RepositoryCache { Ok(resp.map(boxed)) } - pub fn resolve_meta(&self, key: &RepositoryKey) -> Option { - if let Some(meta) = self.repositories().get(key) { - return Some(meta.clone()); - } - None + pub fn resolve_meta(&self, key: &RepositoryKey) -> Result { + self.get_repository_meta(&key.repo_name, &key.rel_path) + .map(RepositoryMeta::from) } pub fn resolve_all(&self) -> Result { - let entries: Vec<_> = self - .repository_lookup - .read() - .unwrap() - .keys() - .map(|repo| DirEntry { + let mut entries = vec![]; + for entry in self.bucket()?.iter() { + let key: String = entry?.key()?; + let Some(key) = Self::str_to_key(&key) else { + continue; + }; + entries.push(DirEntry { kind: DirEntryKind::Dir, - basename: repo.repo_name.clone(), + basename: key.repo_name, }) - .collect(); + } let body = Json(ListDir { entries }).into_response(); let resp = Response::builder() @@ -262,15 +237,18 @@ impl RepositoryCache { Ok(resp) } - pub fn find_repository(&self, name: &str) -> Option { - let repository_lookup = self.repository_lookup.read().unwrap(); - let key = repository_lookup - .keys() - .find(|repo| repo.repo_name == name)?; - let value = repository_lookup.get(key)?; - Some(RepositoryConfig::new_named( - key.repo_name.clone(), - value.git_url.clone(), - )) + pub fn find_repository(&self, name: &str) -> Result { + for entry in self.bucket()?.iter() { + let entry = entry?; + let key: String = entry.key()?; + let Some(key) = Self::str_to_key(&key) else { + continue; + }; + if &key.repo_name == name { + let kv::Json(value) = entry.value()?; + return Ok(RepositoryConfig::new_named(key.repo_name, value.git_url)); + } + } + Err(anyhow!("Repository not found")) } } From 795e9de569284dc61d767cd6075dd39bc348c6e1 Mon Sep 17 00:00:00 2001 From: boxbeam Date: Fri, 5 Apr 2024 10:58:24 -0400 Subject: [PATCH 10/17] Use rotating cache version --- Cargo.lock | 1 - Cargo.toml | 1 - crates/tabby-common/src/config.rs | 9 +- crates/tabby-common/src/lib.rs | 1 - crates/tabby-common/src/path.rs | 4 - crates/tabby-scheduler/src/dataset.rs | 18 ++-- ee/tabby-db/Cargo.toml | 2 +- ee/tabby-webserver/src/handler.rs | 6 +- ee/tabby-webserver/src/hub/api.rs | 41 +++++++-- ee/tabby-webserver/src/repositories/mod.rs | 10 +-- .../src/repositories/resolve.rs | 85 ++++++++++++++----- ee/tabby-webserver/src/schema/repository.rs | 1 + ee/tabby-webserver/src/service/repository.rs | 5 +- 13 files changed, 131 insertions(+), 53 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8bcd9eb0558e..dbc02e25d2e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4609,7 +4609,6 @@ dependencies = [ "jsonwebtoken", "juniper", "juniper-axum", - "kv", "lazy_static", "lettre", "mime_guess", diff --git a/Cargo.toml b/Cargo.toml index 57527fbb66c4..448ddbadc0dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,6 @@ authors = ["Meng Zhang"] homepage = "https://github.com/TabbyML/tabby" [workspace.dependencies] -sqlx = { version = "0.7.3", features = ["sqlite", "chrono", "runtime-tokio", "macros"] } lazy_static = "1.4.0" serde = { version = "1.0", features = ["derive"] } serde_json = "1" diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs index b633d329c833..0f2f2a815404 100644 --- a/crates/tabby-common/src/config.rs +++ b/crates/tabby-common/src/config.rs @@ -147,12 +147,11 @@ impl Default for ServerConfig { pub trait RepositoryAccess: Send + Sync { async fn list_repositories(&self) -> Result>; - fn clear_index(&self) -> Result<()> { - Ok(()) - } - fn write_index(&self, _source_file: SourceFile) -> Result<()> { - Ok(()) + fn start_snapshot(&self) -> String { + Default::default() } + fn process_file(&self, _version: String, _file: SourceFile) {} + fn finish_snapshot(&self, _version: String) {} } pub struct ConfigRepositoryAccess; diff --git a/crates/tabby-common/src/lib.rs b/crates/tabby-common/src/lib.rs index 6b7cda3e66b5..f8fa66f647a0 100644 --- a/crates/tabby-common/src/lib.rs +++ b/crates/tabby-common/src/lib.rs @@ -24,7 +24,6 @@ use serde_jsonlines::JsonLinesReader; #[derive(Serialize, Deserialize, Clone)] pub struct SourceFile { pub git_url: String, - pub repository_name: String, pub filepath: String, pub content: String, pub language: String, diff --git a/crates/tabby-common/src/path.rs b/crates/tabby-common/src/path.rs index c7f2eaadbccc..823d64f91ce4 100644 --- a/crates/tabby-common/src/path.rs +++ b/crates/tabby-common/src/path.rs @@ -29,10 +29,6 @@ pub fn config_file() -> PathBuf { tabby_root().join("config.toml") } -pub fn repository_meta_db() -> PathBuf { - tabby_root().join("repositories.db") -} - pub fn usage_id_file() -> PathBuf { tabby_root().join("usage_anonymous_id") } diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index 0f3c55851a0a..d66380ef1c65 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -25,8 +25,12 @@ use tree_sitter_tags::TagsContext; use crate::utils::tqdm; trait RepositoryExt { - fn create_dataset(&self, writer: &mut impl Write, access: &impl RepositoryAccess) - -> Result<()>; + fn create_dataset( + &self, + writer: &mut impl Write, + access: &impl RepositoryAccess, + cache_version: String, + ) -> Result<()>; } impl RepositoryExt for RepositoryConfig { @@ -34,6 +38,7 @@ impl RepositoryExt for RepositoryConfig { &self, writer: &mut impl Write, access: &impl RepositoryAccess, + cache_version: String, ) -> Result<()> { let dir = self.dir(); @@ -66,7 +71,6 @@ impl RepositoryExt for RepositoryConfig { match read_to_string(entry.path()) { Ok(file_content) => { let source_file = SourceFile { - repository_name: self.name(), git_url: self.git_url.clone(), filepath: relative_path.display().to_string(), max_line_length: metrics::max_line_length(&file_content), @@ -77,7 +81,7 @@ impl RepositoryExt for RepositoryConfig { content: file_content, }; writer.write_json_lines([source_file.clone()])?; - access.write_index(source_file)?; + access.process_file(cache_version.clone(), source_file); } Err(e) => { error!("Cannot read {relative_path:?}: {e:?}"); @@ -105,7 +109,7 @@ fn is_source_code(entry: &DirEntry) -> bool { pub fn create_dataset(config: &[RepositoryConfig], access: &impl RepositoryAccess) -> Result<()> { fs::remove_dir_all(dataset_dir()).ok(); fs::create_dir_all(dataset_dir())?; - access.clear_index()?; + let mut writer = FileRotate::new( SourceFile::files_jsonl(), AppendCount::new(usize::max_value()), @@ -115,15 +119,17 @@ pub fn create_dataset(config: &[RepositoryConfig], access: &impl RepositoryAcces None, ); + let snapshot_version = access.start_snapshot(); let mut deps = DependencyFile::default(); for repository in config { deps::collect(repository.dir().as_path(), &mut deps); - repository.create_dataset(&mut writer, access)?; + repository.create_dataset(&mut writer, access, snapshot_version.clone())?; } serdeconv::to_json_file(&deps, dependency_file())?; writer.flush()?; + access.finish_snapshot(snapshot_version); Ok(()) } diff --git a/ee/tabby-db/Cargo.toml b/ee/tabby-db/Cargo.toml index e6c584281bbe..a971f0af5912 100644 --- a/ee/tabby-db/Cargo.toml +++ b/ee/tabby-db/Cargo.toml @@ -10,12 +10,12 @@ testutils = [] prod-db = [] [dependencies] +sqlx = { version = "0.7.3", features = ["sqlite", "chrono", "runtime-tokio", "macros"] } tabby-db-macros = { path = "../tabby-db-macros" } anyhow.workspace = true chrono = { workspace = true, features = ["serde"] } hash-ids.workspace = true lazy_static.workspace = true -sqlx = { workspace = true } sql_query_builder = { version = "2.1.0", features = ["sqlite"] } tabby-common = { path = "../../crates/tabby-common" } tokio = { workspace = true, features = ["fs"] } diff --git a/ee/tabby-webserver/src/handler.rs b/ee/tabby-webserver/src/handler.rs index 5df4dc38014a..c831e0871f69 100644 --- a/ee/tabby-webserver/src/handler.rs +++ b/ee/tabby-webserver/src/handler.rs @@ -16,7 +16,7 @@ use tracing::{error, warn}; use crate::{ cron, hub, oauth, - repositories::{self, RepositoryCache}, + repositories::{self, RepositoryCache, RepositoryResolver}, schema::{auth::AuthenticationService, create_schema, Schema, ServiceLocator}, service::{create_service_locator, event_logger::new_event_logger}, ui, @@ -51,9 +51,11 @@ impl WebserverHandle { cron::run_cron(ctx.auth(), ctx.job(), ctx.worker(), local_port).await; let repository_cache = RepositoryCache::new().expect("Failed to create repository index"); + let repository_resolver = + RepositoryResolver::new(repository_cache.clone(), ctx.repository()); let schema = Arc::new(create_schema()); - let rs = Arc::new(repository_cache); + let rs = Arc::new(repository_resolver); let api = api .route( diff --git a/ee/tabby-webserver/src/hub/api.rs b/ee/tabby-webserver/src/hub/api.rs index 51a53e534636..5c480dfa989e 100644 --- a/ee/tabby-webserver/src/hub/api.rs +++ b/ee/tabby-webserver/src/hub/api.rs @@ -15,6 +15,7 @@ use tabby_common::{ }; use tarpc::context::Context; use tokio_tungstenite::connect_async; +use tracing::error; use super::websocket::WebSocketTransport; pub use crate::schema::worker::WorkerKind; @@ -198,13 +199,41 @@ impl RepositoryAccess for SchedulerClient { Ok(self.0.list_repositories(Context::current()).await?) } - fn clear_index(&self) -> Result<()> { - RepositoryCache::new()?.clear()?; - Ok(()) + fn start_snapshot(&self) -> String { + log_errors("start repository snapshot", || { + let cache = RepositoryCache::new()?; + Ok(cache.get_next_version()?) + }) } - fn write_index(&self, source_file: SourceFile) -> Result<()> { - RepositoryCache::new()?.add_repository_meta(source_file)?; - Ok(()) + fn process_file(&self, version: String, file: SourceFile) { + log_errors("process repository file", move || { + let cache = RepositoryCache::new()?; + cache.add_repository_meta(version, file.into())?; + Ok(()) + }); + } + + fn finish_snapshot(&self, version: String) { + log_errors("clear old repository cache data", move || { + let cache = RepositoryCache::new()?; + let old_version = cache.latest_version()?; + cache.set_version(version)?; + cache.clear(old_version)?; + Ok(()) + }); + } +} + +fn log_errors(operation: &'static str, f: impl FnOnce() -> Result) -> T +where + T: Default, +{ + match f() { + Ok(v) => v, + Err(e) => { + error!("Failed to {operation}: {e}"); + Default::default() + } } } diff --git a/ee/tabby-webserver/src/repositories/mod.rs b/ee/tabby-webserver/src/repositories/mod.rs index 791e3458686a..b010f81b0118 100644 --- a/ee/tabby-webserver/src/repositories/mod.rs +++ b/ee/tabby-webserver/src/repositories/mod.rs @@ -10,8 +10,8 @@ use axum::{ response::Response, routing, Json, Router, }; -pub use resolve::RepositoryCache; -use tracing::{instrument, warn}; +pub use resolve::{RepositoryCache, RepositoryResolver}; +use tracing::warn; use crate::{ handler::require_login_middleware, @@ -19,7 +19,7 @@ use crate::{ schema::auth::AuthenticationService, }; -pub type ResolveState = Arc; +pub type ResolveState = Arc; pub fn routes(rs: ResolveState, auth: Arc) -> Router { Router::new() @@ -40,7 +40,6 @@ async fn not_found() -> StatusCode { StatusCode::NOT_FOUND } -#[instrument(skip(repo))] async fn resolve_path( State(rs): State, Path(repo): Path, @@ -74,13 +73,12 @@ async fn resolve_path( } } -#[instrument(skip(repo))] async fn meta( State(rs): State, Path(repo): Path, ) -> Result, StatusCode> { let key = repo.dataset_key(); - if let Ok(resp) = rs.resolve_meta(&key) { + if let Ok(resp) = rs.resolve_meta(&key).await { return Ok(Json(resp)); } Err(StatusCode::NOT_FOUND) diff --git a/ee/tabby-webserver/src/repositories/resolve.rs b/ee/tabby-webserver/src/repositories/resolve.rs index 0d48b925dfb4..d25131cacc45 100644 --- a/ee/tabby-webserver/src/repositories/resolve.rs +++ b/ee/tabby-webserver/src/repositories/resolve.rs @@ -1,4 +1,4 @@ -use std::{path::PathBuf, str::FromStr}; +use std::{path::PathBuf, str::FromStr, sync::Arc}; use anyhow::{anyhow, Result}; use axum::{ @@ -14,6 +14,13 @@ use tabby_common::{config::RepositoryConfig, SourceFile, Tag}; use tower::ServiceExt; use tower_http::services::ServeDir; +use crate::schema::repository::RepositoryService; + +fn repository_meta_db() -> PathBuf { + tabby_common::path::tabby_root().join("repositories.kv") +} + +#[derive(Clone)] pub struct RepositoryCache { cache: Store, } @@ -23,32 +30,58 @@ impl std::fmt::Debug for RepositoryCache { f.debug_struct("RepositoryCache").finish() } } -type RepositoryBucket<'a> = Bucket<'a, String, kv::Json>; +type RepositoryBucket<'a> = Bucket<'a, String, kv::Json>; impl RepositoryCache { pub fn new() -> Result { - let config = Config::new(tabby_common::path::repository_meta_db()); + let config = Config::new(repository_meta_db()); let store = Store::new(config)?; Ok(RepositoryCache { cache: store }) } + pub fn latest_version(&self) -> Result { + let bucket = self.cache.bucket(Some("version"))?; + if !bucket.contains(&"version".to_string())? { + self.set_version(self.get_next_version()?)?; + } + Ok(bucket + .get(&"version".to_string())? + .expect("Cache version must always be set")) + } + + pub fn set_version(&self, version: String) -> Result<()> { + let bucket = self.cache.bucket(Some("version"))?; + bucket.set(&"version".to_string(), &version)?; + Ok(()) + } + + pub fn get_next_version(&self) -> Result { + Ok(self.cache.generate_id()?.to_string()) + } + + fn versioned_bucket(&self, version: String) -> Result { + let bucket_name = format!("repositories_{}", version); + Ok(self.cache.bucket(Some(&bucket_name))?) + } + fn bucket(&self) -> Result { - Ok(self.cache.bucket(Some("repositories"))?) + self.versioned_bucket(self.latest_version()?) } - pub fn clear(&self) -> Result<()> { - self.bucket()?.clear()?; + pub fn clear(&self, version: String) -> Result<()> { + let bucket_name = format!("repositories_{version}"); + self.cache.drop_bucket(&bucket_name)?; Ok(()) } - pub fn add_repository_meta(&self, file: SourceFile) -> Result<()> { - let key = format!("{}:{}", file.repository_name, file.filepath); - self.bucket()?.set(&key, &kv::Json(file))?; + pub fn add_repository_meta(&self, version: String, file: RepositoryMeta) -> Result<()> { + let key = format!("{}:{}", file.git_url, file.filepath); + self.versioned_bucket(version)?.set(&key, &kv::Json(file))?; Ok(()) } - pub fn get_repository_meta(&self, repository_name: &str, filepath: &str) -> Result { - let key = format!("{repository_name}:{filepath}"); + pub fn get_repository_meta(&self, git_url: &str, filepath: &str) -> Result { + let key = format!("{git_url}:{filepath}"); let Some(kv::Json(val)) = self.bucket()?.get(&key)? else { return Err(anyhow!("Repository meta not found")); }; @@ -145,8 +178,16 @@ impl From for RepositoryMeta { } } -/// Webserver resolve functions -impl RepositoryCache { +pub struct RepositoryResolver { + cache: RepositoryCache, + service: Arc, +} + +impl RepositoryResolver { + pub fn new(cache: RepositoryCache, service: Arc) -> Self { + RepositoryResolver { cache, service } + } + /// Resolve a directory pub async fn resolve_dir( &self, @@ -211,16 +252,22 @@ impl RepositoryCache { Ok(resp.map(boxed)) } - pub fn resolve_meta(&self, key: &RepositoryKey) -> Result { - self.get_repository_meta(&key.repo_name, &key.rel_path) + pub async fn resolve_meta(&self, key: &RepositoryKey) -> Result { + let git_url = self + .service + .get_repository_by_name(key.repo_name.clone()) + .await? + .git_url; + self.cache + .get_repository_meta(&git_url, &key.rel_path) .map(RepositoryMeta::from) } pub fn resolve_all(&self) -> Result { let mut entries = vec![]; - for entry in self.bucket()?.iter() { + for entry in self.cache.bucket()?.iter() { let key: String = entry?.key()?; - let Some(key) = Self::str_to_key(&key) else { + let Some(key) = RepositoryCache::str_to_key(&key) else { continue; }; entries.push(DirEntry { @@ -238,10 +285,10 @@ impl RepositoryCache { } pub fn find_repository(&self, name: &str) -> Result { - for entry in self.bucket()?.iter() { + for entry in self.cache.bucket()?.iter() { let entry = entry?; let key: String = entry.key()?; - let Some(key) = Self::str_to_key(&key) else { + let Some(key) = RepositoryCache::str_to_key(&key) else { continue; }; if &key.repo_name == name { diff --git a/ee/tabby-webserver/src/schema/repository.rs b/ee/tabby-webserver/src/schema/repository.rs index a334bf81fe1f..d310edd0407c 100644 --- a/ee/tabby-webserver/src/schema/repository.rs +++ b/ee/tabby-webserver/src/schema/repository.rs @@ -60,6 +60,7 @@ pub trait RepositoryService: Send + Sync { async fn create_repository(&self, name: String, git_url: String) -> Result; async fn delete_repository(&self, id: &ID) -> Result; async fn update_repository(&self, id: &ID, name: String, git_url: String) -> Result; + async fn get_repository_by_name(&self, name: String) -> Result; async fn search_files(&self, name: &str, pattern: &str, top_n: usize) -> Result>; diff --git a/ee/tabby-webserver/src/service/repository.rs b/ee/tabby-webserver/src/service/repository.rs index cd3843d11185..8eb2a753c080 100644 --- a/ee/tabby-webserver/src/service/repository.rs +++ b/ee/tabby-webserver/src/service/repository.rs @@ -66,6 +66,10 @@ impl RepositoryService for DbConn { Ok(matching) } + + async fn get_repository_by_name(&self, name: String) -> Result { + todo!() + } } async fn match_pattern( @@ -110,7 +114,6 @@ async fn match_pattern( #[cfg(test)] mod tests { use tabby_db::DbConn; - use temp_testdir::TempDir; use super::*; From 51d551282728ad3ca35b72fc68a1c94d5774382f Mon Sep 17 00:00:00 2001 From: boxbeam Date: Fri, 5 Apr 2024 11:02:58 -0400 Subject: [PATCH 11/17] Fix conflicts --- Cargo.lock | 102 +++++++++++++++++-- ee/tabby-webserver/src/service/repository.rs | 6 +- 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbc02e25d2e6..6232d5d6981f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -527,7 +527,7 @@ dependencies = [ "petgraph", "semver", "serde", - "toml", + "toml 0.7.4", "url", ] @@ -930,7 +930,7 @@ dependencies = [ "hashbrown 0.14.3", "lock_api", "once_cell", - "parking_lot_core", + "parking_lot_core 0.9.8", ] [[package]] @@ -1243,6 +1243,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "fs4" version = "0.6.6" @@ -1314,7 +1324,7 @@ checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" dependencies = [ "futures-core", "lock_api", - "parking_lot", + "parking_lot 0.12.1", ] [[package]] @@ -1370,6 +1380,15 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generator" version = "0.7.4" @@ -2034,6 +2053,20 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "kv" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "620727085ac39ee9650b373fe6d8073a0aee6f99e52a9c72b25f7671078039ab" +dependencies = [ + "pin-project-lite", + "serde", + "serde_json", + "sled", + "thiserror", + "toml 0.5.11", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -2506,7 +2539,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5262af4c94921c2646c5ac6ff7900c2af9cbb08dc26a797e18130a7019c039d4" dependencies = [ "nucleo-matcher", - "parking_lot", + "parking_lot 0.12.1", "rayon", ] @@ -2823,6 +2856,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core 0.8.6", +] + [[package]] name = "parking_lot" version = "0.12.1" @@ -2830,7 +2874,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core", + "parking_lot_core 0.9.8", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall 0.2.16", + "smallvec", + "winapi", ] [[package]] @@ -3837,7 +3895,7 @@ dependencies = [ "rmp-serde", "serde", "serde_json", - "toml", + "toml 0.7.4", "trackable", ] @@ -3851,7 +3909,7 @@ dependencies = [ "futures", "lazy_static", "log", - "parking_lot", + "parking_lot 0.12.1", "serial_test_derive 2.0.0", ] @@ -3865,7 +3923,7 @@ dependencies = [ "futures", "lazy_static", "log", - "parking_lot", + "parking_lot 0.12.1", "serial_test_derive 3.0.0", ] @@ -3999,6 +4057,22 @@ dependencies = [ "autocfg", ] +[[package]] +name = "sled" +version = "0.34.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" +dependencies = [ + "crc32fast", + "crossbeam-epoch", + "crossbeam-utils", + "fs2", + "fxhash", + "libc", + "log", + "parking_lot 0.11.2", +] + [[package]] name = "smallvec" version = "1.10.0" @@ -4609,6 +4683,7 @@ dependencies = [ "jsonwebtoken", "juniper", "juniper-axum", + "kv", "lazy_static", "lettre", "mime_guess", @@ -4943,7 +5018,7 @@ dependencies = [ "libc", "mio", "num_cpus", - "parking_lot", + "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", "socket2 0.5.5", @@ -5071,6 +5146,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + [[package]] name = "toml" version = "0.7.4" diff --git a/ee/tabby-webserver/src/service/repository.rs b/ee/tabby-webserver/src/service/repository.rs index 8eb2a753c080..bb37e309e056 100644 --- a/ee/tabby-webserver/src/service/repository.rs +++ b/ee/tabby-webserver/src/service/repository.rs @@ -68,7 +68,10 @@ impl RepositoryService for DbConn { } async fn get_repository_by_name(&self, name: String) -> Result { - todo!() + Ok((self as &DbConn) + .get_repository_by_name(&name) + .await + .map(Repository::from)?) } } @@ -114,6 +117,7 @@ async fn match_pattern( #[cfg(test)] mod tests { use tabby_db::DbConn; + use temp_testdir::TempDir; use super::*; From 8ffb2c8b431fac6b0e0ca01831168a9b0c00dba6 Mon Sep 17 00:00:00 2001 From: boxbeam Date: Fri, 5 Apr 2024 11:03:50 -0400 Subject: [PATCH 12/17] Revert db cargo.toml --- ee/tabby-db/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/tabby-db/Cargo.toml b/ee/tabby-db/Cargo.toml index a971f0af5912..9f8f288068f7 100644 --- a/ee/tabby-db/Cargo.toml +++ b/ee/tabby-db/Cargo.toml @@ -10,13 +10,13 @@ testutils = [] prod-db = [] [dependencies] -sqlx = { version = "0.7.3", features = ["sqlite", "chrono", "runtime-tokio", "macros"] } tabby-db-macros = { path = "../tabby-db-macros" } anyhow.workspace = true chrono = { workspace = true, features = ["serde"] } hash-ids.workspace = true lazy_static.workspace = true sql_query_builder = { version = "2.1.0", features = ["sqlite"] } +sqlx = { version = "0.7.3", features = ["sqlite", "chrono", "runtime-tokio", "macros"] } tabby-common = { path = "../../crates/tabby-common" } tokio = { workspace = true, features = ["fs"] } uuid.workspace = true From f6dce7302b1c9f71beced8d35640a47ccaf7b0fe Mon Sep 17 00:00:00 2001 From: boxbeam Date: Fri, 5 Apr 2024 11:34:59 -0400 Subject: [PATCH 13/17] Apply suggestions --- crates/tabby-common/src/config.rs | 6 +- crates/tabby-scheduler/src/dataset.rs | 4 +- ee/tabby-webserver/src/hub/api.rs | 78 +++++++++++-------- ee/tabby-webserver/src/lib.rs | 1 + ee/tabby-webserver/src/path.rs | 5 ++ .../src/repositories/resolve.rs | 49 +++++++----- 6 files changed, 87 insertions(+), 56 deletions(-) create mode 100644 ee/tabby-webserver/src/path.rs diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs index 0f2f2a815404..c0581ee905a3 100644 --- a/crates/tabby-common/src/config.rs +++ b/crates/tabby-common/src/config.rs @@ -147,11 +147,11 @@ impl Default for ServerConfig { pub trait RepositoryAccess: Send + Sync { async fn list_repositories(&self) -> Result>; - fn start_snapshot(&self) -> String { + fn start_snapshot(&self) -> u64 { Default::default() } - fn process_file(&self, _version: String, _file: SourceFile) {} - fn finish_snapshot(&self, _version: String) {} + fn process_file(&self, _version: u64, _file: SourceFile) {} + fn finish_snapshot(&self, _version: u64) {} } pub struct ConfigRepositoryAccess; diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index d66380ef1c65..d0e6d7d5d745 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -29,7 +29,7 @@ trait RepositoryExt { &self, writer: &mut impl Write, access: &impl RepositoryAccess, - cache_version: String, + cache_version: u64, ) -> Result<()>; } @@ -38,7 +38,7 @@ impl RepositoryExt for RepositoryConfig { &self, writer: &mut impl Write, access: &impl RepositoryAccess, - cache_version: String, + cache_version: u64, ) -> Result<()> { let dir = self.dir(); diff --git a/ee/tabby-webserver/src/hub/api.rs b/ee/tabby-webserver/src/hub/api.rs index 5c480dfa989e..98519a9cd9cd 100644 --- a/ee/tabby-webserver/src/hub/api.rs +++ b/ee/tabby-webserver/src/hub/api.rs @@ -199,41 +199,57 @@ impl RepositoryAccess for SchedulerClient { Ok(self.0.list_repositories(Context::current()).await?) } - fn start_snapshot(&self) -> String { - log_errors("start repository snapshot", || { - let cache = RepositoryCache::new()?; - Ok(cache.get_next_version()?) - }) + fn start_snapshot(&self) -> u64 { + let cache = match RepositoryCache::new() { + Ok(cache) => cache, + Err(e) => { + error!("Failed to open repository cache: {e}"); + return 0; + } + }; + match cache.get_next_version() { + Ok(v) => v, + Err(e) => { + error!("Failed to get next repository cache version: {e}"); + 0 + } + } } - fn process_file(&self, version: String, file: SourceFile) { - log_errors("process repository file", move || { - let cache = RepositoryCache::new()?; - cache.add_repository_meta(version, file.into())?; - Ok(()) - }); + fn process_file(&self, version: u64, file: SourceFile) { + let cache = match RepositoryCache::new() { + Ok(cache) => cache, + Err(e) => { + error!("Failed to open repository cache: {e}"); + return; + } + }; + let file_path = file.filepath.clone(); + if let Err(e) = cache.add_repository_meta(version, file.into()) { + error!("Failed to write {} to repository cache: {e}", file_path); + }; } - fn finish_snapshot(&self, version: String) { - log_errors("clear old repository cache data", move || { - let cache = RepositoryCache::new()?; - let old_version = cache.latest_version()?; - cache.set_version(version)?; - cache.clear(old_version)?; - Ok(()) - }); - } -} - -fn log_errors(operation: &'static str, f: impl FnOnce() -> Result) -> T -where - T: Default, -{ - match f() { - Ok(v) => v, - Err(e) => { - error!("Failed to {operation}: {e}"); - Default::default() + fn finish_snapshot(&self, version: u64) { + let cache = match RepositoryCache::new() { + Ok(cache) => cache, + Err(e) => { + error!("Failed to open repository cache: {e}"); + return; + } + }; + let old_version = match cache.latest_version() { + Ok(v) => v, + Err(e) => { + error!("Failed to get next repository cache version: {e}"); + return; + } + }; + if let Err(e) = cache + .set_version(version) + .and_then(|_| cache.clear_versions_under(old_version)) + { + error!("Failed to update repository cache version: {e}"); } } } diff --git a/ee/tabby-webserver/src/lib.rs b/ee/tabby-webserver/src/lib.rs index d20b7d053cdc..4cd719aff72b 100644 --- a/ee/tabby-webserver/src/lib.rs +++ b/ee/tabby-webserver/src/lib.rs @@ -4,6 +4,7 @@ mod cron; mod handler; mod hub; mod oauth; +pub(crate) mod path; mod repositories; mod schema; mod service; diff --git a/ee/tabby-webserver/src/path.rs b/ee/tabby-webserver/src/path.rs new file mode 100644 index 000000000000..47ccb4625d4b --- /dev/null +++ b/ee/tabby-webserver/src/path.rs @@ -0,0 +1,5 @@ +use std::path::PathBuf; + +pub fn repository_meta_db() -> PathBuf { + tabby_common::path::tabby_root().join("repositories.kv") +} diff --git a/ee/tabby-webserver/src/repositories/resolve.rs b/ee/tabby-webserver/src/repositories/resolve.rs index d25131cacc45..80c787af8cdb 100644 --- a/ee/tabby-webserver/src/repositories/resolve.rs +++ b/ee/tabby-webserver/src/repositories/resolve.rs @@ -14,11 +14,7 @@ use tabby_common::{config::RepositoryConfig, SourceFile, Tag}; use tower::ServiceExt; use tower_http::services::ServeDir; -use crate::schema::repository::RepositoryService; - -fn repository_meta_db() -> PathBuf { - tabby_common::path::tabby_root().join("repositories.kv") -} +use crate::{path::repository_meta_db, schema::repository::RepositoryService}; #[derive(Clone)] pub struct RepositoryCache { @@ -32,6 +28,9 @@ impl std::fmt::Debug for RepositoryCache { } type RepositoryBucket<'a> = Bucket<'a, String, kv::Json>; +static META_BUCKET: &str = "meta"; +static META_BUCKET_VERSION_KEY: &str = "version"; + impl RepositoryCache { pub fn new() -> Result { let config = Config::new(repository_meta_db()); @@ -39,27 +38,28 @@ impl RepositoryCache { Ok(RepositoryCache { cache: store }) } - pub fn latest_version(&self) -> Result { - let bucket = self.cache.bucket(Some("version"))?; - if !bucket.contains(&"version".to_string())? { + pub fn latest_version(&self) -> Result { + let bucket: Bucket<_, String> = self.cache.bucket(Some(META_BUCKET))?; + if !bucket.contains(&META_BUCKET_VERSION_KEY.to_string())? { self.set_version(self.get_next_version()?)?; } Ok(bucket - .get(&"version".to_string())? - .expect("Cache version must always be set")) + .get(&META_BUCKET_VERSION_KEY.to_string())? + .expect("Cache version must always be set") + .parse()?) } - pub fn set_version(&self, version: String) -> Result<()> { - let bucket = self.cache.bucket(Some("version"))?; - bucket.set(&"version".to_string(), &version)?; + pub fn set_version(&self, version: u64) -> Result<()> { + let bucket = self.cache.bucket(Some(META_BUCKET))?; + bucket.set(&META_BUCKET_VERSION_KEY.to_string(), &version.to_string())?; Ok(()) } - pub fn get_next_version(&self) -> Result { - Ok(self.cache.generate_id()?.to_string()) + pub fn get_next_version(&self) -> Result { + Ok(self.cache.generate_id()?) } - fn versioned_bucket(&self, version: String) -> Result { + fn versioned_bucket(&self, version: u64) -> Result { let bucket_name = format!("repositories_{}", version); Ok(self.cache.bucket(Some(&bucket_name))?) } @@ -68,13 +68,22 @@ impl RepositoryCache { self.versioned_bucket(self.latest_version()?) } - pub fn clear(&self, version: String) -> Result<()> { - let bucket_name = format!("repositories_{version}"); - self.cache.drop_bucket(&bucket_name)?; + pub fn clear_versions_under(&self, old_version: u64) -> Result<()> { + for bucket in self.cache.buckets() { + let Some((_, version)) = bucket.split_once('_') else { + continue; + }; + let Ok(version) = version.parse::() else { + continue; + }; + if version <= old_version { + self.cache.drop_bucket(bucket)?; + } + } Ok(()) } - pub fn add_repository_meta(&self, version: String, file: RepositoryMeta) -> Result<()> { + pub fn add_repository_meta(&self, version: u64, file: RepositoryMeta) -> Result<()> { let key = format!("{}:{}", file.git_url, file.filepath); self.versioned_bucket(version)?.set(&key, &kv::Json(file))?; Ok(()) From bcf69ca4a1ccd364211ef66cabf715254e7ea9b9 Mon Sep 17 00:00:00 2001 From: boxbeam Date: Fri, 5 Apr 2024 11:40:34 -0400 Subject: [PATCH 14/17] Apply suggestions --- crates/tabby-common/src/config.rs | 3 --- crates/tabby-scheduler/src/dataset.rs | 11 +++++++---- ee/tabby-webserver/src/hub/api.rs | 17 ----------------- 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs index c0581ee905a3..907a2bf1f472 100644 --- a/crates/tabby-common/src/config.rs +++ b/crates/tabby-common/src/config.rs @@ -147,9 +147,6 @@ impl Default for ServerConfig { pub trait RepositoryAccess: Send + Sync { async fn list_repositories(&self) -> Result>; - fn start_snapshot(&self) -> u64 { - Default::default() - } fn process_file(&self, _version: u64, _file: SourceFile) {} fn finish_snapshot(&self, _version: u64) {} } diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index d0e6d7d5d745..576ce8cf0a75 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -29,7 +29,7 @@ trait RepositoryExt { &self, writer: &mut impl Write, access: &impl RepositoryAccess, - cache_version: u64, + snapshot_version: u64, ) -> Result<()>; } @@ -38,7 +38,7 @@ impl RepositoryExt for RepositoryConfig { &self, writer: &mut impl Write, access: &impl RepositoryAccess, - cache_version: u64, + snapshot_version: u64, ) -> Result<()> { let dir = self.dir(); @@ -81,7 +81,7 @@ impl RepositoryExt for RepositoryConfig { content: file_content, }; writer.write_json_lines([source_file.clone()])?; - access.process_file(cache_version.clone(), source_file); + access.process_file(snapshot_version.clone(), source_file); } Err(e) => { error!("Cannot read {relative_path:?}: {e:?}"); @@ -119,7 +119,10 @@ pub fn create_dataset(config: &[RepositoryConfig], access: &impl RepositoryAcces None, ); - let snapshot_version = access.start_snapshot(); + let snapshot_version = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("Failed to read system clock") + .as_millis() as u64; let mut deps = DependencyFile::default(); for repository in config { deps::collect(repository.dir().as_path(), &mut deps); diff --git a/ee/tabby-webserver/src/hub/api.rs b/ee/tabby-webserver/src/hub/api.rs index 98519a9cd9cd..15920a722994 100644 --- a/ee/tabby-webserver/src/hub/api.rs +++ b/ee/tabby-webserver/src/hub/api.rs @@ -199,23 +199,6 @@ impl RepositoryAccess for SchedulerClient { Ok(self.0.list_repositories(Context::current()).await?) } - fn start_snapshot(&self) -> u64 { - let cache = match RepositoryCache::new() { - Ok(cache) => cache, - Err(e) => { - error!("Failed to open repository cache: {e}"); - return 0; - } - }; - match cache.get_next_version() { - Ok(v) => v, - Err(e) => { - error!("Failed to get next repository cache version: {e}"); - 0 - } - } - } - fn process_file(&self, version: u64, file: SourceFile) { let cache = match RepositoryCache::new() { Ok(cache) => cache, From 443b281f5dcc76579d45c5142c7ac4ff07859fa0 Mon Sep 17 00:00:00 2001 From: boxbeam Date: Fri, 5 Apr 2024 11:48:25 -0400 Subject: [PATCH 15/17] Readd start_snapshot --- crates/tabby-common/src/config.rs | 1 + crates/tabby-scheduler/src/dataset.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/crates/tabby-common/src/config.rs b/crates/tabby-common/src/config.rs index 907a2bf1f472..1c116d666625 100644 --- a/crates/tabby-common/src/config.rs +++ b/crates/tabby-common/src/config.rs @@ -147,6 +147,7 @@ impl Default for ServerConfig { pub trait RepositoryAccess: Send + Sync { async fn list_repositories(&self) -> Result>; + fn start_snapshot(&self, _version: u64) {} fn process_file(&self, _version: u64, _file: SourceFile) {} fn finish_snapshot(&self, _version: u64) {} } diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index 576ce8cf0a75..689291325ce4 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -123,6 +123,7 @@ pub fn create_dataset(config: &[RepositoryConfig], access: &impl RepositoryAcces .duration_since(std::time::UNIX_EPOCH) .expect("Failed to read system clock") .as_millis() as u64; + access.start_snapshot(snapshot_version); let mut deps = DependencyFile::default(); for repository in config { deps::collect(repository.dir().as_path(), &mut deps); From 323a3c746d385ba4833bd98a1c965579b7da594e Mon Sep 17 00:00:00 2001 From: boxbeam Date: Fri, 5 Apr 2024 12:01:16 -0400 Subject: [PATCH 16/17] Make update_latest_version --- ee/tabby-webserver/src/hub/api.rs | 12 +----------- ee/tabby-webserver/src/path.rs | 2 +- ee/tabby-webserver/src/repositories/resolve.rs | 7 ++++--- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/ee/tabby-webserver/src/hub/api.rs b/ee/tabby-webserver/src/hub/api.rs index 15920a722994..6a1f9cf7aaa8 100644 --- a/ee/tabby-webserver/src/hub/api.rs +++ b/ee/tabby-webserver/src/hub/api.rs @@ -221,17 +221,7 @@ impl RepositoryAccess for SchedulerClient { return; } }; - let old_version = match cache.latest_version() { - Ok(v) => v, - Err(e) => { - error!("Failed to get next repository cache version: {e}"); - return; - } - }; - if let Err(e) = cache - .set_version(version) - .and_then(|_| cache.clear_versions_under(old_version)) - { + if let Err(e) = cache.update_latest_version(version) { error!("Failed to update repository cache version: {e}"); } } diff --git a/ee/tabby-webserver/src/path.rs b/ee/tabby-webserver/src/path.rs index 47ccb4625d4b..bef7bc978350 100644 --- a/ee/tabby-webserver/src/path.rs +++ b/ee/tabby-webserver/src/path.rs @@ -1,5 +1,5 @@ use std::path::PathBuf; pub fn repository_meta_db() -> PathBuf { - tabby_common::path::tabby_root().join("repositories.kv") + tabby_common::path::tabby_root().join("ee/repositories.kv") } diff --git a/ee/tabby-webserver/src/repositories/resolve.rs b/ee/tabby-webserver/src/repositories/resolve.rs index 80c787af8cdb..0b761494b074 100644 --- a/ee/tabby-webserver/src/repositories/resolve.rs +++ b/ee/tabby-webserver/src/repositories/resolve.rs @@ -41,7 +41,7 @@ impl RepositoryCache { pub fn latest_version(&self) -> Result { let bucket: Bucket<_, String> = self.cache.bucket(Some(META_BUCKET))?; if !bucket.contains(&META_BUCKET_VERSION_KEY.to_string())? { - self.set_version(self.get_next_version()?)?; + self.update_latest_version(self.get_next_version()?)?; } Ok(bucket .get(&META_BUCKET_VERSION_KEY.to_string())? @@ -49,9 +49,10 @@ impl RepositoryCache { .parse()?) } - pub fn set_version(&self, version: u64) -> Result<()> { + pub fn update_latest_version(&self, version: u64) -> Result<()> { let bucket = self.cache.bucket(Some(META_BUCKET))?; bucket.set(&META_BUCKET_VERSION_KEY.to_string(), &version.to_string())?; + self.clear_versions_under(version)?; Ok(()) } @@ -76,7 +77,7 @@ impl RepositoryCache { let Ok(version) = version.parse::() else { continue; }; - if version <= old_version { + if version < old_version { self.cache.drop_bucket(bucket)?; } } From cd197004303e59b8ebedb9543b96697b7cb8e1b8 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Fri, 5 Apr 2024 16:09:19 +0000 Subject: [PATCH 17/17] [autofix.ci] apply automated fixes --- crates/tabby-scheduler/src/dataset.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/tabby-scheduler/src/dataset.rs b/crates/tabby-scheduler/src/dataset.rs index 689291325ce4..4dc77fb6bf7d 100644 --- a/crates/tabby-scheduler/src/dataset.rs +++ b/crates/tabby-scheduler/src/dataset.rs @@ -81,7 +81,7 @@ impl RepositoryExt for RepositoryConfig { content: file_content, }; writer.write_json_lines([source_file.clone()])?; - access.process_file(snapshot_version.clone(), source_file); + access.process_file(snapshot_version, source_file); } Err(e) => { error!("Cannot read {relative_path:?}: {e:?}"); @@ -127,7 +127,7 @@ pub fn create_dataset(config: &[RepositoryConfig], access: &impl RepositoryAcces let mut deps = DependencyFile::default(); for repository in config { deps::collect(repository.dir().as_path(), &mut deps); - repository.create_dataset(&mut writer, access, snapshot_version.clone())?; + repository.create_dataset(&mut writer, access, snapshot_version)?; } serdeconv::to_json_file(&deps, dependency_file())?;