From 0cbf842524438dea84c39c842003ec2d4592b7e9 Mon Sep 17 00:00:00 2001 From: Jordan Doyle Date: Sun, 14 Jan 2024 21:37:42 +0000 Subject: [PATCH] Migrate to RocksDB --- Cargo.lock | 323 +++++++++++++++++++----------- Cargo.toml | 5 +- README.md | 6 +- doc/man/rgit.1.md | 6 +- src/database/indexer.rs | 73 ++++--- src/database/schema/commit.rs | 179 ++++++++++++----- src/database/schema/mod.rs | 3 +- src/database/schema/prefixes.rs | 58 +----- src/database/schema/repository.rs | 168 +++++++++------- src/database/schema/tag.rs | 115 +++++++---- src/git.rs | 2 +- src/main.rs | 93 ++++++--- src/methods/index.rs | 6 +- src/methods/repo/log.rs | 76 +++---- src/methods/repo/mod.rs | 4 +- src/methods/repo/refs.rs | 47 +++-- src/methods/repo/summary.rs | 54 ++--- 17 files changed, 725 insertions(+), 493 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1251242..91ec96e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -309,6 +309,27 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.65.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +dependencies = [ + "bitflags 1.3.2", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.48", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -373,12 +394,6 @@ version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "bytes" version = "1.5.0" @@ -391,6 +406,17 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc" +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "camino" version = "1.1.6" @@ -432,12 +458,32 @@ dependencies = [ "libc", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.4.13" @@ -800,7 +846,7 @@ checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", + "redox_syscall", "windows-sys 0.52.0", ] @@ -829,16 +875,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "futures" version = "0.3.30" @@ -928,15 +964,6 @@ dependencies = [ "slab", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -994,7 +1021,7 @@ dependencies = [ "aho-corasick", "bstr", "log", - "regex-automata", + "regex-automata 0.4.3", "regex-syntax 0.8.2", ] @@ -1159,15 +1186,6 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - [[package]] name = "itoa" version = "1.0.10" @@ -1198,6 +1216,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "libc" version = "0.2.151" @@ -1218,12 +1242,38 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "libm" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +[[package]] +name = "librocksdb-sys" +version = "0.11.0+8.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3386f101bcb4bd252d8e9d2fb41ec3b0862a15a62b478c355b2982efa469e3e" +dependencies = [ + "bindgen", + "bzip2-sys", + "cc", + "glob", + "libc", + "libz-sys", + "lz4-sys", + "zstd-sys", +] + [[package]] name = "libssh2-sys" version = "0.3.0" @@ -1287,6 +1337,16 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "lz4-sys" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d27b317e207b10f69f5e75494119e391a96f48861ae870d1da6edac98ca900" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "mach2" version = "0.4.2" @@ -1296,6 +1356,15 @@ dependencies = [ "libc", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "matchit" version = "0.7.3" @@ -1369,7 +1438,7 @@ dependencies = [ "crossbeam-utils", "futures-util", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "quanta", "rustc_version", "skeptic", @@ -1532,17 +1601,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" -[[package]] -name = "parking_lot" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" -dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", -] - [[package]] name = "parking_lot" version = "0.12.1" @@ -1550,21 +1608,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" dependencies = [ "lock_api", - "parking_lot_core 0.9.9", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -1575,7 +1619,7 @@ checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", + "redox_syscall", "smallvec", "windows-targets 0.48.5", ] @@ -1595,6 +1639,12 @@ dependencies = [ "std_prelude", ] +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1659,6 +1709,22 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "prettyplease" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +dependencies = [ + "proc-macro2", + "syn 2.0.48", +] + [[package]] name = "proc-macro2" version = "1.0.75" @@ -1714,19 +1780,40 @@ dependencies = [ ] [[package]] -name = "raw-cpuid" -version = "10.7.0" +name = "rand" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "bitflags 1.3.2", + "libc", + "rand_chacha", + "rand_core", ] [[package]] -name = "redox_syscall" -version = "0.2.16" +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" dependencies = [ "bitflags 1.3.2", ] @@ -1748,10 +1835,19 @@ checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" dependencies = [ "aho-corasick", "memchr", - "regex-automata", + "regex-automata 0.4.3", "regex-syntax 0.8.2", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", +] + [[package]] name = "regex-automata" version = "0.4.3" @@ -1763,6 +1859,12 @@ dependencies = [ "regex-syntax 0.8.2", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.7.5" @@ -1807,13 +1909,14 @@ dependencies = [ "moka", "nom", "once_cell", - "parking_lot 0.12.1", + "parking_lot", "path-clean", + "rand", + "rocksdb", "rsass", "rust-ini", "serde", "sha2", - "sled", "syntect", "tar", "time", @@ -1832,6 +1935,16 @@ dependencies = [ "yoke", ] +[[package]] +name = "rocksdb" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb6f170a4041d50a0ce04b0d2e14916d6ca863ea2e422689a5b694395d299ffe" +dependencies = [ + "libc", + "librocksdb-sys", +] + [[package]] name = "rsass" version = "0.28.8" @@ -1865,6 +1978,12 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.4.0" @@ -2032,6 +2151,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" +[[package]] +name = "shlex" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380" + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -2065,23 +2190,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "sled" -version = "0.34.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f96b4737c2ce5987354855aed3797279def4ebf734436c6aa4552cf8e169935" -dependencies = [ - "crc32fast", - "crossbeam-epoch", - "crossbeam-utils", - "fs2", - "fxhash", - "libc", - "log", - "parking_lot 0.11.2", - "zstd", -] - [[package]] name = "slug" version = "0.1.5" @@ -2213,7 +2321,7 @@ checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.4.1", + "redox_syscall", "rustix", "windows-sys 0.52.0", ] @@ -2328,7 +2436,7 @@ dependencies = [ "libc", "mio", "num_cpus", - "parking_lot 0.12.1", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2", @@ -2467,10 +2575,14 @@ version = "0.3.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" dependencies = [ + "matchers", "nu-ansi-term 0.46.0", + "once_cell", + "regex", "sharded-slab", "smallvec", "thread_local", + "tracing", "tracing-core", "tracing-log", ] @@ -2992,31 +3104,12 @@ dependencies = [ "synstructure", ] -[[package]] -name = "zstd" -version = "0.9.2+zstd.1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2390ea1bf6c038c39674f22d95f0564725fc06034a47129179810b2fc58caa54" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "4.1.3+zstd.1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e99d81b99fb3c2c2c794e3fe56c305c63d5173a16a46b5850b07c935ffc7db79" -dependencies = [ - "libc", - "zstd-sys", -] - [[package]] name = "zstd-sys" -version = "1.6.2+zstd.1.5.1" +version = "2.0.9+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2daf2f248d9ea44454bfcb2516534e8b8ad2fc91bf818a1885495fc42bc8ac9f" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" dependencies = [ "cc", - "libc", + "pkg-config", ] diff --git a/Cargo.toml b/Cargo.toml index 736fca7..567fcfd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,7 @@ parking_lot = "0.12" serde = { version = "1.0", features = ["derive", "rc"] } sha2 = "0.10" syntect = "5" -sled = { version = "0.34", features = ["compression"] } +rocksdb = "0.21" tar = "0.4" flate2 = "1.0" time = { version = "0.3", features = ["serde"] } @@ -45,11 +45,12 @@ tower-service = "0.3" tower-layer = "0.3" tower-http = { version = "0.4.4", features = ["cors"] } tracing = "0.1" -tracing-subscriber = "0.3" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } unix_mode = "0.1" uuid = { version = "1.6", features = ["v4"] } httparse = "1.7" yoke = { version = "0.7.1", features = ["derive"] } +rand = "0.8.5" [build-dependencies] anyhow = "1.0" diff --git a/README.md b/README.md index 493b812..b6d3227 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [See it in action!](https://git.inept.dev/) -A gitweb/cgit-like interface for the modern age. Written in Rust using Axum, git2, Askama and Sled. +A gitweb/cgit-like interface for the modern age. Written in Rust using Axum, git2, Askama and RocksDB. Includes a dark mode for late night committing. @@ -35,7 +35,7 @@ Includes a dark mode for late night committing. ## Features - **Efficient Metadata Storage** - [Sled][] is used to store all metadata about a repository, including commits, branches, and tags. Metadata is reindexed, and the reindex interval is configurable (default: every 5 minutes), resulting in up to 97% faster load times for large repositories. + [RocksDB][] is used to store all metadata about a repository, including commits, branches, and tags. Metadata is reindexed, and the reindex interval is configurable (default: every 5 minutes), resulting in up to 97% faster load times for large repositories. - **On-Demand Loading** Files, trees, and diffs are loaded using [git2][] directly upon request. A small in-memory cache is included for rendered READMEs and diffs, enhancing performance. @@ -43,7 +43,7 @@ Includes a dark mode for late night committing. - **Dark Mode Support** Enjoy a dark mode for late-night committing, providing a visually comfortable experience during extended coding sessions. -[Sled]: https://github.com/spacejam/sled +[RocksDB]: https://github.com/facebook/rocksdb [git2]: https://github.com/rust-lang/git2-rs ## Getting Started diff --git a/doc/man/rgit.1.md b/doc/man/rgit.1.md index 29eca92..4b5b657 100644 --- a/doc/man/rgit.1.md +++ b/doc/man/rgit.1.md @@ -15,7 +15,7 @@ SYNOPSIS DESCRIPTION =========== -A gitweb/cgit-like interface for the modern age. Written in Rust using Axum, git2, Askama, and Sled. +A gitweb/cgit-like interface for the modern age. Written in Rust using Axum, git2, Askama, and RocksDB. _bind_address_ @@ -47,9 +47,9 @@ OPTIONS **-d** _path_, **\--db-store** _path_ -: Path to a directory in which the Sled database should be stored, will be created if it doesn't already exist. +: Path to a directory in which the RocksDB database should be stored, will be created if it doesn't already exist. - The Sled database is very quick to generate, so this can be pointed to temporary storage. (Required) + The RocksDB database is very quick to generate, so this can be pointed to temporary storage. (Required) Example: diff --git a/src/database/indexer.rs b/src/database/indexer.rs index 3cf751d..92d9532 100644 --- a/src/database/indexer.rs +++ b/src/database/indexer.rs @@ -4,6 +4,7 @@ use std::{ ffi::OsStr, fmt::Debug, path::{Path, PathBuf}, + sync::Arc, }; use anyhow::Context; @@ -14,20 +15,19 @@ use tracing::{error, info, info_span, instrument, warn}; use crate::database::schema::{ commit::Commit, - prefixes::TreePrefix, repository::{Repository, RepositoryId}, tag::{Tag, TagTree}, }; -pub fn run(scan_path: &Path, db: &sled::Db) { +pub fn run(scan_path: &Path, db: &Arc) { let span = info_span!("index_update"); let _entered = span.enter(); info!("Starting index update"); update_repository_metadata(scan_path, db); - update_repository_reflog(scan_path, db); - update_repository_tags(scan_path, db); + update_repository_reflog(scan_path, db.clone()); + update_repository_tags(scan_path, db.clone()); info!("Flushing to disk"); @@ -39,7 +39,7 @@ pub fn run(scan_path: &Path, db: &sled::Db) { } #[instrument(skip(db))] -fn update_repository_metadata(scan_path: &Path, db: &sled::Db) { +fn update_repository_metadata(scan_path: &Path, db: &rocksdb::DB) { let mut discovered = Vec::new(); discover_repositories(scan_path, &mut discovered); @@ -49,7 +49,7 @@ fn update_repository_metadata(scan_path: &Path, db: &sled::Db) { }; let id = match Repository::open(db, relative) { - Ok(v) => v.map_or_else(|| RepositoryId::new(db), |v| v.get().id), + Ok(v) => v.map_or_else(RepositoryId::new, |v| v.get().id), Err(error) => { // maybe we could nuke it ourselves, but we need to instantly trigger // a reindex and we could enter into an infinite loop if there's a bug @@ -75,7 +75,7 @@ fn update_repository_metadata(scan_path: &Path, db: &sled::Db) { } }; - Repository { + let res = Repository { id, name, description, @@ -88,6 +88,10 @@ fn update_repository_metadata(scan_path: &Path, db: &sled::Db) { .map(Cow::Owned), } .insert(db, relative); + + if let Err(error) = res { + warn!(%error, "Failed to insert repository"); + } } } @@ -116,8 +120,8 @@ fn find_last_committed_time(repo: &git2::Repository) -> Result) { + let repos = match Repository::fetch_all(&db) { Ok(v) => v, Err(error) => { error!(%error, "Failed to read repository index to update reflog, consider deleting database directory"); @@ -126,7 +130,7 @@ fn update_repository_reflog(scan_path: &Path, db: &sled::Db) { }; for (relative_path, db_repository) in repos { - let Some(git_repository) = open_repo(scan_path, &relative_path, db_repository.get(), db) + let Some(git_repository) = open_repo(scan_path, &relative_path, db_repository.get(), &db) else { continue; }; @@ -139,6 +143,8 @@ fn update_repository_reflog(scan_path: &Path, db: &sled::Db) { } }; + let mut valid_references = Vec::new(); + for reference in references.filter_map(Result::ok) { let reference_name = String::from_utf8_lossy(reference.name_bytes()); if !reference_name.starts_with("refs/heads/") @@ -147,18 +153,24 @@ fn update_repository_reflog(scan_path: &Path, db: &sled::Db) { continue; } + valid_references.push(reference_name.to_string()); + if let Err(error) = branch_index_update( &reference, &reference_name, &relative_path, db_repository.get(), - db, + db.clone(), &git_repository, false, ) { error!(%error, "Failed to update reflog for {relative_path}@{reference_name}"); } } + + if let Err(error) = db_repository.get().replace_heads(&db, &valid_references) { + error!(%error, "Failed to update heads"); + } } } @@ -168,20 +180,21 @@ fn branch_index_update( reference_name: &str, relative_path: &str, db_repository: &Repository<'_>, - db: &sled::Db, + db: Arc, git_repository: &git2::Repository, force_reindex: bool, ) -> Result<(), anyhow::Error> { info!("Refreshing indexes"); + let commit_tree = db_repository.commit_tree(db.clone(), reference_name); + if force_reindex { - db.drop_tree(TreePrefix::commit_id(db_repository.id, reference_name))?; + commit_tree.drop_commits()?; } let commit = reference.peel_to_commit()?; - let commit_tree = db_repository.commit_tree(db, reference_name)?; - let latest_indexed = if let Some(latest_indexed) = commit_tree.fetch_latest_one() { + let latest_indexed = if let Some(latest_indexed) = commit_tree.fetch_latest_one()? { if commit.id().as_bytes() == &*latest_indexed.get().hash { info!("No commits since last index"); return Ok(()); @@ -196,7 +209,7 @@ fn branch_index_update( revwalk.set_sorting(Sort::REVERSE)?; revwalk.push_ref(reference_name)?; - let tree_len = commit_tree.len(); + let tree_len = commit_tree.len()?; let mut seen = false; let mut i = 0; for rev in revwalk { @@ -220,7 +233,7 @@ fn branch_index_update( let author = commit.author(); let committer = commit.committer(); - Commit::new(&commit, &author, &committer).insert(&commit_tree, tree_len + i); + Commit::new(&commit, &author, &committer).insert(&commit_tree, tree_len + i)?; i += 1; } @@ -238,12 +251,14 @@ fn branch_index_update( ); } + commit_tree.update_counter(tree_len + i)?; + Ok(()) } #[instrument(skip(db))] -fn update_repository_tags(scan_path: &Path, db: &sled::Db) { - let repos = match Repository::fetch_all(db) { +fn update_repository_tags(scan_path: &Path, db: Arc) { + let repos = match Repository::fetch_all(&db) { Ok(v) => v, Err(error) => { error!(%error, "Failed to read repository index to update tags, consider deleting database directory"); @@ -252,13 +267,17 @@ fn update_repository_tags(scan_path: &Path, db: &sled::Db) { }; for (relative_path, db_repository) in repos { - let Some(git_repository) = open_repo(scan_path, &relative_path, db_repository.get(), db) + let Some(git_repository) = open_repo(scan_path, &relative_path, db_repository.get(), &db) else { continue; }; - if let Err(error) = tag_index_scan(&relative_path, db_repository.get(), db, &git_repository) - { + if let Err(error) = tag_index_scan( + &relative_path, + db_repository.get(), + db.clone(), + &git_repository, + ) { error!(%error, "Failed to update tags for {relative_path}"); } } @@ -268,12 +287,10 @@ fn update_repository_tags(scan_path: &Path, db: &sled::Db) { fn tag_index_scan( relative_path: &str, db_repository: &Repository<'_>, - db: &sled::Db, + db: Arc, git_repository: &git2::Repository, ) -> Result<(), anyhow::Error> { - let tag_tree = db_repository - .tag_tree(db) - .context("Failed to read tag index tree")?; + let tag_tree = db_repository.tag_tree(db); let git_tags: HashSet<_> = git_repository .references() @@ -282,7 +299,7 @@ fn tag_index_scan( .filter(|v| v.name_bytes().starts_with(b"refs/tags/")) .map(|v| String::from_utf8_lossy(v.name_bytes()).into_owned()) .collect(); - let indexed_tags: HashSet = tag_tree.list().into_iter().collect(); + let indexed_tags: HashSet = tag_tree.list()?.into_iter().collect(); // insert any git tags that are missing from the index for tag_name in git_tags.difference(&indexed_tags) { @@ -330,7 +347,7 @@ fn open_repo + Debug>( scan_path: &Path, relative_path: P, db_repository: &Repository<'_>, - db: &sled::Db, + db: &rocksdb::DB, ) -> Option { match git2::Repository::open(scan_path.join(relative_path.as_ref())) { Ok(v) => Some(v), diff --git a/src/database/schema/commit.rs b/src/database/schema/commit.rs index a608155..5d9587d 100644 --- a/src/database/schema/commit.rs +++ b/src/database/schema/commit.rs @@ -1,12 +1,18 @@ -use std::{borrow::Cow, ops::Deref}; +use std::{borrow::Cow, ops::Deref, sync::Arc}; +use anyhow::Context; use git2::{Oid, Signature}; +use rocksdb::{IteratorMode, ReadOptions}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use sled::IVec; use time::OffsetDateTime; +use tracing::debug; use yoke::{Yoke, Yokeable}; -use crate::database::schema::Yoked; +use crate::database::schema::{ + prefixes::{COMMIT_COUNT_FAMILY, COMMIT_FAMILY}, + repository::RepositoryId, + Yoked, +}; #[derive(Serialize, Deserialize, Debug, Yokeable)] pub struct Commit<'a> { @@ -38,10 +44,8 @@ impl<'a> Commit<'a> { } } - pub fn insert(&self, batch: &CommitTree, id: usize) { - batch - .insert(id.to_be_bytes(), bincode::serialize(self).unwrap()) - .unwrap(); + pub fn insert(&self, batch: &CommitTree, id: u64) -> anyhow::Result<()> { + batch.insert(id, self) } } @@ -104,66 +108,137 @@ impl<'a> From<&'a git2::Signature<'_>> for Author<'a> { } } -pub struct CommitTree(sled::Tree); - -impl Deref for CommitTree { - type Target = sled::Tree; - - fn deref(&self) -> &Self::Target { - &self.0 - } +pub struct CommitTree { + db: Arc, + pub prefix: Box<[u8]>, } pub type YokedCommit = Yoked>; impl CommitTree { - pub(super) fn new(tree: sled::Tree) -> Self { - Self(tree) + pub(super) fn new(db: Arc, repository: RepositoryId, reference: &str) -> Self { + let mut prefix = Vec::with_capacity(std::mem::size_of::() + reference.len() + 1); + prefix.extend_from_slice(&repository.to_be_bytes()); + prefix.extend_from_slice(reference.as_bytes()); + prefix.push(b'\0'); + + Self { + db, + prefix: prefix.into_boxed_slice(), + } + } + + pub fn drop_commits(&self) -> anyhow::Result<()> { + let mut to = self.prefix.clone(); + *to.last_mut().unwrap() += 1; + + let commit_cf = self + .db + .cf_handle(COMMIT_FAMILY) + .context("commit column family missing")?; + self.db.delete_range_cf(commit_cf, &self.prefix, &to)?; + + let commit_count_cf = self + .db + .cf_handle(COMMIT_COUNT_FAMILY) + .context("missing column family")?; + self.db.delete_cf(commit_count_cf, &self.prefix)?; + + Ok(()) } - pub fn fetch_latest_one(&self) -> Option { - self.last().unwrap().map(|(_, value)| { - // internally value is an Arc so it should already be stablederef but because - // of reasons unbeknownst to me, sled has its own Arc implementation so we need - // to box the value as well to get a stablederef... - let value = Box::new(value); + pub fn update_counter(&self, count: u64) -> anyhow::Result<()> { + let cf = self + .db + .cf_handle(COMMIT_COUNT_FAMILY) + .context("missing column family")?; + + self.db.put_cf(cf, &self.prefix, count.to_be_bytes())?; - Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(data)).unwrap() - }) + Ok(()) } - pub async fn fetch_latest(&self, amount: usize, offset: usize) -> Vec { - let latest_key = if let Some((latest_key, _)) = self.last().unwrap() { - let mut latest_key_bytes = [0; std::mem::size_of::()]; - latest_key_bytes.copy_from_slice(&latest_key); - usize::from_be_bytes(latest_key_bytes) - } else { - return vec![]; + pub fn len(&self) -> anyhow::Result { + let cf = self + .db + .cf_handle(COMMIT_COUNT_FAMILY) + .context("missing column family")?; + + let Some(res) = self.db.get_pinned_cf(cf, &self.prefix)? else { + return Ok(0); }; - let end = latest_key.saturating_sub(offset); - let start = end.saturating_sub(amount - 1); - let iter = self.range(start.to_be_bytes()..=end.to_be_bytes()); + let mut out = [0_u8; std::mem::size_of::()]; + out.copy_from_slice(&res); + Ok(u64::from_be_bytes(out)) + } + + fn insert(&self, id: u64, commit: &Commit<'_>) -> anyhow::Result<()> { + let cf = self + .db + .cf_handle(COMMIT_FAMILY) + .context("missing column family")?; + + let mut key = self.prefix.to_vec(); + key.extend_from_slice(&id.to_be_bytes()); - tokio::task::spawn_blocking(move || { - iter.rev() - .map(|res| { - let (_, value) = res?; + self.db.put_cf(cf, key, bincode::serialize(commit)?)?; - // internally value is an Arc so it should already be stablederef but because - // of reasons unbeknownst to me, sled has its own Arc implementation so we need - // to box the value as well to get a stablederef... - let value = Box::new(value); + Ok(()) + } + + pub fn fetch_latest_one(&self) -> Result, anyhow::Error> { + let cf = self + .db + .cf_handle(COMMIT_FAMILY) + .context("missing column family")?; + + self.db + .prefix_iterator_cf(cf, &self.prefix) + .next() + .transpose() + .context("Failed to instantiate iterator")? + .map(|(_, value)| Yoke::try_attach_to_cart(value, |data| bincode::deserialize(data))) + .transpose() + .context("Failed to decode commit") + } - Ok( - Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(data)) - .unwrap(), - ) + pub fn fetch_latest( + &self, + amount: u64, + offset: u64, + ) -> Result, anyhow::Error> { + let cf = self + .db + .cf_handle(COMMIT_FAMILY) + .context("missing column family")?; + + let latest_commit_id = self.len()?; + debug!("Searching from latest commit {latest_commit_id}"); + + let mut start_key = self.prefix.to_vec(); + start_key.extend_from_slice( + &latest_commit_id + .saturating_sub(offset) + .saturating_sub(amount) + .to_be_bytes(), + ); + + let mut end_key = self.prefix.to_vec(); + end_key.extend_from_slice(&(latest_commit_id.saturating_sub(offset)).to_be_bytes()); + + let mut opts = ReadOptions::default(); + opts.set_iterate_range(start_key.as_slice()..end_key.as_slice()); + + opts.set_prefix_same_as_start(true); + + self.db + .iterator_cf_opt(cf, opts, IteratorMode::End) + .map(|v| { + Yoke::try_attach_to_cart(v.context("failed to read commit")?.1, |data| { + bincode::deserialize(data).context("failed to deserialize") }) - .collect::, sled::Error>>() - .unwrap() - }) - .await - .unwrap() + }) + .collect::, anyhow::Error>>() } } diff --git a/src/database/schema/mod.rs b/src/database/schema/mod.rs index d977149..e3da120 100644 --- a/src/database/schema/mod.rs +++ b/src/database/schema/mod.rs @@ -1,6 +1,5 @@ #![allow(clippy::module_name_repetitions)] -use sled::IVec; use yoke::Yoke; pub mod commit; @@ -8,6 +7,6 @@ pub mod prefixes; pub mod repository; pub mod tag; -pub type Yoked = Yoke>; +pub type Yoked = Yoke>; pub const SCHEMA_VERSION: &str = "1"; diff --git a/src/database/schema/prefixes.rs b/src/database/schema/prefixes.rs index 2646915..299364b 100644 --- a/src/database/schema/prefixes.rs +++ b/src/database/schema/prefixes.rs @@ -1,53 +1,5 @@ -use std::path::Path; - -use crate::database::schema::repository::RepositoryId; - -#[repr(u8)] -pub enum TreePrefix { - Repository = 0, - SchemaVersion = 1, - Commit = 100, - Tag = 101, -} - -impl TreePrefix { - pub fn repository_id>(path: T) -> Vec { - let path = path.as_ref().to_string_lossy(); - let path_bytes = path.as_bytes(); - - let mut prefixed = Vec::with_capacity(path_bytes.len() + std::mem::size_of::()); - prefixed.push(Self::Repository as u8); - prefixed.extend_from_slice(path_bytes); - - prefixed - } - - pub fn commit_id>(repository: RepositoryId, reference: T) -> Vec { - let reference = reference.as_ref(); - - let mut prefixed = Vec::with_capacity( - reference.len() - + std::mem::size_of::() - + std::mem::size_of::(), - ); - prefixed.push(TreePrefix::Commit as u8); - prefixed.extend_from_slice(&repository.to_ne_bytes()); - prefixed.extend_from_slice(reference); - - prefixed - } - - pub fn tag_id(repository: RepositoryId) -> Vec { - let mut prefixed = Vec::with_capacity( - std::mem::size_of::() + std::mem::size_of::(), - ); - prefixed.push(TreePrefix::Tag as u8); - prefixed.extend_from_slice(&repository.to_ne_bytes()); - - prefixed - } - - pub fn schema_version() -> &'static [u8] { - &[TreePrefix::SchemaVersion as u8] - } -} +pub const COMMIT_FAMILY: &str = "commit"; +pub const COMMIT_COUNT_FAMILY: &str = "commit_count"; +pub const REPOSITORY_FAMILY: &str = "repository"; +pub const TAG_FAMILY: &str = "tag"; +pub const REFERENCE_FAMILY: &str = "repository_refs"; diff --git a/src/database/schema/repository.rs b/src/database/schema/repository.rs index 42b0f62..763fc15 100644 --- a/src/database/schema/repository.rs +++ b/src/database/schema/repository.rs @@ -1,17 +1,22 @@ -use std::{borrow::Cow, collections::BTreeMap, ops::Deref, path::Path}; +use std::{borrow::Cow, collections::BTreeMap, ops::Deref, path::Path, sync::Arc}; use anyhow::{Context, Result}; -use nom::AsBytes; +use rand::random; +use rocksdb::IteratorMode; use serde::{Deserialize, Serialize}; -use sled::IVec; use time::OffsetDateTime; use yoke::{Yoke, Yokeable}; -use crate::database::schema::{commit::CommitTree, prefixes::TreePrefix, tag::TagTree, Yoked}; +use crate::database::schema::{ + commit::CommitTree, + prefixes::{COMMIT_FAMILY, REFERENCE_FAMILY, REPOSITORY_FAMILY, TAG_FAMILY}, + tag::TagTree, + Yoked, +}; #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Hash, Yokeable)] pub struct Repository<'a> { - /// The ID of the repository, as stored in `sled` + /// The ID of the repository, as stored in `RocksDB` pub id: RepositoryId, /// The "clean name" of the repository (ie. `hello-world.git`) #[serde(borrow)] @@ -33,96 +38,117 @@ pub struct Repository<'a> { pub type YokedRepository = Yoked>; impl Repository<'_> { - pub fn exists>(database: &sled::Db, path: P) -> bool { - database - .contains_key(TreePrefix::repository_id(path)) - .unwrap_or_default() + pub fn exists>(database: &rocksdb::DB, path: P) -> Result { + let cf = database + .cf_handle(REPOSITORY_FAMILY) + .context("repository column family missing")?; + let path = path.as_ref().to_str().context("invalid path")?; + + Ok(database.get_pinned_cf(cf, path)?.is_some()) } - pub fn fetch_all(database: &sled::Db) -> Result> { + pub fn fetch_all(database: &rocksdb::DB) -> Result> { + let cf = database + .cf_handle(REPOSITORY_FAMILY) + .context("repository column family missing")?; + database - .scan_prefix([TreePrefix::Repository as u8]) + .full_iterator_cf(cf, IteratorMode::Start) .filter_map(Result::ok) .map(|(key, value)| { - // strip the prefix we've just scanned for - let key = String::from_utf8_lossy(&key[1..]).to_string(); - - // internally value is an Arc so it should already be stablederef but because - // of reasons unbeknownst to me, sled has its own Arc implementation so we need - // to box the value as well to get a stablederef... - let value = Box::new(value); - - let value = - Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(data))?; + let key = String::from_utf8(key.into_vec()).context("invalid repo name")?; + let value = Yoke::try_attach_to_cart(value, |data| bincode::deserialize(data))?; Ok((key, value)) }) .collect() } - pub fn insert>(&self, database: &sled::Db, path: P) { - database - .insert( - TreePrefix::repository_id(path), - bincode::serialize(self).unwrap(), - ) - .unwrap(); - } + pub fn insert>(&self, database: &rocksdb::DB, path: P) -> Result<()> { + let cf = database + .cf_handle(REPOSITORY_FAMILY) + .context("repository column family missing")?; + let path = path.as_ref().to_str().context("invalid path")?; + + database.put_cf(cf, path, bincode::serialize(self)?)?; - pub fn delete>(&self, database: &sled::Db, path: P) -> Result<()> { - for reference in self.heads(database) { - database.drop_tree(TreePrefix::commit_id(self.id, &reference))?; - } + Ok(()) + } - database.drop_tree(TreePrefix::tag_id(self.id))?; - database.remove(TreePrefix::repository_id(path))?; + pub fn delete>(&self, database: &rocksdb::DB, path: P) -> Result<()> { + let start_id = self.id.to_be_bytes(); + let mut end_id = self.id.to_be_bytes(); + *end_id.last_mut().unwrap() += 1; + + // delete commits + let commit_cf = database + .cf_handle(COMMIT_FAMILY) + .context("commit column family missing")?; + database.delete_range_cf(commit_cf, start_id, end_id)?; + + // delete tags + let tag_cf = database + .cf_handle(TAG_FAMILY) + .context("tag column family missing")?; + database.delete_range_cf(tag_cf, start_id, end_id)?; + + // delete self + let repo_cf = database + .cf_handle(REPOSITORY_FAMILY) + .context("repository column family missing")?; + let path = path.as_ref().to_str().context("invalid path")?; + database.delete_cf(repo_cf, path)?; Ok(()) } - pub fn open>(database: &sled::Db, path: P) -> Result> { - database - .get(TreePrefix::repository_id(path)) - .context("Failed to open indexed repository")? - .map(|value| { - // internally value is an Arc so it should already be stablederef but because - // of reasons unbeknownst to me, sled has its own Arc implementation so we need - // to box the value as well to get a stablederef... - let value = Box::new(value); - - Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(data)) - .context("Failed to deserialise indexed repository") - }) - .transpose() + pub fn open>( + database: &rocksdb::DB, + path: P, + ) -> Result> { + let cf = database + .cf_handle(REPOSITORY_FAMILY) + .context("repository column family missing")?; + + let path = path.as_ref().to_str().context("invalid path")?; + let Some(value) = database.get_cf(cf, path)? else { + return Ok(None); + }; + + Yoke::try_attach_to_cart(value.into_boxed_slice(), |data| bincode::deserialize(data)) + .map(Some) + .context("Failed to open repository") } - pub fn commit_tree(&self, database: &sled::Db, reference: &str) -> Result { - let tree = database - .open_tree(TreePrefix::commit_id(self.id, reference)) - .context("Failed to open commit tree")?; + pub fn commit_tree(&self, database: Arc, reference: &str) -> CommitTree { + CommitTree::new(database, self.id, reference) + } - Ok(CommitTree::new(tree)) + pub fn tag_tree(&self, database: Arc) -> TagTree { + TagTree::new(database, self.id) } - pub fn tag_tree(&self, database: &sled::Db) -> Result { - let tree = database - .open_tree(TreePrefix::tag_id(self.id)) - .context("Failed to open tag tree")?; + pub fn replace_heads(&self, database: &rocksdb::DB, new_heads: &[String]) -> Result<()> { + let cf = database + .cf_handle(REFERENCE_FAMILY) + .context("missing reference column family")?; - Ok(TagTree::new(tree)) + database.put_cf(cf, self.id.to_be_bytes(), bincode::serialize(new_heads)?)?; + + Ok(()) } - pub fn heads(&self, database: &sled::Db) -> Vec { - let prefix = TreePrefix::commit_id(self.id, ""); + pub fn heads(&self, database: &rocksdb::DB) -> Result, Box<[u8]>>> { + let cf = database + .cf_handle(REFERENCE_FAMILY) + .context("missing reference column family")?; - database - .tree_names() - .into_iter() - .filter_map(|v| { - v.strip_prefix(prefix.as_bytes()) - .map(|v| String::from_utf8_lossy(v).into_owned()) - }) - .collect() + let Some(bytes) = database.get_cf(cf, self.id.to_be_bytes())? else { + return Ok(Yoke::attach_to_cart(Box::default(), |_| vec![])); + }; + + Yoke::try_attach_to_cart(Box::from(bytes), |bytes| bincode::deserialize(bytes)) + .context("failed to deserialize heads") } } @@ -130,8 +156,8 @@ impl Repository<'_> { pub struct RepositoryId(pub(super) u64); impl RepositoryId { - pub fn new(db: &sled::Db) -> Self { - Self(db.generate_id().unwrap()) + pub fn new() -> Self { + Self(random()) } } diff --git a/src/database/schema/tag.rs b/src/database/schema/tag.rs index 0456020..d3fd4d2 100644 --- a/src/database/schema/tag.rs +++ b/src/database/schema/tag.rs @@ -1,11 +1,13 @@ -use std::{collections::HashSet, ops::Deref}; +use std::{collections::HashSet, sync::Arc}; +use anyhow::Context; use git2::Signature; use serde::{Deserialize, Serialize}; -use sled::IVec; use yoke::{Yoke, Yokeable}; -use crate::database::schema::{commit::Author, Yoked}; +use crate::database::schema::{ + commit::Author, prefixes::TAG_FAMILY, repository::RepositoryId, Yoked, +}; #[derive(Serialize, Deserialize, Debug, Yokeable)] pub struct Tag<'a> { @@ -21,65 +23,90 @@ impl<'a> Tag<'a> { } pub fn insert(&self, batch: &TagTree, name: &str) -> Result<(), anyhow::Error> { - batch.insert(name.as_bytes(), bincode::serialize(self)?)?; - - Ok(()) + batch.insert(name, self) } } -pub struct TagTree(sled::Tree); - -impl Deref for TagTree { - type Target = sled::Tree; - - fn deref(&self) -> &Self::Target { - &self.0 - } +pub struct TagTree { + db: Arc, + prefix: RepositoryId, } pub type YokedTag = Yoked>; impl TagTree { - pub(super) fn new(tree: sled::Tree) -> Self { - Self(tree) + pub(super) fn new(db: Arc, prefix: RepositoryId) -> Self { + Self { db, prefix } } - pub fn remove(&self, name: &str) -> Result { - self.0.remove(name).map(|v| v.is_some()) + pub fn insert(&self, name: &str, value: &Tag<'_>) -> anyhow::Result<()> { + let cf = self + .db + .cf_handle(TAG_FAMILY) + .context("missing tag column family")?; + + let mut db_name = self.prefix.to_be_bytes().to_vec(); + db_name.extend_from_slice(name.as_ref()); + + self.db.put_cf(cf, db_name, bincode::serialize(value)?)?; + + Ok(()) } - pub fn list(&self) -> HashSet { - self.iter() - .keys() + pub fn remove(&self, name: &str) -> anyhow::Result<()> { + let cf = self + .db + .cf_handle(TAG_FAMILY) + .context("missing tag column family")?; + + let mut db_name = self.prefix.to_be_bytes().to_vec(); + db_name.extend_from_slice(name.as_ref()); + self.db.delete_cf(cf, db_name)?; + + Ok(()) + } + + pub fn list(&self) -> anyhow::Result> { + let cf = self + .db + .cf_handle(TAG_FAMILY) + .context("missing tag column family")?; + + Ok(self + .db + .prefix_iterator_cf(cf, self.prefix.to_be_bytes()) .filter_map(Result::ok) - .map(|v| String::from_utf8_lossy(&v).into_owned()) - .collect() + .filter_map(|(k, _)| { + Some( + String::from_utf8_lossy(k.strip_prefix(&self.prefix.to_be_bytes())?) + .to_string(), + ) + }) + .collect()) } - pub fn fetch_all(&self) -> Vec<(String, YokedTag)> { - let mut res = self - .iter() - .map(|res| { - let (name, value) = res?; + pub fn fetch_all(&self) -> anyhow::Result> { + let cf = self + .db + .cf_handle(TAG_FAMILY) + .context("missing tag column family")?; - let name = String::from_utf8_lossy(&name) - .strip_prefix("refs/tags/") - .unwrap() + let mut res = self + .db + .prefix_iterator_cf(cf, self.prefix.to_be_bytes()) + .filter_map(Result::ok) + .filter_map(|(name, value)| { + let name = String::from_utf8_lossy(name.strip_prefix(&self.prefix.to_be_bytes())?) + .strip_prefix("refs/tags/")? .to_string(); - // internally value is an Arc so it should already be stablederef but because - // of reasons unbeknownst to me, sled has its own Arc implementation so we need - // to box the value as well to get a stablederef... - let value = Box::new(value); - - Ok(( - name, - Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(data)) - .unwrap(), - )) + Some((name, value)) + }) + .map(|(name, value)| { + let value = Yoke::try_attach_to_cart(value, |data| bincode::deserialize(data))?; + Ok((name, value)) }) - .collect::, sled::Error>>() - .unwrap(); + .collect::>>()?; res.sort_unstable_by(|a, b| { let a_tagger = a.1.get().tagger.as_ref().map(|v| v.time); @@ -87,6 +114,6 @@ impl TagTree { b_tagger.cmp(&a_tagger) }); - res + Ok(res) } } diff --git a/src/git.rs b/src/git.rs index 8c2981d..1fc0379 100644 --- a/src/git.rs +++ b/src/git.rs @@ -110,7 +110,7 @@ impl OpenRepository { .peel_to_tree() .context("Couldn't find tree for reference")? } else { - let head = repo.head()?; + let head = repo.head().context("Failed to find HEAD")?; head.peel_to_tree() .context("Couldn't find tree from HEAD")? }; diff --git a/src/main.rs b/src/main.rs index eea8226..3792da1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,10 +22,11 @@ use axum::{ }; use bat::assets::HighlightingAssets; use clap::Parser; -use database::schema::{prefixes::TreePrefix, SCHEMA_VERSION}; +use database::schema::SCHEMA_VERSION; +use nom::AsBytes; use once_cell::sync::{Lazy, OnceCell}; +use rocksdb::{Options, SliceTransform}; use sha2::{digest::FixedOutput, Digest}; -use sled::Db; use syntect::html::ClassStyle; use tokio::{ signal::unix::{signal, SignalKind}, @@ -34,8 +35,15 @@ use tokio::{ use tower_http::cors::CorsLayer; use tower_layer::layer_fn; use tracing::{error, info, instrument, warn}; - -use crate::{git::Git, layers::logger::LoggingMiddleware}; +use tracing_subscriber::EnvFilter; + +use crate::{ + database::schema::prefixes::{ + COMMIT_COUNT_FAMILY, COMMIT_FAMILY, REFERENCE_FAMILY, REPOSITORY_FAMILY, TAG_FAMILY, + }, + git::Git, + layers::logger::LoggingMiddleware, +}; mod database; mod git; @@ -54,9 +62,9 @@ static DARK_HIGHLIGHT_CSS_HASH: OnceCell> = OnceCell::new(); #[derive(Parser, Debug)] #[clap(author, version, about)] pub struct Args { - /// Path to a directory in which the Sled database should be stored, will be created if it doesn't already exist + /// Path to a directory in which the RocksDB database should be stored, will be created if it doesn't already exist /// - /// The Sled database is very quick to generate, so this can be pointed to temporary storage + /// The RocksDB database is very quick to generate, so this can be pointed to temporary storage #[clap(short, long, value_parser)] db_store: PathBuf, /// The socket address to bind to (eg. 0.0.0.0:3333) @@ -101,7 +109,7 @@ impl FromStr for RefreshInterval { async fn main() -> Result<(), anyhow::Error> { let args: Args = Args::parse(); - let subscriber = tracing_subscriber::fmt(); + let subscriber = tracing_subscriber::fmt().with_env_filter(EnvFilter::from_default_env()); #[cfg(debug_assertions)] let subscriber = subscriber.pretty(); subscriber.init(); @@ -198,35 +206,62 @@ async fn main() -> Result<(), anyhow::Error> { } } -fn open_db(args: &Args) -> Result { - let db = sled::Config::default() - .use_compression(true) - .path(&args.db_store) - .open() - .context("Failed to open database")?; - - let needs_schema_regen = match db.get(TreePrefix::schema_version())? { - Some(v) if v != SCHEMA_VERSION.as_bytes() => Some(Some(v)), - Some(_) => None, - None => Some(None), - }; +fn open_db(args: &Args) -> Result, anyhow::Error> { + loop { + let mut db_options = Options::default(); + db_options.create_missing_column_families(true); + db_options.create_if_missing(true); + + let mut commit_family_options = Options::default(); + commit_family_options.set_prefix_extractor(SliceTransform::create( + "commit_prefix", + |input| input.split(|&c| c == b'\0').next().unwrap_or(input), + None, + )); + + let mut tag_family_options = Options::default(); + tag_family_options.set_prefix_extractor(SliceTransform::create_fixed_prefix( + std::mem::size_of::(), + )); // repository id prefix + + let db = rocksdb::DB::open_cf_with_opts( + &db_options, + &args.db_store, + vec![ + (COMMIT_FAMILY, commit_family_options), + (REPOSITORY_FAMILY, Options::default()), + (TAG_FAMILY, tag_family_options), + (REFERENCE_FAMILY, Options::default()), + (COMMIT_COUNT_FAMILY, Options::default()), + ], + )?; + + let needs_schema_regen = match db.get("schema_version")? { + Some(v) if v.as_bytes() != SCHEMA_VERSION.as_bytes() => Some(Some(v)), + Some(_) => None, + None => { + db.put("schema_version", SCHEMA_VERSION)?; + None + } + }; - if let Some(version) = needs_schema_regen { - let old_version = version - .as_deref() - .map_or(Cow::Borrowed("unknown"), String::from_utf8_lossy); + if let Some(version) = needs_schema_regen { + let old_version = version + .as_deref() + .map_or(Cow::Borrowed("unknown"), String::from_utf8_lossy); - warn!("Clearing outdated database ({old_version} != {SCHEMA_VERSION})"); + warn!("Clearing outdated database ({old_version} != {SCHEMA_VERSION})"); - db.clear()?; - db.insert(TreePrefix::schema_version(), SCHEMA_VERSION)?; + drop(db); + rocksdb::DB::destroy(&Options::default(), &args.db_store)?; + } else { + break Ok(Arc::new(db)); + } } - - Ok(db) } async fn run_indexer( - db: Db, + db: Arc, scan_path: PathBuf, refresh_interval: RefreshInterval, ) -> Result<(), tokio::task::JoinError> { diff --git a/src/methods/index.rs b/src/methods/index.rs index 747ee22..40a8b8c 100644 --- a/src/methods/index.rs +++ b/src/methods/index.rs @@ -1,4 +1,4 @@ -use std::collections::BTreeMap; +use std::{collections::BTreeMap, sync::Arc}; use anyhow::Context; use askama::Template; @@ -13,7 +13,9 @@ pub struct View<'a> { pub repositories: BTreeMap, Vec<&'a Repository<'a>>>, } -pub async fn handle(Extension(db): Extension) -> Result { +pub async fn handle( + Extension(db): Extension>, +) -> Result { let mut repositories: BTreeMap, Vec<&Repository<'_>>> = BTreeMap::new(); let fetched = tokio::task::spawn_blocking(move || Repository::fetch_all(&db)) diff --git a/src/methods/repo/log.rs b/src/methods/repo/log.rs index e43a190..80f4b1e 100644 --- a/src/methods/repo/log.rs +++ b/src/methods/repo/log.rs @@ -1,3 +1,5 @@ +use std::sync::Arc; + use anyhow::Context; use askama::Template; use axum::{extract::Query, response::Response, Extension}; @@ -16,7 +18,7 @@ use crate::{ #[derive(Deserialize)] pub struct UriQuery { #[serde(rename = "ofs")] - offset: Option, + offset: Option, #[serde(rename = "h")] branch: Option, } @@ -26,51 +28,55 @@ pub struct UriQuery { pub struct View<'a> { repo: Repository, commits: Vec<&'a crate::database::schema::commit::Commit<'a>>, - next_offset: Option, + next_offset: Option, branch: Option, } pub async fn handle( Extension(repo): Extension, - Extension(db): Extension, + Extension(db): Extension>, Query(query): Query, ) -> Result { - let offset = query.offset.unwrap_or(0); - - let repository = crate::database::schema::repository::Repository::open(&db, &*repo)? - .context("Repository does not exist")?; - let mut commits = - get_branch_commits(&repository, &db, query.branch.as_deref(), 101, offset).await?; - - let next_offset = if commits.len() == 101 { - commits.pop(); - Some(offset + 100) - } else { - None - }; - - let commits = commits.iter().map(Yoke::get).collect(); - - Ok(into_response(&View { - repo, - commits, - next_offset, - branch: query.branch, - })) + tokio::task::spawn_blocking(move || { + let offset = query.offset.unwrap_or(0); + + let repository = crate::database::schema::repository::Repository::open(&db, &*repo)? + .context("Repository does not exist")?; + let mut commits = + get_branch_commits(&repository, &db, query.branch.as_deref(), 101, offset)?; + + let next_offset = if commits.len() == 101 { + commits.pop(); + Some(offset + 100) + } else { + None + }; + + let commits = commits.iter().map(Yoke::get).collect(); + + Ok(into_response(&View { + repo, + commits, + next_offset, + branch: query.branch, + })) + }) + .await + .context("Failed to attach to tokio task")? } -pub async fn get_branch_commits( +pub fn get_branch_commits( repository: &YokedRepository, - database: &sled::Db, + database: &Arc, branch: Option<&str>, - amount: usize, - offset: usize, + amount: u64, + offset: u64, ) -> Result> { if let Some(reference) = branch { let commit_tree = repository .get() - .commit_tree(database, &format!("refs/heads/{reference}"))?; - let commit_tree = commit_tree.fetch_latest(amount, offset).await; + .commit_tree(database.clone(), &format!("refs/heads/{reference}")); + let commit_tree = commit_tree.fetch_latest(amount, offset)?; if !commit_tree.is_empty() { return Ok(commit_tree); @@ -78,8 +84,8 @@ pub async fn get_branch_commits( let tag_tree = repository .get() - .commit_tree(database, &format!("refs/tags/{reference}"))?; - let tag_tree = tag_tree.fetch_latest(amount, offset).await; + .commit_tree(database.clone(), &format!("refs/tags/{reference}")); + let tag_tree = tag_tree.fetch_latest(amount, offset)?; return Ok(tag_tree); } @@ -91,8 +97,8 @@ pub async fn get_branch_commits( .into_iter() .chain(DEFAULT_BRANCHES.into_iter()) { - let commit_tree = repository.get().commit_tree(database, branch)?; - let commits = commit_tree.fetch_latest(amount, offset).await; + let commit_tree = repository.get().commit_tree(database.clone(), branch); + let commits = commit_tree.fetch_latest(amount, offset)?; if !commits.is_empty() { return Ok(commits); diff --git a/src/methods/repo/mod.rs b/src/methods/repo/mod.rs index 8bf8040..6280480 100644 --- a/src/methods/repo/mod.rs +++ b/src/methods/repo/mod.rs @@ -125,10 +125,10 @@ where let db = request .extensions() - .get::() + .get::>() .expect("db extension missing"); if path.as_os_str().is_empty() - || !crate::database::schema::repository::Repository::exists(db, &uri) + || !crate::database::schema::repository::Repository::exists(db, &uri).unwrap_or_default() { return RepositoryNotFound.into_response(); } diff --git a/src/methods/repo/refs.rs b/src/methods/repo/refs.rs index a085504..660065b 100644 --- a/src/methods/repo/refs.rs +++ b/src/methods/repo/refs.rs @@ -20,33 +20,32 @@ pub struct View { branch: Option>, } -#[allow(clippy::unused_async)] pub async fn handle( Extension(repo): Extension, - Extension(db): Extension, + Extension(db): Extension>, ) -> Result { - let repository = crate::database::schema::repository::Repository::open(&db, &*repo)? - .context("Repository does not exist")?; + tokio::task::spawn_blocking(move || { + let repository = crate::database::schema::repository::Repository::open(&db, &*repo)? + .context("Repository does not exist")?; + + let mut heads = BTreeMap::new(); + for head in repository.get().heads(&db)?.get() { + let commit_tree = repository.get().commit_tree(db.clone(), head); + let name = head.strip_prefix("refs/heads/"); + + if let (Some(name), Some(commit)) = (name, commit_tree.fetch_latest_one()?) { + heads.insert(name.to_string(), commit); + } + } - let mut heads = BTreeMap::new(); - for head in repository.get().heads(&db) { - let commit_tree = repository.get().commit_tree(&db, &head)?; - let name = head.strip_prefix("refs/heads/"); + let tags = repository.get().tag_tree(db).fetch_all()?; - if let (Some(name), Some(commit)) = (name, commit_tree.fetch_latest_one()) { - heads.insert(name.to_string(), commit); - } - } - - let tags = repository - .get() - .tag_tree(&db) - .context("Failed to fetch indexed tags")? - .fetch_all(); - - Ok(into_response(&View { - repo, - refs: Refs { heads, tags }, - branch: None, - })) + Ok(into_response(&View { + repo, + refs: Refs { heads, tags }, + branch: None, + })) + }) + .await + .context("Failed to attach to tokio task")? } diff --git a/src/methods/repo/summary.rs b/src/methods/repo/summary.rs index 0d4bb07..3d6d5b4 100644 --- a/src/methods/repo/summary.rs +++ b/src/methods/repo/summary.rs @@ -25,40 +25,40 @@ pub struct View<'a> { pub async fn handle( Extension(repo): Extension, - Extension(db): Extension, + Extension(db): Extension>, ) -> Result { - let repository = crate::database::schema::repository::Repository::open(&db, &*repo)? - .context("Repository does not exist")?; - let commits = get_default_branch_commits(&repository, &db).await?; - let commit_list = commits.iter().map(Yoke::get).collect(); + tokio::task::spawn_blocking(move || { + let repository = crate::database::schema::repository::Repository::open(&db, &*repo)? + .context("Repository does not exist")?; + let commits = get_default_branch_commits(&repository, &db)?; + let commit_list = commits.iter().map(Yoke::get).collect(); - let mut heads = BTreeMap::new(); - for head in repository.get().heads(&db) { - let commit_tree = repository.get().commit_tree(&db, &head)?; - let name = head.strip_prefix("refs/heads/"); + let mut heads = BTreeMap::new(); + for head in repository.get().heads(&db)?.get() { + let commit_tree = repository.get().commit_tree(db.clone(), head); + let name = head.strip_prefix("refs/heads/"); - if let (Some(name), Some(commit)) = (name, commit_tree.fetch_latest_one()) { - heads.insert(name.to_string(), commit); + if let (Some(name), Some(commit)) = (name, commit_tree.fetch_latest_one()?) { + heads.insert(name.to_string(), commit); + } } - } - let tags = repository - .get() - .tag_tree(&db) - .context("Failed to fetch indexed tags")? - .fetch_all(); + let tags = repository.get().tag_tree(db).fetch_all()?; - Ok(into_response(&View { - repo, - refs: Refs { heads, tags }, - commit_list, - branch: None, - })) + Ok(into_response(&View { + repo, + refs: Refs { heads, tags }, + commit_list, + branch: None, + })) + }) + .await + .context("Failed to attach to tokio task")? } -pub async fn get_default_branch_commits( +pub fn get_default_branch_commits( repository: &YokedRepository, - database: &sled::Db, + database: &Arc, ) -> Result> { for branch in repository .get() @@ -67,8 +67,8 @@ pub async fn get_default_branch_commits( .into_iter() .chain(DEFAULT_BRANCHES.into_iter()) { - let commit_tree = repository.get().commit_tree(database, branch)?; - let commits = commit_tree.fetch_latest(11, 0).await; + let commit_tree = repository.get().commit_tree(database.clone(), branch); + let commits = commit_tree.fetch_latest(11, 0)?; if !commits.is_empty() { return Ok(commits);