Skip to content

Commit

Permalink
Index tags to sled and read them from views
Browse files Browse the repository at this point in the history
  • Loading branch information
w4 committed Jul 22, 2022
1 parent b4c4b87 commit c220112
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 112 deletions.
66 changes: 65 additions & 1 deletion src/database/indexer.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
use git2::Sort;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use time::OffsetDateTime;
use tracing::{info, info_span};

use crate::database::schema::{
commit::Commit,
repository::{Repository, RepositoryId},
tag::Tag,
};

pub fn run(db: &sled::Db) {
let span = info_span!("index_update");
let _entered = span.enter();

info!("Starting index update");

let scan_path = Path::new("/Users/jordan/Code/test-git");
update_repository_metadata(scan_path, db);
update_repository_reflog(scan_path, db);
update_repository_tags(scan_path, db);

info!("Flushing to disk");

db.flush().unwrap();

info!("Finished index update");
}

fn update_repository_metadata(scan_path: &Path, db: &sled::Db) {
Expand Down Expand Up @@ -52,7 +66,7 @@ fn update_repository_reflog(scan_path: &Path, db: &sled::Db) {
}

let span = info_span!(
"index_update",
"branch_index_update",
reference = reference_name.as_ref(),
repository = relative_path
);
Expand Down Expand Up @@ -98,6 +112,56 @@ fn update_repository_reflog(scan_path: &Path, db: &sled::Db) {
}
}

fn update_repository_tags(scan_path: &Path, db: &sled::Db) {
for (relative_path, db_repository) in Repository::fetch_all(db).unwrap() {
let git_repository = git2::Repository::open(scan_path.join(&relative_path)).unwrap();

let tag_tree = db_repository.get().tag_tree(db).unwrap();

let git_tags: HashSet<_> = git_repository
.references()
.unwrap()
.filter_map(Result::ok)
.filter(|v| v.name_bytes().starts_with(b"refs/tags/"))
.map(|v| String::from_utf8_lossy(v.name_bytes()).into_owned())
.collect();
let indexed_tags: HashSet<String> = tag_tree.list().into_iter().collect();

// insert any git tags that are missing from the index
for tag_name in git_tags.difference(&indexed_tags) {
let span = info_span!(
"tag_index_update",
reference = tag_name,
repository = relative_path
);
let _entered = span.enter();

let reference = git_repository.find_reference(tag_name).unwrap();

if let Ok(tag) = reference.peel_to_tag() {
info!("Inserting newly discovered tag to index");

Tag::new(tag.tagger().as_ref()).insert(&tag_tree, tag_name);
}
}

// remove any extra tags that the index has
// TODO: this also needs to check peel_to_tag
for tag_name in indexed_tags.difference(&git_tags) {
let span = info_span!(
"tag_index_update",
reference = tag_name,
repository = relative_path
);
let _entered = span.enter();

info!("Removing stale tag from index");

tag_tree.remove(tag_name);
}
}
}

fn get_relative_path<'a>(relative_to: &Path, full_path: &'a Path) -> &'a Path {
full_path.strip_prefix(relative_to).unwrap()
}
Expand Down
2 changes: 2 additions & 0 deletions src/database/schema/commit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ impl<'a, 'de: 'a> Deserialize<'de> for CommitHash<'a> {

#[derive(Serialize, Deserialize, Debug)]
pub struct Author<'a> {
#[serde(borrow)]
pub name: Cow<'a, str>,
#[serde(borrow)]
pub email: Cow<'a, str>,
pub time: OffsetDateTime,
}
Expand Down
1 change: 1 addition & 0 deletions src/database/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@ use yoke::Yoke;
pub mod commit;
pub mod prefixes;
pub mod repository;
pub mod tag;

pub type Yoked<T> = Yoke<T, Box<IVec>>;
12 changes: 11 additions & 1 deletion src/database/schema/prefixes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::path::Path;
pub enum TreePrefix {
Repository = 0,
Commit = 100,
_Tag = 101,
Tag = 101,
}

impl TreePrefix {
Expand Down Expand Up @@ -34,4 +34,14 @@ impl TreePrefix {

prefixed
}

pub fn tag_id(repository: RepositoryId) -> Vec<u8> {
let mut prefixed = Vec::with_capacity(
std::mem::size_of::<TreePrefix>() + std::mem::size_of::<RepositoryId>(),
);
prefixed.push(TreePrefix::Tag as u8);
prefixed.extend_from_slice(&repository.to_ne_bytes());

prefixed
}
}
23 changes: 23 additions & 0 deletions src/database/schema/repository.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use crate::database::schema::commit::CommitTree;
use crate::database::schema::prefixes::TreePrefix;
use crate::database::schema::tag::TagTree;
use crate::database::schema::Yoked;
use anyhow::{Context, Result};
use nom::AsBytes;
use serde::{Deserialize, Serialize};
use sled::IVec;
use std::borrow::Cow;
Expand Down Expand Up @@ -85,6 +87,27 @@ impl Repository<'_> {

Ok(CommitTree::new(tree))
}

pub fn tag_tree(&self, database: &sled::Db) -> Result<TagTree> {
let tree = database
.open_tree(TreePrefix::tag_id(self.id))
.context("Failed to open tag tree")?;

Ok(TagTree::new(tree))
}

pub fn heads(&self, database: &sled::Db) -> Vec<String> {
let prefix = TreePrefix::commit_id(self.id, "");

database
.tree_names()
.into_iter()
.filter_map(|v| {
v.strip_prefix(prefix.as_bytes())
.map(|v| String::from_utf8_lossy(v).into_owned())
})
.collect()
}
}

#[derive(Serialize, Deserialize, Debug, Copy, Clone, PartialEq, Eq, Hash)]
Expand Down
92 changes: 92 additions & 0 deletions src/database/schema/tag.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
use crate::database::schema::commit::Author;
use crate::database::schema::Yoked;
use git2::Signature;
use serde::{Deserialize, Serialize};
use sled::IVec;
use std::collections::HashSet;
use std::ops::Deref;
use yoke::{Yoke, Yokeable};

#[derive(Serialize, Deserialize, Debug, Yokeable)]
pub struct Tag<'a> {
#[serde(borrow)]
pub tagger: Option<Author<'a>>,
}

impl<'a> Tag<'a> {
pub fn new(tagger: Option<&'a Signature<'_>>) -> Self {
Self {
tagger: tagger.map(Into::into),
}
}

pub fn insert(&self, batch: &TagTree, name: &str) {
batch
.insert(&name.as_bytes(), bincode::serialize(self).unwrap())
.unwrap();
}
}

pub struct TagTree(sled::Tree);

impl Deref for TagTree {
type Target = sled::Tree;

fn deref(&self) -> &Self::Target {
&self.0
}
}

pub type YokedTag = Yoked<Tag<'static>>;

impl TagTree {
pub(super) fn new(tree: sled::Tree) -> Self {
Self(tree)
}

pub fn remove(&self, name: &str) -> bool {
self.0.remove(name).unwrap().is_some()
}

pub fn list(&self) -> HashSet<String> {
self.iter()
.keys()
.filter_map(Result::ok)
.map(|v| String::from_utf8_lossy(&v).into_owned())
.collect()
}

pub fn fetch_all(&self) -> Vec<(String, YokedTag)> {
let mut res = self
.iter()
.map(|res| {
let (name, value) = res?;

let name = String::from_utf8_lossy(&name)
.strip_prefix("refs/tags/")
.unwrap()
.to_string();

// internally value is an Arc so it should already be stablederef but because
// of reasons unbeknownst to me, sled has its own Arc implementation so we need
// to box the value as well to get a stablederef...
let value = Box::new(value);

Ok((
name,
Yoke::try_attach_to_cart(value, |data: &IVec| bincode::deserialize(data))
.unwrap(),
))
})
.collect::<Result<Vec<(String, YokedTag)>, sled::Error>>()
.unwrap();

res.sort_unstable_by(|a, b| {
let a_tagger = a.1.get().tagger.as_ref().map(|v| v.time);
let b_tagger = b.1.get().tagger.as_ref().map(|v| v.time);
b_tagger.cmp(&a_tagger)
});

res
}
}
73 changes: 0 additions & 73 deletions src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ use tracing::instrument;
pub struct Git {
commits: Cache<Oid, Arc<Commit>>,
readme_cache: Cache<PathBuf, Option<(ReadmeFormat, Arc<str>)>>,
refs: Cache<PathBuf, Arc<Refs>>,
syntax_set: SyntaxSet,
}

Expand All @@ -36,10 +35,6 @@ impl Git {
.time_to_live(Duration::from_secs(10))
.max_capacity(100)
.build(),
refs: Cache::builder()
.time_to_live(Duration::from_secs(10))
.max_capacity(100)
.build(),
syntax_set,
}
}
Expand Down Expand Up @@ -202,56 +197,6 @@ impl OpenRepository {
.context("Failed to join Tokio task")?
}

#[instrument(skip(self))]
pub async fn refs(self: Arc<Self>) -> Result<Arc<Refs>, Arc<anyhow::Error>> {
let git = self.git.clone();

git.refs
.try_get_with(self.cache_key.clone(), async move {
tokio::task::spawn_blocking(move || {
let repo = self.repo.lock();

let ref_iter = repo.references().context("Couldn't get list of references for repository")?;

let mut built_refs = Refs::default();

for ref_ in ref_iter {
let ref_ = ref_?;

if ref_.is_branch() {
let commit = ref_.peel_to_commit().context("Reference is apparently a branch but I couldn't get to the HEAD of it")?;

built_refs.branch.push(Branch {
name: String::from_utf8_lossy(ref_.shorthand_bytes()).into_owned(),
commit: commit.try_into()?,
});
} else if ref_.is_tag() {
if let Ok(tag) = ref_.peel_to_tag() {
built_refs.tag.push(Tag {
name: String::from_utf8_lossy(ref_.shorthand_bytes()).into_owned(),
tagger: tag.tagger().map(TryInto::try_into).transpose()?,
});
}
}
}

built_refs.branch.sort_unstable_by(|one, two| {
two.commit.committer.time.cmp(&one.commit.committer.time)
});
built_refs.tag.sort_unstable_by(|one, two| {
let one_tagger = one.tagger.as_ref().map(|v| v.time);
let two_tagger = two.tagger.as_ref().map(|v| v.time);
two_tagger.cmp(&one_tagger)
});

Ok(Arc::new(built_refs))
})
.await
.context("Failed to join Tokio task")?
})
.await
}

#[instrument(skip(self))]
pub async fn readme(
self: Arc<Self>,
Expand Down Expand Up @@ -411,12 +356,6 @@ pub struct FileWithContent {
pub content: String,
}

#[derive(Debug, Default)]
pub struct Refs {
pub branch: Vec<Branch>,
pub tag: Vec<Tag>,
}

#[derive(Debug)]
pub struct Branch {
pub name: String,
Expand All @@ -442,17 +381,10 @@ pub struct DetailedTag {
pub tagged_object: Option<TaggedObject>,
}

#[derive(Debug)]
pub struct Tag {
pub name: String,
pub tagger: Option<CommitUser>,
}

#[derive(Debug)]
pub struct CommitUser {
name: String,
email: String,
email_md5: String,
time: OffsetDateTime,
}

Expand All @@ -463,7 +395,6 @@ impl TryFrom<Signature<'_>> for CommitUser {
Ok(CommitUser {
name: String::from_utf8_lossy(v.name_bytes()).into_owned(),
email: String::from_utf8_lossy(v.email_bytes()).into_owned(),
email_md5: format!("{:x}", md5::compute(v.email_bytes())),
time: OffsetDateTime::from_unix_timestamp(v.when().seconds())?,
})
}
Expand All @@ -478,10 +409,6 @@ impl CommitUser {
&self.email
}

pub fn email_md5(&self) -> &str {
&self.email_md5
}

pub fn time(&self) -> OffsetDateTime {
self.time
}
Expand Down
Loading

0 comments on commit c220112

Please sign in to comment.