Skip to content

Commit

Permalink
test: add code index builder test for failed chunk count
Browse files Browse the repository at this point in the history
Signed-off-by: Wei Zhang <[email protected]>
  • Loading branch information
zwpaper committed Nov 28, 2024
1 parent 62b1e86 commit e87ec84
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 7 deletions.
8 changes: 4 additions & 4 deletions crates/tabby-index/src/code/intelligence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ mod metrics {
}

#[cfg(test)]
mod tests {
pub mod tests {
use std::path::PathBuf;

use serial_test::file_serial;
Expand All @@ -258,17 +258,17 @@ mod tests {

use super::*;

fn get_tabby_root() -> PathBuf {
pub fn get_tabby_root() -> PathBuf {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("testdata");
path
}

fn get_repository_config() -> CodeRepository {
pub fn get_repository_config() -> CodeRepository {
CodeRepository::new("https://github.com/TabbyML/tabby", &config_index_to_id(0))
}

fn get_rust_source_file() -> PathBuf {
pub fn get_rust_source_file() -> PathBuf {
let mut path = get_tabby_root();
path.push("repositories");
path.push("https_github.com_TabbyML_tabby");
Expand Down
2 changes: 1 addition & 1 deletion crates/tabby-index/src/code/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ async fn build_binarize_embedding_tokens(embedding: Arc<dyn Embedding>, body: &s
tokens
}

fn create_code_builder(embedding: Option<Arc<dyn Embedding>>) -> TantivyDocBuilder<SourceCode> {
pub fn create_code_builder(embedding: Option<Arc<dyn Embedding>>) -> TantivyDocBuilder<SourceCode> {
let builder = CodeBuilder::new(embedding);
TantivyDocBuilder::new(corpus::CODE, builder)
}
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,63 @@ mod builder_tests {

use super::mock_embedding::MockEmbedding;
use crate::{
indexer::TantivyDocBuilder,
code::{
create_code_builder,
intelligence::{
tests::{get_repository_config, get_rust_source_file, get_tabby_root},
CodeIntelligence,
},
},
indexer::{Indexer, TantivyDocBuilder, ToIndexId},
structured_doc::{
public::{StructuredDoc, StructuredDocFields, StructuredDocIssueFields},
StructuredDocBuilder,
},
};

#[test]
#[file_serial(set_tabby_root)]
fn test_builder_code_empty_embedding() {
let origin_root = tabby_common::path::tabby_root();
tabby_common::path::set_tabby_root(get_tabby_root());

let embedding = MockEmbedding::new(vec![]);
let builder = Arc::new(create_code_builder(Some(Arc::new(embedding))));

let repo = get_repository_config();
let code = CodeIntelligence::compute_source_file(&repo, &get_rust_source_file()).unwrap();
let index_id = code.to_index_id();

let (id, s) = tokio::runtime::Runtime::new()
.unwrap()
.block_on(async { builder.build(code).await });
assert_eq!(id, index_id.id);

let res = tokio::runtime::Runtime::new().unwrap().block_on(async {
s.buffer_unordered(std::cmp::max(
std::thread::available_parallelism().unwrap().get() * 2,
32,
))
.collect::<Vec<_>>()
.await
});

assert_eq!(res.len(), 4);
let doc = res.last().unwrap().as_ref().unwrap().as_ref().unwrap();

let schema = IndexSchema::instance();
let failed_count = doc
.get_first(schema.field_failed_chunks_count)
.and_then(|v| v.as_u64())
.unwrap();

// the last element is the document itself
// the first three are the chunks and should be failed as no embedding is provided
assert_eq!(failed_count, 3);

tabby_common::path::set_tabby_root(origin_root);
}

/// Test that the indexer return the document and none itself
/// when the embedding is empty
#[test]
Expand Down
2 changes: 1 addition & 1 deletion crates/tabby-index/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use indexer::{IndexAttributeBuilder, Indexer};
mod structured_doc;

#[cfg(test)]
mod structured_doc_tests;
mod indexer_tests;

pub mod public {
use indexer::IndexGarbageCollector;
Expand Down

0 comments on commit e87ec84

Please sign in to comment.