Skip to content

Commit

Permalink
chore: commit should be in doc.attribute
Browse files Browse the repository at this point in the history
Signed-off-by: Wei Zhang <[email protected]>
  • Loading branch information
zwpaper committed Dec 18, 2024
1 parent b754142 commit 700151c
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 17 deletions.
3 changes: 2 additions & 1 deletion crates/tabby-common/src/index/code/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ use super::{corpus, IndexSchema};
use crate::api::code::CodeSearchQuery;

pub mod fields {
pub const ATTRIBUTE_COMMIT: &str = "commit";

pub const CHUNK_GIT_URL: &str = "chunk_git_url";
pub const CHUNK_COMMIT: &str = "chunk_commit";
pub const CHUNK_FILEPATH: &str = "chunk_filepath";
pub const CHUNK_LANGUAGE: &str = "chunk_language";
pub const CHUNK_BODY: &str = "chunk_body";
Expand Down
2 changes: 1 addition & 1 deletion crates/tabby-index/src/code/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ fn require_updates(indexer: Arc<Indexer>, id: &str) -> bool {

fn should_backfill(indexer: Arc<Indexer>, id: &str) -> bool {
// v0.23.0 add the commit field to the code document.
!indexer.has_attribute_field(id, code::fields::CHUNK_COMMIT)
!indexer.has_attribute_field(id, code::fields::ATTRIBUTE_COMMIT)
}

Check warning on line 158 in crates/tabby-index/src/code/index.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby-index/src/code/index.rs#L155-L158

Added lines #L155 - L158 were not covered by tests

fn is_valid_file(file: &SourceCode) -> bool {
Expand Down
7 changes: 4 additions & 3 deletions crates/tabby-index/src/code/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ impl CodeBuilder {

#[async_trait]
impl IndexAttributeBuilder<SourceCode> for CodeBuilder {
async fn build_attributes(&self, _source_code: &SourceCode) -> serde_json::Value {
json!({})
async fn build_attributes(&self, source_code: &SourceCode) -> serde_json::Value {
json!({
code::fields::ATTRIBUTE_COMMIT: source_code.commit,
})
}

async fn build_chunk_attributes<'a>(
Expand Down Expand Up @@ -102,7 +104,6 @@ impl IndexAttributeBuilder<SourceCode> for CodeBuilder {
let attributes = json!({
code::fields::CHUNK_FILEPATH: source_code.filepath,
code::fields::CHUNK_GIT_URL: source_code.git_url,
code::fields::CHUNK_COMMIT: source_code.commit,
code::fields::CHUNK_LANGUAGE: source_code.language,
code::fields::CHUNK_BODY: body,
code::fields::CHUNK_START_LINE: start_line,
Expand Down
40 changes: 28 additions & 12 deletions crates/tabby/src/services/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use tabby_common::{
index::{
self,
code::{self, tokenize_code},
IndexSchema,
corpus, IndexSchema,
},
};
use tabby_inference::Embedding;
Expand Down Expand Up @@ -76,16 +76,37 @@ impl CodeSearchImpl {
.await?
};

Ok(merge_code_responses_by_rank(
&params,
docs_from_embedding,
docs_from_bm25,
))
let mut merged_codes =
merge_code_responses_by_rank(&params, docs_from_embedding, docs_from_bm25);
add_doc_attribute(reader, &mut merged_codes).await;

Check warning on line 81 in crates/tabby/src/services/code.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby/src/services/code.rs#L79-L81

Added lines #L79 - L81 were not covered by tests

Ok(merged_codes)

Check warning on line 83 in crates/tabby/src/services/code.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby/src/services/code.rs#L83

Added line #L83 was not covered by tests
}
}

const RANK_CONSTANT: f32 = 60.0;

async fn add_doc_attribute(reader: &IndexReader, searched_code: &mut CodeSearchResponse) {
let schema = IndexSchema::instance();
for hit in searched_code.hits.iter_mut() {
let query = schema.doc_query(corpus::CODE, &hit.doc.file_id);
let doc = reader
.searcher()
.search(&query, &TopDocs::with_limit(1))
.unwrap();
if doc.len() == 0 {
continue;
}
let doc = reader.searcher().doc(doc[0].1).unwrap();
hit.doc.commit = get_json_text_field_optional(
&doc,
schema.field_attributes,
code::fields::ATTRIBUTE_COMMIT,
)
.map(|s| s.to_owned());
}
}

Check warning on line 108 in crates/tabby/src/services/code.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby/src/services/code.rs#L89-L108

Added lines #L89 - L108 were not covered by tests

fn merge_code_responses_by_rank(
params: &CodeSearchParams,
embedding_resp: Vec<(f32, TantivyDocument)>,
Expand Down Expand Up @@ -187,12 +208,7 @@ fn create_hit(scores: CodeSearchScores, doc: TantivyDocument) -> CodeSearchHit {
.to_owned(),
// commit is introduced in v0.23, but it is also a required field
// so we need to handle the case where it's not present
commit: get_json_text_field_optional(
&doc,
schema.field_chunk_attributes,
code::fields::CHUNK_COMMIT,
)
.map(|s| s.to_owned()),
commit: None,

Check warning on line 211 in crates/tabby/src/services/code.rs

View check run for this annotation

Codecov / codecov/patch

crates/tabby/src/services/code.rs#L209-L211

Added lines #L209 - L211 were not covered by tests
language: get_json_text_field(
&doc,
schema.field_chunk_attributes,
Expand Down

0 comments on commit 700151c

Please sign in to comment.