Skip to content

Commit

Permalink
refactor: restructure code search (#2102)
Browse files Browse the repository at this point in the history
* refactor: restructure search

1. remove dataset / deps related functionalities.
2. add document search index / apis.

* update
  • Loading branch information
wsxiaoys authored May 13, 2024
1 parent fe23649 commit bf37574
Show file tree
Hide file tree
Showing 26 changed files with 274 additions and 644 deletions.
118 changes: 1 addition & 117 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions crates/tabby-common/src/api/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@ use thiserror::Error;
use utoipa::ToSchema;

#[derive(Default, Serialize, Deserialize, Debug, ToSchema)]
pub struct SearchResponse {
pub struct CodeSearchResponse {
pub num_hits: usize,
pub hits: Vec<Hit>,
pub hits: Vec<CodeSearchHit>,
}

#[derive(Serialize, Deserialize, Debug, ToSchema)]
pub struct Hit {
pub struct CodeSearchHit {
pub score: f32,
pub doc: HitDocument,
pub doc: CodeSearchDocument,
pub id: u32,
}

#[derive(Serialize, Deserialize, Debug, ToSchema)]
pub struct HitDocument {
pub struct CodeSearchDocument {
pub body: String,
pub filepath: String,
pub git_url: String,
Expand Down Expand Up @@ -46,7 +46,7 @@ pub trait CodeSearch: Send + Sync {
q: &str,
limit: usize,
offset: usize,
) -> Result<SearchResponse, CodeSearchError>;
) -> Result<CodeSearchResponse, CodeSearchError>;

async fn search_in_language(
&self,
Expand All @@ -55,5 +55,5 @@ pub trait CodeSearch: Send + Sync {
tokens: &[String],
limit: usize,
offset: usize,
) -> Result<SearchResponse, CodeSearchError>;
) -> Result<CodeSearchResponse, CodeSearchError>;
}
49 changes: 49 additions & 0 deletions crates/tabby-common/src/api/doc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use thiserror::Error;
use utoipa::ToSchema;

#[derive(Default, Serialize, Deserialize, Debug, ToSchema)]
pub struct DocSearchResponse {
pub num_hits: usize,
pub hits: Vec<DocSearchHit>,
}

#[derive(Serialize, Deserialize, Debug, ToSchema)]
pub struct DocSearchHit {
pub score: f32,
pub doc: DocSearchDocument,
pub id: u32,
}

#[derive(Serialize, Deserialize, Debug, ToSchema)]
pub struct DocSearchDocument {
pub title: String,
pub link: String,
pub snippet: String,
}

#[derive(Error, Debug)]
pub enum DocSearchError {
#[error("index not ready")]
NotReady,

#[error(transparent)]
QueryParserError(#[from] tantivy::query::QueryParserError),

#[error(transparent)]
TantivyError(#[from] tantivy::TantivyError),

#[error(transparent)]
Other(#[from] anyhow::Error),
}

#[async_trait]
pub trait DocSearch: Send + Sync {
async fn search(
&self,
q: &str,
limit: usize,
offset: usize,
) -> Result<DocSearchResponse, DocSearchError>;
}
1 change: 1 addition & 0 deletions crates/tabby-common/src/api/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod code;
pub mod doc;
pub mod event;
pub mod server_setting;

Expand Down
File renamed without changes.
40 changes: 40 additions & 0 deletions crates/tabby-common/src/index/doc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use tantivy::schema::{Field, Schema, STORED, STRING};

pub struct DocSearchSchema {
pub schema: Schema,
/// Binarized embedding tokens with the following mapping:
/// * [-1, 0] -> 0
/// * (0, 1] -> 1
pub field_embedding_token: Field,

pub field_title: Field,
pub field_link: Field,
pub field_snippet: Field,
}

impl DocSearchSchema {
pub fn new() -> Self {
let mut builder = Schema::builder();

let field_embedding_token = builder.add_text_field("embedding_token", STRING);
let field_title = builder.add_text_field("title", STORED);
let field_link = builder.add_text_field("link", STORED);
let field_snippet = builder.add_text_field("snippet", STORED);

let schema = builder.build();

Self {
schema,
field_embedding_token,
field_title,
field_link,
field_snippet,
}
}
}

impl Default for DocSearchSchema {
fn default() -> Self {
Self::new()
}
}
5 changes: 5 additions & 0 deletions crates/tabby-common/src/index/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod code;
pub use code::{register_tokenizers, CodeSearchSchema};

mod doc;
pub use doc::DocSearchSchema;
Loading

0 comments on commit bf37574

Please sign in to comment.