Skip to content

Commit

Permalink
feat(document): add web document for preset / custom docs (#2898)
Browse files Browse the repository at this point in the history
* add web document change

Signed-off-by: xxs-wallace <[email protected]>

* [autofix.ci] apply automated fixes

* add to schema.sql

Signed-off-by: xxs-wallace <[email protected]>

* move to new file

Signed-off-by: xxs-wallace <[email protected]>

* update schema.sql

* update

* commit suggestion

Signed-off-by: xxs-wallace <[email protected]>

* refactor(document): add web document service api (#2904)

* add api

Signed-off-by: xxs-wallace <[email protected]>

* fmt

Signed-off-by: xxs-wallace <[email protected]>

* refactor some name

Signed-off-by: xxs-wallace <[email protected]>

* fix generate example

Signed-off-by: xxs-wallace <[email protected]>

* fmt

Signed-off-by: xxs-wallace <[email protected]>

* rename

Signed-off-by: xxs-wallace <[email protected]>

* fix method

Signed-off-by: xxs-wallace <[email protected]>

* add updated at field

Signed-off-by: xxs-wallace <[email protected]>

* add regex

Signed-off-by: xxs-wallace <[email protected]>

* Update ee/tabby-schema/src/schema/web_documents.rs

* fix doc

Signed-off-by: xxs-wallace <[email protected]>

* add test

Signed-off-by: xxs-wallace <[email protected]>

* fix space

Signed-off-by: xxs-wallace <[email protected]>

---------

Signed-off-by: xxs-wallace <[email protected]>
Co-authored-by: Meng Zhang <[email protected]>

* [autofix.ci] apply automated fixes

* [autofix.ci] apply automated fixes (attempt 2/3)

* feat(document): support crawl preset document (#2907)

* finish api

Signed-off-by: xxs-wallace <[email protected]>

* fix delete by id

Signed-off-by: xxs-wallace <[email protected]>

* fix active

Signed-off-by: xxs-wallace <[email protected]>

* fix name

Signed-off-by: xxs-wallace <[email protected]>

* fix api

Signed-off-by: xxs-wallace <[email protected]>

* add ut

Signed-off-by: xxs-wallace <[email protected]>

---------

Signed-off-by: xxs-wallace <[email protected]>

* add more test

Signed-off-by: xxs-wallace <[email protected]>

---------

Signed-off-by: xxs-wallace <[email protected]>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Meng Zhang <[email protected]>
  • Loading branch information
3 people authored Aug 24, 2024
1 parent d2962ed commit 593f1a6
Show file tree
Hide file tree
Showing 14 changed files with 1,049 additions and 2 deletions.
1 change: 1 addition & 0 deletions ee/tabby-db/migrations/0036_web_document.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE web_documents;
10 changes: 10 additions & 0 deletions ee/tabby-db/migrations/0036_web_document.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CREATE TABLE web_documents(
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
name VARCHAR(255) NOT NULL,
url TEXT NOT NULL,
is_preset BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMP NOT NULL DEFAULT(DATETIME('now')),
updated_at TIMESTAMP NOT NULL DEFAULT(DATETIME('now')),
CONSTRAINT idx_name UNIQUE(name),
CONSTRAINT idx_url UNIQUE(url)
);
Binary file modified ee/tabby-db/schema.sqlite
Binary file not shown.
10 changes: 10 additions & 0 deletions ee/tabby-db/schema/schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,13 @@ CREATE TABLE thread_messages(
updated_at TIMESTAMP NOT NULL DEFAULT(DATETIME('now')),
FOREIGN KEY(thread_id) REFERENCES threads(id) ON DELETE CASCADE
);
CREATE TABLE web_documents(
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
name VARCHAR(255) NOT NULL,
url TEXT NOT NULL,
is_preset BOOLEAN NOT NULL DEFAULT FALSE,
created_at TIMESTAMP NOT NULL DEFAULT(DATETIME('now')),
updated_at TIMESTAMP NOT NULL DEFAULT(DATETIME('now')),
CONSTRAINT idx_name UNIQUE(name),
CONSTRAINT idx_url UNIQUE(url)
);
2 changes: 2 additions & 0 deletions ee/tabby-db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use user_completions::UserCompletionDailyStatsDAO;
pub use user_events::UserEventDAO;
pub use users::UserDAO;
pub use web_crawler::WebCrawlerUrlDAO;
pub use web_documents::WebDocumentDAO;

pub mod cache;
mod email_setting;
Expand All @@ -41,6 +42,7 @@ mod user_completions;
mod user_events;
mod users;
mod web_crawler;
mod web_documents;

use anyhow::Result;
use sql_query_builder as sql;
Expand Down
77 changes: 77 additions & 0 deletions ee/tabby-db/src/web_documents.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use anyhow::{anyhow, Result};
use chrono::{DateTime, Utc};
use sqlx::{prelude::FromRow, query};
use tabby_db_macros::query_paged_as;

use crate::DbConn;

#[allow(unused)]
#[derive(FromRow)]
pub struct WebDocumentDAO {
pub id: i64,
pub name: String,
pub url: String,
pub is_preset: bool,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}

impl DbConn {
pub async fn list_web_documents(
&self,
limit: Option<usize>,
skip_id: Option<i32>,
backwards: bool,
is_preset: bool,
) -> Result<Vec<WebDocumentDAO>> {
let condition = Some(format!("is_preset={}", is_preset));

let urls = query_paged_as!(
WebDocumentDAO,
"web_documents",
["id", "name", "url", "is_preset", "created_at" as "created_at!: DateTime<Utc>", "updated_at" as "updated_at!: DateTime<Utc>"],
limit,
skip_id,
backwards,
condition
).fetch_all(&self.pool)
.await?;

Ok(urls)
}

pub async fn create_web_document(
&self,
name: String,
url: String,
is_preset: bool,
) -> Result<i64> {
let res = query!(
"INSERT INTO web_documents(name, url, is_preset) VALUES (?,?,?);",
name,
url,
is_preset
)
.execute(&self.pool)
.await?;

Ok(res.last_insert_rowid())
}

pub async fn deactivate_preset_web_document(&self, name: String) -> Result<()> {
let res = query!("DELETE FROM web_documents WHERE name = ?;", name)
.execute(&self.pool)
.await?;
if res.rows_affected() != 1 {
return Err(anyhow!("No preset web document to deactivate"));
}
Ok(())
}

pub async fn delete_web_document(&self, id: i64) -> Result<()> {
query!("DELETE FROM web_documents WHERE id = ?;", id)
.execute(&self.pool)
.await?;
Ok(())
}
}
53 changes: 53 additions & 0 deletions ee/tabby-schema/graphql/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ input CodeSearchParamsOverrideInput {
numToScore: Int
}

input CreateCustomDocumentInput {
name: String!
url: String!
}

input CreateIntegrationInput {
displayName: String!
accessToken: String!
Expand Down Expand Up @@ -188,6 +193,11 @@ input SecuritySettingInput {
disableClientSideTelemetry: Boolean!
}

input SetPresetDocumentActiveInput {
name: String!
active: Boolean!
}

input ThreadRunDebugOptionsInput {
codeSearchParamsOverride: CodeSearchParamsOverrideInput = null
}
Expand Down Expand Up @@ -238,6 +248,25 @@ type CompletionStats {
selects: Int!
}

type CustomDocumentConnection {
edges: [CustomDocumentEdge!]!
pageInfo: PageInfo!
}

type CustomDocumentEdge {
node: CustomWebDocument!
cursor: String!
}

type CustomWebDocument {
url: String!
name: String!
id: ID!
createdAt: DateTime!
updatedAt: DateTime!
jobInfo: JobInfo!
}

type DiskUsage {
filepath: [String!]!
"Size in kilobytes."
Expand Down Expand Up @@ -490,6 +519,9 @@ type Mutation {
deleteWebCrawlerUrl(id: ID!): Boolean!
"Delete pair of user message and bot response in a thread."
deleteThreadMessagePair(threadId: ID!, userMessageId: ID!, assistantMessageId: ID!): Boolean!
createCustomDocument(input: CreateCustomDocumentInput!): ID!
deleteCustomDocument(id: ID!): Boolean!
setPresetDocumentActive(input: SetPresetDocumentActiveInput!): Boolean!
}

type NetworkSetting {
Expand All @@ -510,6 +542,25 @@ type PageInfo {
endCursor: String
}

type PresetDocumentConnection {
edges: [PresetDocumentEdge!]!
pageInfo: PageInfo!
}

type PresetDocumentEdge {
node: PresetWebDocument!
cursor: String!
}

type PresetWebDocument {
id: ID!
name: String!
"`updated_at` is only filled when the preset is active."
updatedAt: DateTime
"`job_info` is only filled when the preset is active."
jobInfo: JobInfo
}

type ProvidedRepository {
id: ID!
integrationId: ID!
Expand Down Expand Up @@ -581,6 +632,8 @@ type Query {
Thread is public within an instance, so no need to check for ownership.
"""
threadMessages(threadId: ID!, after: String, before: String, first: Int, last: Int): MessageConnection!
customWebDocuments(after: String, before: String, first: Int, last: Int): CustomDocumentConnection!
presetWebDocuments(after: String, before: String, first: Int, last: Int, isActive: Boolean!): PresetDocumentConnection!
}

type RefreshTokenResponse {
Expand Down
21 changes: 21 additions & 0 deletions ee/tabby-schema/src/schema/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ lazy_static! {
pub static ref REPOSITORY_NAME_REGEX: Regex = Regex::new("^[a-zA-Z][\\w.-]+$").unwrap();
pub static ref USERNAME_REGEX: Regex =
Regex::new(r"^[^0-9±!@£$%^&*_+§¡€#¢¶•ªº«\\/<>?:;|=.,]{2,20}$").unwrap();
pub static ref WEB_DOCUMENT_NAME_REGEX: Regex =
Regex::new(r"^[A-Za-z][A-Za-z0-9]*(\ [A-Za-z0-9]+)*$").unwrap();
}

#[cfg(test)]
Expand Down Expand Up @@ -40,4 +42,23 @@ mod tests {
assert_eq!(result, expected, "Failed for name: {}", name);
}
}

#[test]
fn test_web_document_name_regex() {
let test_cases = vec![
("John", true), // English name
("Müller", false), // German name
("abc123", true),
("Abc 123", true),
(" abc 123", false),
("abc123*", false),
("abc123_", false),
("abc 123", false), // two space
];

for (name, expected) in test_cases {
let result = WEB_DOCUMENT_NAME_REGEX.is_match(name);
assert_eq!(result, expected, "Failed for name: {}", name);
}
}
}
78 changes: 78 additions & 0 deletions ee/tabby-schema/src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ pub mod setting;
pub mod thread;
pub mod user_event;
pub mod web_crawler;
pub mod web_documents;
pub mod worker;

use std::sync::Arc;
Expand Down Expand Up @@ -51,10 +52,12 @@ use self::{
},
user_event::{UserEvent, UserEventService},
web_crawler::{CreateWebCrawlerUrlInput, WebCrawlerService, WebCrawlerUrl},
web_documents::{CreateCustomDocumentInput, CustomWebDocument, WebDocumentService},
};
use crate::{
env,
juniper::relay::{self, query_async, Connection},
web_documents::{PresetWebDocument, SetPresetDocumentActiveInput},
};

pub trait ServiceLocator: Send + Sync {
Expand All @@ -71,6 +74,7 @@ pub trait ServiceLocator: Send + Sync {
fn analytic(&self) -> Arc<dyn AnalyticService>;
fn user_event(&self) -> Arc<dyn UserEventService>;
fn web_crawler(&self) -> Arc<dyn WebCrawlerService>;
fn web_documents(&self) -> Arc<dyn WebDocumentService>;
fn thread(&self) -> Arc<dyn ThreadService>;
}

Expand Down Expand Up @@ -580,6 +584,50 @@ impl Query {
)
.await
}

async fn custom_web_documents(
ctx: &Context,
after: Option<String>,
before: Option<String>,
first: Option<i32>,
last: Option<i32>,
) -> Result<Connection<CustomWebDocument>> {
query_async(
after,
before,
first,
last,
|after, before, first, last| async move {
ctx.locator
.web_documents()
.list_custom_web_documents(after, before, first, last)
.await
},
)
.await
}
async fn preset_web_documents(
ctx: &Context,
after: Option<String>,
before: Option<String>,
first: Option<i32>,
last: Option<i32>,
is_active: bool,
) -> Result<Connection<PresetWebDocument>> {
query_async(
after,
before,
first,
last,
|after, before, first, last| async move {
ctx.locator
.web_documents()
.list_preset_web_documents(after, before, first, last, is_active)
.await
},
)
.await
}
}

#[derive(GraphQLObject)]
Expand Down Expand Up @@ -959,6 +1007,36 @@ impl Mutation {
.await?;
Ok(true)
}

async fn create_custom_document(ctx: &Context, input: CreateCustomDocumentInput) -> Result<ID> {
input.validate()?;
let id = ctx
.locator
.web_documents()
.create_custom_web_document(input.name, input.url)
.await?;
Ok(id)
}

async fn delete_custom_document(ctx: &Context, id: ID) -> Result<bool> {
ctx.locator
.web_documents()
.delete_custom_web_document(id)
.await?;
Ok(true)
}

async fn set_preset_document_active(
ctx: &Context,
input: SetPresetDocumentActiveInput,
) -> Result<bool> {
input.validate()?;
ctx.locator
.web_documents()
.set_preset_web_documents_active(input.name, input.active)
.await?;
Ok(true)
}
}

async fn check_analytic_access(ctx: &Context, users: &[ID]) -> Result<(), CoreError> {
Expand Down
Loading

0 comments on commit 593f1a6

Please sign in to comment.