Skip to content

Commit

Permalink
chore: return pull when pre sync
Browse files Browse the repository at this point in the history
Signed-off-by: Wei Zhang <[email protected]>
  • Loading branch information
zwpaper committed Dec 11, 2024
1 parent fbc7d66 commit 7c6771b
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 35 deletions.
4 changes: 2 additions & 2 deletions crates/tabby-index/src/indexer_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ mod structured_doc_tests {
let updated_at = chrono::Utc::now();
let res = tokio::runtime::Runtime::new().unwrap().block_on(async {
let updated = indexer
.presync(StructuredDocState {
.presync(&StructuredDocState {
id: doc.id().to_string(),
updated_at,
deleted: false,
Expand Down Expand Up @@ -118,7 +118,7 @@ mod structured_doc_tests {
let updated_at = chrono::Utc::now();
let res = tokio::runtime::Runtime::new().unwrap().block_on(async {
let updated = indexer
.presync(StructuredDocState {
.presync(&StructuredDocState {
id: doc.id().to_string(),
updated_at,
deleted: false,
Expand Down
2 changes: 0 additions & 2 deletions crates/tabby-index/src/structured_doc/public.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ pub struct StructuredDocState {
// For instance, a closed pull request will be marked as deleted,
// prompting the indexer to remove it from the index.
pub deleted: bool,

pub raw: Option<serde_json::Value>,
}

pub struct StructuredDocIndexer {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::sync::Arc;

use anyhow::Result;
use async_stream::stream;
use chrono::{DateTime, Utc};
use futures::{stream::BoxStream, StreamExt};
Expand Down Expand Up @@ -153,7 +154,7 @@ impl SchedulerGithubGitlabJob {
let mut num_updated = 0;

let index = StructuredDocIndexer::new(embedding);
while let Some((id, state)) = pull_state_stream.next().await {
while let Some((pull, state)) = pull_state_stream.next().await {
count += 1;
if count % 100 == 0 {
logkit::info!(
Expand All @@ -167,21 +168,9 @@ impl SchedulerGithubGitlabJob {
continue;
}

if state.raw.as_ref().is_none() {
logkit::warn!("Pull {} has no raw data", id);
continue;
}
let pull_doc = fetch_pull_structured_doc(integration, repository, pull).await?;

let pull = get_github_pull_doc(
&repository.source_id(),
state.raw.unwrap(),
integration.api_base(),
&repository.display_name,
&integration.access_token,
)
.await?;

index.sync(pull).await;
index.sync(pull_doc).await;
num_updated += 1;
}
logkit::info!(
Expand Down Expand Up @@ -279,10 +268,11 @@ async fn fetch_all_issues(

Ok(s)
}

async fn fetch_all_pull_states(
integration: &Integration,
repository: &ProvidedRepository,
) -> tabby_schema::Result<BoxStream<'static, (u64, StructuredDocState)>> {
) -> tabby_schema::Result<BoxStream<'static, (pulls::Pull, StructuredDocState)>> {
match &integration.kind {
IntegrationKind::Github | IntegrationKind::GithubSelfHosted => Ok(list_github_pull_states(
integration.api_base(),
Expand All @@ -296,3 +286,22 @@ async fn fetch_all_pull_states(
)),
}
}

async fn fetch_pull_structured_doc(
integration: &Integration,
repository: &ProvidedRepository,
pull: pulls::Pull,
) -> Result<StructuredDoc> {
match pull {
pulls::Pull::GitHub(pull) => {
get_github_pull_doc(
&repository.source_id(),
pull,
integration.api_base(),
&repository.display_name,
&integration.access_token,
)
.await
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ pub async fn list_github_issues(
id: doc.id().to_string(),
updated_at: issue.updated_at,
deleted: false,
raw: None,
}, doc);
}

Expand Down Expand Up @@ -138,7 +137,6 @@ pub async fn list_gitlab_issues(
id: doc.id().to_string(),
updated_at: issue.updated_at,
deleted: false,
raw: None,
}, doc);
}
};
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use anyhow::{anyhow, Result};
use async_stream::stream;
use futures::Stream;
use octocrab::models::pulls::PullRequest;
use octocrab::{models::IssueState, Octocrab};
use serde_json::json;
use tabby_index::public::{
StructuredDoc, StructuredDocFields, StructuredDocPullDocumentFields, StructuredDocState,
};
Expand All @@ -20,11 +20,15 @@ fn pull_id(pull: &octocrab::models::pulls::PullRequest) -> String {
.unwrap_or_else(|| pull.url.clone())
}

pub enum Pull {
GitHub(PullRequest),
}

pub async fn list_github_pull_states(
api_base: &str,
full_name: &str,
access_token: &str,
) -> Result<impl Stream<Item = (u64, StructuredDocState)>> {
) -> Result<impl Stream<Item = (Pull, StructuredDocState)>> {
let octocrab = Octocrab::builder()
.personal_token(access_token.to_string())
.base_uri(api_base)?
Expand Down Expand Up @@ -57,25 +61,24 @@ pub async fn list_github_pull_states(

for pull in response.items {
let id = pull_id(&pull);
let updated_at = pull.updated_at.unwrap_or_else(chrono::Utc::now);

// skip closed but not merged pulls
if let Some(state) = &pull.state {
if *state == IssueState::Closed && pull.merged_at.is_none() {
yield (pull.number, StructuredDocState{
yield (Pull::GitHub(pull), StructuredDocState{
id,
updated_at: pull.updated_at.unwrap(),
updated_at,
deleted: true,
raw: None,
});
continue;
}
}

yield (pull.number, StructuredDocState{
yield (Pull::GitHub(pull), StructuredDocState{
id,
updated_at: pull.updated_at.unwrap_or_else(chrono::Utc::now),
updated_at,
deleted: false,
raw: Some(json!(pull)),
});
}

Expand All @@ -91,14 +94,11 @@ pub async fn list_github_pull_states(

pub async fn get_github_pull_doc(
source_id: &str,
raw: serde_json::Value,
pull: PullRequest,
api_base: &str,
full_name: &str,
access_token: &str,
) -> Result<StructuredDoc> {
let pull: octocrab::models::pulls::PullRequest =
serde_json::from_value(raw).map_err(|e| anyhow!("Failed to parse pull request: {}", e))?;

let octocrab = Octocrab::builder()
.personal_token(access_token.to_string())
.base_uri(api_base)?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ impl WebCrawlerJob {
id: source_doc.id().to_string(),
updated_at: Utc::now(),
deleted: false,
raw: None,
})
.await
{
Expand Down

0 comments on commit 7c6771b

Please sign in to comment.