diff --git a/Cargo.lock b/Cargo.lock index 2afaf9b..8e99cbb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -158,7 +158,7 @@ dependencies = [ [[package]] name = "doclytics" -version = "0.1.0" +version = "1.1.4-rc.9" dependencies = [ "lazy_static", "ollama-rs", @@ -891,18 +891,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.201" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "780f1cebed1629e4753a1a38a3c72d30b97ec044f0aef68cb26650a3c5cf363c" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.201" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e405930b9796f1c00bee880d03fc7e0bb4b9a11afc776885ffe84320da2865" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 7ae6171..b7ea7b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "doclytics" -version = "0.1.0" +version = "1.1.4-rc.9" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/llm_api.rs b/src/llm_api.rs index b6239dd..d76d9c4 100644 --- a/src/llm_api.rs +++ b/src/llm_api.rs @@ -13,5 +13,14 @@ pub async fn generate_response( let res = ollama .generate(GenerationRequest::new(model.clone(), prompt)) .await; - res.map_err(|e| e.into()) // Map the Err variant to a Box + match res { + Ok(res) => { + slog_scope::debug!("Response from ollama:\n {}", res.response); + Ok(res) + }, + Err(e) => { + slog_scope::error!("{}", e); + Err(e.into()) + } + } } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 39bc860..4f1256e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,9 +5,8 @@ mod logger; use ollama_rs::{ Ollama, }; -use substring::Substring; -use reqwest::{Client, }; +use reqwest::{Client}; use std::result::Result; //function that fetches data from the endpoint @@ -18,22 +17,23 @@ use serde_json::{Value}; use std::env; use crate::llm_api::generate_response; use crate::paperless::{get_data_from_paperless, query_custom_fields, update_document_fields}; +use substring::Substring; #[derive(Serialize, Deserialize, Debug, Clone)] struct Document { id: u32, correspondent: Option, - document_type: Option, + document_type: Option, storage_path: Option, title: String, content: String, created: String, - created_date: String, + created_date: Option, modified: String, added: String, archive_serial_number: Option, original_file_name: String, - archived_file_name: String, + archived_file_name: Option, owner: Option, notes: Vec, tags: Vec, @@ -49,6 +49,7 @@ struct Response { all: Vec, results: Vec, } + #[derive(Serialize, Deserialize, Debug, Clone)] struct CustomField { value: Option, @@ -85,7 +86,7 @@ fn init_ollama_client(host: &str, port: u16, secure_endpoint: bool) -> Ollama { // Refactor the main process into a function for better readability async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_url: &str, filter: &str) -> Result<(), Box> { - let prompt_base= env::var("BASE_PROMPT").unwrap_or_else(|_| "Please extract metadata\ + let prompt_base = env::var("BASE_PROMPT").unwrap_or_else(|_| "Please extract metadata\ from the provided document and return it in JSON format.\ The fields I need are:\ title,topic,sender,recipient,urgency(with value either n/a or low or medium or high),\ @@ -101,21 +102,38 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u match get_data_from_paperless(&client, &base_url, filter).await { Ok(data) => { for document in data { + slog_scope::trace!("Document Content: {}", document.content); slog_scope::info!("Generate Response with LLM {}", "model"); - let res = generate_response(ollama, &model.to_string(), &prompt_base.to_string(), &document).await?; - if let Some(json_str) = extract_json_object(&res.response) { - match serde_json::from_str(&json_str) { - Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url).await?, - Err(e) => { - slog_scope::error!("Error parsing llm response json {}", e.to_string()); - slog_scope::debug!("JSON String was: {}",&json_str); - }, + slog_scope::debug!("with Prompt: {}", prompt_base); + + match generate_response(ollama, &model.to_string(), &prompt_base.to_string(), &document).await { + Ok(res) => { + // Log the response from the generate_response call + slog_scope::debug!("LLM Response: {}", res.response); + + match extract_json_object(&res.response) { + Ok(json_str) => { + // Log successful JSON extraction + slog_scope::debug!("Extracted JSON Object: {}", json_str); + + match serde_json::from_str(&json_str) { + Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url).await?, + Err(e) => { + slog_scope::error!("Error parsing llm response json {}", e.to_string()); + slog_scope::debug!("JSON String was: {}", &json_str); + } + } + } + Err(e) => slog_scope::error!("{}", e), + } + }, + Err(e) => { + slog_scope::error!("Error generating llm response: {}", e); + continue; } - } else { - slog_scope::error!("No JSON object found in the response{}", "!"); } } - } + }, Err(e) => slog_scope::error!("Error while interacting with paperless: {}", e), } Ok(()) @@ -124,7 +142,7 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u #[tokio::main] async fn main() -> Result<(), Box> { logger::init(); // Initializes the global logger - slog_scope::info!("Application started {}", "!"); + slog_scope::info!("Application started, version: {}", env!("CARGO_PKG_VERSION")); let token = env::var("PAPERLESS_TOKEN").expect("PAPERLESS_TOKEN is not set in .env file"); let base_url = env::var("PAPERLESS_BASE_URL").expect("PAPERLESS_BASE_URL is not set in .env file"); let client = init_paperless_client(&token); @@ -146,8 +164,7 @@ async fn main() -> Result<(), Box> { process_documents(&client, &ollama, &model, &base_url, default_filter.as_str()).await } -fn extract_json_object(input: &str) -> Option { - slog_scope::debug!("Input: {}", input); +fn extract_json_object(input: &str) -> Result { let mut brace_count = 0; let mut json_start = None; let mut json_end = None; @@ -155,17 +172,16 @@ fn extract_json_object(input: &str) -> Option { for (i, c) in input.chars().enumerate() { match c { '{' | '[' => { - brace_count += 1; - if json_start.is_none() { + if brace_count == 0 { json_start = Some(i); } + brace_count += 1; } '}' | ']' => { - if brace_count > 0 { - brace_count -= 1; - if brace_count == 0 { - json_end = Some(i); // Include the closing brace - } + brace_count -= 1; + if brace_count == 0 { + json_end = Some(i); + break; // Found the complete JSON object } } _ => {} @@ -174,8 +190,11 @@ fn extract_json_object(input: &str) -> Option { if let (Some(start), Some(end)) = (json_start, json_end) { slog_scope::debug!("{}", input.substring(start, end + 1)); - Some(input.substring(start, end + 1).to_string()) // Use end with equal sign + Ok(input.substring(start, end + 1).to_string()) } else { - None + let error_msg = "No JSON object found in the response!".to_string(); + slog_scope::debug!("{}", error_msg); + Err(error_msg) } } + diff --git a/src/paperless.rs b/src/paperless.rs index 4b92db9..4624ed4 100644 --- a/src/paperless.rs +++ b/src/paperless.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::fmt; use reqwest::Client; use serde::de::StdError; use serde_json::Value; @@ -8,7 +9,7 @@ pub async fn get_data_from_paperless( client: &Client, url: &str, filter: &str, -) -> std::result::Result, Box> { +) -> Result, Box> { // Read token from environment //Define filter string let filter = filter; @@ -20,7 +21,7 @@ pub async fn get_data_from_paperless( match response_result { Ok(data) => { let body = data.text().await?; - slog_scope::debug!("Response from server while fetching documents: {}", body); + slog_scope::trace!("Response from server while fetching documents: {}", body); // Remove the "Document content: " prefix let json = body.trim_start_matches("Document content: "); @@ -32,19 +33,19 @@ pub async fn get_data_from_paperless( //let error_part = value.pointer("/results/0").unwrap(); //println!("Error part: {}", error_part); // Parse the JSON string into the Response struct - let data: std::result::Result, _> = serde_json::from_str(json); + let data: Result, _> = serde_json::from_str(json); match data { Ok(data) => { slog_scope::info!("Successfully retrieved {} Documents", data.results.len()); Ok(data.results) - }, + } Err(e) => { let column = e.column(); let start = (column as isize - 30).max(0) as usize; let end = (column + 30).min(json.len()); slog_scope::error!("Error while creating json of document response from paperless {}", e); slog_scope::error!("Error at column {}: {}", column, &json[start..end]); - slog_scope::debug!("Error occured in json {}", &json); + slog_scope::trace!("Error occured in json {}", &json); Err(e.into()) // Remove the semicolon here } } @@ -55,95 +56,118 @@ pub async fn get_data_from_paperless( } } } - pub async fn query_custom_fields( - client: &Client, - base_url: &str, - ) -> std::result::Result, Box> { - slog_scope::info!("Fetching custom fields from paperless at {}", base_url); - let res = client - .get(format!("{}/api/custom_fields/", base_url)) - .send() - .await?; - let response_result = res.error_for_status(); - match response_result { - Ok(data) => { - let body = data.text().await?; - slog_scope::debug!("Response from server while fetching documents: {}", body); +pub async fn query_custom_fields( + client: &Client, + base_url: &str, +) -> Result, Box> { + slog_scope::info!("Fetching custom fields from paperless at {}", base_url); + let res = client + .get(format!("{}/api/custom_fields/", base_url)) + .send() + .await?; + + let response_result = res.error_for_status(); + match response_result { + Ok(data) => { + let body = data.text().await?; + slog_scope::debug!("Response from server while fetching documents: {}", body); - // Remove the "Document content: " prefix - let json = body.trim_start_matches("Field: "); - let data: std::result::Result, _> = serde_json::from_str(json); - match data { - Ok(data) => { - slog_scope::info!("Fields: {:?}", data.results); - Ok(data.results) - } - Err(e) => { - let column = e.column(); - let start = (column as isize - 30).max(0) as usize; - let end = (column + 30).min(json.len()); - slog_scope::error!("Error occured parsing custom fields: {}", e); - slog_scope::error!("Error at column {}: {}", column, &json[start..end]); - slog_scope::debug!("Error occured in json {}", &json); - Err(e.into()) - } + // Remove the "Document content: " prefix + let json = body.trim_start_matches("Field: "); + let data: Result, _> = serde_json::from_str(json); + match data { + Ok(data) => { + slog_scope::info!("Fields: {:?}", data.results); + Ok(data.results) + } + Err(e) => { + let column = e.column(); + let start = (column as isize - 30).max(0) as usize; + let end = (column + 30).min(json.len()); + slog_scope::error!("Error occured parsing custom fields: {}", e); + slog_scope::error!("Error at column {}: {}", column, &json[start..end]); + slog_scope::debug!("Error occured in json {}", &json); + Err(e.into()) } } - Err(e) => { - slog_scope::error!("Error retrieving custom fields: {}", e); - Err(e.into()) - } + } + Err(e) => { + slog_scope::error!("Error retrieving custom fields: {}", e); + Err(e.into()) } } +} - pub async fn update_document_fields( - client: &Client, - document_id: u32, - fields: &Vec, - metadata: &HashMap>, - base_url: &str, - ) -> std::result::Result<(), Box> { - let mut custom_fields = Vec::new(); +pub async fn update_document_fields( + client: &Client, + document_id: u32, + fields: &Vec, + metadata: &HashMap>, + base_url: &str, +) -> Result<(), Box> { + let mut custom_fields = Vec::new(); - for (key, value) in metadata { - if key == "title" { - continue; - } - if let Some(field) = fields.iter().find(|&f| f.name == *key) { - let custom_field = CustomField { - field: field.id.clone(), - value: value.as_ref().cloned(), - }; - custom_fields.push(custom_field); - } - } - // Add the tagged field, to indicate that the document has been processed - let custom_field = CustomField { - field: 1, +// Use `if let` to conditionally execute code if the 'tagged' field is found. + if let Some(field) = fields.iter().find(|&f| f.name == "tagged") { + let tagged_field = CustomField { + field: field.id, value: Some(serde_json::json!(true)), }; - custom_fields.push(custom_field); - let mut payload = serde_json::Map::new(); - payload.insert("custom_fields".to_string(), serde_json::json!(custom_fields)); - if let Some(value) = metadata.get("title").and_then(|v| v.as_ref().and_then(|v| v.as_str())) { - payload.insert("title".to_string(), serde_json::json!(value)); + // Add this tagged_field to your custom_fields collection or use it as needed. + custom_fields.push(tagged_field); + + // Continue with your logic, such as preparing the payload and sending the request. + } else { + // Handle the case where tagged_field_id is None, which means the "tagged" field wasn't found. + slog_scope::error!("{} field not found in the provided fields.", "'tagged'"); + return Err(Box::new(fmt::Error::default())); // Use a standard library error type like fmt::Error. + + } + for (key, value) in metadata { + if key == "title" { + continue; } - let url = format!("{}/api/documents/{}/", base_url, document_id); - slog_scope::info!("Updating document with ID: {}", document_id); - let res = client.patch(&url).json(&payload).send().await?; - let response_result = res.error_for_status(); - match response_result { - Ok(data) => { - let body = data.text().await?; - slog_scope::debug!("{}", body); - slog_scope::info!("Document with ID: {} successfully updated", document_id); - Ok(()) - } - Err(e) => { - slog_scope::error!("Error while updating document fields: {}", e); - Err(e.into()) - } + + if let Some(field) = fields.iter().find(|&f| f.name == *key) { + let custom_field = CustomField { + field: field.id.clone(), + value: value.as_ref().cloned(), + }; + custom_fields.push(custom_field); + } + } +// Check if tagged_field_id has a value and then proceed. + + let mut payload = serde_json::Map::new(); + + payload.insert("custom_fields".to_string(), serde_json::json!(custom_fields)); + if let Some(value) = metadata.get("title").and_then(|v| v.as_ref().and_then(|v| v.as_str())) { + payload.insert("title".to_string(), serde_json::json!(value)); + } + if payload.is_empty() { + slog_scope::warn!("{}", "payload is empty, not updating fields"); + return Err(Box::new(fmt::Error::default())); // Use a standard library error type like fmt::Error. + } + let url = format!("{}/api/documents/{}/", base_url, document_id); + slog_scope::info!("Updating document with ID: {}", document_id); + slog_scope::debug!("Request Payload: {}", ""); + for (key, value) in &payload { + slog_scope::debug!("{}: {}", key, value); + } + let res = client.patch(&url).json(&payload).send().await?; + let response_result = res.error_for_status(); + match response_result { + Ok(data) => { + let body = data.text().await?; + slog_scope::trace!("{}", body); + slog_scope::info!("Document with ID: {} successfully updated", document_id); + Ok(()) + } + Err(e) => { + slog_scope::error!("Error while updating document fields: {}", e); + Err(e.into()) } - } \ No newline at end of file + } +} \ No newline at end of file