From 544768525114f481af2e3f18b6b9e00d821e818b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Sat, 27 Jul 2024 12:12:03 +0200 Subject: [PATCH 01/32] feat: add modules for paperless tags, document types, and correspondents Introduced new modules for handling paperless tags, document types, and correspondents, including basic structures and functions for creating and retrieving data. Also added utility functions for response parsing and error handling. --- src/error.rs | 33 +++++++++++++++++++++++ src/main.rs | 5 ++++ src/paperless_correspondents.rs | 25 ++++++++++++++++++ src/paperless_documenttypes.rs | 46 +++++++++++++++++++++++++++++++++ src/paperless_tags.rs | 26 +++++++++++++++++++ src/util.rs | 39 ++++++++++++++++++++++++++++ 6 files changed, 174 insertions(+) create mode 100644 src/error.rs create mode 100644 src/paperless_correspondents.rs create mode 100644 src/paperless_documenttypes.rs create mode 100644 src/paperless_tags.rs create mode 100644 src/util.rs diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..b900cfa --- /dev/null +++ b/src/error.rs @@ -0,0 +1,33 @@ +use std::fmt; + +#[derive(Debug)] +pub enum ResponseError { + Io(std::io::Error), + ParseBody(std::num::ParseIntError), + RequestError(std::io::Error), + Other(String), +} + +// Step 2: Implement std::fmt::Display +impl fmt::Display for ResponseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ResponseError::Io(ref err) => write!(f, "IO error: {}", err), + ResponseError::ParseBody(ref err) => write!(f, "Parse error: {}", err), + ResponseError::RequestError(ref err) => write!(f, "Parse error: {}", err), + ResponseError::Other(ref err) => write!(f, "Other error: {}", err), + } + } +} + +impl std::error::Error for ResponseError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match *self { + ResponseError::Io(ref err) => Some(err), + ResponseError::ParseBody(ref err) => Some(err), + ResponseError::RequestError(ref err) => Some(err), + ResponseError::Other(_) => None, + } + } +} + diff --git a/src/main.rs b/src/main.rs index c45a7b9..529f363 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,11 @@ mod llm_api; mod paperless; mod logger; +mod paperless_tags; +mod paperless_documenttypes; +mod paperless_correspondents; +mod util; +mod error; use ollama_rs::{ Ollama, diff --git a/src/paperless_correspondents.rs b/src/paperless_correspondents.rs new file mode 100644 index 0000000..1ccf415 --- /dev/null +++ b/src/paperless_correspondents.rs @@ -0,0 +1,25 @@ +use reqwest::Client; +use crate::paperless::CreateField; +struct DocumentType { + id: u32, + slug: String, + name: String, + matching_algorithm: u8 +} +pub fn create_correspondent( + correspondent_name: &str, + client: &Client, + base_url: &str, +) { + +} + + +pub fn get_correspondents( + client: &Client, + base_url: &str, +) { + + let url = format!("{}/api/correspondents/", base_url); + let res= client.get(url).send(); +} diff --git a/src/paperless_documenttypes.rs b/src/paperless_documenttypes.rs new file mode 100644 index 0000000..3d2bb26 --- /dev/null +++ b/src/paperless_documenttypes.rs @@ -0,0 +1,46 @@ +use reqwest::Client; + + + +struct DocumentType { + id: u32, + slug: String, + name: String, + matching_algorithm: u8 +} +pub fn create_document_type( + document_types: &str, + client: &Client, + base_url: &str, +) { + + + +} + + +pub fn get_document_types( + client: &Client, + base_url: &str, +) { + + let url = format!("{}/api/document_types/", base_url); + let res= client.get(url).send(); + let body = match { + Ok(data) => { + + }, + Err(e) => { + slog_scope::error!("Error getting document types: {}", {}) + } + }; +} + + + +pub fn determine_if_type_exists( + client: &Client, + base_url: &str, +) { + +} \ No newline at end of file diff --git a/src/paperless_tags.rs b/src/paperless_tags.rs new file mode 100644 index 0000000..e46ab35 --- /dev/null +++ b/src/paperless_tags.rs @@ -0,0 +1,26 @@ +use reqwest::Client; + + +struct Tags { + id: u32, + slug: String, + name: String, + matching_algorithm: u8 +} +pub fn create_tag( + correspondent_name: &str, + client: &Client, + base_url: &str, +) { + +} + + +pub fn get_tags( + client: &Client, + base_url: &str, +) { + + let url = format!("{}/api/tags/", base_url); + let res= client.get(url).send(); +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..cbe732c --- /dev/null +++ b/src/util.rs @@ -0,0 +1,39 @@ +use reqwest::Response; +use serde::Deserialize; +use crate::error::ResponseError; +use crate::{Response as PaperlessResponse}; + + +pub async fn parse_response(response: Response) -> Result where T: Deserialize<'static> { + + let response_result = response.error_for_status(); + match response_result { + Ok(data) => { + let body = data.text().await?; + slog_scope::trace!("Response from server while fetching documents: {}", body); + + let json = body.trim_start_matches("Document content: "); + + let data: Result, _> = serde_json::from_str(json); + match data { + Ok(data) => { + slog_scope::info!("Successfully retrieved {} Documents", data.results.len()); + Ok(data.results) + } + Err(e) => { + let column = e.column(); + let start = (column as isize - 30).max(0) as usize; + let end = (column + 30).min(json.len()); + slog_scope::error!("Error while creating json of document response from paperless {}", e); + slog_scope::error!("Error at column {}: {}", column, &json[start..end]); + slog_scope::trace!("Error occured in json {}", &json); + Err(e.into()) // Remove the semicolon here + } + } + } + Err(e) => { + slog_scope::error!("Error while fetching documents from paperless: {}",e); + Err(e.into()) + } + } +} \ No newline at end of file From 6396551917d9fee7c644dc81b71157898740d78d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Thu, 19 Sep 2024 21:46:45 +0200 Subject: [PATCH 02/32] feat: refactor file structure and improve LLM integration Refactor the structure of Paperless-related modules by merging functionalities and removing redundant files. Improved LLM response handling by separating the prompt construction and response data extraction into distinct functions, leading to more modular and maintainable code. --- src/llm_api.rs | 4 +- src/main.rs | 56 +++++----- src/paperless.rs | 184 +++++++++++++++++++++++++++++++- src/paperless_correspondents.rs | 25 ----- src/paperless_defaultfields.rs | 66 ++++++++++++ src/paperless_documenttypes.rs | 46 -------- src/paperless_tags.rs | 26 ----- 7 files changed, 278 insertions(+), 129 deletions(-) delete mode 100644 src/paperless_correspondents.rs create mode 100644 src/paperless_defaultfields.rs delete mode 100644 src/paperless_documenttypes.rs delete mode 100644 src/paperless_tags.rs diff --git a/src/llm_api.rs b/src/llm_api.rs index d76d9c4..09dfa99 100644 --- a/src/llm_api.rs +++ b/src/llm_api.rs @@ -6,10 +6,8 @@ use crate::Document; pub async fn generate_response( ollama: &Ollama, model: &String, - prompt_base: &String, - document: &Document, + prompt: String, ) -> std::result::Result> { - let prompt = format!("{} {}", document.content, prompt_base); let res = ollama .generate(GenerationRequest::new(model.clone(), prompt)) .await; diff --git a/src/main.rs b/src/main.rs index 529f363..6494eba 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,7 @@ mod llm_api; mod paperless; mod logger; -mod paperless_tags; -mod paperless_documenttypes; -mod paperless_correspondents; +mod paperless_defaultfields; mod util; mod error; @@ -129,7 +127,7 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u Analyze the document to find the values for these fields and format the response as a \ JSON object. Use the most likely answer for each field. \ The response should contain only JSON data where the key and values are all in simple string \ - format(no nested object) for direct parsing by another program. So now additional text or \ + format(no nested object) for direct parsing by another program. So no additional text or \ explanation, no introtext, the answer should start and end with curly brackets \ delimiting the json object ".to_string() }; @@ -166,38 +164,44 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u } async fn process_documents_batch(documents: &Vec, ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: &Vec, base_url: &str, mode: Mode) -> Result<(), Box> { + Ok(for document in documents { slog_scope::trace!("Document Content: {}", document.content); slog_scope::info!("Generate Response with LLM {}", "model"); slog_scope::debug!("with Prompt: {}", prompt_base); - match generate_response(ollama, &model.to_string(), &prompt_base.to_string(), &document).await { - Ok(res) => { - // Log the response from the generate_response call - slog_scope::debug!("LLM Response: {}", res.response); - - match extract_json_object(&res.response) { - Ok(json_str) => { - // Log successful JSON extraction - slog_scope::debug!("Extracted JSON Object: {}", json_str); - - match serde_json::from_str(&json_str) { - Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await?, - Err(e) => { - slog_scope::error!("Error parsing llm response json {}", e.to_string()); - slog_scope::debug!("JSON String was: {}", &json_str); - } + generate_response_and_extract_data(ollama, &model, &prompt_base, client, &fields, base_url, mode, &document).await; + }) +} + +async fn generate_response_and_extract_data(ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: &Vec, base_url: &str, mode: Mode, document: &Document) { + let prompt = format!("{} {}", prompt_base, document.content); + + match generate_response(ollama, &model.to_string(), prompt).await { + Ok(res) => { + // Log the response from the generate_response call + slog_scope::debug!("LLM Response: {}", res.response); + + match extract_json_object(&res.response) { + Ok(json_str) => { + // Log successful JSON extraction + slog_scope::debug!("Extracted JSON Object: {}", json_str); + + match serde_json::from_str(&json_str) { + Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await?, + Err(e) => { + slog_scope::error!("Error parsing llm response json {}", e.to_string()); + slog_scope::debug!("JSON String was: {}", &json_str); } } - Err(e) => slog_scope::error!("{}", e), } - } - Err(e) => { - slog_scope::error!("Error generating llm response: {}", e); - continue; + Err(e) => slog_scope::error!("{}", e), } } - }) + Err(e) => { + slog_scope::error!("Error generating llm response: {}", e); + } + } } #[tokio::main] diff --git a/src/paperless.rs b/src/paperless.rs index 4fc919a..161523e 100644 --- a/src/paperless.rs +++ b/src/paperless.rs @@ -1,11 +1,38 @@ use std::collections::HashMap; use std::fmt; +use std::fmt::Debug; use reqwest::Client; -use serde::de::StdError; +use serde::de::{DeserializeOwned, StdError}; use serde_json::{Map, Value}; use crate::{CustomField, Document, Field, Mode, Response}; use serde::{Deserialize, Serialize}; +#[derive(Clone, Copy)] +pub enum PaperlessDefaultFieldType { + Tag, + DocumentType, + Correspondent, +} + +impl PaperlessDefaultFieldType { + fn to_string(self) -> &'static str { + match self { + PaperlessDefaultFieldType::Tag => "tags", + PaperlessDefaultFieldType::DocumentType => "document_types", + PaperlessDefaultFieldType::Correspondent => "correspondents", + } + } +} + +#[derive(Serialize, Deserialize, Debug, Clone)] +struct DefaultField { + #[serde(skip_serializing_if = "Option::is_none")] // Skip `id` if it's None + id: Option, + slug: String, + name: String, + matching_algorithm: u8, +} + pub async fn get_data_from_paperless( client: &Client, url: &str, @@ -34,7 +61,7 @@ pub async fn get_data_from_paperless( //let error_part = value.pointer("/results/0").unwrap(); //println!("Error part: {}", error_part); // Parse the JSON string into the Response struct - return parse_document_response(json); + parse_document_response(json) } Err(e) => { slog_scope::error!("Error while fetching documents from paperless: {}",e); @@ -68,7 +95,7 @@ pub async fn get_next_data_from_paperless(client: &Client, //let error_part = value.pointer("/results/0").unwrap(); //println!("Error part: {}", error_part); // Parse the JSON string into the Response struct - return parse_document_response(json); + parse_document_response(json) } Err(e) => { slog_scope::error!("Error while fetching documents from paperless: {}",e); @@ -139,6 +166,51 @@ pub async fn query_custom_fields( } } +pub async fn get_default_fields( + client: &Client, + base_url: &str, + endpoint: PaperlessDefaultFieldType, +) -> Result, Box> +where + T: DeserializeOwned + Debug, +{ + slog_scope::info!("Fetching custom fields from paperless at {}", base_url); + let res = client + .get(format!("{}/api/{}/", base_url, endpoint.to_string())) + .send() + .await?; + + let response_result = res.error_for_status(); + match response_result { + Ok(data) => { + let body = data.text().await?; + slog_scope::debug!("Response from server while fetching documents: {}", body); + + // Remove the "Field: " prefix if necessary + let json = body.trim_start_matches("Field: "); + let data: Result, _> = serde_json::from_str(json); + match data { + Ok(data) => { + slog_scope::info!("{}: {:?}", endpoint.to_string(), data.results); + Ok(data.results) + } + Err(e) => { + let column = e.column(); + let start = (column as isize - 30).max(0) as usize; + let end = (column + 30).min(json.len()); + slog_scope::error!("Error occurred parsing custom fields: {}", e); + slog_scope::error!("Error at column {}: {}", column, &json[start..end]); + slog_scope::debug!("Error occurred in json {}", &json); + Err(e.into()) + } + } + } + Err(e) => { + slog_scope::error!("Error retrieving custom fields: {}", e); + Err(e.into()) + } + } +} pub async fn update_document_fields( client: &Client, document_id: u32, @@ -230,6 +302,77 @@ pub async fn update_document_fields( } } +/// This function update the default fields like tags, correspondents and document_types in paperless +/// it is checked if a field exists on the server and if not, it is created +/// +pub async fn update_document_default_fields( + client: &Client, + document_id: u32, + fields: &Vec, + data: Vec, + base_url: &str, + endpoint: PaperlessDefaultFieldType, + mode: Mode, +) -> Result<(), Box> { + let mut default_field_ids = Vec::new(); + + for value in data { + + if let Some(field) = fields.iter().find(|&f| f.name == *value) { + let default_field_id = field.id; + default_field_ids.push(default_field_id); + } else { + if matches!(mode, Mode::Create) { + slog_scope::info!("Creating {}: {}", endpoint.to_string(), value); + let create_field = DefaultField { + id: None, + name: value.clone(), + slug: value.clone(), + matching_algorithm: 6, + }; + match create_default_field(client, &create_field, base_url, endpoint).await + { + Ok(new_field) => { + default_field_ids.push(new_field.id) + } + Err(e) => { + slog_scope::error!("Error: {} creating custom field: {}, skipping...",e, value) + } + } + } + } + } + + let mut payload = serde_json::Map::new(); + payload.insert(endpoint.to_string().to_string(), serde_json::json!(default_field_ids)); + + if payload.is_empty() { + slog_scope::warn!("{}", "payload is empty, not updating fields"); + return Err(Box::new(fmt::Error::default())); // Use a standard library error type like fmt::Error. + } + let url = format!("{}/api/documents/{}/", base_url, document_id); + slog_scope::info!("Updating document with ID: {}", document_id); + slog_scope::debug!("Request Payload: {}", map_to_string(&payload)); + + for (key, value) in &payload { + slog_scope::debug!("{}: {}", key, value); + } + let res = client.patch(&url).json(&payload).send().await?; + let response_result = res.error_for_status(); + match response_result { + Ok(data) => { + let body = data.text().await?; + slog_scope::trace!("{}", body); + slog_scope::info!("Document with ID: {} successfully updated", document_id); + Ok(()) + } + Err(e) => { + slog_scope::error!("Error while updating document fields: {}", e); + Err(e.into()) + } + } +} + fn convert_field_to_custom_field(value: &Option, field: &Field) -> CustomField { let custom_field = CustomField { field: field.id.clone(), @@ -279,6 +422,41 @@ pub async fn create_custom_field( } } } +pub async fn create_default_field( + client: &Client, + field: &DefaultField, + base_url: &str, + endpoint: PaperlessDefaultFieldType, +) -> Result> { + // Define the URL for creating a custom field + let url = format!("{}/api/{}/", base_url, endpoint.to_string()); + + + // Send the request to create the custom field + let res = client.post(&url).json(&field).send().await?; + let response_result = res.error_for_status(); + match response_result { + Ok(data) => { + let body = data.text().await?; + slog_scope::trace!("{}", body); + let field: Result, _> = serde_json::from_str(&body); + match field { + Ok(field) => { + Ok(field.results[0].clone()) // TODO: improve + } + Err(e) => { + slog_scope::debug!("Creating field response: {}", body); + slog_scope::error!("Error parsing response from new field: {}", e); + Err(e.into()) + } + } + } + Err(e) => { + slog_scope::error!("Error creating custom field: {}", e); + Err(e.into()) + } + } +} fn map_to_string(map: &Map) -> String { map.iter() .map(|(key, value)| format!("{}: {}", key, value)) diff --git a/src/paperless_correspondents.rs b/src/paperless_correspondents.rs deleted file mode 100644 index 1ccf415..0000000 --- a/src/paperless_correspondents.rs +++ /dev/null @@ -1,25 +0,0 @@ -use reqwest::Client; -use crate::paperless::CreateField; -struct DocumentType { - id: u32, - slug: String, - name: String, - matching_algorithm: u8 -} -pub fn create_correspondent( - correspondent_name: &str, - client: &Client, - base_url: &str, -) { - -} - - -pub fn get_correspondents( - client: &Client, - base_url: &str, -) { - - let url = format!("{}/api/correspondents/", base_url); - let res= client.get(url).send(); -} diff --git a/src/paperless_defaultfields.rs b/src/paperless_defaultfields.rs new file mode 100644 index 0000000..c9fd148 --- /dev/null +++ b/src/paperless_defaultfields.rs @@ -0,0 +1,66 @@ +use ollama_rs::Ollama; +use reqwest::Client; +use serde::de::StdError; +use crate::{extract_json_object, Document, Field, Mode}; +use crate::llm_api::generate_response; +use crate::paperless::{get_default_fields, update_document_fields, PaperlessDefaultFieldType}; + +const ANSWER_INSTRUCTION: String = "The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. The document is:".to_string(); +async fn construct_document_type_prompt(client: &Client, base_url: &str) -> Result> { + let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::DocumentType).await?; + let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one: ", document_types); + Ok(base_prompt) +} + + +async fn construct_tag_prompt(client: &Client, base_url: &str) -> Result> { + let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Tag).await?; + let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one: ", document_types); + Ok(base_prompt) +} +async fn construct_correspondent_prompt(client: &Client, base_url: &str) -> Result> { + let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Correspondent).await?; + let base_prompt = format!("Determine possible correspondents from this document from the following available correspondents: {:?}, if none of these fit the document, create a new one. The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. The document is: ", document_types); + Ok(base_prompt) +} + + + + + + +async fn generate_response_and_extract_data(ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: &Vec, base_url: &str, mode: Mode, document: &Document) { + let prompt = format!("{} {}", prompt_base, document.content); + + match generate_response(ollama, &model.to_string(), prompt).await { + Ok(res) => { + // Log the response from the generate_response call + slog_scope::debug!("LLM Response: {}", res.response); + + match extract_json_object(&res.response) { + Ok(json_str) => { + // Log successful JSON extraction + slog_scope::debug!("Extracted JSON Object: {}", json_str); + + match serde_json::from_str(&json_str) { + Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await?, + Err(e) => { + slog_scope::error!("Error parsing llm response json {}", e.to_string()); + slog_scope::debug!("JSON String was: {}", &json_str); + } + } + } + Err(e) => slog_scope::error!("{}", e), + } + } + Err(e) => { + slog_scope::error!("Error generating llm response: {}", e); + } + } +} +pub fn determine_if_type_exists( + client: &Client, + base_url: &str, +) { + //TODO: +} \ No newline at end of file diff --git a/src/paperless_documenttypes.rs b/src/paperless_documenttypes.rs deleted file mode 100644 index 3d2bb26..0000000 --- a/src/paperless_documenttypes.rs +++ /dev/null @@ -1,46 +0,0 @@ -use reqwest::Client; - - - -struct DocumentType { - id: u32, - slug: String, - name: String, - matching_algorithm: u8 -} -pub fn create_document_type( - document_types: &str, - client: &Client, - base_url: &str, -) { - - - -} - - -pub fn get_document_types( - client: &Client, - base_url: &str, -) { - - let url = format!("{}/api/document_types/", base_url); - let res= client.get(url).send(); - let body = match { - Ok(data) => { - - }, - Err(e) => { - slog_scope::error!("Error getting document types: {}", {}) - } - }; -} - - - -pub fn determine_if_type_exists( - client: &Client, - base_url: &str, -) { - -} \ No newline at end of file diff --git a/src/paperless_tags.rs b/src/paperless_tags.rs deleted file mode 100644 index e46ab35..0000000 --- a/src/paperless_tags.rs +++ /dev/null @@ -1,26 +0,0 @@ -use reqwest::Client; - - -struct Tags { - id: u32, - slug: String, - name: String, - matching_algorithm: u8 -} -pub fn create_tag( - correspondent_name: &str, - client: &Client, - base_url: &str, -) { - -} - - -pub fn get_tags( - client: &Client, - base_url: &str, -) { - - let url = format!("{}/api/tags/", base_url); - let res= client.get(url).send(); -} From da463cb2ca8be38076808e7e0befcfd427c30d75 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 30 Sep 2024 22:37:33 +0000 Subject: [PATCH 03/32] fix(deps): update rust crate reqwest to v0.12.8 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0d325bb..2b5deb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -871,9 +871,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.7" +version = "0.12.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" +checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" dependencies = [ "base64 0.22.0", "bytes", From e1dd59f39bb510e1bc56b430bf041c7f063a891d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Fri, 4 Oct 2024 22:30:27 +0200 Subject: [PATCH 04/32] fix: error handling and flow of filling default fields --- src/main.rs | 2 +- src/paperless.rs | 25 ++++++-------- src/paperless_defaultfields.rs | 61 ++++++++++++++++++++-------------- 3 files changed, 48 insertions(+), 40 deletions(-) diff --git a/src/main.rs b/src/main.rs index 6494eba..82f9aa5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -188,7 +188,7 @@ async fn generate_response_and_extract_data(ollama: &Ollama, model: &str, prompt slog_scope::debug!("Extracted JSON Object: {}", json_str); match serde_json::from_str(&json_str) { - Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await?, + Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await.unwrap_or_default(), //TODO: Fix unwrap Err(e) => { slog_scope::error!("Error parsing llm response json {}", e.to_string()); slog_scope::debug!("JSON String was: {}", &json_str); diff --git a/src/paperless.rs b/src/paperless.rs index 161523e..77fee1b 100644 --- a/src/paperless.rs +++ b/src/paperless.rs @@ -25,7 +25,7 @@ impl PaperlessDefaultFieldType { } #[derive(Serialize, Deserialize, Debug, Clone)] -struct DefaultField { +pub struct DefaultField { #[serde(skip_serializing_if = "Option::is_none")] // Skip `id` if it's None id: Option, slug: String, @@ -166,13 +166,11 @@ pub async fn query_custom_fields( } } -pub async fn get_default_fields( +pub async fn get_default_fields( client: &Client, base_url: &str, endpoint: PaperlessDefaultFieldType, -) -> Result, Box> -where - T: DeserializeOwned + Debug, +) -> Result, Box> { slog_scope::info!("Fetching custom fields from paperless at {}", base_url); let res = client @@ -188,7 +186,7 @@ where // Remove the "Field: " prefix if necessary let json = body.trim_start_matches("Field: "); - let data: Result, _> = serde_json::from_str(json); + let data: Result, _> = serde_json::from_str(json); match data { Ok(data) => { slog_scope::info!("{}: {:?}", endpoint.to_string(), data.results); @@ -313,7 +311,7 @@ pub async fn update_document_default_fields( base_url: &str, endpoint: PaperlessDefaultFieldType, mode: Mode, -) -> Result<(), Box> { +) -> Option> { let mut default_field_ids = Vec::new(); for value in data { @@ -348,7 +346,7 @@ pub async fn update_document_default_fields( if payload.is_empty() { slog_scope::warn!("{}", "payload is empty, not updating fields"); - return Err(Box::new(fmt::Error::default())); // Use a standard library error type like fmt::Error. + return None } let url = format!("{}/api/documents/{}/", base_url, document_id); slog_scope::info!("Updating document with ID: {}", document_id); @@ -357,18 +355,17 @@ pub async fn update_document_default_fields( for (key, value) in &payload { slog_scope::debug!("{}: {}", key, value); } - let res = client.patch(&url).json(&payload).send().await?; - let response_result = res.error_for_status(); + let res = client.patch(&url).json(&payload).send().await; + let response_result = res; match response_result { Ok(data) => { - let body = data.text().await?; - slog_scope::trace!("{}", body); + let body = data.text().await; slog_scope::info!("Document with ID: {} successfully updated", document_id); - Ok(()) + None } Err(e) => { slog_scope::error!("Error while updating document fields: {}", e); - Err(e.into()) + Some(Box::new(e)) } } } diff --git a/src/paperless_defaultfields.rs b/src/paperless_defaultfields.rs index c9fd148..cf97e16 100644 --- a/src/paperless_defaultfields.rs +++ b/src/paperless_defaultfields.rs @@ -3,58 +3,69 @@ use reqwest::Client; use serde::de::StdError; use crate::{extract_json_object, Document, Field, Mode}; use crate::llm_api::generate_response; -use crate::paperless::{get_default_fields, update_document_fields, PaperlessDefaultFieldType}; +use crate::paperless::{get_default_fields, update_document_default_fields, update_document_fields, DefaultField, PaperlessDefaultFieldType}; const ANSWER_INSTRUCTION: String = "The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. The document is:".to_string(); async fn construct_document_type_prompt(client: &Client, base_url: &str) -> Result> { - let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::DocumentType).await?; + let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::DocumentType).await; let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one: ", document_types); Ok(base_prompt) } async fn construct_tag_prompt(client: &Client, base_url: &str) -> Result> { - let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Tag).await?; + let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Tag).await; let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one: ", document_types); Ok(base_prompt) } async fn construct_correspondent_prompt(client: &Client, base_url: &str) -> Result> { - let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Correspondent).await?; + let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Correspondent).await; let base_prompt = format!("Determine possible correspondents from this document from the following available correspondents: {:?}, if none of these fit the document, create a new one. The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. The document is: ", document_types); Ok(base_prompt) } +async fn generate_response_and_extract_data(ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: Vec, base_url: &str, document: &Document, mode: Mode, field_type: PaperlessDefaultFieldType) -> Option> { + let prompt = match field_type { + PaperlessDefaultFieldType::Correspondent => construct_correspondent_prompt(client, base_url).await, + PaperlessDefaultFieldType::Tag => construct_tag_prompt(client, base_url).await, + PaperlessDefaultFieldType::DocumentType => construct_document_type_prompt(client, base_url).await, + }; + match prompt { + Ok(prompt) => { + match generate_response(ollama, &model.to_string(), prompt).await { + Ok(res) => { + // Log the response from the generate_response call + slog_scope::debug!("LLM Response: {}", res.response); + match extract_json_object(&res.response) { + Ok(json_str) => { + // Log successful JSON extraction + slog_scope::debug!("Extracted JSON Object: {}", json_str); - - -async fn generate_response_and_extract_data(ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: &Vec, base_url: &str, mode: Mode, document: &Document) { - let prompt = format!("{} {}", prompt_base, document.content); - - match generate_response(ollama, &model.to_string(), prompt).await { - Ok(res) => { - // Log the response from the generate_response call - slog_scope::debug!("LLM Response: {}", res.response); - - match extract_json_object(&res.response) { - Ok(json_str) => { - // Log successful JSON extraction - slog_scope::debug!("Extracted JSON Object: {}", json_str); - - match serde_json::from_str(&json_str) { - Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await?, + match serde_json::from_str(&json_str) { + Ok(json) => update_document_default_fields(client, document.id, &fields, json, base_url, field_type, mode).await, + Err(e) => { + slog_scope::error!("Error parsing llm response json {}", e.to_string()); + slog_scope::debug!("JSON String was: {}", & json_str); + Some(Box::new(e)) + } + } + } Err(e) => { - slog_scope::error!("Error parsing llm response json {}", e.to_string()); - slog_scope::debug!("JSON String was: {}", &json_str); + slog_scope::error ! ("{}", e); + None } } } - Err(e) => slog_scope::error!("{}", e), + Err(e) => { + slog_scope::error ! ("Error generating llm response: {}", e); + None + } } } Err(e) => { - slog_scope::error!("Error generating llm response: {}", e); + Some(e) } } } From 42f43ca8564475f1c0074a0234255183e0683c72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Sat, 5 Oct 2024 10:08:33 +0200 Subject: [PATCH 05/32] fix: change prompt to generate more accurate results --- src/main.rs | 10 ++++++++- src/paperless.rs | 4 ++-- src/paperless_defaultfields.rs | 13 ++++++------ src/util.rs | 39 ---------------------------------- 4 files changed, 18 insertions(+), 48 deletions(-) diff --git a/src/main.rs b/src/main.rs index 82f9aa5..3c0d3a8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,8 +19,9 @@ use serde::{Deserialize, Serialize}; use serde_json::{Value}; use std::env; use crate::llm_api::generate_response; -use crate::paperless::{get_data_from_paperless, get_next_data_from_paperless, query_custom_fields, update_document_fields}; +use crate::paperless::{get_data_from_paperless, get_default_fields, get_next_data_from_paperless, query_custom_fields, update_document_fields, PaperlessDefaultFieldType}; use substring::Substring; +use crate::paperless_defaultfields::extract_default_fields; #[derive(Serialize, Deserialize, Debug, Clone)] struct Document { @@ -171,6 +172,13 @@ async fn process_documents_batch(documents: &Vec, ollama: &Ollama, mod slog_scope::debug!("with Prompt: {}", prompt_base); generate_response_and_extract_data(ollama, &model, &prompt_base, client, &fields, base_url, mode, &document).await; + let default_fields = get_default_fields(client, base_url, PaperlessDefaultFieldType::Tag).await; + match default_fields { + Ok(default_fields) => { + extract_default_fields(ollama, &model, &prompt_base, client, default_fields, base_url, &document, mode, PaperlessDefaultFieldType::Tag).await; + } + Err(e) => slog_scope::error!("Error while interacting with paperless: {}", e), + } }) } diff --git a/src/paperless.rs b/src/paperless.rs index 77fee1b..afe14d6 100644 --- a/src/paperless.rs +++ b/src/paperless.rs @@ -436,10 +436,10 @@ pub async fn create_default_field( Ok(data) => { let body = data.text().await?; slog_scope::trace!("{}", body); - let field: Result, _> = serde_json::from_str(&body); + let field: Result = serde_json::from_str(&body); match field { Ok(field) => { - Ok(field.results[0].clone()) // TODO: improve + Ok(field) // TODO: improve } Err(e) => { slog_scope::debug!("Creating field response: {}", body); diff --git a/src/paperless_defaultfields.rs b/src/paperless_defaultfields.rs index cf97e16..be3f659 100644 --- a/src/paperless_defaultfields.rs +++ b/src/paperless_defaultfields.rs @@ -5,27 +5,27 @@ use crate::{extract_json_object, Document, Field, Mode}; use crate::llm_api::generate_response; use crate::paperless::{get_default_fields, update_document_default_fields, update_document_fields, DefaultField, PaperlessDefaultFieldType}; -const ANSWER_INSTRUCTION: String = "The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. The document is:".to_string(); + const ANSWER_INSTRUCTION: &'static str = "The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. The document is: "; async fn construct_document_type_prompt(client: &Client, base_url: &str) -> Result> { let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::DocumentType).await; - let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one: ", document_types); + let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one. ", document_types); Ok(base_prompt) } async fn construct_tag_prompt(client: &Client, base_url: &str) -> Result> { let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Tag).await; - let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one: ", document_types); + let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one. ", document_types); Ok(base_prompt) } async fn construct_correspondent_prompt(client: &Client, base_url: &str) -> Result> { let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Correspondent).await; - let base_prompt = format!("Determine possible correspondents from this document from the following available correspondents: {:?}, if none of these fit the document, create a new one. The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. The document is: ", document_types); + let base_prompt = format!("Determine possible correspondents from this document from the following available correspondents: {:?}, if none of these fit the document, create a new one. The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. ", document_types); Ok(base_prompt) } -async fn generate_response_and_extract_data(ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: Vec, base_url: &str, document: &Document, mode: Mode, field_type: PaperlessDefaultFieldType) -> Option> { +pub async fn extract_default_fields(ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: Vec, base_url: &str, document: &Document, mode: Mode, field_type: PaperlessDefaultFieldType) -> Option> { let prompt = match field_type { PaperlessDefaultFieldType::Correspondent => construct_correspondent_prompt(client, base_url).await, PaperlessDefaultFieldType::Tag => construct_tag_prompt(client, base_url).await, @@ -33,7 +33,8 @@ async fn generate_response_and_extract_data(ollama: &Ollama, model: &str, prompt }; match prompt { Ok(prompt) => { - match generate_response(ollama, &model.to_string(), prompt).await { + let prompt_with_document = prompt + &*ANSWER_INSTRUCTION + &*document.content; + match generate_response(ollama, &model.to_string(), prompt_with_document).await { Ok(res) => { // Log the response from the generate_response call slog_scope::debug!("LLM Response: {}", res.response); diff --git a/src/util.rs b/src/util.rs index cbe732c..e69de29 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,39 +0,0 @@ -use reqwest::Response; -use serde::Deserialize; -use crate::error::ResponseError; -use crate::{Response as PaperlessResponse}; - - -pub async fn parse_response(response: Response) -> Result where T: Deserialize<'static> { - - let response_result = response.error_for_status(); - match response_result { - Ok(data) => { - let body = data.text().await?; - slog_scope::trace!("Response from server while fetching documents: {}", body); - - let json = body.trim_start_matches("Document content: "); - - let data: Result, _> = serde_json::from_str(json); - match data { - Ok(data) => { - slog_scope::info!("Successfully retrieved {} Documents", data.results.len()); - Ok(data.results) - } - Err(e) => { - let column = e.column(); - let start = (column as isize - 30).max(0) as usize; - let end = (column + 30).min(json.len()); - slog_scope::error!("Error while creating json of document response from paperless {}", e); - slog_scope::error!("Error at column {}: {}", column, &json[start..end]); - slog_scope::trace!("Error occured in json {}", &json); - Err(e.into()) // Remove the semicolon here - } - } - } - Err(e) => { - slog_scope::error!("Error while fetching documents from paperless: {}",e); - Err(e.into()) - } - } -} \ No newline at end of file From fae6c092d723cb72d3738147fb4fbd21c03b5710 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 01:08:05 +0000 Subject: [PATCH 06/32] fix(deps): update rust crate serde_json to v1.0.129 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2b5deb9..292c1f3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1058,9 +1058,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.129" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "6dbcf9b78a125ee667ae19388837dd12294b858d101fdd393cb9d5501ef09eb2" dependencies = [ "itoa", "memchr", From b4489e20ad90c9f4ef3a120826576ad973d6ea0b Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 18 Oct 2024 18:32:04 +0000 Subject: [PATCH 07/32] fix(deps): update rust crate serde_json to v1.0.130 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 292c1f3..6f4e8cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1058,9 +1058,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.129" +version = "1.0.130" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dbcf9b78a125ee667ae19388837dd12294b858d101fdd393cb9d5501ef09eb2" +checksum = "610f75ff4a8e3cb29b85da56eabdd1bff5b06739059a4b8e2967fef32e5d9944" dependencies = [ "itoa", "memchr", From a4d69ab94dd08a9873898e4672e45d2e046c104a Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 01:12:30 +0000 Subject: [PATCH 08/32] fix(deps): update rust crate serde_json to v1.0.131 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6f4e8cc..ce41e8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1058,9 +1058,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.130" +version = "1.0.131" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "610f75ff4a8e3cb29b85da56eabdd1bff5b06739059a4b8e2967fef32e5d9944" +checksum = "67d42a0bd4ac281beff598909bb56a86acaf979b84483e1c79c10dcaf98f8cf3" dependencies = [ "itoa", "memchr", From 4967af019b549867095c0a3938b581aae29a8171 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 19 Oct 2024 19:33:11 +0000 Subject: [PATCH 09/32] fix(deps): update rust crate serde_json to v1.0.132 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ce41e8e..9d061cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1058,9 +1058,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.131" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67d42a0bd4ac281beff598909bb56a86acaf979b84483e1c79c10dcaf98f8cf3" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "itoa", "memchr", From c62a851b64270aabb09289ea83f1ffca07a559e9 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 08:42:58 +0000 Subject: [PATCH 10/32] fix(deps): update rust crate serde to v1.0.211 --- Cargo.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9d061cc..598cce7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -833,9 +833,9 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" dependencies = [ "unicode-ident", ] @@ -1038,18 +1038,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.210" +version = "1.0.211" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "1ac55e59090389fb9f0dd9e0f3c09615afed1d19094284d0b200441f13550793" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.211" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "54be4f245ce16bc58d57ef2716271d0d4519e0f6defa147f6e081005bcb278ff" dependencies = [ "proc-macro2", "quote", @@ -1202,9 +1202,9 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" [[package]] name = "syn" -version = "2.0.50" +version = "2.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" +checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" dependencies = [ "proc-macro2", "quote", From 759a25380c0206cab443536d7e65ad58a0482ca5 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 09:50:50 +0000 Subject: [PATCH 11/32] chore(deps): update all minor updates --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 598cce7..a326365 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1349,9 +1349,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.40.0" +version = "1.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +checksum = "145f3413504347a2be84393cc8a7d2fb4d863b375909ea59f2158261aa258bbb" dependencies = [ "backtrace", "bytes", From 2563c0c569812202e0e685ecb4e83cab16c207f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 22 Oct 2024 12:45:31 +0200 Subject: [PATCH 12/32] feat!: add support for creating default fields controlled by modes BREAKING-CHANGE: mode values have changed, value 0 no means NoAnalyze prompting doclytics to do nothing for this field type, use value 1 for the previous behaviour instead. --- README.md | 29 +++++++++++++----------- src/llm_api.rs | 1 - src/main.rs | 41 ++++++++++++++++++++++++++++------ src/paperless.rs | 3 ++- src/paperless_defaultfields.rs | 2 +- src/util.rs | 3 +++ 6 files changed, 56 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 2fd1369..ebda439 100644 --- a/README.md +++ b/README.md @@ -46,19 +46,22 @@ With these prerequisites met, you are now ready to proceed with the installation The application requires setting environment variables for its configuration. Below is a table describing each environment variable, indicating whether it is required or optional, its default value (if any), and a brief description: -| Environment Variable | Required | Default Value | Description | -|--------------------------|---------|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. | -| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. | -| `PAPERLESS_FILTER` | NO | "NOT tagged=true" | Filter string that filters the documents to be fetched from paperless | -| `LANGUAGE` | No | "EN" | Allow to use translated base prompts (Support: EN, DE) | -| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. | -| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. | -| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. | -| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. | -| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.
Should contain the custom fields in paperless that you want doclytics. | -| `LOG_LEVEL` | No | INFO | Log level | -| `MODE` | No | 0 | :warning: **Experimental**: Mode of operation.
0 = NoCreate (Doclytics does not create custom fields automatically in Paperless), 1 = Create (Doclytics automatically creates custom fields that do not exist in Paperless). All fields will be created as type "Text" at the moment. In stable support, the type will be inferred. | +| Environment Variable | Required | Default Value | Description | +|---------------------------|---------|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. | +| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. | +| `PAPERLESS_FILTER` | NO | "NOT tagged=true" | Filter string that filters the documents to be fetched from paperless | +| `LANGUAGE` | No | "EN" | Allow to use translated base prompts (Support: EN, DE) | +| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. | +| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. | +| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. | +| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. | +| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.
Should contain the custom fields in paperless that you want doclytics. | +| `LOG_LEVEL` | No | INFO | Log level | +| `MODE` | No | 0 | :warning: **Experimental**: Mode of operation.
0 = NoAnalyze(Doclytics does nothing for this field type), 1 = NoCreate (Doclytics does not create custom fields automatically in Paperless), 2 = Create (Doclytics automatically creates custom fields that do not exist in Paperless). All fields will be created as type "Text" at the moment. In stable support, the type will be inferred. | +| `DOCLYTICS_TAGS` | No | 0 | :warning: **Experimental**: Mode of operation.
0 = NoAnalyze(Doclytics does nothing for this field type), 1 = NoCreate (Doclytics does not create custom fields automatically in Paperless), 2 = Create (Doclytics automatically creates custom fields that do not exist in Paperless). All fields will be created as type "Text" at the moment. In stable support, the type will be inferred. | +| `DOCLYTICS_DOCTYPE` | No | 0 | :warning: **Experimental**: Mode of operation.
0 = NoAnalyze(Doclytics does nothing for this field type), 1 = NoCreate (Doclytics does not create custom fields automatically in Paperless), 2 = Create (Doclytics automatically creates custom fields that do not exist in Paperless). All fields will be created as type "Text" at the moment. In stable support, the type will be inferred. | +| `DOCLYTICS_CORRESPONDENT` | No | 0 | :warning: **Experimental**: Mode of operation.
0 = NoAnalyze(Doclytics does nothing for this field type), 1 = NoCreate (Doclytics does not create custom fields automatically in Paperless), 2 = Create (Doclytics automatically creates custom fields that do not exist in Paperless). All fields will be created as type "Text" at the moment. In stable support, the type will be inferred. | diff --git a/src/llm_api.rs b/src/llm_api.rs index 09dfa99..4539177 100644 --- a/src/llm_api.rs +++ b/src/llm_api.rs @@ -1,7 +1,6 @@ use ollama_rs::generation::completion::GenerationResponse; use ollama_rs::generation::completion::request::GenerationRequest; use ollama_rs::Ollama; -use crate::Document; pub async fn generate_response( ollama: &Ollama, diff --git a/src/main.rs b/src/main.rs index 3c0d3a8..955488b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,14 +69,16 @@ struct Field { #[derive(Clone, Copy)] enum Mode { + NoAnalyze, Create, NoCreate, } impl Mode { fn from_int(value: i32) -> Self { match value { - 1 => Mode::Create, - 0 => Mode::NoCreate, + 2 => Mode::Create, + 1 => Mode::NoCreate, + 0 => Mode::NoAnalyze, _ => Mode::NoCreate, } } @@ -165,7 +167,10 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u } async fn process_documents_batch(documents: &Vec, ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: &Vec, base_url: &str, mode: Mode) -> Result<(), Box> { - + let tag_mode = create_mode_from_env("DOCLYTICS_TAGS"); + let doctype_mode = create_mode_from_env("DOCLYTICS_DOCTYPE"); + let correspondent_mode = create_mode_from_env("DOCLYTICS_CORRESPONDENT"); + Ok(for document in documents { slog_scope::trace!("Document Content: {}", document.content); slog_scope::info!("Generate Response with LLM {}", "model"); @@ -175,7 +180,26 @@ async fn process_documents_batch(documents: &Vec, ollama: &Ollama, mod let default_fields = get_default_fields(client, base_url, PaperlessDefaultFieldType::Tag).await; match default_fields { Ok(default_fields) => { - extract_default_fields(ollama, &model, &prompt_base, client, default_fields, base_url, &document, mode, PaperlessDefaultFieldType::Tag).await; + match tag_mode { + Mode::NoAnalyze => (), + _ => + if let Some(err) = extract_default_fields(ollama, &model, &prompt_base, client, &default_fields, base_url, &document, tag_mode, PaperlessDefaultFieldType::Tag).await { + return Err(err); + } + } + match doctype_mode { + Mode::NoAnalyze => (), + _ => + if let Some(err) = extract_default_fields(ollama, &model, &prompt_base, client, &default_fields, base_url, &document, doctype_mode, PaperlessDefaultFieldType::DocumentType).await { + return Err(err); + } + } + match correspondent_mode { + Mode::NoAnalyze => (), + _ => if let Some(err) = extract_default_fields(ollama, &model, &prompt_base, client, &default_fields, base_url, &document, correspondent_mode, PaperlessDefaultFieldType::Correspondent).await { + return Err(err); + } + } } Err(e) => slog_scope::error!("Error while interacting with paperless: {}", e), } @@ -211,7 +235,6 @@ async fn generate_response_and_extract_data(ollama: &Ollama, model: &str, prompt } } } - #[tokio::main] async fn main() -> Result<(), Box> { logger::init(); // Initializes the global logger @@ -271,7 +294,11 @@ fn extract_json_object(input: &str) -> Result { } } - +fn create_mode_from_env(env_key: &str) -> Mode { + let mode_env = env::var(env_key).unwrap_or_else(|_| "1".to_string()); + let mode_int = mode_env.parse::().unwrap_or(1); + Mode::from_int(mode_int) +} #[cfg(test)] mod tests { use super::*; @@ -293,4 +320,4 @@ mod tests { let empty_json_str = "No JSON object or array here"; assert!(extract_json_object(empty_json_str).is_err()); } -} \ No newline at end of file +} diff --git a/src/paperless.rs b/src/paperless.rs index afe14d6..bcf94ca 100644 --- a/src/paperless.rs +++ b/src/paperless.rs @@ -6,6 +6,7 @@ use serde::de::{DeserializeOwned, StdError}; use serde_json::{Map, Value}; use crate::{CustomField, Document, Field, Mode, Response}; use serde::{Deserialize, Serialize}; +use crate::util::normalize_string; #[derive(Clone, Copy)] pub enum PaperlessDefaultFieldType { @@ -316,7 +317,7 @@ pub async fn update_document_default_fields( for value in data { - if let Some(field) = fields.iter().find(|&f| f.name == *value) { + if let Some(field) = fields.iter().find(|&f| normalize_string(&*f.name) == normalize_string(&*value)) { let default_field_id = field.id; default_field_ids.push(default_field_id); } else { diff --git a/src/paperless_defaultfields.rs b/src/paperless_defaultfields.rs index be3f659..5abc6c3 100644 --- a/src/paperless_defaultfields.rs +++ b/src/paperless_defaultfields.rs @@ -25,7 +25,7 @@ async fn construct_correspondent_prompt(client: &Client, base_url: &str) -> Resu } -pub async fn extract_default_fields(ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: Vec, base_url: &str, document: &Document, mode: Mode, field_type: PaperlessDefaultFieldType) -> Option> { +pub async fn extract_default_fields(ollama: &Ollama, model: &str, prompt_base: &String, client: &Client, fields: &Vec, base_url: &str, document: &Document, mode: Mode, field_type: PaperlessDefaultFieldType) -> Option> { let prompt = match field_type { PaperlessDefaultFieldType::Correspondent => construct_correspondent_prompt(client, base_url).await, PaperlessDefaultFieldType::Tag => construct_tag_prompt(client, base_url).await, diff --git a/src/util.rs b/src/util.rs index e69de29..c676f43 100644 --- a/src/util.rs +++ b/src/util.rs @@ -0,0 +1,3 @@ +pub fn normalize_string(s: &str) -> String { + s.replace("-", "").replace("_", "").to_lowercase() +} \ No newline at end of file From e5577e8f3709a6f013687432093cc7a0f6307eac Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 11:18:46 +0000 Subject: [PATCH 13/32] chore(deps): update rust docker tag to v1.82 --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c8a439a..741f42d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM rust:1.81 +FROM rust:1.82 WORKDIR /usr/doclytics COPY . . From 28680b4d608db9b29eb6d9748df7862463184369 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 18:29:50 +0000 Subject: [PATCH 14/32] fix(deps): update rust crate serde to v1.0.212 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a326365..64ce259 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1038,18 +1038,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.211" +version = "1.0.212" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ac55e59090389fb9f0dd9e0f3c09615afed1d19094284d0b200441f13550793" +checksum = "ccd4055b7e3937a5c2595e974f5bf1715a23919a595a04b5ad959bdbbb61ab04" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.211" +version = "1.0.212" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54be4f245ce16bc58d57ef2716271d0d4519e0f6defa147f6e081005bcb278ff" +checksum = "726adf8349784fb68a42e6466f49362ae039d9c5333cc6eb131f4d6f94bb9126" dependencies = [ "proc-macro2", "quote", From 4f92be2845516c275c45bfe3a2aa66ea95c74a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 22 Oct 2024 20:55:24 +0200 Subject: [PATCH 15/32] fix: error handling and logging --- src/main.rs | 6 +++--- src/paperless_defaultfields.rs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 955488b..08ffc2a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -184,20 +184,20 @@ async fn process_documents_batch(documents: &Vec, ollama: &Ollama, mod Mode::NoAnalyze => (), _ => if let Some(err) = extract_default_fields(ollama, &model, &prompt_base, client, &default_fields, base_url, &document, tag_mode, PaperlessDefaultFieldType::Tag).await { - return Err(err); + slog_scope::error!("Error while getting tags: {:?}", err); } } match doctype_mode { Mode::NoAnalyze => (), _ => if let Some(err) = extract_default_fields(ollama, &model, &prompt_base, client, &default_fields, base_url, &document, doctype_mode, PaperlessDefaultFieldType::DocumentType).await { - return Err(err); + slog_scope::error!("Error while getting doctype: {:?}", err); } } match correspondent_mode { Mode::NoAnalyze => (), _ => if let Some(err) = extract_default_fields(ollama, &model, &prompt_base, client, &default_fields, base_url, &document, correspondent_mode, PaperlessDefaultFieldType::Correspondent).await { - return Err(err); + slog_scope::error!("Error while getting correspondents: {:?}", err); } } } diff --git a/src/paperless_defaultfields.rs b/src/paperless_defaultfields.rs index 5abc6c3..a010d58 100644 --- a/src/paperless_defaultfields.rs +++ b/src/paperless_defaultfields.rs @@ -5,7 +5,7 @@ use crate::{extract_json_object, Document, Field, Mode}; use crate::llm_api::generate_response; use crate::paperless::{get_default_fields, update_document_default_fields, update_document_fields, DefaultField, PaperlessDefaultFieldType}; - const ANSWER_INSTRUCTION: &'static str = "The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. The document is: "; + const ANSWER_INSTRUCTION: &'static str = "The result should be a only a non-nested one dimensional json array of correctly quoted strings and nothing else. The answer should start and end with the square bracket. The document is: "; async fn construct_document_type_prompt(client: &Client, base_url: &str) -> Result> { let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::DocumentType).await; let base_prompt = format!("Determine the type of this document from the following available document types: {:?}, if none of these fit the document, create a new one. ", document_types); From 7804eade53dfbf66e1190163e90b62253ddd8b71 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 18:59:18 +0000 Subject: [PATCH 16/32] fix(deps): update rust crate serde to v1.0.213 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 64ce259..b028a85 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1038,18 +1038,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.212" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccd4055b7e3937a5c2595e974f5bf1715a23919a595a04b5ad959bdbbb61ab04" +checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.212" +version = "1.0.213" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "726adf8349784fb68a42e6466f49362ae039d9c5333cc6eb131f4d6f94bb9126" +checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" dependencies = [ "proc-macro2", "quote", From 30f21420a8ab1afb5b140202095b4df022e08efb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 07:58:12 +0200 Subject: [PATCH 17/32] feat: add support for creating default fields like tags, correspondents and document types controlled by modes. The results can be a bit inconsistent with the current query so adjust your filter to exclude documents you already have tags for to be safe. BREAKING CHANGE: mode values have changed, value 0 no means NoAnalyze prompting doclytics to do nothing for this field type, use value 1 for the previous behaviour instead. --- src/paperless_defaultfields.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paperless_defaultfields.rs b/src/paperless_defaultfields.rs index a010d58..a29a93d 100644 --- a/src/paperless_defaultfields.rs +++ b/src/paperless_defaultfields.rs @@ -20,7 +20,7 @@ async fn construct_tag_prompt(client: &Client, base_url: &str) -> Result Result> { let document_types = get_default_fields(client, base_url, PaperlessDefaultFieldType::Correspondent).await; - let base_prompt = format!("Determine possible correspondents from this document from the following available correspondents: {:?}, if none of these fit the document, create a new one. The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. ", document_types); + let base_prompt = format!("Determine possible correspondents from this document from the following available correspondents: {:?}, if none of these fit the document, create a maximum of one new one. The result should be a only a json array of string and nothing else. The answer should start and end with the square bracket. ", document_types); Ok(base_prompt) } From cf48f8c74b023d4cc4a8988e8d91de746b23cbcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 12:35:24 +0200 Subject: [PATCH 18/32] fix: set correct version during the build phase to be later logged --- .github/workflows/release-prod.yml | 6 ++++++ .github/workflows/release.yml | 7 +++++++ Cargo.lock | 2 +- Cargo.toml | 1 - 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index ad9c7ae..a5eadf2 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -30,6 +30,12 @@ jobs: target: ${{ matrix.target }} profile: minimal override: true + - name: Install cargo-edit + run: | + cargo install cargo-edit + - name: Update Cargo Version + env: + VERSION: ${{ github.head_ref || github.ref_name }} - name: Build run: cargo build --release --target ${{ matrix.target }} - name: Upload artifact diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f0a14fd..f2726a1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -30,6 +30,13 @@ jobs: target: ${{ matrix.target }} profile: minimal override: true + - name: Install cargo-edit + run: | + cargo install cargo-edit + - name: Update Cargo Version + env: + VERSION: ${{ github.head_ref || github.ref_name }} + run: cargo set-version ${{ env.VERSION }} - name: Build run: cargo build --release --target ${{ matrix.target }} - name: Upload artifact diff --git a/Cargo.lock b/Cargo.lock index b028a85..884f1fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -220,7 +220,7 @@ dependencies = [ [[package]] name = "doclytics" -version = "1.1.4-rc.9" +version = "0.0.0" dependencies = [ "chrono", "lazy_static", diff --git a/Cargo.toml b/Cargo.toml index c16904c..dc0d0a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,5 @@ [package] name = "doclytics" -version = "1.1.4-rc.9" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From 5e4807928eed0c4d2c207a7cacd281a877f98326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 12:47:02 +0200 Subject: [PATCH 19/32] fix: set correct version during the build phase to be later logged --- Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/Cargo.toml b/Cargo.toml index dc0d0a2..ebb7a65 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [package] name = "doclytics" +version = "0.0.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html From 0badd03a3698e2840cbd75092e6d6469add64f19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 13:09:04 +0200 Subject: [PATCH 20/32] fix: version workflow --- .github/workflows/release-prod.yml | 1 + .releaserc.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index a5eadf2..5e6f9a7 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -36,6 +36,7 @@ jobs: - name: Update Cargo Version env: VERSION: ${{ github.head_ref || github.ref_name }} + run: cargo set-version ${{ env.VERSION }} - name: Build run: cargo build --release --target ${{ matrix.target }} - name: Upload artifact diff --git a/.releaserc.yml b/.releaserc.yml index cdc479d..b166fb8 100644 --- a/.releaserc.yml +++ b/.releaserc.yml @@ -18,3 +18,4 @@ branches: - name: "development" prerelease: "rc" channel: "false" +tagFormat: "${version}" From 2806ce0bedf6ec6119e3398833fc215b11013366 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 23 Oct 2024 11:17:00 +0000 Subject: [PATCH 21/32] chore(deps): update actions/upload-artifact action to v4 --- .github/workflows/push.yml | 2 +- .github/workflows/release-prod.yml | 2 +- .github/workflows/release.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 82e3622..532e422 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -41,7 +41,7 @@ jobs: - name: Build run: cargo build --release --target ${{ matrix.target }} - name: Upload artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} path: | diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index 5e6f9a7..0544005 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -40,7 +40,7 @@ jobs: - name: Build run: cargo build --release --target ${{ matrix.target }} - name: Upload artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} path: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f2726a1..fc37466 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -40,7 +40,7 @@ jobs: - name: Build run: cargo build --release --target ${{ matrix.target }} - name: Upload artifact - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} TAG_NAME: ${{ github.ref }} # Assumes the tag name is the same as the ref. Adjust if necessary. From 461459bf24589bf59b575d2de502ad88bca5e99d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 13:27:28 +0200 Subject: [PATCH 22/32] fix: version workflow --- .github/workflows/release-prod.yml | 4 ++++ .github/workflows/release.yml | 6 +++++- Dockerfile | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index 5e6f9a7..07c32f6 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -93,6 +93,8 @@ jobs: username: ${{github.actor}} password: ${{secrets.GITHUB_TOKEN}} - name: Build and push + env: + VERSION: ${{ github.head_ref || github.ref_name }} uses: docker/build-push-action@v6 with: context: . @@ -100,6 +102,8 @@ jobs: tags: ${{ secrets.DOCKERHUB_USERNAME }}/doclytics:${{ env.BRANCH_NAME }} platforms: linux/amd64,linux/arm64 - name: Build and push to GitHub Container Registry + env: + VERSION: ${{ github.head_ref || github.ref_name }} uses: docker/build-push-action@v6 with: context: . diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index f2726a1..325f215 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -71,6 +71,8 @@ jobs: password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Build and push uses: docker/build-push-action@v6 + env: + VERSION: ${{ github.head_ref || github.ref_name }} with: context: . push: true @@ -83,9 +85,11 @@ jobs: username: ${{github.actor}} password: ${{secrets.GITHUB_TOKEN}} - name: Build and push to GitHub Container Registry + env: + VERSION: ${{ github.head_ref || github.ref_name }} uses: docker/build-push-action@v6 with: context: . push: true - tags: ghcr.io/b-urb/doclytics:${{ env.BRANCH_NAME }} + tags: ghcr.io/b-urb/doclytics:v${{ env.BRANCH_NAME }} platforms: linux/amd64,linux/arm64 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 741f42d..7f086a5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,11 @@ FROM rust:1.82 WORKDIR /usr/doclytics COPY . . +ENV VERSION +RUN cargo install cargo edit +RUN cargo set-version ${{ env.VERSION }} RUN cargo install --path . + CMD ["doclytics"] \ No newline at end of file From ad6db4766425ed011ff50480ff2f58f1cb506bc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 13:31:41 +0200 Subject: [PATCH 23/32] chore: switch to actively maintained rust action --- .github/workflows/push.yml | 4 +--- .github/workflows/release-prod.yml | 4 +--- .github/workflows/release.yml | 4 +--- 3 files changed, 3 insertions(+), 9 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 532e422..c4b6077 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -32,12 +32,10 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Rust - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@master with: toolchain: stable target: ${{ matrix.target }} - profile: minimal - override: true - name: Build run: cargo build --release --target ${{ matrix.target }} - name: Upload artifact diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index beb8c7e..d437a22 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -24,12 +24,10 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Rust - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@master with: toolchain: stable target: ${{ matrix.target }} - profile: minimal - override: true - name: Install cargo-edit run: | cargo install cargo-edit diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index caeba5c..8dfb621 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,12 +24,10 @@ jobs: steps: - uses: actions/checkout@v4 - name: Install Rust - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@master with: toolchain: stable target: ${{ matrix.target }} - profile: minimal - override: true - name: Install cargo-edit run: | cargo install cargo-edit From 922444c06ccf87304f5730d362cfcc521457e631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 13:40:11 +0200 Subject: [PATCH 24/32] fix: dockerfile argument --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7f086a5..ee0fca3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,9 @@ FROM rust:1.82 WORKDIR /usr/doclytics COPY . . -ENV VERSION +ARG VERSION RUN cargo install cargo edit -RUN cargo set-version ${{ env.VERSION }} +RUN cargo set-version ${VERSION} RUN cargo install --path . From a6d87430012ba34825a60555ca76452f85fffadc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 13:47:44 +0200 Subject: [PATCH 25/32] fix: only build docker once --- .github/workflows/push.yml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index c4b6077..b84e5b1 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -47,6 +47,9 @@ jobs: build-docker: needs: build runs-on: ubuntu-latest + strategy: + matrix: + registry: ['ghcr.io/b-urb/doclytics', '${{ secrets.DOCKERHUB_USERNAME }}/doclytics'] permissions: packages: write contents: read @@ -63,13 +66,6 @@ jobs: with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Build and push - uses: docker/build-push-action@v6 - with: - context: . - push: true - tags: ${{ secrets.DOCKERHUB_USERNAME }}/doclytics:${{ env.BRANCH_NAME }} - platforms: linux/amd64,linux/arm64 - name: 'Login to GitHub Container Registry' uses: docker/login-action@v3 with: @@ -81,7 +77,7 @@ jobs: with: context: . push: true - tags: ghcr.io/b-urb/doclytics:${{ env.BRANCH_NAME }} + tags: ${{ matrix.registry }}:${{ env.BRANCH_NAME }} platforms: linux/amd64,linux/arm64 release: runs-on: ubuntu-latest From fe1851f2f7d4c0fd41e6d792cfcc4e4c57fe8667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 13:50:27 +0200 Subject: [PATCH 26/32] fix: only build docker once --- .github/workflows/push.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index b84e5b1..fbf1f40 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -49,7 +49,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - registry: ['ghcr.io/b-urb/doclytics', '${{ secrets.DOCKERHUB_USERNAME }}/doclytics'] + registry: ['ghcr.io/b-urb/doclytics', 'bjoern5urban/doclytics'] permissions: packages: write contents: read From 05bf7c43d255d9e8506fe1324a259eff4011e535 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 13:54:58 +0200 Subject: [PATCH 27/32] fix: install cargo-edit --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index ee0fca3..d956d1d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM rust:1.82 WORKDIR /usr/doclytics COPY . . ARG VERSION -RUN cargo install cargo edit +RUN cargo install cargo-edit RUN cargo set-version ${VERSION} RUN cargo install --path . From e1a7f68ff6792688392e56e19371b050589ee8c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 14:03:04 +0200 Subject: [PATCH 28/32] fix: only build docker once --- .github/workflows/push.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index fbf1f40..7da86ca 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -47,9 +47,6 @@ jobs: build-docker: needs: build runs-on: ubuntu-latest - strategy: - matrix: - registry: ['ghcr.io/b-urb/doclytics', 'bjoern5urban/doclytics'] permissions: packages: write contents: read @@ -77,7 +74,10 @@ jobs: with: context: . push: true - tags: ${{ matrix.registry }}:${{ env.BRANCH_NAME }} + tags: + - ghcr.io/b-urb/doclytics:${{ env.BRANCH_NAME }} + - bjoern5urban/doclytics:${{ env.BRANCH_NAME }} + platforms: linux/amd64,linux/arm64 release: runs-on: ubuntu-latest From a593269786ccaa88f66fcdb12c558765b011c3e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 14:08:20 +0200 Subject: [PATCH 29/32] fix: only build docker once --- .github/workflows/push.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 7da86ca..32c846f 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -74,9 +74,9 @@ jobs: with: context: . push: true - tags: - - ghcr.io/b-urb/doclytics:${{ env.BRANCH_NAME }} - - bjoern5urban/doclytics:${{ env.BRANCH_NAME }} + tags: | + ghcr.io/b-urb/doclytics:${{ env.BRANCH_NAME }} + bjoern5urban/doclytics:${{ env.BRANCH_NAME }} platforms: linux/amd64,linux/arm64 release: From d58cca105b8caa73c7b5f9a291167cbc8531739c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 23 Oct 2024 14:56:07 +0200 Subject: [PATCH 30/32] fix: pipeline versioning scheme --- .github/workflows/release-prod.yml | 2 ++ .github/workflows/release.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index d437a22..4ee0164 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -3,8 +3,10 @@ on: push: branches: - 'v[0-9]+.[0-9]+.[0-9]+' + - '[0-9]+.[0-9]+.[0-9]+' tags: - 'v[0-9]+.[0-9]+.[0-9]+' + - '[0-9]+.[0-9]+.[0-9]+' env: CI: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8dfb621..271f549 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -3,8 +3,10 @@ on: push: branches: - 'v[0-9]+.[0-9]+.[0-9]+-rc.[0-9]+' + - '[0-9]+.[0-9]+.[0-9]+-rc.[0-9]+' tags: - 'v[0-9]+.[0-9]+.[0-9]+-rc.[0-9]+' + - '[0-9]+.[0-9]+.[0-9]+-rc.[0-9]+' env: CI: true From c6f120d79403d68faaeeb4781db7b3af9464a3ae Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 19:54:12 +0000 Subject: [PATCH 31/32] fix(deps): update rust crate reqwest to v0.12.9 --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 884f1fd..33304f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -871,9 +871,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.0", "bytes", From b48623719160ac0202d6e313c8ae558a74efa2df Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 28 Oct 2024 19:54:19 +0000 Subject: [PATCH 32/32] fix(deps): update rust crate serde to v1.0.214 --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33304f5..755076e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1038,18 +1038,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea7893ff5e2466df8d720bb615088341b295f849602c6956047f8f80f0e9bc1" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.213" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e85ad2009c50b58e87caa8cd6dac16bdf511bbfb7af6c33df902396aa480fa5" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote",