Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/development' into development
Browse files Browse the repository at this point in the history
# Conflicts:
#	.github/workflows/push.yml
#	.github/workflows/release-prod.yml
#	.github/workflows/release.yml
  • Loading branch information
Björn Urban committed Jun 16, 2024
2 parents 1da43eb + 5a78de7 commit dcb1923
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 27 deletions.
97 changes: 91 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ edition = "2021"

[dependencies]
tokio = { version = "1.37.0", features = ["full"] }
ollama-rs = "0.1.9"
ollama-rs = "0.2.0"
reqwest = {version = "0.12.4", features = ["json"]}
serde_json = "1.0.116"
serde = "1.0.200"
Expand All @@ -19,4 +19,5 @@ slog-json = "2.4"
slog-scope = "4.4"
slog-stdlog = "4.1"
lazy_static = "1.4"
chrono = "0.4.38"

2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM rust:1.78
FROM rust:1.79

WORKDIR /usr/doclytics
COPY . .
Expand Down
27 changes: 16 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,22 @@ With these prerequisites met, you are now ready to proceed with the installation

The application requires setting environment variables for its configuration. Below is a table describing each environment variable, indicating whether it is required or optional, its default value (if any), and a brief description:

| Environment Variable | Required | Default Value | Description |
|--------------------------|---------|----------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------|
| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. |
| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. |
| `PAPERLESS_FILTER` | NO | "NOT tagged=true" | Filter string that filters the documents to be fetched from paperless |
| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. |
| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. |
| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. |
| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. |
| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.<br/> Should contain the custom fields in paperless that you want doclytics. |
| `LOG_LEVEL` | No | INFO | Log level |
| Environment Variable | Required | Default Value | Description |
|--------------------------|---------|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. |
| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. |
| `PAPERLESS_FILTER` | NO | "NOT tagged=true" | Filter string that filters the documents to be fetched from paperless |
| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. |
| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. |
| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. |
| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. |
| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.<br/> Should contain the custom fields in paperless that you want doclytics. |
| `LOG_LEVEL` | No | INFO | Log level |
| `MODE` | No | 0 | :warning: **Experimental**: Mode of operation. <br/> 0 = NoCreate (Doclytics does not create custom fields automatically in Paperless), 1 = Create (Doclytics automatically creates custom fields that do not exist in Paperless). All fields will be created as type "Text" at the moment. In stable support, the type will be inferred. |






Make sure to set the required environment variables (`PAPERLESS_TOKEN` and `PAPERLESS_BASE_URL`) before running the application. Optional variables have default values and will use those defaults if not explicitly set.
Expand Down
20 changes: 19 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ struct Field {
data_type: String,
}

#[derive(Clone, Copy)]
enum Mode {
Create,
NoCreate,
}
impl Mode {
fn from_int(value: i32) -> Self {
match value {
1 => Mode::Create,
0 => Mode::NoCreate,
_ => Mode::NoCreate,
}
}
}


// Initialize the HTTP client with Paperless API token and base URL
fn init_paperless_client(token: &str) -> Client {
Expand Down Expand Up @@ -98,6 +113,9 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u
explanation, no introtext, the answer should start and end with curly brackets \
delimiting the json object ".to_string()
);
let mode_env = env::var("MODE").unwrap_or_else(|_| "0".to_string());
let mode_int = mode_env.parse::<i32>().unwrap_or(0);
let mode = Mode::from_int(mode_int);
let fields = query_custom_fields(client, base_url).await?;
match get_data_from_paperless(&client, &base_url, filter).await {
Ok(data) => {
Expand All @@ -117,7 +135,7 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u
slog_scope::debug!("Extracted JSON Object: {}", json_str);

match serde_json::from_str(&json_str) {
Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url).await?,
Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await?,
Err(e) => {
slog_scope::error!("Error parsing llm response json {}", e.to_string());
slog_scope::debug!("JSON String was: {}", &json_str);
Expand Down
85 changes: 78 additions & 7 deletions src/paperless.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use std::fmt;
use reqwest::Client;
use serde::de::StdError;
use serde_json::Value;
use crate::{CustomField, Document, Field, Response};
use crate::{CustomField, Document, Field, Mode, Response};
use serde::{Deserialize, Serialize};

pub async fn get_data_from_paperless(
client: &Client,
Expand Down Expand Up @@ -105,6 +106,7 @@ pub async fn update_document_fields(
fields: &Vec<Field>,
metadata: &HashMap<String, Option<Value>>,
base_url: &str,
mode: Mode,
) -> Result<(), Box<dyn std::error::Error>> {
let mut custom_fields = Vec::new();

Expand All @@ -131,15 +133,32 @@ pub async fn update_document_fields(
}

if let Some(field) = fields.iter().find(|&f| f.name == *key) {
let custom_field = CustomField {
field: field.id.clone(),
value: value.as_ref().cloned(),
};
let custom_field = convert_field_to_custom_field(value, field);
custom_fields.push(custom_field);
}
else {
if matches!(mode, Mode::Create) {
slog_scope::info!("Creating field: {}", key);
let create_field = CreateField {
name: key.clone(),
data_type: "Text".to_string(),
default_value: None,
};
match create_custom_field(client, &create_field, base_url).await
{
Ok(new_field) => {
let custom_field = convert_field_to_custom_field(value, &new_field);
custom_fields.push(custom_field)
},
Err(e) => {
slog_scope::error!("Error: {} creating custom field: {}, skipping...",e, key)
}
}
}
}
}
// Check if tagged_field_id has a value and then proceed.

let mut payload = serde_json::Map::new();

payload.insert("custom_fields".to_string(), serde_json::json!(custom_fields));
Expand Down Expand Up @@ -170,4 +189,56 @@ pub async fn update_document_fields(
Err(e.into())
}
}
}
}

fn convert_field_to_custom_field(value: &Option<Value>, field: &Field) -> CustomField {
let custom_field = CustomField {
field: field.id.clone(),
value: value.as_ref().cloned(),
};
custom_field
}

#[derive(Serialize, Deserialize, Debug)]
pub struct CreateField {
name: String,
default_value: Option<String>,
data_type: String,
}

pub async fn create_custom_field(
client: &Client,
field: &CreateField,
base_url: &str,
) -> Result<Field, Box<dyn std::error::Error>> {
// Define the URL for creating a custom field
let url = format!("{}/api/custom_fields/", base_url);


// Send the request to create the custom field
let res = client.post(&url).json(&field).send().await?;
let response_result = res.error_for_status();
match response_result {
Ok(data) => {
let body = data.text().await?;
slog_scope::trace!("{}", body);
let field: Result<Response<Field>, _> = serde_json::from_str(&body);
match field {
Ok(field) => {
Ok(field.results[0].clone()) // TODO: improve
},
Err(e) => {
slog_scope::debug!("Creating field response: {}", body);
slog_scope::error!("Error parsing response from new field: {}", e);
Err(e.into())
}
}
}
Err(e) => {
slog_scope::error!("Error creating custom field: {}", e);
Err(e.into())
}
}
}


0 comments on commit dcb1923

Please sign in to comment.