From 20880030ba6588757fe96552b77f5cc9611d1290 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 23 Feb 2024 12:34:40 +0000 Subject: [PATCH 01/18] fix(deps): update all non-major dependencies --- Cargo.lock | 49 +++++++++++++++++++++++++++++++++---------------- Cargo.toml | 8 ++++---- 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd5bdae..b264d80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -465,9 +465,9 @@ dependencies = [ [[package]] name = "ollama-rs" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb57402b9ed5e0c239459eafba74c28306687354bf6876a6c9e9ce7de39867f" +checksum = "8d4afa0fc3b4baa5804364e94580210bed42caa6513e26ca2f4aca3e482578a1" dependencies = [ "reqwest", "serde", @@ -573,18 +573,18 @@ checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" [[package]] name = "proc-macro2" -version = "1.0.70" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.33" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] @@ -600,9 +600,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.23" +version = "0.11.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41" +checksum = "c6920094eb85afde5e4a138be3f2de8bbdf28000f0029e72c45025a56b042251" dependencies = [ "base64", "bytes", @@ -622,9 +622,11 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", + "sync_wrapper", "system-configuration", "tokio", "tokio-native-tls", @@ -655,6 +657,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64", +] + [[package]] name = "ryu" version = "1.0.16" @@ -701,18 +712,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.193" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.193" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", @@ -721,9 +732,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "itoa", "ryu", @@ -787,15 +798,21 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.41" +version = "2.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269" +checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "system-configuration" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index cdfda2d..bad3b38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,9 +7,9 @@ edition = "2021" [dependencies] tokio = { version = "1", features = ["full"] } -ollama-rs = "0.1.5" -reqwest = {version = "0.11.23", features = ["json"]} -serde_json = "1.0.108" -serde = "1.0.193" +ollama-rs = "0.1.6" +reqwest = {version = "0.11.24", features = ["json"]} +serde_json = "1.0.114" +serde = "1.0.197" substring = "1.4.5" From c3ea0794abfd3527f5a8c1733e1c680b44635dae Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 24 Feb 2024 13:39:14 +0000 Subject: [PATCH 02/18] fix(deps): update rust crate ollama-rs to 0.1.7 --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b264d80..9053cec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -465,9 +465,9 @@ dependencies = [ [[package]] name = "ollama-rs" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d4afa0fc3b4baa5804364e94580210bed42caa6513e26ca2f4aca3e482578a1" +checksum = "6ee48e21359d1a897e180d612319de5e348a9247629aefc1f73147d78b4b859c" dependencies = [ "reqwest", "serde", diff --git a/Cargo.toml b/Cargo.toml index bad3b38..d6913c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" [dependencies] tokio = { version = "1", features = ["full"] } -ollama-rs = "0.1.6" +ollama-rs = "0.1.7" reqwest = {version = "0.11.24", features = ["json"]} serde_json = "1.0.114" serde = "1.0.197" From ff4c01d81cda673b1f779a955108e4fc87623c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 09:23:06 +0100 Subject: [PATCH 03/18] feat(deployment): add pipeline for building binary and docker image related to #9 --- .dockerignore | 37 ++++++++++++ .github/workflows/push.yml | 91 +++++++++++++++++++++++++++++ .github/workflows/release-prod.yml | 93 ++++++++++++++++++++++++++++++ .github/workflows/release.yml | 92 +++++++++++++++++++++++++++++ .releaserc.yml | 20 +++++++ Dockerfile | 8 +++ 6 files changed, 341 insertions(+) create mode 100644 .dockerignore create mode 100644 .github/workflows/push.yml create mode 100644 .github/workflows/release-prod.yml create mode 100644 .github/workflows/release.yml create mode 100644 .releaserc.yml create mode 100644 Dockerfile diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..6f8778b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,37 @@ +# Ignore git and version control +.git +.gitignore +.svn +.cvsignore + +# Ignore build and log files +build/ +logs/ +tmp/ + +# Ignore testing and benchmarking artifacts +tests/ +benchmarks/ + +# Ignore Dockerfile and .dockerignore to reduce context size +Dockerfile +.dockerignore + +# Ignore dependency directories (language and framework-specific) +node_modules/ +vendor/ +.env + +# Ignore IDE and editor directories +.idea/ +*.swp +*.swo +.vscode/ + +# Ignore user-specific files +.DS_Store +*.log + +# Ignore sensitive files +secrets.yml +config/credentials.yml.enc diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml new file mode 100644 index 0000000..b7d38e6 --- /dev/null +++ b/.github/workflows/push.yml @@ -0,0 +1,91 @@ +name: CI + +on: + push: + branches: + - master + - development + pull_request: + branches: + - '*' + +env: + CI: true + +permissions: + contents: read +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + - os: macos-latest + target: x86_64-apple-darwin + - os: windows-latest + target: x86_64-pc-windows-msvc + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: ${{ matrix.target }} + profile: minimal + override: true + - name: Build + run: cargo build --release --target ${{ matrix.target }} + build-docker: + needs: build + runs-on: ubuntu-latest + if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Download build artifact + uses: actions/download-artifact@v3 + with: + name: production-artifacts + - name: Set up Docker Build + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Registry + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ secrets.DOCKERHUB_USERNAME }}/doclytics:${{ env.BRANCH_NAME }} + platforms: linux/amd64,linux/arm64 + release: + runs-on: ubuntu-latest + permissions: + contents: write + issues: write + pull-requests: write + if: false #${{ github.ref }} == 'master' || ${{ github.ref }} == 'development' + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + persist-credentials: false + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 20 + - name: Install dependencies + run: npm install -g semantic-release @saithodev/semantic-release-backmerge @semantic-release/github @semantic-release/exec + - name: Release + env: + GITHUB_TOKEN: ${{ secrets.PAT }} + run: npx semantic-release --debug diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml new file mode 100644 index 0000000..b8557c9 --- /dev/null +++ b/.github/workflows/release-prod.yml @@ -0,0 +1,93 @@ +name: Deploy Release +on: + push: + branches: + - 'v[0-9]+.[0-9]+.[0-9]+' + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' +env: + CI: true + +jobs: + build: + runs-on: ubuntu-latest + environment: production + strategy: + matrix: + go-version: [ '1.22' ] + node-version: [ 18 ] + goarch: [ 'amd64', 'arm64' ] # Define architectures here + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup Go ${{ matrix.go-version }} + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + - name: Cache node modules + uses: actions/cache@v4 + with: + path: ~/.npm + key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-node- + - name: Install dependencies + working-directory: frontend + run: npm ci + - name: Build + working-directory: frontend + run: npm run build + - name: Install ARM64 cross-compiler + run: sudo apt-get update && sudo apt-get install -y gcc-aarch64-linux-gnu + - name: Install dependencies + working-directory: backend + run: go get ./... + - name: Set up CC for cross-compilation + if: matrix.goarch == 'arm64' + run: echo "CC=aarch64-linux-gnu-gcc" >> $GITHUB_ENV + - name: Build + working-directory: backend + run: | + GOOS=linux GOARCH=${{ matrix.goarch }} CGO_ENABLED=1 go build -o build/doclytics-${{ matrix.goarch }} ./cmd/doclytics + #- name: Test with the Go CLI + # run: go test + - name: Archive production artifacts + uses: actions/upload-artifact@v3 + with: + name: production-artifacts + path: | + backend/ + frontend/public + build-docker: + needs: build + runs-on: ubuntu-latest + if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Download build artifact + uses: actions/download-artifact@v3 + with: + name: production-artifacts + - name: Set up Docker Build + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Registry + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ secrets.DOCKERHUB_USERNAME }}/doclytics:${{ env.BRANCH_NAME }} + platforms: linux/amd64,linux/arm64 + diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..d7f511c --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,92 @@ +name: Deploy Release +on: + push: + branches: + - 'v[0-9]+.[0-9]+.[0-9]+-rc.[0-9]+' + tags: + - 'v[0-9]+.[0-9]+.[0-9]+-rc.[0-9]+' +env: + CI: true + +jobs: + build: + runs-on: ubuntu-latest + environment: development + strategy: + matrix: + go-version: [ '1.22' ] + node-version: [ 18 ] + goarch: [ 'amd64', 'arm64' ] # Define architectures here + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Setup Go ${{ matrix.go-version }} + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ matrix.node-version }} + - name: Cache node modules + uses: actions/cache@v4 + with: + path: ~/.npm + key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} + restore-keys: | + ${{ runner.os }}-node- + - name: Install dependencies + working-directory: frontend + run: npm ci + - name: Build + working-directory: frontend + run: npm run build + - name: Install ARM64 cross-compiler + run: sudo apt-get update && sudo apt-get install -y gcc-aarch64-linux-gnu + - name: Install dependencies + working-directory: backend + run: go get ./... + - name: Set up CC for cross-compilation + if: matrix.goarch == 'arm64' + run: echo "CC=aarch64-linux-gnu-gcc" >> $GITHUB_ENV + - name: Build + working-directory: backend + run: | + GOOS=linux GOARCH=${{ matrix.goarch }} CGO_ENABLED=1 go build -o build/doclytics-${{ matrix.goarch }} ./cmd/doclytics + #- name: Test with the Go CLI + # run: go test + - name: Archive production artifacts + uses: actions/upload-artifact@v3 + with: + name: production-artifacts + path: | + backend/ + frontend/public + build-docker: + needs: build + runs-on: ubuntu-latest + if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: Download build artifact + uses: actions/download-artifact@v3 + with: + name: production-artifacts + - name: Set up Docker Build + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Registry + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build and push + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ secrets.DOCKERHUB_USERNAME }}/doclytics:${{ env.BRANCH_NAME }} + platforms: linux/amd64,linux/arm64 \ No newline at end of file diff --git a/.releaserc.yml b/.releaserc.yml new file mode 100644 index 0000000..cdc479d --- /dev/null +++ b/.releaserc.yml @@ -0,0 +1,20 @@ +plugins: + - "@semantic-release/commit-analyzer" + - "@semantic-release/release-notes-generator" + - - "@saithodev/semantic-release-backmerge" + - backmergeBranches: + - from: master + to: development + clearWorkspace: true + backmergeStrategy: merge + fastForwardMode: no-ff + message: "chore(release): Preparations for next release [skip ci]" + - - "@semantic-release/exec" + - verifyReleaseCmd: "echo ${nextRelease.version} > VERSION.txt" + - "@semantic-release/github" +branches: + - "+([0-9])?(.{+([0-9]),x}).x" + - "master" + - name: "development" + prerelease: "rc" + channel: "false" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8566d90 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM rust:1.67 + +WORKDIR /usr/doclytics +COPY . . + +RUN cargo install --path . + +CMD ["doclytics"] \ No newline at end of file From a228a7108934d8c02775e2226c09a9e92df26bd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 09:23:54 +0100 Subject: [PATCH 04/18] feat(deployment): remove wrong build step --- .github/workflows/release-prod.yml | 53 ----------------------------- .github/workflows/release.yml | 54 +----------------------------- 2 files changed, 1 insertion(+), 106 deletions(-) diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index b8557c9..3a861ae 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -9,59 +9,6 @@ env: CI: true jobs: - build: - runs-on: ubuntu-latest - environment: production - strategy: - matrix: - go-version: [ '1.22' ] - node-version: [ 18 ] - goarch: [ 'amd64', 'arm64' ] # Define architectures here - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v5 - with: - go-version: ${{ matrix.go-version }} - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: ${{ matrix.node-version }} - - name: Cache node modules - uses: actions/cache@v4 - with: - path: ~/.npm - key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} - restore-keys: | - ${{ runner.os }}-node- - - name: Install dependencies - working-directory: frontend - run: npm ci - - name: Build - working-directory: frontend - run: npm run build - - name: Install ARM64 cross-compiler - run: sudo apt-get update && sudo apt-get install -y gcc-aarch64-linux-gnu - - name: Install dependencies - working-directory: backend - run: go get ./... - - name: Set up CC for cross-compilation - if: matrix.goarch == 'arm64' - run: echo "CC=aarch64-linux-gnu-gcc" >> $GITHUB_ENV - - name: Build - working-directory: backend - run: | - GOOS=linux GOARCH=${{ matrix.goarch }} CGO_ENABLED=1 go build -o build/doclytics-${{ matrix.goarch }} ./cmd/doclytics - #- name: Test with the Go CLI - # run: go test - - name: Archive production artifacts - uses: actions/upload-artifact@v3 - with: - name: production-artifacts - path: | - backend/ - frontend/public build-docker: needs: build runs-on: ubuntu-latest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d7f511c..0b30263 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,59 +9,7 @@ env: CI: true jobs: - build: - runs-on: ubuntu-latest - environment: development - strategy: - matrix: - go-version: [ '1.22' ] - node-version: [ 18 ] - goarch: [ 'amd64', 'arm64' ] # Define architectures here - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: Setup Go ${{ matrix.go-version }} - uses: actions/setup-go@v5 - with: - go-version: ${{ matrix.go-version }} - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: ${{ matrix.node-version }} - - name: Cache node modules - uses: actions/cache@v4 - with: - path: ~/.npm - key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} - restore-keys: | - ${{ runner.os }}-node- - - name: Install dependencies - working-directory: frontend - run: npm ci - - name: Build - working-directory: frontend - run: npm run build - - name: Install ARM64 cross-compiler - run: sudo apt-get update && sudo apt-get install -y gcc-aarch64-linux-gnu - - name: Install dependencies - working-directory: backend - run: go get ./... - - name: Set up CC for cross-compilation - if: matrix.goarch == 'arm64' - run: echo "CC=aarch64-linux-gnu-gcc" >> $GITHUB_ENV - - name: Build - working-directory: backend - run: | - GOOS=linux GOARCH=${{ matrix.goarch }} CGO_ENABLED=1 go build -o build/doclytics-${{ matrix.goarch }} ./cmd/doclytics - #- name: Test with the Go CLI - # run: go test - - name: Archive production artifacts - uses: actions/upload-artifact@v3 - with: - name: production-artifacts - path: | - backend/ - frontend/public + build-docker: needs: build runs-on: ubuntu-latest From e36483315c9e0cee172210468ac7a8ae1c88cdc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 09:33:15 +0100 Subject: [PATCH 05/18] fix(deployment): update rust docker image and attempt to fix workflow, update env variables --- .github/workflows/push.yml | 4 ---- Dockerfile | 2 +- src/main.rs | 4 ++-- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index b7d38e6..03bde8b 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -47,10 +47,6 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - - name: Download build artifact - uses: actions/download-artifact@v3 - with: - name: production-artifacts - name: Set up Docker Build uses: docker/setup-buildx-action@v3 diff --git a/Dockerfile b/Dockerfile index 8566d90..de33f58 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM rust:1.67 +FROM rust:1.76 WORKDIR /usr/doclytics COPY . . diff --git a/src/main.rs b/src/main.rs index 6d5220b..1723d79 100644 --- a/src/main.rs +++ b/src/main.rs @@ -135,9 +135,9 @@ async fn generate_response( } #[tokio::main] async fn main() -> Result<(), Box> { - let base_url = std::env::var("BASE_URL").unwrap(); + let base_url = std::env::var("PAPERLESS_BASE_URL").unwrap(); - let token = env::var("TOKEN").expect("TOKEN is not set in .env file"); + let token = env::var("PAPERLESS_TOKEN").expect("TOKEN is not set in .env file"); // Create HeaderMap and add Authorization header let mut headers = HeaderMap::new(); let header_value = HeaderValue::from_str(&format!("Token {}", token)).unwrap(); From 3d7ebf634f0574faa58ab5d590108768cbc584c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 09:40:37 +0100 Subject: [PATCH 06/18] fix(structure): split code into different modules --- src/llm_api.rs | 17 ++++++++++ src/main.rs | 85 +++++------------------------------------------- src/paperless.rs | 63 +++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 76 deletions(-) create mode 100644 src/llm_api.rs create mode 100644 src/paperless.rs diff --git a/src/llm_api.rs b/src/llm_api.rs new file mode 100644 index 0000000..b6239dd --- /dev/null +++ b/src/llm_api.rs @@ -0,0 +1,17 @@ +use ollama_rs::generation::completion::GenerationResponse; +use ollama_rs::generation::completion::request::GenerationRequest; +use ollama_rs::Ollama; +use crate::Document; + +pub async fn generate_response( + ollama: &Ollama, + model: &String, + prompt_base: &String, + document: &Document, +) -> std::result::Result> { + let prompt = format!("{} {}", document.content, prompt_base); + let res = ollama + .generate(GenerationRequest::new(model.clone(), prompt)) + .await; + res.map_err(|e| e.into()) // Map the Err variant to a Box +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 1723d79..1abf916 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,21 +1,25 @@ +mod llm_api; +mod paperless; + use ollama_rs::{ - generation::completion::{request::GenerationRequest, GenerationContext, GenerationResponse}, Ollama, }; use substring::Substring; -use reqwest::{Client, Error}; +use reqwest::{Client, }; use std::result::Result; -use tokio::io::stdout; -use tokio::runtime::Runtime; + //function that fetches data from the endpoint //write function that queries a rest endpoint for a given url use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION}; use serde::{Deserialize, Serialize}; -use serde_json::{Value, Map}; +use serde_json::{Value}; use std::collections::HashMap; use std::env; use std::error::Error as StdError; +use crate::llm_api::generate_response; +use crate::paperless::{get_data_from_paperless, query_custom_fields}; + #[derive(Serialize, Deserialize, Debug, Clone)] struct Document { id: u32, @@ -59,80 +63,9 @@ struct Field { data_type: String, } -async fn get_data_from_paperless( - client: &Client, - url: &str, -) -> std::result::Result, Box> { - // Read token from environment - let token = env::var("TOKEN").expect("TOKEN is not set in .env file"); - - //Define filter string - let filter = "NOT tagged=true".to_string(); - - let response = client.get(format!("{}/api/documents/?query={}", url, filter)).send().await?; - let body = response.text().await?; - // Remove the "Document content: " prefix - let json = body.trim_start_matches("Document content: "); - //println!("{}",json); - // Parse the JSON string into a generic JSON structure - //let value: serde_json::Value = serde_json::from_str(json).unwrap(); - // Print the part of the JSON structure that's causing the error - //let error_part = value.pointer("/results/0").unwrap(); - //println!("Error part: {}", error_part); - // Parse the JSON string into the Response struct - let data: std::result::Result, _> = serde_json::from_str(json); - match data { - Ok(data) => Ok(data.results), - Err(e) => { - let column = e.column(); - let start = (column as isize - 30).max(0) as usize; - let end = (column + 30).min(json.len()); - println!("Error at column {}: {}", column, &json[start..end]); - Err(e.into()) // Remove the semicolon here - } - } -} -async fn query_custom_fields( - client: &Client, - base_url: &str, -) -> std::result::Result, Box> { - let res = client - .get(format!("{}/api/custom_fields/", base_url)) - .send() - .await?; - let body = res.text().await?; - // Remove the "Document content: " prefix - let json = body.trim_start_matches("Field: "); - let data: std::result::Result, _> = serde_json::from_str(json); - match data { - Ok(data) => { - println!("Fields: {:?}", data.results); - Ok(data.results) - }, - Err(e) => { - let column = e.column(); - let start = (column as isize - 30).max(0) as usize; - let end = (column + 30).min(json.len()); - println!("Error at column {}: {}", column, &json[start..end]); - Err(e.into()) // Remove the semicolon here - } - } -} -async fn generate_response( - ollama: &Ollama, - model: &String, - prompt_base: &String, - document: &Document, -) -> std::result::Result> { - let prompt = format!("{} {}", document.content, prompt_base); - let res = ollama - .generate(GenerationRequest::new(model.clone(), prompt)) - .await; - res.map_err(|e| e.into()) // Map the Err variant to a Box -} #[tokio::main] async fn main() -> Result<(), Box> { let base_url = std::env::var("PAPERLESS_BASE_URL").unwrap(); diff --git a/src/paperless.rs b/src/paperless.rs new file mode 100644 index 0000000..143fa1b --- /dev/null +++ b/src/paperless.rs @@ -0,0 +1,63 @@ +use reqwest::Client; +use serde::de::StdError; +use crate::{Document, Field, Response}; + +pub async fn get_data_from_paperless( + client: &Client, + url: &str, +) -> std::result::Result, Box> { + // Read token from environment + //Define filter string + let filter = "NOT tagged=true".to_string(); + + let response = client.get(format!("{}/api/documents/?query={}", url, filter)).send().await?; + let body = response.text().await?; + + // Remove the "Document content: " prefix + let json = body.trim_start_matches("Document content: "); + //println!("{}",json); + // Parse the JSON string into a generic JSON structure + //let value: serde_json::Value = serde_json::from_str(json).unwrap(); + + // Print the part of the JSON structure that's causing the error + //let error_part = value.pointer("/results/0").unwrap(); + //println!("Error part: {}", error_part); + // Parse the JSON string into the Response struct + let data: std::result::Result, _> = serde_json::from_str(json); + match data { + Ok(data) => Ok(data.results), + Err(e) => { + let column = e.column(); + let start = (column as isize - 30).max(0) as usize; + let end = (column + 30).min(json.len()); + println!("Error at column {}: {}", column, &json[start..end]); + Err(e.into()) // Remove the semicolon here + } + } +} +pub async fn query_custom_fields( + client: &Client, + base_url: &str, +) -> std::result::Result, Box> { + let res = client + .get(format!("{}/api/custom_fields/", base_url)) + .send() + .await?; + let body = res.text().await?; + // Remove the "Document content: " prefix + let json = body.trim_start_matches("Field: "); + let data: std::result::Result, _> = serde_json::from_str(json); + match data { + Ok(data) => { + println!("Fields: {:?}", data.results); + Ok(data.results) + }, + Err(e) => { + let column = e.column(); + let start = (column as isize - 30).max(0) as usize; + let end = (column + 30).min(json.len()); + println!("Error at column {}: {}", column, &json[start..end]); + Err(e.into()) // Remove the semicolon here + } + } +} \ No newline at end of file From 2dea2ec078ebc09e69ea97a0e9d2640305d43794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 13:12:48 +0100 Subject: [PATCH 07/18] chore(docs): update readme with docker setup instructions --- README.md | 84 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 56e1ccb..c2a06e4 100644 --- a/README.md +++ b/README.md @@ -2,37 +2,95 @@ ## Description -This is a Rust project that interacts with the paperless-ngx API to fetch and update documents. The goal is to utilize local llms, in this case ollama, to generate metadata for the documents in your paperless document library. -It uses the `reqwest` library to make HTTP requests and `serde_json` for JSON serialization and deserialization, as well as ollama_rs +Doclytics is a straightforward Rust-based tool that integrates with the paperless-ngx API to fetch and update document metadata. It primarily leverages a local language model, ollama, to extract and generate metadata for documents stored in a Paperless document library. The tool uses `reqwest` for making HTTP requests and `serde_json` for handling JSON data, ensuring seamless communication with the Paperless API and efficient data processing. +By interfacing directly with ollama, Doclytics automates the extraction of specified metadata from documents, utilizing the local LLM's capabilities to analyze document content and produce the required metadata in a JSON format. This metadata is then used to update the respective documents in the Paperless library, aiming to improve document organization and retrievability without overly complex processes or configurations. + +## Prerequisites + +Before setting up and running Doclytics, ensure you have the following prerequisites installed and configured: + +1. **Rust Environment:** Doclyics is built with Rust, so you need to have Rust installed on your machine. If you haven't already installed Rust, follow the instructions on the [official Rust website](https://www.rust-lang.org/tools/install). + +2. **Paperless-ngx Instance:** You should have a running instance of Paperless-ngx, as Doclytics interacts with its API. Ensure that your Paperless-ngx instance is accessible and that you have the necessary permissions to interact with it. + +3. **Ollama Setup:** + - **Ollama Installation:** Ensure that you have ollama installed and properly configured in your environment. Follow the installation instructions provided in the [ollama repository](https://github.com/ollama/ollama) to set up ollama on your system. + - **Starting Ollama:** Once installed, you need to start the ollama service using the command `ollama serve`. This command initializes the ollama server, allowing Doclytics to communicate with it for processing documents. + - **Model Selection:** + - For processing documents with ollama, the `llama2-7b` model can be used, but it may have difficulties returning outputs as plain JSON. + - Alternatively, the `llama2-13b` parameter model is known to work reasonably well with Doclytics, offering a good balance between performance and output quality. It's essential to choose a model that aligns with your requirements for metadata extraction accuracy and format. + +4. **Docker (Optional):** If you prefer to run Doclytics within a Docker container, ensure Docker is installed and running on your machine. This approach is beneficial for maintaining a consistent environment and simplifying deployment. + +5. **Git:** To clone the Doclytics repository, you'll need Git installed on your machine. Git will allow you to obtain the latest version of the project and stay updated with any changes. + +With these prerequisites met, you are now ready to proceed with the installation and configuration of Doclytics to enhance your document management workflow with advanced metadata extraction capabilities. ## Setup -1. Install Rust: Follow the instructions on the [official Rust website](https://www.rust-lang.org/tools/install) to install Rust on your machine. +1. **Install Rust:** Follow the installation guide on the [official Rust website](https://www.rust-lang.org/tools/install) to set up Rust on your system. -2. Clone the repository: `git clone https://github.com/yourusername/yourrepository.git` +2. **Clone the Repository:** Use the command `git clone https://github.com/B-Urb/doclytics.git` to clone the project repository. -3. Navigate to the project directory: `cd doclytics` +3. **Navigate to the Project Directory:** Change into the project's directory with `cd doclytics`. -4. Build the project: `cargo build` +4. **Build the Project:** Compile the project using `cargo build`. -5. Run the project: `cargo run` +5. **Run the Project:** Start the application with `cargo run`. ## Environment Variables -This project uses the following environment variables: +The project requires setting certain environment variables for its operation: + +- `PAPERLESS_BASE_URL`: The base URL for your Paperless instance. +- `PAPERLESS_TOKEN`: The API token used for authenticating against the Paperless API. + +These should be defined in a `.env` file located at the root of your project directory. + +## Docker Integration + +To facilitate easier deployment and environment management, Doclytics provides a Docker container setup. + +### Docker Image + +The Docker image for Doclytics can be found on Docker Hub: `bjoern5urban/doclytics:latest`. + +### Docker Compose + +Here's an example `docker-compose.yml` snippet for setting up Doclytics: + +```yaml +version: '3' +services: + doclytics: + image: bjoern5urban/doclytics:latest + environment: + PAPERLESS_BASE_URL: http://your-paperless-instance + PAPERLESS_TOKEN: yourapitoken + volumes: + - ./data:/app/data +``` + +This configuration mounts a local directory (`./data`) to the `/app/data` directory inside the container, allowing persistent data storage. + +### Docker Run + +Alternatively, you can use `docker run` to start the container: -- `TOKEN`: This is used for API authentication. +```bash +docker run -e PAPERLESS_BASE_URL=http://your-paperless-instance -e PAPERLESS_TOKEN=yourapitoken bjoern5urban/doclytics:latest +``` -Set these in a `.env` file in the root of your project. +Ensure to replace `http://your-paperless-instance` and `yourapitoken` with your actual Paperless instance URL and API token. ## Usage -This project is currently set up to fetch and update documents from an API. The main function queries custom fields from the API, generates a request to the Ollama service, and updates the document fields based on the response. +Doclytics is designed to automate the enrichment of document metadata using language model insights. After setup, it fetches documents from the configured Paperless instance, processes them through Ollama to generate metadata, and updates the document entries in Paperless with this new metadata. ## Contributing -Pull requests are welcome. For major changes, please open an issue first to discuss what you would like to change. +Contributions are encouraged! If you're interested in enhancing Doclytics, please fork the repository, create a feature branch, and submit a pull request. For substantial changes or enhancements, opening an issue for discussion is recommended. ## License -[MIT](https://choosealicense.com/licenses/mit/) +Doclytics is released under the [MIT License](https://choosealicense.com/licenses/mit/). \ No newline at end of file From 92e70c3a8d1be2b8e4a564372651f7184a044995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 14:30:53 +0100 Subject: [PATCH 08/18] chore(docs): update readme with docker setup instructions --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index c2a06e4..bd82d55 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,8 @@ Before setting up and running Doclytics, ensure you have the following prerequis 1. **Rust Environment:** Doclyics is built with Rust, so you need to have Rust installed on your machine. If you haven't already installed Rust, follow the instructions on the [official Rust website](https://www.rust-lang.org/tools/install). 2. **Paperless-ngx Instance:** You should have a running instance of Paperless-ngx, as Doclytics interacts with its API. Ensure that your Paperless-ngx instance is accessible and that you have the necessary permissions to interact with it. + 1. Create a custom field called `tagged` of type `Boolean` in custom fields. Furthermore, create an arbitrary number of fields, you want Doclytics to extract metadata for. + 2. Get Your API-Token from clicking on "Your Username" -> My Profile -> API Auth Token 3. **Ollama Setup:** - **Ollama Installation:** Ensure that you have ollama installed and properly configured in your environment. Follow the installation instructions provided in the [ollama repository](https://github.com/ollama/ollama) to set up ollama on your system. From 1132a3600e7b3d6d54f5a42a2051e7def15bdb1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 14:33:13 +0100 Subject: [PATCH 09/18] refactor: optimize env variable reading and split code into more readable functions initialize clients for ollama and paperless in seperate methods --- src/main.rs | 125 +++++++++++++++++------------------------------ src/paperless.rs | 44 ++++++++++++++++- 2 files changed, 88 insertions(+), 81 deletions(-) diff --git a/src/main.rs b/src/main.rs index 1abf916..0bb188a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,11 +14,9 @@ use std::result::Result; use reqwest::header::{HeaderMap, HeaderValue, AUTHORIZATION}; use serde::{Deserialize, Serialize}; use serde_json::{Value}; -use std::collections::HashMap; use std::env; -use std::error::Error as StdError; use crate::llm_api::generate_response; -use crate::paperless::{get_data_from_paperless, query_custom_fields}; +use crate::paperless::{get_data_from_paperless, query_custom_fields, update_document_fields}; #[derive(Serialize, Deserialize, Debug, Clone)] struct Document { @@ -64,54 +62,41 @@ struct Field { } - - -#[tokio::main] -async fn main() -> Result<(), Box> { - let base_url = std::env::var("PAPERLESS_BASE_URL").unwrap(); - - let token = env::var("PAPERLESS_TOKEN").expect("TOKEN is not set in .env file"); - // Create HeaderMap and add Authorization header +// Initialize the HTTP client with Paperless API token and base URL +fn init_paperless_client(token: &str) -> Client { let mut headers = HeaderMap::new(); - let header_value = HeaderValue::from_str(&format!("Token {}", token)).unwrap(); + let header_value = HeaderValue::from_str(&format!("Token {}", token)) + .expect("Invalid header value for TOKEN"); headers.insert(AUTHORIZATION, header_value); - let client = Client::builder().default_headers(headers).build().unwrap(); - // Create a Client with the default headers - let ollama = Ollama::new("http://localhost".to_string(), 11434); - //let model = "mistral:latest".to_string(); - let model = "llama2:13b".to_string(); - let prompt_base = "Please extract metadata from the provided document and return it in JSON format. The fields I need are: title,topic,sender,recipient,urgency(with value either n/a or low or medium or high),date_received,category. Analyze the document to find the values for these fields and format the response as a JSON object. Use the most likely answer for each field. The response should contain only JSON data where the key and values are all in simple string format(no nested object) for direct parsing by another program. So now additional text or explanation, no introtext, the answer should start and end with curly brackets delimiting the json object ".to_string(); - - let fields = query_custom_fields(&client, &base_url).await?; - //let res = ollama.generate(GenerationRequest::new(model, prompt)).await; + Client::builder() + .default_headers(headers) + .build() + .expect("Failed to build client") +} - // if let Ok(res) = res { - // println!("{}", res.response); - // } +// Initialize Ollama client +fn init_ollama_client(host: &str, port: u16, secure_endpoint: bool) -> Ollama { + let protocol = if secure_endpoint { "https" } else { "http" }; + let ollama_base_url = format!("{}://{}", protocol, host); + Ollama::new(ollama_base_url, port) +} - // Query data from paperless-ngx endpoint +// Refactor the main process into a function for better readability +async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_url: &str) -> Result<(), Box> { + let prompt_base = "Please extract metadata from the provided document and return it in JSON format. The fields I need are: title,topic,sender,recipient,urgency(with value either n/a or low or medium or high),date_received,category. Analyze the document to find the values for these fields and format the response as a JSON object. Use the most likely answer for each field. The response should contain only JSON data where the key and values are all in simple string format(no nested object) for direct parsing by another program. So now additional text or explanation, no introtext, the answer should start and end with curly brackets delimiting the json object ".to_string(); + let fields = query_custom_fields(client, base_url).await?; match get_data_from_paperless(&client, &base_url).await { Ok(data) => { for document in data { - let res = generate_response(&ollama, &model, &prompt_base, &document).await; - if let Ok(res) = res { - println!("Response: {}", res.response); - if let Some(json_str) = extract_json_object(&res.response) { - println!("JSON: {}", json_str); - let parsed_json = serde_json::from_str(&json_str); - match parsed_json { - Ok(json) => { - update_document_fields(&client, document.id, &fields, &json, &base_url).await; - // Use the parsed JSON here - } - Err(e) => { - eprintln!("Error parsing JSON: {}", e); - } - } - } else { - eprintln!("No JSON object found in the response"); + let res = generate_response(ollama, &model.to_string(), &prompt_base.to_string(), &document).await?; + if let Some(json_str) = extract_json_object(&res.response) { + match serde_json::from_str(&json_str) { + Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url).await?, + Err(e) => eprintln!("Error parsing JSON: {}", e), } + } else { + eprintln!("No JSON object found in the response"); } } } @@ -120,44 +105,24 @@ async fn main() -> Result<(), Box> { Ok(()) } -async fn update_document_fields( - client: &Client, - document_id: u32, - fields: &Vec, - metadata: &HashMap>, - base_url: &str -) -> std::result::Result<(), Box> { - let mut custom_fields = Vec::new(); - - for (key, value) in metadata { - if key == "title" { - continue; - } - if let Some(field) = fields.iter().find(|&f| f.name == *key) { - let custom_field = CustomField { - field: field.id.clone(), - value: value.as_ref().cloned(), - }; - custom_fields.push(custom_field); - } - } - // Add the tagged field, to indicate that the document has been processed - let custom_field = CustomField { - field: 1, - value: Some(serde_json::json!(true)), - }; - custom_fields.push(custom_field); - let mut payload = serde_json::Map::new(); - - payload.insert("custom_fields".to_string(), serde_json::json!(custom_fields)); - if let Some(value) = metadata.get("title").and_then(|v| v.as_ref().and_then(|v| v.as_str())) { - payload.insert("title".to_string(), serde_json::json!(value)); - } - let url = format!("{}/api/documents/{}/", base_url, document_id); - let res = client.patch(&url).json(&payload).send().await?; - let body = res.text().await?; - println!("{}", body); - Ok(()) +#[tokio::main] +async fn main() -> Result<(), Box> { + let token = env::var("PAPERLESS_TOKEN").expect("PAPERLESS_TOKEN is not set in .env file"); + let base_url = env::var("PAPERLESS_BASE_URL").expect("PAPERLESS_BASE_URL is not set in .env file"); + let client = init_paperless_client(&token); + + let ollama_host = env::var("OLLAMA_HOST").unwrap_or_else(|_| "localhost".to_string()); + let ollama_port = env::var("OLLAMA_PORT") + .unwrap_or_else(|_| "11434".to_string()) + .parse::().unwrap_or(11434); + let ollama_secure_endpoint = env::var("OLLAMA_SECURE_ENDPOINT") + .unwrap_or_else(|_| "false".to_string()) + .parse::().unwrap_or(false); + + let ollama = init_ollama_client(&ollama_host, ollama_port, ollama_secure_endpoint); + let model = env::var("OLLAMA_MODEL").unwrap_or_else(|_| "llama2:13b".to_string()); + + process_documents(&client, &ollama, &model, &base_url).await } fn extract_json_object(input: &str) -> Option { diff --git a/src/paperless.rs b/src/paperless.rs index 143fa1b..1cf4fbb 100644 --- a/src/paperless.rs +++ b/src/paperless.rs @@ -1,6 +1,8 @@ +use std::collections::HashMap; use reqwest::Client; use serde::de::StdError; -use crate::{Document, Field, Response}; +use serde_json::Value; +use crate::{CustomField, Document, Field, Response}; pub async fn get_data_from_paperless( client: &Client, @@ -60,4 +62,44 @@ pub async fn query_custom_fields( Err(e.into()) // Remove the semicolon here } } +} + +pub async fn update_document_fields( + client: &Client, + document_id: u32, + fields: &Vec, + metadata: &HashMap>, + base_url: &str +) -> std::result::Result<(), Box> { + let mut custom_fields = Vec::new(); + + for (key, value) in metadata { + if key == "title" { + continue; + } + if let Some(field) = fields.iter().find(|&f| f.name == *key) { + let custom_field = CustomField { + field: field.id.clone(), + value: value.as_ref().cloned(), + }; + custom_fields.push(custom_field); + } + } + // Add the tagged field, to indicate that the document has been processed + let custom_field = CustomField { + field: 1, + value: Some(serde_json::json!(true)), + }; + custom_fields.push(custom_field); + let mut payload = serde_json::Map::new(); + + payload.insert("custom_fields".to_string(), serde_json::json!(custom_fields)); + if let Some(value) = metadata.get("title").and_then(|v| v.as_ref().and_then(|v| v.as_str())) { + payload.insert("title".to_string(), serde_json::json!(value)); + } + let url = format!("{}/api/documents/{}/", base_url, document_id); + let res = client.patch(&url).json(&payload).send().await?; + let body = res.text().await?; + println!("{}", body); + Ok(()) } \ No newline at end of file From 1f6b1ad54555b4a774d2d52e09177b21eec72379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 14:37:23 +0100 Subject: [PATCH 10/18] docs: update README with new configuration section --- README.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index bd82d55..634947f 100644 --- a/README.md +++ b/README.md @@ -42,12 +42,19 @@ With these prerequisites met, you are now ready to proceed with the installation ## Environment Variables -The project requires setting certain environment variables for its operation: - -- `PAPERLESS_BASE_URL`: The base URL for your Paperless instance. -- `PAPERLESS_TOKEN`: The API token used for authenticating against the Paperless API. - -These should be defined in a `.env` file located at the root of your project directory. +The application requires setting environment variables for its configuration. Below is a table describing each environment variable, indicating whether it is required or optional, its default value (if any), and a brief description: + +| Environment Variable | Required | Default Value | Description | +|----------------------------|----------|-----------------|--------------------------------------------------------------| +| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. | +| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. | +| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. | +| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. | +| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. | +| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. | + +Make sure to set the required environment variables (`PAPERLESS_TOKEN` and `PAPERLESS_BASE_URL`) before running the application. Optional variables have default values and will use those defaults if not explicitly set. +For Development these should be defined in a `.env` file located at the root of your project directory. ## Docker Integration From 3d8a3f756403aef4f82f632fc1db26379e42deff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 15:37:46 +0100 Subject: [PATCH 11/18] docs: update README with new configuration section --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 634947f..00b5b3a 100644 --- a/README.md +++ b/README.md @@ -87,10 +87,16 @@ This configuration mounts a local directory (`./data`) to the `/app/data` direct Alternatively, you can use `docker run` to start the container: ```bash -docker run -e PAPERLESS_BASE_URL=http://your-paperless-instance -e PAPERLESS_TOKEN=yourapitoken bjoern5urban/doclytics:latest +docker run --network="host" -e PAPERLESS_BASE_URL=http://your-paperless-instance -e PAPERLESS_TOKEN=yourapitoken bjoern5urban/doclytics:latest ``` - Ensure to replace `http://your-paperless-instance` and `yourapitoken` with your actual Paperless instance URL and API token. +> [!IMPORTANT] +> When using `ollama serve` on the host system, please ensure you set the `OLLAMA_HOST` environment variable to `host.docker.internal`. This setting is crucial for proper communication between your containerized application and the Ollama service running on the host. + +```bash +export OLLAMA_HOST=host.docker.internal + + ## Usage From 6f5303570987d29d04ed72e71c29709320277657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 17:02:46 +0100 Subject: [PATCH 12/18] docs: update README with new configuration section --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 00b5b3a..3313a85 100644 --- a/README.md +++ b/README.md @@ -93,10 +93,6 @@ Ensure to replace `http://your-paperless-instance` and `yourapitoken` with your > [!IMPORTANT] > When using `ollama serve` on the host system, please ensure you set the `OLLAMA_HOST` environment variable to `host.docker.internal`. This setting is crucial for proper communication between your containerized application and the Ollama service running on the host. -```bash -export OLLAMA_HOST=host.docker.internal - - ## Usage From 7222506da2bd186c6367b9aa05b7abfcf405b9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 18:38:37 +0100 Subject: [PATCH 13/18] feat: add possibility to specify prompt via env variable --- example/example.prompt | 10 ++++++++++ src/main.rs | 14 +++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 example/example.prompt diff --git a/example/example.prompt b/example/example.prompt new file mode 100644 index 0000000..c092cf2 --- /dev/null +++ b/example/example.prompt @@ -0,0 +1,10 @@ +Please extract metadata from the provided document and return it in JSON format. +The fields I need are: +title,topic,sender,recipient,urgency(with value either n/a or low or medium or high), +date_received,category. +Analyze the document to find the values for these fields and format the response as a +JSON object. Use the most likely answer for each field. +The response should contain only JSON data where the key and values are all in simple string +format(no nested object) for direct parsing by another program. So now additional text or +explanation, no introtext, the answer should start and end with curly brackets +delimiting the json object \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 0bb188a..d047b1d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -84,7 +84,17 @@ fn init_ollama_client(host: &str, port: u16, secure_endpoint: bool) -> Ollama { // Refactor the main process into a function for better readability async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_url: &str) -> Result<(), Box> { - let prompt_base = "Please extract metadata from the provided document and return it in JSON format. The fields I need are: title,topic,sender,recipient,urgency(with value either n/a or low or medium or high),date_received,category. Analyze the document to find the values for these fields and format the response as a JSON object. Use the most likely answer for each field. The response should contain only JSON data where the key and values are all in simple string format(no nested object) for direct parsing by another program. So now additional text or explanation, no introtext, the answer should start and end with curly brackets delimiting the json object ".to_string(); + let prompt_base= env::var("BASE_PROMPT").unwrap_or_else(|_| "Please extract metadata from the provided document and return it in JSON format.\ + The fields I need are:\ + title,topic,sender,recipient,urgency(with value either n/a or low or medium or high),\ + date_received,category.\ + Analyze the document to find the values for these fields and format the response as a \ + JSON object. Use the most likely answer for each field. \ + The response should contain only JSON data where the key and values are all in simple string \ + format(no nested object) for direct parsing by another program. So now additional text or \ + explanation, no introtext, the answer should start and end with curly brackets \ + delimiting the json object ".to_string() + ); let fields = query_custom_fields(client, base_url).await?; match get_data_from_paperless(&client, &base_url).await { Ok(data) => { @@ -120,8 +130,10 @@ async fn main() -> Result<(), Box> { .parse::().unwrap_or(false); let ollama = init_ollama_client(&ollama_host, ollama_port, ollama_secure_endpoint); + let model = env::var("OLLAMA_MODEL").unwrap_or_else(|_| "llama2:13b".to_string()); + process_documents(&client, &ollama, &model, &base_url).await } From bd67a9a40ac58e834fce8344d178ef31ec9f2f1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 18:38:48 +0100 Subject: [PATCH 14/18] docs: add possibility to specify prompt via env variable --- README.md | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 3313a85..0ec529d 100644 --- a/README.md +++ b/README.md @@ -44,14 +44,16 @@ With these prerequisites met, you are now ready to proceed with the installation The application requires setting environment variables for its configuration. Below is a table describing each environment variable, indicating whether it is required or optional, its default value (if any), and a brief description: -| Environment Variable | Required | Default Value | Description | -|----------------------------|----------|-----------------|--------------------------------------------------------------| -| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. | -| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. | -| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. | -| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. | -| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. | -| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. | +| Environment Variable | Required | Default Value | Description | +|--------------------------|----------|------------------------------|---------------------------------------------------------------------------------------------------------------------------------| +| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. | +| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. | +| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. | +| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. | +| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. | +| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. | +| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.
Should contain the custom fields in paperless that you want doclytics. | + Make sure to set the required environment variables (`PAPERLESS_TOKEN` and `PAPERLESS_BASE_URL`) before running the application. Optional variables have default values and will use those defaults if not explicitly set. For Development these should be defined in a `.env` file located at the root of your project directory. @@ -96,7 +98,14 @@ Ensure to replace `http://your-paperless-instance` and `yourapitoken` with your ## Usage -Doclytics is designed to automate the enrichment of document metadata using language model insights. After setup, it fetches documents from the configured Paperless instance, processes them through Ollama to generate metadata, and updates the document entries in Paperless with this new metadata. +Doclytics uses the custom field `tagged` to query documents not yet analyzed from your paperless instance. +You can pass a prompt like this [Example Prompt](example/example.prompt) to generate metadata. Json is automatically extracted +from the LLM's answer, however it is recommended to explicitly specify that you want json returned, especially for smaller +models or else you might not get any parseable json back at all. + +If you want to explicitly reanalyze a specific document, the easiest way would be to set the `tagged` custom field to +false in the UI. + ## Contributing From c9f7d6e289cc7d3895d93e798a4f0b5663f4d278 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 18:39:05 +0100 Subject: [PATCH 15/18] ci: enable release and asset upload --- .github/workflows/push.yml | 11 ++++- .github/workflows/release-prod.yml | 40 ++++++++++++++++ .github/workflows/release.yml | 76 ++++++++++++++++++++++++++++-- 3 files changed, 122 insertions(+), 5 deletions(-) diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 03bde8b..32e5dbf 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -17,6 +17,8 @@ permissions: jobs: build: runs-on: ${{ matrix.os }} + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] @@ -38,10 +40,15 @@ jobs: override: true - name: Build run: cargo build --release --target ${{ matrix.target }} + - name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} + path: | + ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} build-docker: needs: build runs-on: ubuntu-latest - if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' env: BRANCH_NAME: ${{ github.head_ref || github.ref_name }} steps: @@ -68,7 +75,7 @@ jobs: contents: write issues: write pull-requests: write - if: false #${{ github.ref }} == 'master' || ${{ github.ref }} == 'development' + if: ${{ github.ref }} == 'master' || ${{ github.ref }} == 'development' steps: - name: Checkout code uses: actions/checkout@v4 diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index 3a861ae..d32e0c6 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -9,6 +9,46 @@ env: CI: true jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + - os: macos-latest + target: x86_64-apple-darwin + - os: windows-latest + target: x86_64-pc-windows-msvc + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: ${{ matrix.target }} + profile: minimal + override: true + - name: Build + run: cargo build --release --target ${{ matrix.target }} + - name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} + path: | + ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} + - name: Upload Release Asset + uses: actions/upload-release-asset@v1 + if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} + asset_name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} + asset_content_type: application/octet-stream} build-docker: needs: build runs-on: ubuntu-latest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0b30263..b55556f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -9,9 +9,47 @@ env: CI: true jobs: - + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ ubuntu-latest, macos-latest, windows-latest ] + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + - os: macos-latest + target: x86_64-apple-darwin + - os: windows-latest + target: x86_64-pc-windows-msvc + steps: + - uses: actions/checkout@v4 + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + target: ${{ matrix.target }} + profile: minimal + override: true + - name: Build + run: cargo build --release --target ${{ matrix.target }} + - name: Upload artifact + uses: actions/upload-artifact@v3 + with: + name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} + path: | + ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} + - name: Upload Release Asset + uses: actions/upload-release-asset@v1 + if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + with: + upload_url: ${{ github.event.release.upload_url }} + asset_path: ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} + asset_name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} + asset_content_type: application/octet-stream build-docker: - needs: build runs-on: ubuntu-latest if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' env: @@ -37,4 +75,36 @@ jobs: context: . push: true tags: ${{ secrets.DOCKERHUB_USERNAME }}/doclytics:${{ env.BRANCH_NAME }} - platforms: linux/amd64,linux/arm64 \ No newline at end of file + platforms: linux/amd64,linux/arm64 + - name: Upload macOS Binary + uses: actions/upload-release-asset@v1 + if: steps.semantic-release.outputs.release_created == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.semantic-release.outputs.release_upload_url }} + asset_path: ./target/x86_64-apple-darwin/release/your-binary + asset_name: your-binary-macos + asset_content_type: application/octet-stream + + - name: Upload Windows Binary + uses: actions/upload-release-asset@v1 + if: steps.semantic-release.outputs.release_created == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.semantic-release.outputs.release_upload_url }} + asset_path: ./target/x86_64-pc-windows-msvc/release/your-binary.exe + asset_name: your-binary-windows.exe + asset_content_type: application/octet-stream + + - name: Upload Linux Binary + uses: actions/upload-release-asset@v1 + if: steps.semantic-release.outputs.release_created == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + upload_url: ${{ steps.semantic-release.outputs.release_upload_url }} + asset_path: ./target/x86_64-unknown-linux-gnu/release/your-binary + asset_name: your-binary-linux + asset_content_type: application/octet-stream From 98605eba23c66959a8a0f6ef9b612ae569049c46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 19:47:03 +0100 Subject: [PATCH 16/18] ci: fix asset upload --- .github/workflows/release.yml | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b55556f..acdedd2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -38,17 +38,34 @@ jobs: name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} path: | ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} - - name: Upload Release Asset - uses: actions/upload-release-asset@v1 - if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' + - name: Get Release Upload URL + id: get_upload_url env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - with: - upload_url: ${{ github.event.release.upload_url }} - asset_path: ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} - asset_name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} - asset_content_type: application/octet-stream + TAG_NAME: ${{ github.ref }} # Assumes the tag name is the same as the ref. Adjust if necessary. + run: | + release_id=$(curl -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/${{ github.repository }}/releases/tags/${TAG_NAME} \ + | jq '.id') + + upload_url=$(curl -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/${{ github.repository }}/releases/${release_id} \ + | jq -r .upload_url) + + echo "::set-output name=upload_url::${upload_url}" + - name: Upload Asset + env: + GITHUB_TOKEN: ${{ secrets.PAT }} + UPLOAD_URL: ${{ steps.create_release.outputs.upload_url }} # Assuming this is set by a previous step + ASSET_PATH: ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} + ASSET_NAME: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} + run: | + curl \ + -X POST \ + -H "Authorization: token $GITHUB_TOKEN" \ + -H "Content-Type: application/zip" \ + --data-binary @$ASSET_PATH \ + "${UPLOAD_URL}?name=${ASSET_NAME}&label=${ASSET_NAME}" build-docker: runs-on: ubuntu-latest if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' From ed8b8ad971a042746d494bf95f172bb6f6bd9a8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Tue, 27 Feb 2024 20:01:40 +0100 Subject: [PATCH 17/18] ci: fix asset upload fix: line length in prompt fix: ci release upload fix: ci use correct shell for windows runner --- .github/workflows/release-prod.yml | 28 +++++++++---- .github/workflows/release.yml | 67 +++--------------------------- src/main.rs | 3 +- 3 files changed, 27 insertions(+), 71 deletions(-) diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index d32e0c6..de06011 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -38,17 +38,27 @@ jobs: name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} path: | ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} - - name: Upload Release Asset - uses: actions/upload-release-asset@v1 - if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' + - name: Get Release Upload URL + id: get_upload_url env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.PAT }} BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - with: - upload_url: ${{ github.event.release.upload_url }} - asset_path: ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} - asset_name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} - asset_content_type: application/octet-stream} + shell: bash + run: | + upload_url=$(curl -H "Authorization: token $GITHUB_TOKEN" \ + https://api.github.com/repos/${{ github.repository }}/releases/tags/${BRANCH_NAME} \ + | jq -r '.upload_url' | sed 's/{?name,label}//') + echo "UPLOAD_URL=${upload_url}" >> $GITHUB_OUTPUT + - name: Upload Asset + shell: bash + env: + GITHUB_TOKEN: ${{ secrets.PAT }} + ASSET_PATH: ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} + ASSET_NAME: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} + UPLOAD_URL: ${{ steps.get_upload_url.outputs.UPLOAD_URL }} + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + run: | + curl -X POST -H "Authorization: token $GITHUB_TOKEN" -H "Content-Type: application/zip" --data-binary @$ASSET_PATH "${UPLOAD_URL}?name=${ASSET_NAME}&label=${ASSET_NAME}" build-docker: needs: build runs-on: ubuntu-latest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index acdedd2..01b369e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -34,38 +34,15 @@ jobs: run: cargo build --release --target ${{ matrix.target }} - name: Upload artifact uses: actions/upload-artifact@v3 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TAG_NAME: ${{ github.ref }} # Assumes the tag name is the same as the ref. Adjust if necessary. + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} with: name: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} path: | ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} - - name: Get Release Upload URL - id: get_upload_url - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - TAG_NAME: ${{ github.ref }} # Assumes the tag name is the same as the ref. Adjust if necessary. - run: | - release_id=$(curl -H "Authorization: token $GITHUB_TOKEN" \ - https://api.github.com/repos/${{ github.repository }}/releases/tags/${TAG_NAME} \ - | jq '.id') - - upload_url=$(curl -H "Authorization: token $GITHUB_TOKEN" \ - https://api.github.com/repos/${{ github.repository }}/releases/${release_id} \ - | jq -r .upload_url) - - echo "::set-output name=upload_url::${upload_url}" - - name: Upload Asset - env: - GITHUB_TOKEN: ${{ secrets.PAT }} - UPLOAD_URL: ${{ steps.create_release.outputs.upload_url }} # Assuming this is set by a previous step - ASSET_PATH: ./target/${{ matrix.target }}/release/doclytics${{ matrix.os == 'windows-latest' && '.exe' || '' }} - ASSET_NAME: doclytics-${{ env.BRANCH_NAME }}-${{ matrix.os }} - run: | - curl \ - -X POST \ - -H "Authorization: token $GITHUB_TOKEN" \ - -H "Content-Type: application/zip" \ - --data-binary @$ASSET_PATH \ - "${UPLOAD_URL}?name=${ASSET_NAME}&label=${ASSET_NAME}" + build-docker: runs-on: ubuntu-latest if: ${{ github.ref }} != 'master' && ${{ github.ref }} != 'development' @@ -92,36 +69,4 @@ jobs: context: . push: true tags: ${{ secrets.DOCKERHUB_USERNAME }}/doclytics:${{ env.BRANCH_NAME }} - platforms: linux/amd64,linux/arm64 - - name: Upload macOS Binary - uses: actions/upload-release-asset@v1 - if: steps.semantic-release.outputs.release_created == 'true' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.semantic-release.outputs.release_upload_url }} - asset_path: ./target/x86_64-apple-darwin/release/your-binary - asset_name: your-binary-macos - asset_content_type: application/octet-stream - - - name: Upload Windows Binary - uses: actions/upload-release-asset@v1 - if: steps.semantic-release.outputs.release_created == 'true' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.semantic-release.outputs.release_upload_url }} - asset_path: ./target/x86_64-pc-windows-msvc/release/your-binary.exe - asset_name: your-binary-windows.exe - asset_content_type: application/octet-stream - - - name: Upload Linux Binary - uses: actions/upload-release-asset@v1 - if: steps.semantic-release.outputs.release_created == 'true' - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.semantic-release.outputs.release_upload_url }} - asset_path: ./target/x86_64-unknown-linux-gnu/release/your-binary - asset_name: your-binary-linux - asset_content_type: application/octet-stream + platforms: linux/amd64,linux/arm64 \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index d047b1d..21a7a0b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -84,7 +84,8 @@ fn init_ollama_client(host: &str, port: u16, secure_endpoint: bool) -> Ollama { // Refactor the main process into a function for better readability async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_url: &str) -> Result<(), Box> { - let prompt_base= env::var("BASE_PROMPT").unwrap_or_else(|_| "Please extract metadata from the provided document and return it in JSON format.\ + let prompt_base= env::var("BASE_PROMPT").unwrap_or_else(|_| "Please extract metadata\ + from the provided document and return it in JSON format.\ The fields I need are:\ title,topic,sender,recipient,urgency(with value either n/a or low or medium or high),\ date_received,category.\ From 9a13db50feb3e07f9dd4d7a652062123285c5f73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Urban?= Date: Wed, 28 Feb 2024 11:52:56 +0100 Subject: [PATCH 18/18] ci: fix errors in build-docker --- .github/workflows/release-prod.yml | 4 ---- .github/workflows/release.yml | 4 ---- 2 files changed, 8 deletions(-) diff --git a/.github/workflows/release-prod.yml b/.github/workflows/release-prod.yml index de06011..c2f5067 100644 --- a/.github/workflows/release-prod.yml +++ b/.github/workflows/release-prod.yml @@ -68,10 +68,6 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - - name: Download build artifact - uses: actions/download-artifact@v3 - with: - name: production-artifacts - name: Set up Docker Build uses: docker/setup-buildx-action@v3 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 01b369e..4eb48f6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -51,10 +51,6 @@ jobs: steps: - name: Checkout code uses: actions/checkout@v4 - - name: Download build artifact - uses: actions/download-artifact@v3 - with: - name: production-artifacts - name: Set up Docker Build uses: docker/setup-buildx-action@v3