Skip to content

Commit

Permalink
Release latest fixes to production (#425)
Browse files Browse the repository at this point in the history
* Remove whitespace from URL to reduce number of unnecessary errors when copying URLs

* Only prompt for review for actual changes

* Differentiate log messages when creating vs when updating a company

* Only saveToAPI if we detected meaningful changes. This removes excessive and confusing approval requests.

* previously non saved companies will now be saved: buggfix

* remove console logs

* feat: Simplify environment configuration to use single .env file

* refactor: Remove cross-env and update npm scripts

* refactor: Remove DOTENV_CONFIG_PATH and simplify dev scripts

* Update package.json

* refactor: Remove dotenv import from index and startWorkers

* feat: Add dotenv/config import to all config files for consistent environment loading

* fix: add dotenv/config in all config files

* feat: Add frontend configuration with separate config file

* feat: Add missing configuration files with zod validation

* fix: Remove duplicate content in config files

* fix: dotenv-cli and remove duplicate code

* fix: update for consistency

* Import dotenv/config in import script

* Consolidate chroma config

* fix discord flow

* fix prompt and message link

---------

Co-authored-by: Hugo Björk <[email protected]>
Co-authored-by: Christian Landgren (aider) <[email protected]>
  • Loading branch information
3 people authored Dec 10, 2024
1 parent 2aeeabf commit 26819d9
Show file tree
Hide file tree
Showing 27 changed files with 154 additions and 152 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ DISCORD_CHANNEL_ID=

# NLM ingestor is used to parse PDF files, look at the README.md
NLM_INGESTOR_URL=http://0.0.0.0:5001

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Ensure you have Node.js version 22.0.0 or higher installed. You will also need D

### Setting up environment variables

Make a copy of the file `.env.example` and name it `.env.development`. Fill it in using the instructions in the file.
Make a copy of the file `.env.example` and name it `.env`. Fill it in using the instructions in the file.

### Installing dependencies

Expand Down
56 changes: 1 addition & 55 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 6 additions & 8 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
"start": "node --import tsx src/index.ts",
"migrate": "npx prisma migrate deploy",
"workers": "node --import tsx src/startWorkers.ts",
"dev-workers": "cross-env DOTENV_CONFIG_PATH=./.env.development node --import tsx --watch src/startWorkers.ts",
"dev-board": "cross-env DOTENV_CONFIG_PATH=./.env.development node --import tsx --watch src/index.ts",
"dev-workers": "node --import tsx --watch src/startWorkers.ts",
"dev-board": "node --import tsx --watch src/index.ts",
"dev": "concurrently \"npm run dev-board\" \"npm run dev-workers\"",
"import": "node --import=tsx --env-file=.env.development scripts/import-spreadsheet-companies.ts",
"test": "DOTENV_CONFIG_PATH=./.env.test jest",
"prisma": "dotenv -e .env.development -- prisma",
"reset": "node --import tsx --env-file .env.development scripts/dev-reset.ts"
"import": "node --import=tsx scripts/import-spreadsheet-companies.ts",
"test": "jest",
"prisma": "prisma",
"reset": "node --import tsx scripts/dev-reset.ts"
},
"author": "Christian Landgren, William Ryder, Samuel Plumppu mfl",
"license": "MIT License",
Expand All @@ -31,7 +31,6 @@
"cors": "^2.8.5",
"discord.js": "^14.16.3",
"dotenv": "^16.4.5",
"dotenv-cli": "^7.4.4",
"express": "^5.0.1",
"jest": "^29.7.0",
"openai": "^4.73.1",
Expand All @@ -48,7 +47,6 @@
"@types/express": "^5.0.0",
"@types/node": "^22.8.4",
"concurrently": "^9.1.0",
"cross-env": "^7.0.3",
"deepl-node": "^1.15.0",
"exceljs": "^4.4.0",
"jest": "^29.7.0",
Expand Down
1 change: 1 addition & 0 deletions scripts/import-spreadsheet-companies.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import 'dotenv/config'
import ExcelJS from 'exceljs'
import { resolve } from 'path'
import { z } from 'zod'
Expand Down
1 change: 0 additions & 1 deletion src/config.ts

This file was deleted.

8 changes: 8 additions & 0 deletions src/config/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ const envSchema = z.object({
* NOTE: This is only relevant during import with alex data, and then we switch to proper auth tokens.
*/
API_TOKENS: z.string().transform((tokens) => tokens.split(',')),
FRONTEND_URL: z
.string()
.default(
process.env.NODE_ENV === 'development'
? 'http://localhost:4321'
: 'https://beta.klimatkollen.se'
),
API_BASE_URL: z.string().default('http://localhost:3000/api'),
PORT: z.coerce.number().default(3000),
})
Expand All @@ -14,6 +21,7 @@ const env = envSchema.parse(process.env)

export default {
tokens: env.API_TOKENS,
frontendURL: env.FRONTEND_URL,
baseURL: env.API_BASE_URL,
port: env.PORT,
}
3 changes: 3 additions & 0 deletions src/config/chromadb.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import 'dotenv/config'
import { z } from 'zod'

const envSchema = z.object({
CHROMA_HOST: z.string().default('http://127.0.0.1:8000'),
CHROMA_TOKEN: z.string().optional(),
CHUNK_SIZE: z.number().default(2000),
})

const env = envSchema.parse(process.env)
Expand All @@ -15,4 +17,5 @@ export default {
credentials: env.CHROMA_TOKEN,
}
: undefined,
chunkSize: env.CHUNK_SIZE,
}
1 change: 1 addition & 0 deletions src/config/discord.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import 'dotenv/config'
import { z } from 'zod'

const envSchema = z.object({
Expand Down
1 change: 1 addition & 0 deletions src/config/nlmIngestor.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import 'dotenv/config'
import { z } from 'zod'

const envSchema = z.object({
Expand Down
1 change: 1 addition & 0 deletions src/config/openai.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import 'dotenv/config'
import { z } from 'zod'

const envSchema = z.object({
Expand Down
1 change: 1 addition & 0 deletions src/config/redis.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import 'dotenv/config'
import { z } from 'zod'

const envSchema = z.object({
Expand Down
2 changes: 1 addition & 1 deletion src/discord/commands/pdfs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export default {
nlmParsePDF.queue.add(
'download ' + url.slice(-20),
{
url,
url: url.trim(),
threadId: thread.id,
},
{
Expand Down
2 changes: 1 addition & 1 deletion src/discord/interactions/approve.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ class ApproveJob extends DiscordJob {
export default {
async execute(interaction: ButtonInteraction, job: ApproveJob) {
await job.updateData({ ...job.data, approved: true })

job.log(`Approving company edit: ${job.data.wikidata.node}`)

await interaction.reply({
content: `Tack för din granskning, ${interaction?.user?.username}!`,
})
Expand Down
1 change: 0 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import 'dotenv/config'
import express from 'express'

import queue from './queue'
Expand Down
6 changes: 4 additions & 2 deletions src/lib/DiscordWorker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ export class DiscordJob extends Job {
sendMessage: (
msg: string | { files?: any[]; content?: string; components?: any[] }
) => Promise<any>
editMessage: (msg: string) => Promise<any>
editMessage: (
msg: string | { files?: any[]; content?: string; components?: any[] }
) => Promise<any>
setThreadName: (name: string) => Promise<any>
sendTyping: () => Promise<any>
getChildrenEntries: () => Promise<any>
Expand Down Expand Up @@ -57,7 +59,7 @@ function addCustomMethods(job: DiscordJob) {
return discord.sendTyping(job.data)
}

job.editMessage = async (msg) => {
job.editMessage = async (msg: any) => {
if (!message && job.data.messageId) {
const { channelId, threadId, messageId } = job.data
message = await discord.findMessage({
Expand Down
34 changes: 27 additions & 7 deletions src/lib/saveUtils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import apiConfig from '../config/api'
import { getReportingPeriodDates } from './reportingPeriodDates'

export function formatAsReportingPeriods(
Expand Down Expand Up @@ -56,14 +57,21 @@ const recursiveOmit = <T extends Object>(
}

const askDiff = async (before: any, after: any) => {
if (!before || !after) return 'NO_CHANGES'
if (!after) return 'NO_CHANGES'
return await askPrompt(
`What is changed between these two json values? Please respond in clear text with markdown formatting.
The purpose is to let an editor approve the changes or suggest changes in Discord.
Be as brief as possible. Never be technical - meaning no comments about structure changes, fields renames etc.
Focus only on the actual values that have changed.
When handling years and ambiguous dates, always use the last year in the period (e.g. startDate: 2020 - endDate: 2021 should be referred to as 2021).
NEVER REPEAT UNCHANGED VALUES OR UNCHANGED YEARS! If nothing important has changed, just write "NO_CHANGES".`,
`What is changed between these two json values? If the before value is missing that means the company did not exist previously and everything is a change (No need to mention that just start with something like: "Here is fresh data for you to approve:" and describe the new additions..
Please respond clearly and concisely in text with markdown formatting:
- Use simple, reader-friendly language to explain the changes.
- When a report or data is added for a specific year, mention it as: "Added a report for [year]."
- Do not mention technical details like structure changes or metadata.
- Avoid repeating unchanged values or years.
- If nothing important has changed, simply write: "NO_CHANGES."
When handling years or date ranges, always refer to the last year in the range (e.g., startDate: 2020 - endDate: 2021 should be referred to as 2021).
Summarize the changes and avoid unnecessary repetition.`,

JSON.stringify({
before: recursiveOmit(structuredClone(before), new Set(['metadata'])),
after: recursiveOmit(structuredClone(after), new Set(['metadata'])),
Expand All @@ -85,3 +93,15 @@ export async function diffChanges<T>({
const requiresApproval = Boolean(existingCompany) || hasChanges
return { diff: hasChanges ? diff : '', requiresApproval }
}

export function getCompanyURL(name: string, wikidataId: string) {
const safeName = name
.toLowerCase()
.replace(/[åä]/g, 'a')
.replace(/[ö]/g, 'o')
.replace(/[^a-z0-9]/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '')

return `${apiConfig.frontendURL}/foretag/${safeName}-${wikidataId}`
}
9 changes: 4 additions & 5 deletions src/lib/vectordb.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { ChromaClient, OpenAIEmbeddingFunction } from 'chromadb'

import chromadb from '../config/chromadb'
import config from '../config/chromadb'
import openai from '../config/openai'
import { CHUNK_SIZE } from '../config'

const client = new ChromaClient(chromadb)
const client = new ChromaClient(config)
const embedder = new OpenAIEmbeddingFunction(openai)

const collection = await client.getOrCreateCollection({
Expand Down Expand Up @@ -46,8 +45,8 @@ async function addReport(url: string, markdown: string) {
const documentChunks: { chunk: string; paragraph: string }[] = []

mergedParagraphs.forEach((paragraph) => {
for (let i = 0; i < paragraph.length; i += CHUNK_SIZE - overlapSize) {
const chunk = paragraph.slice(i, i + CHUNK_SIZE).trim()
for (let i = 0; i < paragraph.length; i += config.chunkSize - overlapSize) {
const chunk = paragraph.slice(i, i + config.chunkSize).trim()
if (chunk.length > 0) {
documentChunks.push({ chunk, paragraph })
}
Expand Down
1 change: 0 additions & 1 deletion src/startWorkers.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import 'dotenv/config'
import discord from './discord'
import { workers } from './workers'

Expand Down
8 changes: 8 additions & 0 deletions src/workers/checkDB.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { FlowProducer } from 'bullmq'
import { DiscordJob, DiscordWorker } from '../lib/DiscordWorker'
import { apiFetch } from '../lib/api'
import redis from '../config/redis'
import { getCompanyURL } from '../lib/saveUtils'

export class CheckDBJob extends DiscordJob {
declare data: DiscordJob['data'] & {
Expand Down Expand Up @@ -52,6 +53,13 @@ const checkDB = new DiscordWorker('checkDB', async (job: CheckDBJob) => {
metadata,
}
await apiFetch(`/companies`, { body })

await job.sendMessage(
`✅ Företaget har skapats! Se resultatet här: ${getCompanyURL(
companyName,
wikidataId
)}`
)
}

const { scope12, scope3, biogenic, industry, economy, goals, initiatives } =
Expand Down
23 changes: 13 additions & 10 deletions src/workers/diffGoals.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,19 @@ const diffGoals = new DiscordWorker<DiffGoalsJob>('diffGoals', async (job) => {

job.log('Diff:' + diff)

await saveToAPI.queue.add(companyName + ' goals', {
...job.data,
body,
diff,
requiresApproval,
apiSubEndpoint: 'goals',

// Remove duplicated job data that should be part of the body from now on
goals: undefined,
})
// Only save if we detected any meaningful changes
if (diff) {
await saveToAPI.queue.add(companyName + ' goals', {
...job.data,
body,
diff,
requiresApproval,
apiSubEndpoint: 'goals',

// Remove duplicated job data that should be part of the body from now on
goals: undefined,
})
}

return { body, diff, requiresApproval }
})
Expand Down
Loading

0 comments on commit 26819d9

Please sign in to comment.