Skip to content

Commit

Permalink
Merge pull request #388 from Klimatbyran/staging
Browse files Browse the repository at this point in the history
Publish to prod
  • Loading branch information
Greenheart authored Dec 4, 2024
2 parents 32f9973 + 83e998d commit 043ecef
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 6 deletions.
18 changes: 14 additions & 4 deletions src/discord/commands/pdfs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,20 @@ export default {
})

thread.send(`PDF i kö: ${url}`)
nlmParsePDF.queue.add('download ' + url.slice(-20), {
url,
threadId: thread.id,
})
nlmParsePDF.queue.add(
'download ' + url.slice(-20),
{
url,
threadId: thread.id,
},
{
backoff: {
type: 'fixed',
delay: 60_000,
},
attempts: 10,
}
)
})
} catch (error) {
console.error('Pdfs: error', error)
Expand Down
22 changes: 21 additions & 1 deletion src/lib/vectordb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,29 @@ async function addReport(url: string, markdown: string) {
.map((p) => p.trim())
.filter((p) => p.length > 0)

let prefix = ''
const mergedParagraphs: string[] = []

// Combine standalone headers (titles without body) with the next paragraph that has a body.
for (let i = 0; i < paragraphs.length; i++) {
const current = paragraphs[i]
const hasBody = current.split('\n').length > 1

if (!hasBody) {
prefix += (prefix ? '\n' : '') + current
} else {
mergedParagraphs.push((prefix ? prefix + '\n' : '') + current)
prefix = ''
}
}

if (prefix) {
mergedParagraphs.push(prefix)
}

const documentChunks: { chunk: string; paragraph: string }[] = []

paragraphs.forEach((paragraph) => {
mergedParagraphs.forEach((paragraph) => {
for (let i = 0; i < paragraph.length; i += CHUNK_SIZE - overlapSize) {
const chunk = paragraph.slice(i, i + CHUNK_SIZE).trim()
if (chunk.length > 0) {
Expand Down
2 changes: 1 addition & 1 deletion src/prompts/followUp/scope12.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ NEVER CALCULATE ANY EMISSIONS. ONLY REPORT THE DATA AS IT IS IN THE PDF. If you
Example - feel free to add more fields and relevant data:
{
"scope12": [{
"year": 2021,
"year": 2023,
"scope1": {
"total": 12.3
},
Expand Down
11 changes: 11 additions & 0 deletions src/workers/nlmParsePDF.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,17 @@ const nlmParsePDF = new DiscordWorker(
let json
try {
json = await extractJsonFromPdf(pdf)
} catch (err) {
if (job.attemptsMade < (job.opts?.attempts || 10)) {
job.editMessage(
`❌ Fel vid tolkning av PDF: ${err.message}. Försöker igen om en stund...`
)
} else {
job.editMessage(
`❌ Fel vid tolkning av PDF: ${err.message}. Ger upp...`
)
}
throw new Error('Failed to parse PDF, retrying in one minute...')
} finally {
clearInterval(interval)
}
Expand Down

0 comments on commit 043ecef

Please sign in to comment.