Skip to content

Commit

Permalink
feat: Support analysis of big text. Add retries if 0 words were found…
Browse files Browse the repository at this point in the history
… from text.
  • Loading branch information
evgenius1424 committed Jun 4, 2024
1 parent 6b84ad4 commit dda31af
Show file tree
Hide file tree
Showing 11 changed files with 373 additions and 271 deletions.
47 changes: 42 additions & 5 deletions apps/learnbefore-bff/api/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ import { startExpress } from "../src/start-express"
import { getWords } from "../src/get-words"
import { Message, User, Word } from "../types"
import { ClerkExpressRequireAuth } from "@clerk/clerk-sdk-node"
import { Store } from "../src/store" // Adjust the import path as necessary
import { Store } from "../src/store"
import { waitFor } from "../src/waitFor"
import { splitText } from "../src/splitText"

declare global {
namespace Express {
Expand Down Expand Up @@ -73,10 +75,17 @@ app.get("/api/words", ClerkExpressRequireAuth({}), async (req, res) => {
res.write(`data: ${JSON.stringify(message)}\n\n`)
res.flushHeaders()

for await (const word of getWords(openai, text)) {
words.push(word)
res.write(`data: ${JSON.stringify(word)}\n\n`)
res.flushHeaders()
for (const chunk of splitText(text, 6000)) {
for await (const word of getWordsRetryable(openai, chunk)) {
const isDuplicate = words.find((w) =>
equalsIgnoringCase(w.word, word.word),
)
if (!isDuplicate) {
words.push(word)
res.write(`data: ${JSON.stringify(word)}\n\n`)
res.flushHeaders()
}
}
}

try {
Expand Down Expand Up @@ -104,3 +113,31 @@ async function getUser(userId: string) {
const authorizedParties = ["http://localhost:3000", "https://learnbefore.com"]

app.use(ClerkExpressRequireAuth({ authorizedParties }))

// TODO: actually we do not need do to retry if the text is too big, or other API related issues.
async function* getWordsRetryable(
openai: OpenAI,
text: string,
maxRetries = 3,
): AsyncIterableIterator<Word> {
let retryCount = 0
let emitted = false

do {
for await (const word of getWords(openai, text)) {
emitted = true
yield word
}
if (emitted) {
return
}
retryCount++
if (retryCount != maxRetries) {
await waitFor(200)
}
} while (retryCount <= maxRetries)

if (!emitted) {
throw new Error("Maximum retries exceeded without finding any words.")
}
}
8 changes: 4 additions & 4 deletions apps/learnbefore-bff/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"scripts": {
"build": "npx tsc",
"preview": "node dist/api/chat.js",
"dev": "nodemon api/chat.ts"
"dev": "NODE_OPTIONS='--max-http-header-size=512000' nodemon api/chat.ts"
},
"dependencies": {
"@clerk/clerk-sdk-node": "^5.0.9",
Expand All @@ -16,15 +16,15 @@
"dotenv": "^16.4.5",
"express": "^4.19.2",
"mongodb": "^6.7.0",
"openai": "^4.47.2",
"openai": "^4.47.3",
"vitest": "^1.6.0",
"zod": "^3.23.8"
},
"devDependencies": {
"@types/cors": "^2.8.17",
"@types/express": "^4.17.21",
"@types/node": "^20.12.13",
"nodemon": "^3.1.2",
"@types/node": "^20.14.0",
"nodemon": "^3.1.3",
"ts-node": "^10.9.2",
"typescript": "^5.4.5"
}
Expand Down
3 changes: 3 additions & 0 deletions apps/learnbefore-bff/src/equalsIgnoringCase.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
function equalsIgnoringCase(text: string, other: string) {
return text.localeCompare(other, undefined, { sensitivity: "base" }) === 0
}
23 changes: 23 additions & 0 deletions apps/learnbefore-bff/src/splitText.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
export const splitText = (text: string, chunkSize: number = 3000): string[] => {
const sentences = text.match(/[^.!?]+[^.!?]+/g) || []
const chunks: string[] = []

let currentChunk = ""

function add(chunk: string) {
if (chunk.length > 0) chunks.push(chunk)
}

for (const sentence of sentences) {
if ((currentChunk + sentence).length <= chunkSize) {
currentChunk += sentence
} else {
add(currentChunk)
currentChunk = sentence
}
}

add(currentChunk)

return chunks
}
3 changes: 3 additions & 0 deletions apps/learnbefore-bff/src/waitFor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
export function waitFor(ms: number) {
return new Promise((resolve) => setTimeout(resolve, ms))
}
10 changes: 10 additions & 0 deletions apps/learnbefore-bff/test/split-text.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { expect, test } from "vitest"
import { splitText } from "../src/splitText"

test("splitText", () => {
const text =
"This is sentence one. This is sentence two. This is sentence three."
const result = splitText(text, 25)

expect(result).toHaveLength(3)
})
4 changes: 2 additions & 2 deletions apps/learnbefore/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
"version": "0.0.0",
"type": "module",
"scripts": {
"dev": "vite",
"dev": "NODE_OPTIONS='--max-http-header-size=512000' vite",
"build": "tsc && vite build",
"lint": "eslint . --ext ts,tsx --report-unused-disable-directives --max-warnings 0",
"preview": "vite preview"
"preview": "NODE_OPTIONS='--max-http-header-size=512000' vite preview"
},
"dependencies": {
"@clerk/clerk-react": "^5.2.2",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
"devDependencies": {
"@repo/eslint-config": "workspace:*",
"@repo/typescript-config": "workspace:*",
"prettier": "^3.2.5",
"prettier": "^3.3.0",
"turbo": "^1.13.3"
},
"volta": {
Expand Down
2 changes: 1 addition & 1 deletion packages/types/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"@repo/typescript-config": "workspace:*",
"@turbo/gen": "^1.13.3",
"@types/eslint": "^8.56.10",
"@types/node": "^20.12.13",
"@types/node": "^20.14.0",
"eslint": "^8.57.0",
"typescript": "^5.4.5"
}
Expand Down
4 changes: 2 additions & 2 deletions packages/ui/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
"@repo/typescript-config": "workspace:*",
"@turbo/gen": "^1.13.3",
"@types/eslint": "^8.56.10",
"@types/node": "^20.12.13",
"@types/node": "^20.14.0",
"@types/react": "^18.3.3",
"@types/react-dom": "^18.3.0",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.1",
"eslint": "^8.57.0",
"lucide-react": "^0.381.0",
"lucide-react": "^0.383.0",
"react": "^18.3.1",
"tailwind-merge": "^2.3.0",
"tailwindcss": "^3.4.3",
Expand Down
Loading

0 comments on commit dda31af

Please sign in to comment.