diff --git a/db/schema.sql b/db/schema.sql index 4e59850..21fcf04 100644 --- a/db/schema.sql +++ b/db/schema.sql @@ -1,3 +1,5 @@ +CREATE EXTENSION IF NOT EXISTS vector; + CREATE OR REPLACE FUNCTION update_updated_at_column() RETURNS TRIGGER AS $$ @@ -38,7 +40,8 @@ CREATE TABLE articles( is_published boolean DEFAULT FALSE, -- REMOVE after translation is_published_on_social_media boolean DEFAULT FALSE, -- REMOVE after translation category text, -- REMOVE after translation - page_views integer DEFAULT 0 -- REMOVE after translation + page_views integer DEFAULT 0, -- REMOVE after translation + embedding VECTOR(1536) ); CREATE TABLE article_translations( diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index c50a209..ff58963 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -2,7 +2,7 @@ version: "3.9" services: db: container_name: nyheter_db - image: postgres:15.4 + image: ankane/pgvector:v0.5.1 restart: always environment: TZ: "Europe/Stockholm" diff --git a/worker/src/3-openai.ts b/worker/src/3-openai.ts index fb0304f..f349daf 100644 --- a/worker/src/3-openai.ts +++ b/worker/src/3-openai.ts @@ -2,6 +2,7 @@ import slugify from 'slugify'; import { db } from './utils/db'; import { + articleNewsValue, generateArticle, generateTranslation, textIsRelatedToSweden, @@ -24,9 +25,8 @@ import { translate } from './utils/helpers'; throw new Error('article.transcribedText is null'); } - console.log('check if the source information is related to sweden'); - // check if the article is related to sweden or not + console.log('Check if the source information is related to sweden'); let isRelatedToSweden = false; try { isRelatedToSweden = await textIsRelatedToSweden( @@ -50,7 +50,26 @@ import { translate } from './utils/helpers'; continue; } - console.log('generate article...'); + // check what's the news value of the article + console.log('Check the articles news value'); + let newsValue = -1; + try { + newsValue = await articleNewsValue(article.transcribedText as string); + } catch (e) { + console.error('error: ', e); + continue; + } + + console.log('newsValue: ', newsValue); + + if (newsValue < 7) { + console.log( + `skipping this article because it's news value is too low: ${newsValue}`, + ); + continue; + } + + console.log('Generate article...'); let generatedArticle = null; try { diff --git a/worker/src/utils/openai.ts b/worker/src/utils/openai.ts index df57a1a..90a60ef 100644 --- a/worker/src/utils/openai.ts +++ b/worker/src/utils/openai.ts @@ -27,6 +27,20 @@ export const FUNCTIONS = { required: ['isRelatedToSweden'], }, }, + classifyNewValue: { + name: 'classifyNewsValue', + description: 'Classify the news value of a news article', + parameters: { + type: 'object', + properties: { + newsValue: { + type: 'number', + description: 'THe news value of the news article', + }, + }, + required: ['newsValue'], + }, + }, getNewsArticleInformation: { name: 'getNewsArticleInformation', description: 'Gets information about the news article', @@ -130,7 +144,7 @@ export async function textIsRelatedToSweden(text: string): Promise { function_call: { name: FUNCTIONS.informationIsRelatedToSweden.name, }, - model: 'gpt-3.5-turbo', + model: 'gpt-3.5-turbo-1106', temperature: 0.7, max_tokens: 500, }); @@ -141,6 +155,35 @@ export async function textIsRelatedToSweden(text: string): Promise { return bodyObject.isRelatedToSweden; } +export async function articleNewsValue(text: string): Promise { + const bodyContent = `ARTICLE:\n${text}\nEND OF ARTICLE.\n Help me ditermin the news value of the article above from a scale of 0 to 10 where 10 means the article has the highest news value possible.`; + + const openAiBodyResponse = await openai.chat.completions.create({ + messages: [ + { + role: 'system', + content: GPT_PROMPT_ASSISTANT, + }, + { + role: 'user', + content: bodyContent, + }, + ], + functions: [FUNCTIONS.classifyNewValue], + function_call: { + name: FUNCTIONS.classifyNewValue.name, + }, + model: 'gpt-3.5-turbo-1106', + temperature: 0.7, + max_tokens: 500, + }); + + const body = openAiBodyResponse.choices[0].message?.function_call?.arguments; + + const bodyObject = parseOpenAiJson(body as string); + return bodyObject.newsValue; +} + export async function generateArticle(transcribedText: string) { // body const bodyContent = `INFORMATION: ${removeLastSentence( @@ -173,13 +216,8 @@ export async function generateArticle(transcribedText: string) { const jsonString = openAiBodyResponse.choices[0].message?.function_call ?.arguments as string; - console.log(openAiBodyResponse.choices[0].message); - console.log(jsonString); - const resJson = parseOpenAiJson(jsonString); - console.log({ resJson }); - const articleResponseSchema = z.object({ body: z.string(), headline: z.string(), @@ -243,9 +281,6 @@ END OF ARTICLE`; const jsonString = openAiBodyResponse.choices[0].message?.function_call ?.arguments as string; - console.log(openAiBodyResponse.choices[0].message); - console.log(jsonString); - // const sanitizedJsonString = jsonString.replace(/\t/g, '\\t'); const resJson = parseOpenAiJson(jsonString);