From 5700f4908d2049f047433e5ad3661e9bc3a43372 Mon Sep 17 00:00:00 2001 From: mithandir Date: Tue, 3 Sep 2024 19:50:13 +0200 Subject: [PATCH] Shorten Youtube subtitles to 32k token window --- .../ch/climbd/newsfeed/controller/MlController.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/ch/climbd/newsfeed/controller/MlController.java b/src/main/java/ch/climbd/newsfeed/controller/MlController.java index 75c7bcb..0aa6fcd 100644 --- a/src/main/java/ch/climbd/newsfeed/controller/MlController.java +++ b/src/main/java/ch/climbd/newsfeed/controller/MlController.java @@ -86,12 +86,17 @@ private void processYoutubeTranscription(NewsEntry item) { var content = TranscriptFormatters.textFormatter().format(fragments); LOG.info("Transcript found for video: {}", item.getTitle()); - item.setSummary(chatClient.prompt() + // 32k token limit + if (content.length() > 100000) { + content = content.substring(0, 100000); + } + + var summary = chatClient.prompt() .system("You are a news reporter that summarizes news articles") .user("Create a summary of the following youtube subtitles: \n\n" + content) .call() - .content()); - LOG.debug("Summary: {}", item.getSummary()); + .content(); + item.setContent(summary); mongo.update(item); LOG.info("Summarized the article: {}", item.getTitle());