Skip to content

Commit

Permalink
Shorten Youtube subtitles to 32k token window
Browse files Browse the repository at this point in the history
  • Loading branch information
mithandir committed Sep 3, 2024
1 parent 941b528 commit 5700f49
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions src/main/java/ch/climbd/newsfeed/controller/MlController.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,17 @@ private void processYoutubeTranscription(NewsEntry item) {
var content = TranscriptFormatters.textFormatter().format(fragments);
LOG.info("Transcript found for video: {}", item.getTitle());

item.setSummary(chatClient.prompt()
// 32k token limit
if (content.length() > 100000) {
content = content.substring(0, 100000);
}

var summary = chatClient.prompt()
.system("You are a news reporter that summarizes news articles")
.user("Create a summary of the following youtube subtitles: \n\n" + content)
.call()
.content());
LOG.debug("Summary: {}", item.getSummary());
.content();
item.setContent(summary);
mongo.update(item);
LOG.info("Summarized the article: {}", item.getTitle());

Expand Down

0 comments on commit 5700f49

Please sign in to comment.