From d30a9a177592ad0c68622a6e859eaa0b9b648390 Mon Sep 17 00:00:00 2001 From: mithandir Date: Wed, 26 Jun 2024 09:33:54 +0200 Subject: [PATCH] Process the last 48h of posts to re-try summarization --- src/main/java/ch/climbd/newsfeed/controller/MlController.java | 2 +- .../java/ch/climbd/newsfeed/controller/MongoController.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/ch/climbd/newsfeed/controller/MlController.java b/src/main/java/ch/climbd/newsfeed/controller/MlController.java index e727b9b..6f4a87e 100644 --- a/src/main/java/ch/climbd/newsfeed/controller/MlController.java +++ b/src/main/java/ch/climbd/newsfeed/controller/MlController.java @@ -27,7 +27,7 @@ public class MlController { @PostConstruct public void fixQueueAfterRestart() { - var todaysNews = mongo.findAllPostedToday(); + var todaysNews = mongo.findLast100PostsPostedInTheLast48h(); todaysNews.stream() .filter(news -> news.getSummary() == null || news.getSummary().isBlank()) .filter(news -> news.getContent() != null && !news.getContent().isBlank() && news.getContent().length() > 1000) diff --git a/src/main/java/ch/climbd/newsfeed/controller/MongoController.java b/src/main/java/ch/climbd/newsfeed/controller/MongoController.java index 35556c6..f280a04 100644 --- a/src/main/java/ch/climbd/newsfeed/controller/MongoController.java +++ b/src/main/java/ch/climbd/newsfeed/controller/MongoController.java @@ -113,9 +113,9 @@ public List findAllFilterdBySite(String host) { return template.find(query, NewsEntry.class); } - public List findAllPostedToday() { + public List findLast100PostsPostedInTheLast48h() { Query query = new Query(); - query.addCriteria(Criteria.where("publishedAt").gte(ZonedDateTime.now().truncatedTo(ChronoUnit.DAYS).toInstant())); + query.addCriteria(Criteria.where("publishedAt").gte(ZonedDateTime.now().minusDays(2).toInstant())); query.with(Sort.by(Sort.Direction.DESC, "publishedAt")); query.limit(100); query.maxTimeMsec(1000);