diff --git a/api/src/main/java/run/halo/app/content/ExcerptGenerator.java b/api/src/main/java/run/halo/app/content/ExcerptGenerator.java new file mode 100644 index 0000000000..d25b6814fa --- /dev/null +++ b/api/src/main/java/run/halo/app/content/ExcerptGenerator.java @@ -0,0 +1,32 @@ +package run.halo.app.content; + +import java.util.Set; +import lombok.Data; +import lombok.experimental.Accessors; +import org.pf4j.ExtensionPoint; +import reactor.core.publisher.Mono; + +public interface ExcerptGenerator extends ExtensionPoint { + + Mono generate(ExcerptGenerator.Context context); + + @Data + @Accessors(chain = true) + class Context { + private String raw; + /** + * html content. + */ + private String content; + + private String rawType; + /** + * keywords in the content to help the excerpt generation more accurate. + */ + private Set keywords; + /** + * Max length of the generated excerpt. + */ + private int maxLength; + } +} diff --git a/application/src/main/java/run/halo/app/core/extension/reconciler/PostReconciler.java b/application/src/main/java/run/halo/app/core/extension/reconciler/PostReconciler.java index 55f8f28b79..29d546944f 100644 --- a/application/src/main/java/run/halo/app/core/extension/reconciler/PostReconciler.java +++ b/application/src/main/java/run/halo/app/core/extension/reconciler/PostReconciler.java @@ -10,6 +10,7 @@ import static run.halo.app.extension.MetadataUtil.nullSafeAnnotations; import static run.halo.app.extension.MetadataUtil.nullSafeLabels; import static run.halo.app.extension.index.query.QueryFactory.equal; +import static run.halo.app.extension.index.query.QueryFactory.in; import com.google.common.hash.Hashing; import java.time.Duration; @@ -20,7 +21,9 @@ import java.util.Objects; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; @@ -28,8 +31,10 @@ import org.springframework.context.ApplicationEventPublisher; import org.springframework.data.domain.Sort; import org.springframework.stereotype.Component; +import reactor.core.publisher.Mono; import run.halo.app.content.CategoryService; import run.halo.app.content.ContentWrapper; +import run.halo.app.content.ExcerptGenerator; import run.halo.app.content.NotificationReasonConst; import run.halo.app.content.PostService; import run.halo.app.content.comment.CommentService; @@ -39,6 +44,7 @@ import run.halo.app.core.extension.content.Post.PostPhase; import run.halo.app.core.extension.content.Post.VisibleEnum; import run.halo.app.core.extension.content.Snapshot; +import run.halo.app.core.extension.content.Tag; import run.halo.app.core.extension.notification.Subscription; import run.halo.app.event.post.PostDeletedEvent; import run.halo.app.event.post.PostPublishedEvent; @@ -62,6 +68,7 @@ import run.halo.app.metrics.CounterService; import run.halo.app.metrics.MeterUtils; import run.halo.app.notification.NotificationCenter; +import run.halo.app.plugin.extensionpoint.ExtensionGetter; /** *

Reconciler for {@link Post}.

@@ -75,6 +82,7 @@ * @author guqing * @since 2.0.0 */ +@Slf4j @AllArgsConstructor @Component public class PostReconciler implements Reconciler { @@ -85,6 +93,7 @@ public class PostReconciler implements Reconciler { private final CounterService counterService; private final CommentService commentService; private final CategoryService categoryService; + private final ExtensionGetter extensionGetter; private final ApplicationEventPublisher eventPublisher; private final NotificationCenter notificationCenter; @@ -155,14 +164,7 @@ public Result reconcile(Request request) { } var isAutoGenerate = defaultIfNull(excerpt.getAutoGenerate(), true); if (isAutoGenerate) { - Optional contentWrapper = - postService.getContent(post.getSpec().getReleaseSnapshot(), - post.getSpec().getBaseSnapshot()) - .blockOptional(); - if (contentWrapper.isPresent()) { - String contentRevised = contentWrapper.get().getContent(); - status.setExcerpt(getExcerpt(contentRevised)); - } + status.setExcerpt(getExcerpt(post)); } else { status.setExcerpt(excerpt.getRaw()); } @@ -375,11 +377,57 @@ private void cleanUpResources(Post post) { .block(); } - private String getExcerpt(String htmlContent) { - String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500); - String text = Jsoup.parse(shortHtmlContent).text(); - // TODO The default capture 150 words as excerpt - return StringUtils.substring(text, 0, 150); + private String getExcerpt(Post post) { + Optional contentWrapper = + postService.getContent(post.getSpec().getReleaseSnapshot(), + post.getSpec().getBaseSnapshot()) + .blockOptional(); + if (contentWrapper.isEmpty()) { + return StringUtils.EMPTY; + } + var content = contentWrapper.get(); + var tags = listTagDisplayNames(post); + + var keywords = new HashSet<>(tags); + keywords.add(post.getSpec().getTitle()); + + var context = new ExcerptGenerator.Context() + .setRaw(content.getRaw()) + .setContent(content.getContent()) + .setRawType(content.getRawType()) + .setKeywords(keywords) + .setMaxLength(160); + return extensionGetter.getEnabledExtension(ExcerptGenerator.class) + .defaultIfEmpty(new DefaultExcerptGenerator()) + .flatMap(generator -> generator.generate(context)) + .onErrorResume(Throwable.class, e -> { + log.error("Failed to generate excerpt for post [{}]", + post.getMetadata().getName(), e); + return Mono.empty(); + }) + .blockOptional() + .orElse(StringUtils.EMPTY); + } + + private Set listTagDisplayNames(Post post) { + return Optional.ofNullable(post.getSpec().getTags()) + .map(tags -> client.listAll(Tag.class, ListOptions.builder() + .fieldQuery(in("metadata.name", tags)) + .build(), Sort.unsorted()) + ) + .stream() + .flatMap(List::stream) + .map(tag -> tag.getSpec().getDisplayName()) + .collect(Collectors.toSet()); + } + + static class DefaultExcerptGenerator implements ExcerptGenerator { + @Override + public Mono generate(Context context) { + String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500); + String text = Jsoup.parse(shortHtmlContent).text(); + return Mono.just(StringUtils.substring(text, 0, 150)); + } } List listSnapshots(Ref ref) { diff --git a/application/src/main/java/run/halo/app/core/extension/reconciler/SinglePageReconciler.java b/application/src/main/java/run/halo/app/core/extension/reconciler/SinglePageReconciler.java index 08bd37a26c..b803b6df5b 100644 --- a/application/src/main/java/run/halo/app/core/extension/reconciler/SinglePageReconciler.java +++ b/application/src/main/java/run/halo/app/core/extension/reconciler/SinglePageReconciler.java @@ -17,6 +17,9 @@ import org.springframework.data.domain.Sort; import org.springframework.stereotype.Component; import org.springframework.util.Assert; +import reactor.core.publisher.Mono; +import run.halo.app.content.ContentWrapper; +import run.halo.app.content.ExcerptGenerator; import run.halo.app.content.NotificationReasonConst; import run.halo.app.content.SinglePageService; import run.halo.app.content.comment.CommentService; @@ -43,6 +46,7 @@ import run.halo.app.metrics.CounterService; import run.halo.app.metrics.MeterUtils; import run.halo.app.notification.NotificationCenter; +import run.halo.app.plugin.extensionpoint.ExtensionGetter; /** *

Reconciler for {@link SinglePage}.

@@ -65,6 +69,7 @@ public class SinglePageReconciler implements Reconciler { private final SinglePageService singlePageService; private final CounterService counterService; private final CommentService commentService; + private final ExtensionGetter extensionGetter; private final ExternalUrlSupplier externalUrlSupplier; @@ -318,12 +323,7 @@ private void reconcileStatus(String name) { } if (excerpt.getAutoGenerate()) { - singlePageService.getContent(spec.getHeadSnapshot(), spec.getBaseSnapshot()) - .blockOptional() - .ifPresent(content -> { - String contentRevised = content.getContent(); - status.setExcerpt(getExcerpt(contentRevised)); - }); + status.setExcerpt(getExcerpt(singlePage)); } else { status.setExcerpt(excerpt.getRaw()); } @@ -363,11 +363,40 @@ private void reconcileStatus(String name) { }); } - private String getExcerpt(String htmlContent) { - String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500); - String text = Jsoup.parse(shortHtmlContent).text(); - // TODO The default capture 150 words as excerpt - return StringUtils.substring(text, 0, 150); + private String getExcerpt(SinglePage singlePage) { + Optional contentWrapper = + singlePageService.getContent(singlePage.getSpec().getReleaseSnapshot(), + singlePage.getSpec().getBaseSnapshot()) + .blockOptional(); + if (contentWrapper.isEmpty()) { + return StringUtils.EMPTY; + } + var content = contentWrapper.get(); + var context = new ExcerptGenerator.Context() + .setRaw(content.getRaw()) + .setContent(content.getContent()) + .setRaw(content.getRawType()) + .setKeywords(Set.of()) + .setMaxLength(160); + return extensionGetter.getEnabledExtension(ExcerptGenerator.class) + .defaultIfEmpty(new DefaultExcerptGenerator()) + .flatMap(generator -> generator.generate(context)) + .onErrorResume(Throwable.class, e -> { + log.error("Failed to generate excerpt for single page [{}]", + singlePage.getMetadata().getName(), e); + return Mono.empty(); + }) + .blockOptional() + .orElse(StringUtils.EMPTY); + } + + static class DefaultExcerptGenerator implements ExcerptGenerator { + @Override + public Mono generate(Context context) { + String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500); + String text = Jsoup.parse(shortHtmlContent).text(); + return Mono.just(StringUtils.substring(text, 0, 150)); + } } private boolean isDeleted(SinglePage singlePage) { diff --git a/application/src/main/resources/extensions/extensionpoint-definitions.yaml b/application/src/main/resources/extensions/extensionpoint-definitions.yaml index 1a042a3a13..2e098119c2 100644 --- a/application/src/main/resources/extensions/extensionpoint-definitions.yaml +++ b/application/src/main/resources/extensions/extensionpoint-definitions.yaml @@ -87,4 +87,13 @@ spec: displayName: 页脚标签内容处理器 type: MULTI_INSTANCE description: "提供用于扩展 标签内容的扩展方式。" - \ No newline at end of file +--- +apiVersion: plugin.halo.run/v1alpha1 +kind: ExtensionPointDefinition +metadata: + name: excerpt-generator +spec: + className: run.halo.app.content.ExcerptGenerator + displayName: 摘要生成器 + type: SINGLETON + description: "提供自动生成摘要的方式扩展,如使用算法提取或使用 AI 生成。" diff --git a/application/src/test/java/run/halo/app/core/extension/reconciler/PostReconcilerTest.java b/application/src/test/java/run/halo/app/core/extension/reconciler/PostReconcilerTest.java index 005ca8b68d..b701523537 100644 --- a/application/src/test/java/run/halo/app/core/extension/reconciler/PostReconcilerTest.java +++ b/application/src/test/java/run/halo/app/core/extension/reconciler/PostReconcilerTest.java @@ -25,6 +25,7 @@ import org.springframework.context.ApplicationEventPublisher; import reactor.core.publisher.Mono; import run.halo.app.content.ContentWrapper; +import run.halo.app.content.ExcerptGenerator; import run.halo.app.content.NotificationReasonConst; import run.halo.app.content.PostService; import run.halo.app.content.TestPost; @@ -36,6 +37,7 @@ import run.halo.app.extension.ExtensionClient; import run.halo.app.extension.controller.Reconciler; import run.halo.app.notification.NotificationCenter; +import run.halo.app.plugin.extensionpoint.ExtensionGetter; /** * Tests for {@link PostReconciler}. @@ -61,6 +63,9 @@ class PostReconcilerTest { @Mock private NotificationCenter notificationCenter; + @Mock + private ExtensionGetter extensionGetter; + @InjectMocks private PostReconciler postReconciler; @@ -96,7 +101,7 @@ void reconcile() { verify(postPermalinkPolicy, times(1)).permalink(any()); Post value = captor.getValue(); - assertThat(value.getStatus().getExcerpt()).isNull(); + assertThat(value.getStatus().getExcerpt()).isEmpty(); assertThat(value.getStatus().getContributors()).isEqualTo(List.of("guqing", "zhangsan")); } @@ -126,6 +131,9 @@ void reconcileExcerpt() { Snapshot snapshotV1 = TestPost.snapshotV1(); snapshotV1.getSpec().setContributors(Set.of("guqing")); + when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class))) + .thenReturn(Mono.empty()); + when(client.listAll(eq(Snapshot.class), any(), any())) .thenReturn(List.of(snapshotV1, snapshotV2)); @@ -162,6 +170,9 @@ void reconcileLastModifyTimeWhenPostIsPublished() { when(client.fetch(eq(Snapshot.class), eq(post.getSpec().getReleaseSnapshot()))) .thenReturn(Optional.of(snapshotV2)); + when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class))) + .thenReturn(Mono.empty()); + when(client.listAll(eq(Snapshot.class), any(), any())) .thenReturn(List.of()); @@ -191,6 +202,9 @@ void reconcileLastModifyTimeWhenPostIsNotPublished() { .rawType("markdown") .build())); + when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class))) + .thenReturn(Mono.empty()); + when(client.listAll(eq(Snapshot.class), any(), any())) .thenReturn(List.of()); diff --git a/application/src/test/java/run/halo/app/core/extension/reconciler/SinglePageReconcilerTest.java b/application/src/test/java/run/halo/app/core/extension/reconciler/SinglePageReconcilerTest.java index e5dc4d4c9f..486efadadc 100644 --- a/application/src/test/java/run/halo/app/core/extension/reconciler/SinglePageReconcilerTest.java +++ b/application/src/test/java/run/halo/app/core/extension/reconciler/SinglePageReconcilerTest.java @@ -26,6 +26,7 @@ import org.springframework.context.ApplicationContext; import reactor.core.publisher.Mono; import run.halo.app.content.ContentWrapper; +import run.halo.app.content.ExcerptGenerator; import run.halo.app.content.NotificationReasonConst; import run.halo.app.content.SinglePageService; import run.halo.app.content.TestPost; @@ -39,6 +40,7 @@ import run.halo.app.infra.ExternalUrlSupplier; import run.halo.app.metrics.CounterService; import run.halo.app.notification.NotificationCenter; +import run.halo.app.plugin.extensionpoint.ExtensionGetter; /** * Tests for {@link SinglePageReconciler}. @@ -66,6 +68,9 @@ class SinglePageReconcilerTest { @Mock NotificationCenter notificationCenter; + @Mock + ExtensionGetter extensionGetter; + @InjectMocks private SinglePageReconciler singlePageReconciler; @@ -79,9 +84,10 @@ void reconcile() { String name = "page-A"; SinglePage page = pageV1(); page.getSpec().setHeadSnapshot("page-A-head-snapshot"); + page.getSpec().setReleaseSnapshot(page.getSpec().getHeadSnapshot()); when(client.fetch(eq(SinglePage.class), eq(name))) .thenReturn(Optional.of(page)); - when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()), + when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()), eq(page.getSpec().getBaseSnapshot()))) .thenReturn(Mono.just(ContentWrapper.builder() .snapshotName(page.getSpec().getHeadSnapshot()) @@ -99,6 +105,9 @@ void reconcile() { .thenReturn(List.of(snapshotV1, snapshotV2)); when(externalUrlSupplier.get()).thenReturn(URI.create("")); + when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class))) + .thenReturn(Mono.empty()); + ArgumentCaptor captor = ArgumentCaptor.forClass(SinglePage.class); singlePageReconciler.reconcile(new Reconciler.Request(name)); @@ -141,7 +150,7 @@ void reconcileLastModifyTimeWhenPageIsPublished() { page.getSpec().setReleaseSnapshot("page-fake-released-snapshot"); when(client.fetch(eq(SinglePage.class), eq(name))) .thenReturn(Optional.of(page)); - when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()), + when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()), eq(page.getSpec().getBaseSnapshot()))) .thenReturn(Mono.just(ContentWrapper.builder() .snapshotName(page.getSpec().getHeadSnapshot()) @@ -156,6 +165,9 @@ void reconcileLastModifyTimeWhenPageIsPublished() { when(client.fetch(eq(Snapshot.class), eq(page.getSpec().getReleaseSnapshot()))) .thenReturn(Optional.of(snapshotV2)); + when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class))) + .thenReturn(Mono.empty()); + when(client.listAll(eq(Snapshot.class), any(), any())) .thenReturn(List.of()); @@ -176,7 +188,7 @@ void reconcileLastModifyTimeWhenPageIsNotPublished() { page.getSpec().setPublish(false); when(client.fetch(eq(SinglePage.class), eq(name))) .thenReturn(Optional.of(page)); - when(singlePageService.getContent(eq(page.getSpec().getHeadSnapshot()), + when(singlePageService.getContent(eq(page.getSpec().getReleaseSnapshot()), eq(page.getSpec().getBaseSnapshot()))) .thenReturn(Mono.just(ContentWrapper.builder() .snapshotName(page.getSpec().getHeadSnapshot()) @@ -186,6 +198,9 @@ void reconcileLastModifyTimeWhenPageIsNotPublished() { .build()) ); + when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class))) + .thenReturn(Mono.empty()); + when(client.listAll(eq(Snapshot.class), any(), any())) .thenReturn(List.of());