Skip to content

Commit

Permalink
feat: add extension point for excerpt generation (#6348)
Browse files Browse the repository at this point in the history
#### What type of PR is this?
/kind feature
/area core
/milestone 2.18.x

#### What this PR does / why we need it:
新增文章摘要生成扩展点用于扩展自动生成摘要的方式

#### Does this PR introduce a user-facing change?
```release-note
新增文章摘要生成扩展点用于扩展自动生成摘要的方式
```
  • Loading branch information
guqing authored Jul 31, 2024
1 parent 0110438 commit 39ff455
Show file tree
Hide file tree
Showing 6 changed files with 176 additions and 29 deletions.
32 changes: 32 additions & 0 deletions api/src/main/java/run/halo/app/content/ExcerptGenerator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package run.halo.app.content;

import java.util.Set;
import lombok.Data;
import lombok.experimental.Accessors;
import org.pf4j.ExtensionPoint;
import reactor.core.publisher.Mono;

public interface ExcerptGenerator extends ExtensionPoint {

Mono<String> generate(ExcerptGenerator.Context context);

@Data
@Accessors(chain = true)
class Context {
private String raw;
/**
* html content.
*/
private String content;

private String rawType;
/**
* keywords in the content to help the excerpt generation more accurate.
*/
private Set<String> keywords;
/**
* Max length of the generated excerpt.
*/
private int maxLength;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import static run.halo.app.extension.MetadataUtil.nullSafeAnnotations;
import static run.halo.app.extension.MetadataUtil.nullSafeLabels;
import static run.halo.app.extension.index.query.QueryFactory.equal;
import static run.halo.app.extension.index.query.QueryFactory.in;

import com.google.common.hash.Hashing;
import java.time.Duration;
Expand All @@ -20,16 +21,20 @@
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.springframework.context.ApplicationEvent;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component;
import reactor.core.publisher.Mono;
import run.halo.app.content.CategoryService;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.PostService;
import run.halo.app.content.comment.CommentService;
Expand All @@ -39,6 +44,7 @@
import run.halo.app.core.extension.content.Post.PostPhase;
import run.halo.app.core.extension.content.Post.VisibleEnum;
import run.halo.app.core.extension.content.Snapshot;
import run.halo.app.core.extension.content.Tag;
import run.halo.app.core.extension.notification.Subscription;
import run.halo.app.event.post.PostDeletedEvent;
import run.halo.app.event.post.PostPublishedEvent;
Expand All @@ -62,6 +68,7 @@
import run.halo.app.metrics.CounterService;
import run.halo.app.metrics.MeterUtils;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;

/**
* <p>Reconciler for {@link Post}.</p>
Expand All @@ -75,6 +82,7 @@
* @author guqing
* @since 2.0.0
*/
@Slf4j
@AllArgsConstructor
@Component
public class PostReconciler implements Reconciler<Reconciler.Request> {
Expand All @@ -85,6 +93,7 @@ public class PostReconciler implements Reconciler<Reconciler.Request> {
private final CounterService counterService;
private final CommentService commentService;
private final CategoryService categoryService;
private final ExtensionGetter extensionGetter;

private final ApplicationEventPublisher eventPublisher;
private final NotificationCenter notificationCenter;
Expand Down Expand Up @@ -155,14 +164,7 @@ public Result reconcile(Request request) {
}
var isAutoGenerate = defaultIfNull(excerpt.getAutoGenerate(), true);
if (isAutoGenerate) {
Optional<ContentWrapper> contentWrapper =
postService.getContent(post.getSpec().getReleaseSnapshot(),
post.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isPresent()) {
String contentRevised = contentWrapper.get().getContent();
status.setExcerpt(getExcerpt(contentRevised));
}
status.setExcerpt(getExcerpt(post));
} else {
status.setExcerpt(excerpt.getRaw());
}
Expand Down Expand Up @@ -375,11 +377,57 @@ private void cleanUpResources(Post post) {
.block();
}

private String getExcerpt(String htmlContent) {
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
// TODO The default capture 150 words as excerpt
return StringUtils.substring(text, 0, 150);
private String getExcerpt(Post post) {
Optional<ContentWrapper> contentWrapper =
postService.getContent(post.getSpec().getReleaseSnapshot(),
post.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isEmpty()) {
return StringUtils.EMPTY;
}
var content = contentWrapper.get();
var tags = listTagDisplayNames(post);

var keywords = new HashSet<>(tags);
keywords.add(post.getSpec().getTitle());

var context = new ExcerptGenerator.Context()
.setRaw(content.getRaw())
.setContent(content.getContent())
.setRawType(content.getRawType())
.setKeywords(keywords)
.setMaxLength(160);
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
.defaultIfEmpty(new DefaultExcerptGenerator())
.flatMap(generator -> generator.generate(context))
.onErrorResume(Throwable.class, e -> {
log.error("Failed to generate excerpt for post [{}]",
post.getMetadata().getName(), e);
return Mono.empty();
})
.blockOptional()
.orElse(StringUtils.EMPTY);
}

private Set<String> listTagDisplayNames(Post post) {
return Optional.ofNullable(post.getSpec().getTags())
.map(tags -> client.listAll(Tag.class, ListOptions.builder()
.fieldQuery(in("metadata.name", tags))
.build(), Sort.unsorted())
)
.stream()
.flatMap(List::stream)
.map(tag -> tag.getSpec().getDisplayName())
.collect(Collectors.toSet());
}

static class DefaultExcerptGenerator implements ExcerptGenerator {
@Override
public Mono<String> generate(Context context) {
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
return Mono.just(StringUtils.substring(text, 0, 150));
}
}

List<Snapshot> listSnapshots(Ref ref) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
import org.springframework.data.domain.Sort;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.SinglePageService;
import run.halo.app.content.comment.CommentService;
Expand All @@ -43,6 +46,7 @@
import run.halo.app.metrics.CounterService;
import run.halo.app.metrics.MeterUtils;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;

/**
* <p>Reconciler for {@link SinglePage}.</p>
Expand All @@ -65,6 +69,7 @@ public class SinglePageReconciler implements Reconciler<Reconciler.Request> {
private final SinglePageService singlePageService;
private final CounterService counterService;
private final CommentService commentService;
private final ExtensionGetter extensionGetter;

private final ExternalUrlSupplier externalUrlSupplier;

Expand Down Expand Up @@ -318,12 +323,7 @@ private void reconcileStatus(String name) {
}

if (excerpt.getAutoGenerate()) {
singlePageService.getContent(spec.getHeadSnapshot(), spec.getBaseSnapshot())
.blockOptional()
.ifPresent(content -> {
String contentRevised = content.getContent();
status.setExcerpt(getExcerpt(contentRevised));
});
status.setExcerpt(getExcerpt(singlePage));
} else {
status.setExcerpt(excerpt.getRaw());
}
Expand Down Expand Up @@ -363,11 +363,40 @@ private void reconcileStatus(String name) {
});
}

private String getExcerpt(String htmlContent) {
String shortHtmlContent = StringUtils.substring(htmlContent, 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
// TODO The default capture 150 words as excerpt
return StringUtils.substring(text, 0, 150);
private String getExcerpt(SinglePage singlePage) {
Optional<ContentWrapper> contentWrapper =
singlePageService.getContent(singlePage.getSpec().getReleaseSnapshot(),
singlePage.getSpec().getBaseSnapshot())
.blockOptional();
if (contentWrapper.isEmpty()) {
return StringUtils.EMPTY;
}
var content = contentWrapper.get();
var context = new ExcerptGenerator.Context()
.setRaw(content.getRaw())
.setContent(content.getContent())
.setRaw(content.getRawType())
.setKeywords(Set.of())
.setMaxLength(160);
return extensionGetter.getEnabledExtension(ExcerptGenerator.class)
.defaultIfEmpty(new DefaultExcerptGenerator())
.flatMap(generator -> generator.generate(context))
.onErrorResume(Throwable.class, e -> {
log.error("Failed to generate excerpt for single page [{}]",
singlePage.getMetadata().getName(), e);
return Mono.empty();
})
.blockOptional()
.orElse(StringUtils.EMPTY);
}

static class DefaultExcerptGenerator implements ExcerptGenerator {
@Override
public Mono<String> generate(Context context) {
String shortHtmlContent = StringUtils.substring(context.getContent(), 0, 500);
String text = Jsoup.parse(shortHtmlContent).text();
return Mono.just(StringUtils.substring(text, 0, 150));
}
}

private boolean isDeleted(SinglePage singlePage) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,13 @@ spec:
displayName: 页脚标签内容处理器
type: MULTI_INSTANCE
description: "提供用于扩展 <halo:footer/> 标签内容的扩展方式。"

---
apiVersion: plugin.halo.run/v1alpha1
kind: ExtensionPointDefinition
metadata:
name: excerpt-generator
spec:
className: run.halo.app.content.ExcerptGenerator
displayName: 摘要生成器
type: SINGLETON
description: "提供自动生成摘要的方式扩展,如使用算法提取或使用 AI 生成。"
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.springframework.context.ApplicationEventPublisher;
import reactor.core.publisher.Mono;
import run.halo.app.content.ContentWrapper;
import run.halo.app.content.ExcerptGenerator;
import run.halo.app.content.NotificationReasonConst;
import run.halo.app.content.PostService;
import run.halo.app.content.TestPost;
Expand All @@ -36,6 +37,7 @@
import run.halo.app.extension.ExtensionClient;
import run.halo.app.extension.controller.Reconciler;
import run.halo.app.notification.NotificationCenter;
import run.halo.app.plugin.extensionpoint.ExtensionGetter;

/**
* Tests for {@link PostReconciler}.
Expand All @@ -61,6 +63,9 @@ class PostReconcilerTest {
@Mock
private NotificationCenter notificationCenter;

@Mock
private ExtensionGetter extensionGetter;

@InjectMocks
private PostReconciler postReconciler;

Expand Down Expand Up @@ -96,7 +101,7 @@ void reconcile() {
verify(postPermalinkPolicy, times(1)).permalink(any());

Post value = captor.getValue();
assertThat(value.getStatus().getExcerpt()).isNull();
assertThat(value.getStatus().getExcerpt()).isEmpty();
assertThat(value.getStatus().getContributors()).isEqualTo(List.of("guqing", "zhangsan"));
}

Expand Down Expand Up @@ -126,6 +131,9 @@ void reconcileExcerpt() {
Snapshot snapshotV1 = TestPost.snapshotV1();
snapshotV1.getSpec().setContributors(Set.of("guqing"));

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of(snapshotV1, snapshotV2));

Expand Down Expand Up @@ -162,6 +170,9 @@ void reconcileLastModifyTimeWhenPostIsPublished() {
when(client.fetch(eq(Snapshot.class), eq(post.getSpec().getReleaseSnapshot())))
.thenReturn(Optional.of(snapshotV2));

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());

Expand Down Expand Up @@ -191,6 +202,9 @@ void reconcileLastModifyTimeWhenPostIsNotPublished() {
.rawType("markdown")
.build()));

when(extensionGetter.getEnabledExtension(eq(ExcerptGenerator.class)))
.thenReturn(Mono.empty());

when(client.listAll(eq(Snapshot.class), any(), any()))
.thenReturn(List.of());

Expand Down
Loading

0 comments on commit 39ff455

Please sign in to comment.