Skip to content

Commit

Permalink
fix: invalid bytes in character encoding (#46)
Browse files Browse the repository at this point in the history
### What this PR does?
修复内容中包含非法的 XML 字符导致 RSS 格式异常的问题

```release-note
修复内容中包含非法的 XML 字符导致 RSS 格式异常的问题
```
  • Loading branch information
guqing authored Dec 16, 2024
1 parent f6b461d commit 80b1922
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
9 changes: 7 additions & 2 deletions app/src/main/java/run/halo/feed/RssXmlBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Optional;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.dom4j.Document;
Expand Down Expand Up @@ -146,9 +147,13 @@ private void createItemElementsToChannel(Element channel, List<RSS2.Item> items)

private void createItemElementToChannel(Element channel, RSS2.Item item) {
Element itemElement = channel.addElement("item");
itemElement.addElement("title").addCDATA(item.getTitle());
itemElement.addElement("title")
.addCDATA(XmlCharUtils.removeInvalidXmlChar(item.getTitle()));
itemElement.addElement("link").addText(item.getLink());
var description = getDescriptionWithTelemetry(item);

var description = Optional.of(getDescriptionWithTelemetry(item))
.map(XmlCharUtils::removeInvalidXmlChar)
.orElseThrow();
itemElement.addElement("description").addCDATA(description);
itemElement.addElement("guid")
.addAttribute("isPermaLink", "false")
Expand Down
55 changes: 55 additions & 0 deletions app/src/test/java/run/halo/feed/RSS2Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import java.time.Instant;
import java.util.Arrays;
import java.util.Collections;
import org.junit.jupiter.api.Test;

class RSS2Test {
Expand Down Expand Up @@ -130,4 +131,58 @@ void extractRssTagsTest() {
""".formatted(lastBuildDate);
assertThat(rssXml).isEqualToIgnoringWhitespace(expected);
}

@Test
void invalidCharTest() {
var rss = RSS2.builder()
.title("title")
.description("description")
.link("link")
.items(Collections.singletonList(
RSS2.Item.builder()
.title("title1")
.description("""
<p>并且会保留处理后的图片以供后面的访问。</p>
""")
.link("link1")
.pubDate(Instant.EPOCH)
.guid("guid1")
.build()
))
.build();
var instant = Instant.now();
var rssXml = new RssXmlBuilder()
.withRss2(rss)
.withGenerator("Halo")
.withLastBuildDate(instant)
.toXmlString();

var lastBuildDate = RssXmlBuilder.instantToString(instant);
// language=xml
var expected = """
<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:media="http://search.yahoo.com/mrss/" version="2.0">
<channel>
<title>title</title>
<link>link</link>
<description>description</description>
<generator>Halo</generator>
<language>zh-cn</language>
<lastBuildDate>%s</lastBuildDate>
<item>
<title>
<![CDATA[title1]]>
</title>
<link>link1</link>
<description>
<![CDATA[<p>并且会保留处理后的图片以供后面的访问。</p>]]>
</description>
<guid isPermaLink="false">guid1</guid>
<pubDate>Thu, 1 Jan 1970 00:00:00 GMT</pubDate>
</item>
</channel>
</rss>
""".formatted(lastBuildDate);
assertThat(rssXml).isEqualToIgnoringWhitespace(expected);
}
}

0 comments on commit 80b1922

Please sign in to comment.