From fa95355872f86075b6c481a8532e8a2f62618449 Mon Sep 17 00:00:00 2001 From: Takagi <1103069291@qq.com> Date: Fri, 14 Jun 2024 12:20:37 +0800 Subject: [PATCH] fix: compressed web pages cannot be parsed normally (#4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #### What type of PR is this? /kind bug #### What this PR does / why we need it: 当网页使用 `gzip` 时,网页会被解析为乱码。导致解析失败。 #### How to test it? 测试请求网站 https://www.bilibili.com/video/BV1Vu4m1M7Nr/ ,查看是否会报错。 #### Which issue(s) this PR fixes: Fixes #3 #### Does this PR introduce a user-facing change? ```release-note 解决被压缩的网站无法解析的问题 ``` --- .../java/run/halo/editor/hyperlink/HttpClientFactory.java | 3 ++- .../editor/hyperlink/handler/HyperLinkDefaultParser.java | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/main/java/run/halo/editor/hyperlink/HttpClientFactory.java b/src/main/java/run/halo/editor/hyperlink/HttpClientFactory.java index 908b8f7..57f2ada 100644 --- a/src/main/java/run/halo/editor/hyperlink/HttpClientFactory.java +++ b/src/main/java/run/halo/editor/hyperlink/HttpClientFactory.java @@ -47,7 +47,8 @@ private static boolean isProxy(ProxyConfig proxyConfig, String host) { private static HttpClient getHttpClient() { return HttpClient.create() - .responseTimeout(Duration.ofSeconds(10)); + .responseTimeout(Duration.ofSeconds(10)) + .compress(true); } record ProxyConfig(String host, int port, List hosts) { diff --git a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkDefaultParser.java b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkDefaultParser.java index 1981f3c..0ecabd4 100644 --- a/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkDefaultParser.java +++ b/src/main/java/run/halo/editor/hyperlink/handler/HyperLinkDefaultParser.java @@ -6,6 +6,7 @@ import org.jsoup.nodes.Document; import org.jsoup.parser.Parser; import org.jsoup.select.Elements; +import org.springframework.util.CollectionUtils; import org.springframework.util.StringUtils; import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO; @@ -23,8 +24,11 @@ public HyperLinkBaseDTO parse(String htmlContent) { Elements meta = parse.getElementsByTag("meta"); parserMetas(meta, hyperLinkBaseDTO); - var title = parse.getElementsByTag("title").get(0).text(); - hyperLinkBaseDTO.setTitle(title); + var titles = parse.getElementsByTag("title"); + if (!CollectionUtils.isEmpty(titles)) { + var title = titles.get(0).text(); + hyperLinkBaseDTO.setTitle(title); + } Elements links = parse.getElementsByTag("link"); parserLinks(links, hyperLinkBaseDTO);