Skip to content

Commit

Permalink
feat: add qq music and bilibili parser (#16)
Browse files Browse the repository at this point in the history
支持解析QQ音乐和哔哩哔哩的分享链接

![image](https://github.com/user-attachments/assets/2bf50ea0-c1f3-4eee-9770-c341d08ba260)

由于原parser功能比较单一,所以有对代码进行重构,把请求的逻辑放到了parser里面

```release-note
支持解析QQ音乐和哔哩哔哩的分享链接
```
  • Loading branch information
Aziteee authored Nov 7, 2024
1 parent dd6d67b commit 80396cd
Show file tree
Hide file tree
Showing 8 changed files with 289 additions and 99 deletions.
37 changes: 0 additions & 37 deletions src/main/java/run/halo/editor/hyperlink/HyperLinkRequest.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,43 +21,6 @@
@RequiredArgsConstructor
public class HyperLinkRequest {

private final HttpClientFactory clientFactory;

public Mono<HyperLinkResponse> getHyperLinkDetail(URI linkURI) {
AtomicReference<String> resourceUrl = new AtomicReference<>(linkURI.toString());
return clientFactory.createHttpClientBuilder(linkURI.getHost())
.map(httpClient -> httpClient.followRedirect(true, (clientRequest) -> {
if (StringUtils.hasText(clientRequest.resourceUrl())) {
resourceUrl.set(clientRequest.resourceUrl());
}
}))
.map(httpClient -> WebClient.builder()
.clientConnector(new ReactorClientHttpConnector(httpClient))
.build())
.flatMap(webClient -> webClient.get()
.uri(linkURI)
.accept(MediaType.TEXT_HTML)
.headers(httpHeaders -> {
httpHeaders.set(HttpHeaders.USER_AGENT,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
+ "like Gecko) Chrome/58.0.3029.110 Safari/537.3");
httpHeaders.set(HttpHeaders.REFERER,
linkURI.getScheme() + "://" + linkURI.getHost());
})
.retrieve()
.bodyToFlux(DataBuffer.class)
.flatMap(dataBuffer -> {
String content = dataBuffer.toString(StandardCharsets.UTF_8);
DataBufferUtils.release(dataBuffer);
return Mono.just(content);
})
.reduce(new StringBuilder(), StringBuilder::append)
.filter(stringBuilder -> !stringBuilder.isEmpty())
.map(StringBuilder::toString)
.map(htmlContent -> new HyperLinkResponse(htmlContent, resourceUrl.get()))
);
}

public record HyperLinkResponse(String htmlContent, String url) {

}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
package run.halo.editor.hyperlink.handler;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import org.springframework.http.HttpHeaders;
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.core.publisher.Mono;
import run.halo.editor.hyperlink.HttpClientFactory;
import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO;
import java.net.URI;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@RequiredArgsConstructor
public class HyperLinkBilibiliParser implements HyperLinkParser<HyperLinkBaseDTO> {

private final HttpClientFactory clientFactory;
private final ObjectMapper objectMapper;

public Mono<HyperLinkBaseDTO> parse(URI linkURI) {
return getHyperLinkDetail(linkURI)
.map(item -> {
var hyperLinkDTO = new HyperLinkBaseDTO();
try {
JsonNode root = objectMapper.readTree(item);
JsonNode data = root.path("data");

hyperLinkDTO.setUrl(linkURI.toString());
hyperLinkDTO.setTitle(data.path("title").asText());
hyperLinkDTO.setImage(data.path("pic").asText());
hyperLinkDTO.setDescription("UP主:" + data.path("owner").path("name").asText());
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
return hyperLinkDTO;
});
}

public Mono<String> getHyperLinkDetail(URI linkURI) {
String api = "https://api.bilibili.com/x/web-interface/view?" + getQueryParam(linkURI);
return clientFactory.createHttpClientBuilder(linkURI.getHost())
.map(httpClient -> WebClient.builder()
.clientConnector(new ReactorClientHttpConnector(httpClient))
.build())
.flatMap(webClient -> webClient.get()
.uri(api)
.headers(httpHeaders -> {
httpHeaders.set(HttpHeaders.CONTENT_TYPE, "application/json");
})
.retrieve()
.bodyToMono(String.class));
}

public String getQueryParam(URI linkURI) {
Pattern pattern = Pattern.compile("video/([a-zA-Z0-9]+)");
Matcher matcher = pattern.matcher(linkURI.toString());
if (!matcher.find()) {
throw new RuntimeException("id not found");
}
String id = matcher.group(1);
System.out.println(id);
if (id.chars().allMatch(Character::isDigit)) {
return "aid=" + id;
} else {
return "bvid=" + id;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,77 +1,147 @@
package run.halo.editor.hyperlink.handler;

import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicReference;
import lombok.RequiredArgsConstructor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
import org.jsoup.select.Elements;
import org.springframework.core.io.buffer.DataBuffer;
import org.springframework.core.io.buffer.DataBufferUtils;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import org.springframework.web.reactive.function.client.WebClient;
import org.springframework.web.server.ServerWebInputException;
import reactor.core.publisher.Mono;
import run.halo.app.infra.utils.PathUtils;
import run.halo.editor.hyperlink.HttpClientFactory;
import run.halo.editor.hyperlink.HyperLinkRequest;
import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO;

/**
* @author LIlGG
*/
@RequiredArgsConstructor
public class HyperLinkDefaultParser implements HyperLinkParser<HyperLinkBaseDTO> {

private final HttpClientFactory clientFactory;

@SuppressWarnings("checkstyle:MissingSwitchDefault")
@Override
public HyperLinkBaseDTO parse(String htmlContent) {
var hyperLinkBaseDTO = new HyperLinkBaseDTO();
Document parse = Jsoup.parse(htmlContent, Parser.htmlParser());
public Mono<HyperLinkBaseDTO> parse(URI linkURI) {
return getHyperLinkDetail(linkURI)
.switchIfEmpty(
Mono.error(new ServerWebInputException("this website is not supported.")))
.map(item -> {
var actualURI = URI.create(item.url());

Elements meta = parse.getElementsByTag("meta");
parserMetas(meta, hyperLinkBaseDTO);
var hyperLinkDTO = new HyperLinkBaseDTO();
Document parse = Jsoup.parse(item.htmlContent(), Parser.htmlParser());

var titles = parse.getElementsByTag("title");
if (!CollectionUtils.isEmpty(titles)) {
var title = titles.get(0).text();
hyperLinkBaseDTO.setTitle(title);
}
Elements meta = parse.getElementsByTag("meta");
parserMetas(meta, hyperLinkDTO);

var titles = parse.getElementsByTag("title");
if (!CollectionUtils.isEmpty(titles)) {
var title = titles.get(0).text();
hyperLinkDTO.setTitle(title);
}

Elements links = parse.getElementsByTag("link");
parserLinks(links, hyperLinkBaseDTO);
Elements links = parse.getElementsByTag("link");
parserLinks(links, hyperLinkDTO);

return hyperLinkBaseDTO;
if (org.apache.commons.lang3.StringUtils.isNotBlank(hyperLinkDTO.getIcon())
&& !PathUtils.isAbsoluteUri(hyperLinkDTO.getIcon())) {
hyperLinkDTO.setIcon(actualURI.resolve(hyperLinkDTO.getIcon()).toString());
}
if (org.apache.commons.lang3.StringUtils.isNotBlank(hyperLinkDTO.getImage())
&& !PathUtils.isAbsoluteUri(hyperLinkDTO.getImage())) {
hyperLinkDTO.setImage(actualURI.resolve(hyperLinkDTO.getImage()).toString());
}
if (org.apache.commons.lang3.StringUtils.isBlank(hyperLinkDTO.getUrl())) {
hyperLinkDTO.setUrl(actualURI.toString());
}
return hyperLinkDTO;
});
}

public Mono<HyperLinkRequest.HyperLinkResponse> getHyperLinkDetail(URI linkURI) {
AtomicReference<String> resourceUrl = new AtomicReference<>(linkURI.toString());
return clientFactory.createHttpClientBuilder(linkURI.getHost())
.map(httpClient -> httpClient.followRedirect(true, (clientRequest) -> {
if (StringUtils.hasText(clientRequest.resourceUrl())) {
resourceUrl.set(clientRequest.resourceUrl());
}
}))
.map(httpClient -> WebClient.builder()
.clientConnector(new ReactorClientHttpConnector(httpClient))
.build())
.flatMap(webClient -> webClient.get()
.uri(linkURI)
.accept(MediaType.TEXT_HTML)
.headers(httpHeaders -> {
httpHeaders.set(HttpHeaders.USER_AGENT,
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
+ "like Gecko) Chrome/58.0.3029.110 Safari/537.3");
httpHeaders.set(HttpHeaders.REFERER,
linkURI.getScheme() + "://" + linkURI.getHost());
})
.retrieve()
.bodyToFlux(DataBuffer.class)
.flatMap(dataBuffer -> {
String content = dataBuffer.toString(StandardCharsets.UTF_8);
DataBufferUtils.release(dataBuffer);
return Mono.just(content);
})
.reduce(new StringBuilder(), StringBuilder::append)
.filter(stringBuilder -> !stringBuilder.isEmpty())
.map(StringBuilder::toString)
.map(htmlContent -> new HyperLinkRequest.HyperLinkResponse(htmlContent, resourceUrl.get())));
}

private void parserLinks(Elements links, HyperLinkBaseDTO hyperLinkBaseDTO) {
links.stream().filter(element -> element.hasAttr("rel"))
.forEach(element -> {
String rel = element.attr("rel");
List<String> rels = Arrays.asList(rel.split(" "));
if (rels.contains("icon") && !StringUtils.hasText(hyperLinkBaseDTO.getIcon())) {
hyperLinkBaseDTO.setIcon(element.attr("href"));
}
});
.forEach(element -> {
String rel = element.attr("rel");
List<String> rels = Arrays.asList(rel.split(" "));
if (rels.contains("icon") && !StringUtils.hasText(hyperLinkBaseDTO.getIcon())) {
hyperLinkBaseDTO.setIcon(element.attr("href"));
}
});
}

private void parserMetas(Elements metas, HyperLinkBaseDTO hyperLinkBaseDTO) {
metas.stream().filter(element -> element.hasAttr("property"))
.forEach(element -> {
String property = element.attr("property");
String content = element.attr("content");
switch (property) {
case "og:title" -> hyperLinkBaseDTO.setTitle(content);
case "og:description" -> hyperLinkBaseDTO.setDescription(content);
case "og:image" -> hyperLinkBaseDTO.setImage(content);
case "og:url" -> hyperLinkBaseDTO.setUrl(content);
default -> {
.forEach(element -> {
String property = element.attr("property");
String content = element.attr("content");
switch (property) {
case "og:title" -> hyperLinkBaseDTO.setTitle(content);
case "og:description" -> hyperLinkBaseDTO.setDescription(content);
case "og:image" -> hyperLinkBaseDTO.setImage(content);
case "og:url" -> hyperLinkBaseDTO.setUrl(content);
default -> {

}
}
}
});
});

if (!StringUtils.hasText(hyperLinkBaseDTO.getDescription())) {
metas.stream().filter(element -> element.hasAttr("name"))
.forEach(element -> {
String name = element.attr("name");
String content = element.attr("content");
if ("description".equals(name)) {
hyperLinkBaseDTO.setDescription(content);
}
});
.forEach(element -> {
String name = element.attr("name");
String content = element.attr("content");
if ("description".equals(name)) {
hyperLinkBaseDTO.setDescription(content);
}
});
}
}
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package run.halo.editor.hyperlink.handler;

import reactor.core.publisher.Mono;
import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO;
import java.net.URI;

/**
* @author LIlGG
*/
public interface HyperLinkParser<T extends HyperLinkBaseDTO> {

T parse(String htmlContent);
Mono<T> parse(URI linkURI);
}
Original file line number Diff line number Diff line change
@@ -1,17 +1,34 @@
package run.halo.editor.hyperlink.handler;

import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import org.springframework.context.ApplicationContext;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import run.halo.editor.hyperlink.HttpClientFactory;
import run.halo.editor.hyperlink.dto.HyperLinkBaseDTO;

/**
* @author LIlGG
*/
@Configuration
@RequiredArgsConstructor
public class HyperLinkParserConfiguration {

private final HttpClientFactory httpClientFactory;

@Bean
public HyperLinkParser<HyperLinkBaseDTO> defaultParser() {
return new HyperLinkDefaultParser();
return new HyperLinkDefaultParser(httpClientFactory);
}

@Bean
public HyperLinkParser<HyperLinkBaseDTO> qqMusicParser() {
return new HyperLinkQQMusicParser(httpClientFactory, new ObjectMapper());
}

@Bean
public HyperLinkParser<HyperLinkBaseDTO> bilibiliParser() {
return new HyperLinkBilibiliParser(httpClientFactory, new ObjectMapper());
}
}
Loading

0 comments on commit 80396cd

Please sign in to comment.