Skip to content

Commit

Permalink
Merge pull request #102 from jamebal/develop
Browse files Browse the repository at this point in the history
perf: 优化文本文件编码格式
  • Loading branch information
jamebal authored Jun 19, 2024
2 parents c50c9ae + 057c332 commit 181adda
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 13 deletions.
6 changes: 6 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@
<version>0.2.2</version>
</dependency>

<dependency>
<groupId>com.github.albfernandez</groupId>
<artifactId>juniversalchardet</artifactId>
<version>2.5.0</version>
</dependency>

<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.bson.Document;
import org.bson.conversions.Bson;
import org.jetbrains.annotations.NotNull;
import org.mozilla.universalchardet.ReaderFactory;
import org.springframework.beans.BeanUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Sort;
Expand All @@ -54,7 +55,9 @@

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -499,7 +502,7 @@ public Optional<FileDocument> getById(String id, Boolean content) {
if (Files.exists(filepath)) {
File file = filepath.toFile();
Charset charset = MyFileUtils.getFileCharset(file);
fileDocument.setDecoder(charset.toString());
fileDocument.setDecoder(charset.name());
if (BooleanUtil.isTrue(content)) {
fileDocument.setContentText(FileUtil.readString(file, charset));
}
Expand Down Expand Up @@ -542,7 +545,7 @@ public FileDocument previewTextByPath(String filePath, String username) throws C
throw new CommonException(ExceptionType.FILE_NOT_FIND);
}
FileDocument fileDocument = new FileDocument();
fileDocument.setDecoder(MyFileUtils.getFileCharset(file).toString());
fileDocument.setDecoder(MyFileUtils.getFileCharset(file).name());
Path path1 = path.subpath(0, path.getNameCount() - 1);
int rootCount = Paths.get(fileProperties.getRootDir(), username).getNameCount();
int path1Count = path1.getNameCount();
Expand All @@ -569,10 +572,7 @@ public StreamingResponseBody previewTextByPathStream(String filePath, String use
@NotNull
private static StreamingResponseBody getStreamingResponseBody(File file) {
return outputStream -> {
Charset charset = MyFileUtils.getFileCharset(file);
try (InputStream inputStream = FileUtil.getInputStream(file);
InputStreamReader inputStreamReader = new InputStreamReader(inputStream, charset);
BufferedReader bufferedReader = new BufferedReader(inputStreamReader)) {
try (BufferedReader bufferedReader = ReaderFactory.createBufferedReader(file)) {
// 判断file是否为log文件
boolean logFile = file.length() > 0 && FileTypeUtil.getType(file).equals("log");
String line;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import cn.hutool.core.date.DatePattern;
import cn.hutool.core.date.DateUtil;
import cn.hutool.core.date.LocalDateTimeUtil;
import cn.hutool.core.io.CharsetDetector;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.text.CharSequenceUtil;
import cn.hutool.core.util.URLUtil;
Expand All @@ -30,6 +29,7 @@
import org.bson.Document;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.mozilla.universalchardet.UniversalDetector;
import org.springframework.core.io.InputStreamResource;
import org.springframework.data.domain.Sort;
import org.springframework.data.mongodb.core.MongoTemplate;
Expand Down Expand Up @@ -242,9 +242,9 @@ public FileDocument getFileById(String gridFSId) {
private Charset getCharset(GridFSFile gridFSFile) {
Charset charset = StandardCharsets.UTF_8;
try (InputStream inputStream = getInputStream(gridFSFile)) {
charset = CharsetDetector.detect(inputStream);
} catch (IOException e) {
log.error(e.getMessage(), e);
charset = Charset.forName(UniversalDetector.detectCharset(inputStream));
} catch (Exception e) {
return charset;
}
return charset;
}
Expand Down
12 changes: 10 additions & 2 deletions src/main/java/com/jmal/clouddisk/util/MyFileUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import cn.hutool.core.io.CharsetDetector;
import cn.hutool.core.io.FileTypeUtil;
import cn.hutool.core.util.StrUtil;
import lombok.extern.slf4j.Slf4j;
import org.mozilla.universalchardet.UniversalDetector;

import java.io.File;
import java.nio.charset.Charset;
Expand All @@ -14,6 +17,7 @@
* @Description 文件工具类
* @Date 2020-06-16 16:24
*/
@Slf4j
public class MyFileUtils {

public static List<String> hasContentTypes = Arrays.asList("pdf", "ppt", "pptx", "doc", "docx", "drawio", "mind");
Expand All @@ -28,8 +32,12 @@ private MyFileUtils(){
* @return 字符编码
*/
public static Charset getFileCharset(File file) {
Charset charset = CharsetDetector.detect(file);
return charset == null ? StandardCharsets.UTF_8 : charset;
try {
String charset = UniversalDetector.detectCharset(file);
return StrUtil.isBlank(charset) ? StandardCharsets.UTF_8 : Charset.forName(charset);
} catch (Exception e) {
return StandardCharsets.UTF_8;
}
}

public static boolean checkNoCacheFile(File file) {
Expand Down

0 comments on commit 181adda

Please sign in to comment.