Skip to content

Commit

Permalink
Merge pull request #193 from jamebal/develop
Browse files Browse the repository at this point in the history
chore: 优化dev环境下设置tesseract的lib路径
  • Loading branch information
jamebal authored Dec 4, 2024
2 parents ea68dfa + 93904c4 commit 0d7259a
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 14 deletions.
12 changes: 4 additions & 8 deletions src/main/java/com/jmal/clouddisk/ClouddiskApplication.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package com.jmal.clouddisk;

import cn.hutool.core.io.FileUtil;
import cn.hutool.crypto.SecureUtil;
import com.jmal.clouddisk.util.TesseractUtil;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.cache.annotation.EnableCaching;
import org.springframework.scheduling.annotation.EnableScheduling;

import java.nio.file.Path;
import java.nio.file.Paths;

/**
* ClouddiskApplication
*
Expand All @@ -18,6 +16,7 @@
@SpringBootApplication
@EnableCaching
@EnableScheduling
@Slf4j
public class ClouddiskApplication {

public static void main(String[] args) {
Expand All @@ -27,10 +26,7 @@ public static void main(String[] args) {
application.setAllowCircularReferences(true);

// dev环境下设置tesseract的lib路径
Path tesseractLibPath = Paths.get("/opt/homebrew/Cellar/tesseract/5.3.4_1/lib");
if (FileUtil.exist(tesseractLibPath.toFile())) {
System.setProperty("jna.library.path", tesseractLibPath.toString());
}
TesseractUtil.setTesseractLibPath();

application.run(args);
}
Expand Down
75 changes: 75 additions & 0 deletions src/main/java/com/jmal/clouddisk/util/TesseractUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package com.jmal.clouddisk.util;

import lombok.extern.slf4j.Slf4j;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Optional;

@Slf4j
public class TesseractUtil {
public static void setTesseractLibPath() {
String libPath = findTesseractLibPath();
if (libPath != null) {
System.setProperty("jna.library.path", libPath);
log.info("Set Tesseract lib path: {}", libPath);
} else {
log.warn("Tesseract lib path not found");
}
}

private static String findTesseractLibPath() {
// 基础路径
String basePath = "/opt/homebrew/Cellar/tesseract";
Path baseDir = Paths.get(basePath);

// 检查基础目录是否存在
if (!Files.exists(baseDir)) {
return null;
}

try {
// 查找最新版本的目录
Optional<Path> latestVersion = Files.list(baseDir)
.filter(Files::isDirectory)
.max((p1, p2) -> {
String v1 = p1.getFileName().toString();
String v2 = p2.getFileName().toString();
return compareVersions(v1, v2);
});

if (latestVersion.isPresent()) {
Path libPath = latestVersion.get().resolve("lib");
return Files.exists(libPath) ? libPath.toString() : null;
}
} catch (IOException e) {
log.error("Failed to find Tesseract lib path", e);
}

return null;
}

// 版本号比较工具方法
private static int compareVersions(String v1, String v2) {
String[] parts1 = v1.split("[._]");
String[] parts2 = v2.split("[._]");

int length = Math.min(parts1.length, parts2.length);
for (int i = 0; i < length; i++) {
try {
int num1 = Integer.parseInt(parts1[i]);
int num2 = Integer.parseInt(parts2[i]);
if (num1 != num2) {
return num1 - num2;
}
} catch (NumberFormatException e) {
// 如果解析失败,按字符串比较
int comp = parts1[i].compareTo(parts2[i]);
if (comp != 0) return comp;
}
}
return parts1.length - parts2.length;
}
}
8 changes: 2 additions & 6 deletions src/test/java/com/jmal/clouddisk/lucene/ReadPDFTest.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
package com.jmal.clouddisk.lucene;

import com.jmal.clouddisk.util.TesseractUtil;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;

import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;

import static org.junit.jupiter.api.Assertions.*;

Expand All @@ -20,10 +19,7 @@ public class ReadPDFTest {
public void testImagePDF() {

// dev环境下设置tesseract的lib路径
Path tesseractLibPath = Paths.get("/opt/homebrew/Cellar/tesseract/5.3.4_1/lib");
if (tesseractLibPath.toFile().exists()) {
System.setProperty("jna.library.path", tesseractLibPath.toString());
}
TesseractUtil.setTesseractLibPath();

File file = new File("/Users/jmal/Downloads/1hyflld.pdf");
assertNotNull(file, "File should not be null");
Expand Down

0 comments on commit 0d7259a

Please sign in to comment.