Skip to content

Commit

Permalink
优化书名匹配效果
Browse files Browse the repository at this point in the history
  • Loading branch information
jianyun8023 committed Dec 13, 2024
1 parent d0cd5af commit 595381b
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 43 deletions.
51 changes: 8 additions & 43 deletions cmd/clname.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,18 @@ package cmd

import (
"fmt"
"github.com/jianyun8023/bookimporter/pkg/util"
"github.com/kapmahc/epub"
"github.com/spf13/cobra"
"os"
"os/exec"
"path"
"path/filepath"
"regexp"
"strings"
)

// Used for downloading books from sanqiu website.
var c = &ClnameConfig{
ReNameReg: regexp.MustCompile(`(?m)(\s?[((【][^))】((【册卷套版]{4,}[))】])`),
}
var c = &ClnameConfig{}

// renameBookCmd used for download books from sanqiu.cc
var clnameCmd = &cobra.Command{
Expand All @@ -26,7 +24,7 @@ var clnameCmd = &cobra.Command{

ValidateConfig(c)

if IsDir(c.Path) {
if util.IsDir(c.Path) {
m, _ := filepath.Glob(path.Join(c.Path, "*.epub"))
for _, val := range m {
// fmt.Println(val)
Expand All @@ -52,11 +50,11 @@ var clnameCmd = &cobra.Command{
}

func ValidateConfig(c *ClnameConfig) {
if !Exists(c.Path) {
if !util.Exists(c.Path) {
fmt.Println("文件路径不存在,请检查")
os.Exit(1)
}
if IsFile(c.Path) && !strings.HasSuffix(c.Path, ".epub") {
if util.IsFile(c.Path) && !strings.HasSuffix(c.Path, ".epub") {
fmt.Println("文件格式不存在,请检查")
os.Exit(1)
}
Expand All @@ -69,7 +67,7 @@ func init() {
"尝试运行")
clnameCmd.Flags().BoolVarP(&c.Skip, "skip", "j", false,
"跳过无法解析的书籍")
clnameCmd.Flags().BoolVarP(&c.Debug, "debug", "d", false, "The number of download threads.")
clnameCmd.Flags().BoolVarP(&c.Debug, "debug", "d", false, "调试模式")
}

func ParseEpub(file string, c *ClnameConfig) error {
Expand All @@ -81,13 +79,8 @@ func ParseEpub(file string, c *ClnameConfig) error {
return fmt.Errorf("无法获得书籍标题")
}
title := book.Opf.Metadata.Title[0]

if len(c.ReNameReg.FindAllString(title, -1)) == 0 {
return nil
}
newTitle := c.ReNameReg.ReplaceAllString(title, "")
newTitle = strings.TrimSpace(strings.ReplaceAll(newTitle, "\"", " "))
if len(newTitle) == 0 {
newTitle := util.CleanTitle(title)
if title == newTitle {
return nil
}

Expand Down Expand Up @@ -117,32 +110,4 @@ type ClnameConfig struct {
DoTry bool
Debug bool
Skip bool

ReNameReg *regexp.Regexp
}

// Exists 判断所给路径文件/文件夹是否存在
func Exists(path string) bool {
_, err := os.Stat(path) //os.Stat获取文件信息
if err != nil {
if os.IsExist(err) {
return true
}
return false
}
return true
}

// IsDir 判断所给路径是否为文件夹
func IsDir(path string) bool {
s, err := os.Stat(path)
if err != nil {
return false
}
return s.IsDir()
}

// IsFile 判断所给路径是否为文件
func IsFile(path string) bool {
return !IsDir(path)
}
19 changes: 19 additions & 0 deletions pkg/util/cleanname.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package util

import (
"regexp"
"strings"
)

var (
ReNameReg = regexp.MustCompile(`(?m)(\s?[((【][^))】((【册卷套]{4,}[))】])`)
)

func CleanTitle(title string) string {
if len(ReNameReg.FindAllString(title, -1)) == 0 {
return title
}
newTitle := ReNameReg.ReplaceAllString(title, "")
newTitle = strings.TrimSpace(strings.ReplaceAll(newTitle, "\"", " "))
return newTitle
}
32 changes: 32 additions & 0 deletions pkg/util/cleanname_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package util

import (
"testing"
)

func TestCleanTitle(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"他的秘密【有些秘密注定要永远保守下去,除非你做好了失去一切的准备。《大小谎言》作者、澳洲小说天后莫里亚蒂成名作。被译介为121种版本!出版后盘踞《纽约时报》畅销榜近150周。】", "他的秘密"},
{"体坛周报(2024年第91期)", "体坛周报"},
{"历史的裂变:中国历史上的十三场政变(畅销书《大唐兴亡三百年》作者王觉仁力作,用小说笔法,讲述中华五千年历史上的13场知名政变,聚焦那些封建王朝中皇权的非正常更迭,还原权力争斗下最真实的人性。)", "历史的裂变:中国历史上的十三场政变"},
{"具象之力(世界科幻大师丛书)", "具象之力"},
{"幸运儿:晚清留美幼童的故事 (他们是大文豪马克・吐温的朋友。他们曾目睹一个神话般的时代。他们曾亲身经历近代中国的风云激荡;他们的命运,离奇而曲折;他们的故事,美丽而忧伤。他们有一个永远的名字:“留美幼童”。)", "幸运儿:晚清留美幼童的故事"},
{"武英殿本四库全书总目·上(1-30册)【电子版独家上线!国家图书馆倾情贡献!豆瓣9.6!】", "武英殿本四库全书总目·上(1-30册)"},
{"\"当代中国人文大系“精选(套装共35册)【人大出版社积累多年,集合当代名家著作,收罗中西方政治、哲学、历史研究精粹!】", "当代中国人文大系“精选(套装共35册)"},
{"版式设计法则", "版式设计法则"},
{"成功企业这样管理(套装12册)", "成功企业这样管理(套装12册)"},
{"深入理解Java虚拟机:JVM高级特性与最佳实践(第3版)", "深入理解Java虚拟机:JVM高级特性与最佳实践(第3版)"},
{"(第9版)公务员录用考试华图名家讲义系列教材:申论万能宝典", "(第9版)公务员录用考试华图名家讲义系列教材:申论万能宝典"},
{"第二座山(第一座山是构建自我、定义自我,其意义在于获取;第二座山是摆脱自我、舍弃自我,其意义在于奉献。《纽约时报》畅销书作者戴维·布鲁克斯全新作品,以新的诠释为人类生命的意义提出省思。)", "第二座山"},
}

for _, test := range tests {
result := CleanTitle(test.input)
if result != test.expected {
t.Errorf("CleanTitle(%q) = %q; want %q", test.input, result, test.expected)
}
}
}
29 changes: 29 additions & 0 deletions pkg/util/filetool.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package util

import "os"

// Exists 判断所给路径文件/文件夹是否存在
func Exists(path string) bool {
_, err := os.Stat(path) //os.Stat获取文件信息
if err != nil {
if os.IsExist(err) {
return true
}
return false
}
return true
}

// IsDir 判断所给路径是否为文件夹
func IsDir(path string) bool {
s, err := os.Stat(path)
if err != nil {
return false
}
return s.IsDir()
}

// IsFile 判断所给路径是否为文件
func IsFile(path string) bool {
return !IsDir(path)
}

0 comments on commit 595381b

Please sign in to comment.