Skip to content

Commit

Permalink
update ci
Browse files Browse the repository at this point in the history
  • Loading branch information
quyuan committed Jul 3, 2024
1 parent 0ce98a0 commit 40ac58e
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 8 deletions.
43 changes: 43 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: magicdoc
on:
push:
branches:
- "main"
paths-ignore:
- "cmds/**"
- "**.md"
pull_request:
branches:
- "main"
paths-ignore:
- "cmds/**"
- "**.md"
workflow_dispatch:
jobs:
pdf-test:
runs-on: doc
timeout-minutes: 180
strategy:
fail-fast: true

steps:
- name: magicdoc
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: check-requirements
run: |
changed_files=$(git diff --name-only -r HEAD~1 HEAD)
echo $changed_files
if [[ $changed_files =~ "requirements.txt" ]] || [[ $changed_files =~ "requirements-qa.txt" ]]; then
pip install -r requirements.txt
pip install -r requirements-qa.txt
fi
- name: get-benchmark-result
run: |
echo "start test"
cd tools && python benchmark.py
45 changes: 45 additions & 0 deletions .github/workflows/cli.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python

name: magicdoc
on:
push:
branches:
- "main"
paths-ignore:
- "cmds/**"
- "**.md"
pull_request:
branches:
- "main"
paths-ignore:
- "cmds/**"
- "**.md"
workflow_dispatch:
jobs:
cli-test:
runs-on: doc
timeout-minutes: 40
strategy:
fail-fast: true

steps:
- name: magicdoc cli
uses: actions/checkout@v3
with:
fetch-depth: 2

- name: check-requirements
run: |
changed_files=$(git diff --name-only -r HEAD~1 HEAD)
echo $changed_files
if [[ $changed_files =~ "requirements.txt" ]] || [[ $changed_files =~ "requirements-qa.txt" ]]; then
pip install -r requirements.txt
pip install -r requirements-qa.txt
fi
- name: test_cli
run: |
echo $GITHUB_WORKSPACE
cd $GITHUB_WORKSPACE && pytest -s -v test/test_cli/test_cli.py
14 changes: 14 additions & 0 deletions requirements-qa.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Levenshtein
nltk
rapidfuzz
statistics
openxlab #安装opendatalab
pandas
numpy
matplotlib
seaborn
scipy
scikit-learn
tqdm
htmltabletomd
pypandoc
Empty file added test/test_cli/conf/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions test/test_cli/conf/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import os
conf = {
"code_path": os.environ.get('GITHUB_WORKSPACE'),
"pdf_res_path": os.environ.get('GITHUB_WORKSPACE') + "/pdf_res",

}
Binary file added test/test_cli/datas/datas/test01.doc
Binary file not shown.
Binary file added test/test_cli/datas/datas/test02.docx
Binary file not shown.
12 changes: 12 additions & 0 deletions test/test_cli/datas/datas/test03.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>这是个标题</title>
</head>
<body>
<h1>这是一个简单的 HTML 页面</h1>
<p>Hello World!</p>
</body>
</html>
Binary file added test/test_cli/datas/datas/test05.ppt
Binary file not shown.
Binary file added test/test_cli/datas/datas/test06.pptx
Binary file not shown.
17 changes: 9 additions & 8 deletions test/test_cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import os
from lib import common
import logging
from conf import conf

code_path = "magic_doc" # 假设代码路径已经在配置文件中设置
output_path = "magic_doc/datas_new" # 输出路径
code_path = conf.conf["code_path"]
output_path = conf.conf["pdf_res_path"]

class TestDocConversion:

Expand All @@ -13,7 +14,7 @@ def test_convert_doc_to_md(self):
将DOC文件转换为Markdown
"""
file_path = os.path.join(code_path, "datas/test01.doc")
cmd = f"python {code_path}/cli.py --file-path {file_path} --output {output_path}"
cmd = f"python {code_path}/magic_doc/cli.py --file-path {file_path} --output {output_path}"
logging.info(cmd)
common.check_shell(cmd)
# 这里可以添加更多的检查函数来验证转换结果
Expand All @@ -23,7 +24,7 @@ def test_convert_docx_to_md(self):
将DOCX文件转换为Markdown
"""
file_path = os.path.join(code_path, "datas/test02.docx")
cmd = f"python {code_path}/cli.py --file-path {file_path} --output {output_path}"
cmd = f"python {code_path}/magic_doc/cli.py --file-path {file_path} --output {output_path}"
logging.info(cmd)
common.check_shell(cmd)
# 这里可以添加更多的检查函数来验证转换结果
Expand All @@ -33,7 +34,7 @@ def test_convert_html_to_md(self):
将HTML文件转换为Markdown
"""
file_path = os.path.join(code_path, "datas/test03.html")
cmd = f"python {code_path}/cli.py --file-path {file_path} --output {output_path}"
cmd = f"python {code_path}/magic_doc/cli.py --file-path {file_path} --output {output_path}"
logging.info(cmd)
common.check_shell(cmd)
# 这里可以添加更多的检查函数来验证转换结果
Expand All @@ -43,7 +44,7 @@ def test_convert_pdf_to_md(self):
将PDF文件转换为Markdown
"""
file_path = os.path.join(code_path, "datas/test04.pdf")
cmd = f"python {code_path}/cli.py --file-path {file_path} --output {output_path}"
cmd = f"python {code_path}/magic_doc/cli.py --file-path {file_path} --output {output_path}"
logging.info(cmd)
common.check_shell(cmd)
# 这里可以添加更多的检查函数来验证转换结果
Expand All @@ -53,7 +54,7 @@ def test_convert_ppt_to_md(self):
将PPT文件转换为Markdown
"""
file_path = os.path.join(code_path, "datas/test05.ppt")
cmd = f"python {code_path}/cli.py --file-path {file_path} --output {output_path}"
cmd = f"python {code_path}/magic_doc/cli.py --file-path {file_path} --output {output_path}"
logging.info(cmd)
common.check_shell(cmd)
# 这里可以添加更多的检查函数来验证转换结果
Expand All @@ -63,7 +64,7 @@ def test_convert_pptx_to_md(self):
将PPTX文件转换为Markdown
"""
file_path = os.path.join(code_path, "datas/test06.pptx")
cmd = f"python {code_path}/cli.py --file-path {file_path} --output {output_path}"
cmd = f"python {code_path}/magic_doc/cli.py --file-path {file_path} --output {output_path}"
logging.info(cmd)
common.check_shell(cmd)
# 这里可以添加更多的检查函数来验证转换结果
Expand Down

0 comments on commit 40ac58e

Please sign in to comment.