Skip to content

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
myhloli committed Jun 20, 2024
1 parent 8efd3de commit c4fc4d5
Showing 1 changed file with 4 additions and 7 deletions.
11 changes: 4 additions & 7 deletions magic_pdf/cli/magicpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,7 @@ def do_parse(
):

local_image_dir, local_md_dir = prepare_env(pdf_file_name, parse_method)
image_writer, md_writer = DiskReaderWriter(local_image_dir), DiskReaderWriter(
local_md_dir
)
image_writer, md_writer = DiskReaderWriter(local_image_dir), DiskReaderWriter(local_md_dir)
image_dir = (os.path.basename(local_image_dir),)

if parse_method == "auto":
Expand All @@ -96,7 +94,7 @@ def do_parse(
elif parse_method == "ocr":
pipe = OCRPipe(pdf_bytes, model_list, image_writer, is_debug=True)
else:
print("unknow parse method")
print("unknown parse method")
sys.exit(1)

pipe.pipe_classify()
Expand All @@ -115,8 +113,7 @@ def do_parse(
# write_to_csv(r"D:\project\20231108code-clean\linshixuqiu\pdf_dev\新模型\新建文件夹\luanma.csv",
# [pdf_file_name, pipe.pdf_mid_data['not_common_character_rate'], pipe.pdf_mid_data['not_printable_rate']])

md_content = pipe.pipe_mk_markdown(image_dir, drop_mode=DropMode.NONE)

md_content = pipe.pipe_mk_markdown(str(image_dir), drop_mode=DropMode.NONE)
if f_dump_md:
"""写markdown"""
md_writer.write(
Expand Down Expand Up @@ -148,8 +145,8 @@ def do_parse(
path=f"{pdf_file_name}_origin.pdf",
mode=AbsReaderWriter.MODE_BIN,
)
content_list = pipe.pipe_mk_uni_format(image_dir, drop_mode=DropMode.NONE)

content_list = pipe.pipe_mk_uni_format(str(image_dir), drop_mode=DropMode.NONE)
if f_dump_content_list:
"""写content_list"""
md_writer.write(
Expand Down

0 comments on commit c4fc4d5

Please sign in to comment.