-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathzhihu2markdown.py
30 lines (25 loc) · 1.08 KB
/
zhihu2markdown.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from lib.transformer import *
import argparse
import re
import os
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Transform zhihu article to Markdown format')
parser.add_argument('article_url', help='URL of zhihu article')
parser.add_argument('-o', '--output', help='path of output markdown file', default='./a.md')
parser.add_argument('-i', '--image_dir', help='If present, download image to the image dir path')
parser.add_argument('-a', '--user_agent', help='User agent')
args = parser.parse_args()
config = Config()
if args.image_dir:
config.download_image = True
config.asset_path = args.image_dir
if args.user_agent:
config.user_agent = args.user_agent
article_pattern = r'https://zhuanlan.zhihu.com/p/(\d.+)/?'
objmatch = re.search(article_pattern, args.article_url)
if not objmatch.group(1):
raise "Article URL not match. Must like: https://zhuanlan.zhihu.com/p/1234567"
article = Article(objmatch.group(1), config)
output_path = os.path.expanduser(args.output)
with open(output_path, 'w') as output_file:
output_file.write(article.markdown)