This repository has been archived by the owner on Aug 18, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanki_to_obsidian.py
228 lines (155 loc) · 5.72 KB
/
anki_to_obsidian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#!/usr/bin/env python3
"""
Module Docstring
"""
__author__ = "Dennis Irrgang"
__version__ = "0.1.0"
__license__ = "AGPL-3.0"
from sqlite3.dbapi2 import Cursor
from zipfile import ZipFile
import sqlite3
import re
import os
import shutil
import sys
import argparse
import markdownify as md
from pathvalidate._filename import sanitize_filename
def open_apkg(file: str) -> Cursor:
"""Opens .apkg file for exporting Anki notes from the sqlite databse inside
Parameters:
file (str): File path of .apkg file.
Returns:
cur:SQlite cursor set on first row of note files.
"""
with ZipFile(file, 'r') as zip_obj:
zip_obj.extractall('tmp')
con = sqlite3.connect("tmp/collection.anki21")
cur = con.cursor()
cur.execute("SELECT tags, flds, sfld FROM notes")
return cur
def remove_cloze(text: str) -> str:
"""Removes Anki's Cloze indicators ({{}})
Parameters:
text (str): String with the to-be-removed Cloze indicators.
Returns:
text:Cloze-free string.
"""
cloze_regex = re.compile(r'({{c\d+::.*?}})')
text_list = re.split(cloze_regex, text)
result = ""
for item in text_list:
if item[0:3] == r'{{c':
result += re.sub(r'{{c\d+::', '', item)[:-2]
else:
result += item
return result
def convert_mathjax(text: str) -> str:
"""Converts the Mathjax tokens from Anki to Markdown appropriate $"""
# We need to split the text into mathjax groups and non-mathjax groups
mjregex = re.compile(r'(\\\(.*?\\\))')
text_list = re.split(mjregex, text)
result = ""
# Since we know that the Mathjax tokens are of fixed length, we can simply
# truncate the groups on both sides by the length of the Mathjax tokens.
for item in text_list:
if item[0:2] == r'\(':
result += '$'+item[2:-2].strip()+'$'
else:
result += item
return result
def sanitize_html(text: str) -> str:
'''Sanitizes incoming HTML to avoid certain characters that break markdown
or otherwise do not work with it.'''
# Removes duplicate title in the beginning of every file.
result = re.sub(r'.+?\x1f', '', text, 1)
# Removes unnecessary or "broken" HTML codes
result = re.sub(r'\x1f', '', result)
result = re.sub(r' ', ' ', result)
result = re.sub(r'', '\n', result)
# Anki sometimes mixes the order of HTML tokens which can break during the
# markdown conversion stage
result = re.sub(r'<br></b>', r'</b><br>', result)
result = re.sub(r'<b><br>', r'<br><b>', result)
# CSS formatting normally ensures adequate spacing between <div> and
# previous content, this is not the case in Markdown format, hence we need
# to add some linebreaks on our own
result = re.sub(r'<div>', r'<br><div>', result)
result = re.sub(r'</div>', r'</div><br>', result)
result = re.sub(r'</dd></dl>', r'</dd></dl><br>', result)
return result
def transform_format(text: str) -> str:
'''Formats Anki HTML Code into Markdown'''
result = sanitize_html(text)
result = md.markdownify(result, heading_style="ATX", strip=['a'])
# Escaping artifact that breaks Mathjax
result = re.sub(r'\\_', r'_', result)
result = remove_cloze(result)
result = convert_mathjax(result)
return result
def save_file(name: str, content: str, tags: str) -> None:
"""Stores the file in the ./export/ directory
Parameters:
name (str): Name of the file
content (str): Content to be written into the file body.
tags (str): Tags to be added in the #tag format at the end of the content.
Returns:
None
"""
dirname = os.path.dirname(__file__)+'/export'
filename = sanitize_filename(re.sub(r':', r' -', name).strip()+'.md')
filename = os.path.join(
dirname, filename)
file = open(filename, mode="w", encoding='utf-8')
file.write(content)
file.write('\n')
for tag in tags.split():
file.write('#'+tag.strip()+' ')
def cleanup() -> None:
'''Removes temporary files and tmp directory.'''
dir_path = './tmp/'
try:
shutil.rmtree(dir_path)
except OSError as exception:
print(f"Error: {dir_path} : {exception.strerror}")
def init_argparse() -> argparse.ArgumentParser:
'''Initialises argparser for CLI use.'''
parser = argparse.ArgumentParser(
usage="%(prog)s [OPTION] [FILE]...",
description="Convert Anki cards from an .apkg deck into Obsidian compatible markdown files."
)
parser.add_argument(
"-v", "--version", action="version",
version=f"{parser.prog} {__version__}"
)
parser.add_argument('files', nargs='*')
return parser
def convert(file) -> None:
'''Converts apkg file and exports the .md files into ./export'''
cur = open_apkg(file)
records = cur.fetchall()
for row in records:
title = row[2]
body = row[1]
tags = row[0]
body = transform_format(body)
save_file(title, body, tags)
cleanup()
def main() -> None:
"""Converts Anki cards from .apkg files to Obsidian.md compatible markdown files
Parameters:
None (passed through CLI, see argparse/help)
Returns:
None
"""
parser = init_argparse()
args = parser.parse_args()
for file in args.files:
if file == "-":
continue
try:
convert(file)
except (FileNotFoundError, IsADirectoryError) as err:
print(f"{sys.argv[0]}: {file}: {err.strerror}", file=sys.stderr)
if __name__ == "__main__":
main()