-
Notifications
You must be signed in to change notification settings - Fork 1
/
bibtex.py
218 lines (192 loc) · 8.15 KB
/
bibtex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
""" Class for handling BibTex interactions. """
from collections import defaultdict
from tempfile import NamedTemporaryFile as tempFile
import os
import re
# HTML formatting for LaTex commands
_HTML = {
'emph': r'<font style="font-style:italic;font-weight:bold;">',
'textit': r'<font style="font-style:italic;">',
'textbf': r'<font style="font-weight:bold;">',
'textsc': r'<font style="font-variant:small-caps;">',
}
class BibTex(object):
""" Handles BibTex calls and postprocessing of results. """
def __init__(self, **format_dct):
"""
Adjust bst file based on dct dictionary.
dct should hold formatting directives.
"""
ddct = defaultdict(str)
ddct.update(format_dct)
bst = tempFile(delete=False, suffix='.bst')
self.name = os.path.splitext(bst.name)[0]
self.shortname = os.path.split(self.name)[1]
self.path = os.path.split(bst.name)[0]
self.fdict = ddct
with open(self.name + '.tex', 'w') as f:
print >>f, r"""
\documentclass{{article}}
\begin{{document}}
\nocite{{*}}
\bibliographystyle{{{0}}}
\bibliography{{{0}}}
\end{{document}}""".format(self.shortname)
path = os.path.dirname(os.path.realpath(__file__))
with open(path + '/default.bst', 'r') as f:
template = f.read()
if ddct["query"]:
template = re.sub(r"\{f.~\}\{vv~\}\{ll\}\{, jj\}", "{ll}", template)
self.reverse = False
else:
template = self.adjustBst(ddct, format_dct, template)
print >>bst, template
bst.close()
def adjustBst(self, ddct, format_dct, template):
""" Adjust bst file to the format given by format_dct. """
# put style commands in template
for key, value in format_dct.items():
m = re.match('(.*)Style', key)
if not m or value is None or value == "None":
continue
key = m.group(1)
if not re.search(r'(?s)---' + key + 'style---', template):
continue
start = re.sub(r'\W', '', value)
end = ""
if start:
if ddct["html"]:
# these will be replaced with HTML code later
# BibTex would not like the HTML code
end = "end_html_" + start + "_end}"
start = r"\\" + start + "{start_html_" + start + "_start"
else:
end = '}'
start = r'\\' + start + '{'
template = re.sub("(?s)---"+key+"style---", start, template)
template = re.sub("(?s)---"+key+"styleend---", end, template)
# remove remaining styled places
template = re.sub(r"(?si)---\w+?style---", "", template)
template = re.sub(r"(?si)---\w+?styleend---", "", template)
# sorting
if 'name' in ddct["sortBy"]:
template = re.sub(r"presort3", r"presort2", template)
self.reverse = False
else:
self.reverse = 'oldest' not in ddct['sortBy']
# generate bibitem
if ddct["genBibitems"]:
bibitem = ddct["bibitemStyle"]
# uncomment bibitem
template = template.replace(r"% ", "")
if "{id}" in bibitem:
# {id}
template = template.replace(r"%%id", "")
else:
# {initialsyear}
template = template.replace(r"%%initials", "")
if '[' in bibitem:
# [initialsyear]
template = template.replace(r"%%[]", "")
return template
def run(self, bibstr):
"""Execute BibTex with entries from bibstr, and retrieve the results."""
with open(self.name + '.bib', 'w') as f:
print >>f, bibstr
try:
os.system('cd {}; pdflatex -interaction=batchmode {} >/dev/null'
.format(self.path, self.shortname))
os.system('cd {}; bibtex {} >/dev/null'
.format(self.path, self.shortname))
with open(self.name + '.bbl', 'r') as f:
data = f.read()
self.cleanup('blg', 'aux', 'bib', 'log', 'bbl')
except:
self.cleanup('blg', 'aux', 'bib', 'log', 'bbl')
return ""
# postprocess data
data = re.sub(r"^[\n\r\s]*", "", data)
data = re.sub(r"(?<=[^\n])\n", " ", data)
data = re.sub(r" +", " ", data)
data = re.sub(r"\n", "\n\n", data)
if self.reverse:
data = '\n\n'.join(data.split('\n\n')[::-1])
# put HTML code
for key, value in _HTML.items():
data = re.sub("(?s)start_html_" + key + "_start", value, data)
data = re.sub("(?s)end_html_" + key + "_end", "</font>", data)
# handle conditional numbers
data = "\n\n".join([self.removeNumbers(d) for d in data.split("\n\n")])
data = re.sub(r"[A-Z]{2,3}_END", "}", data) \
.replace("MR_START", r"\mref{MR").replace("AR_START", r"\arxiv{") \
.replace("ZBL_START", r"\zbl{").replace("DOI_START", r"\doi{") \
.replace("URL_START", r"\url{")
# cleanup data
data = re.sub(r"(?s)start_html_(\w+)_start", "", data)
data = re.sub(r"(?s)end_html_(\w+)_end", "", data)
data = re.sub(r"(?s)\\cprime", r"$'$", data)
data = re.sub(r"(?s)\\(bold|Bbb)", r"\bf", data)
data = re.sub(r"\{([A-Z])\}", r"\1", data)
return data
def removeNumbers(self, data):
""" Remove unwanted numbers. """
d = self.fdict
p = d["type"] # search or batch?
# MR and Zbl
mrzbl = {'Both': [True, True], 'MR#': [True, False],
'Zbl#': [False, True], 'Neither': [False, False]}
try:
# absolute
keep = mrzbl[d[p+"MRZbl"]]
if not keep[0]:
data = re.sub(r'(?si)MR_START.*?MR_END', '', data)
if not keep[1]:
data = re.sub(r'(?si)ZBL_START.*?ZBL_END', '', data)
except:
# conditional
if re.search("MR_START", data) and 'Zbl# if' in d[p+"MRZbl"]:
data = re.sub(r'(?si)ZBL_START.*?ZBL_END', '', data)
if re.search("ZBL_START", data) and 'MR# if' in d[p+"MRZbl"]:
data = re.sub(r'(?si)ZBL_START.*?ZBL_END', '', data)
# arxiv
if d[p+"Arxiv"] == 'Exclude':
data = re.sub(r'(?si)AR_START.*?AR_END', '', data)
# doi/link
something = not not re.search(r'(?si)(MR|ZBL|AR)_START', data)
mrorzbl = not not re.search(r'(?si)(MR|ZBL)_START', data)
doi = d[p+"IncludeDOIURL"]
if doi == 'Exclude' or ('arXiv' in doi and something) or \
('Only' in doi and 'arXiv' not in doi and mrorzbl):
# remove doi and link
data = re.sub(r'(?si)DOI_START.*?DOI_END', '', data)
data = re.sub(r'(?si)URL_START.*?URL_END', '', data)
# doi or link?
doi = d[p+"DOIURL"]
if doi == 'URL':
data = re.sub(r'(?si)DOI_START.*?DOI_END', '', data)
elif doi == 'DOI#' or ('different' in doi and re.search(
r"(?si)URL_START[^\s\n]*?dx\.doi[^\s\n]*?URL_END", data)):
data = re.sub(r'(?si)(URL_START.*?URL_END)', '', data)
elif 'as' in doi:
doi = re.search(r'(?si)DOI_START(.*?)DOI_END', data)
if doi and doi.group(1):
# DOI exists, but should look like URL
data = re.sub(r'(?si)(URL_START).*?(URL_END)',
r'\1http://dx.doi.org/{}\2'.format(doi.group(1)),
data)
# remove DOI
data = re.sub(r'(?si)DOI_START.*?DOI_END', '', data)
return data
def cleanup(self, *args):
""" Remove temporary files. """
for l in args:
try:
os.unlink(self.name + '.' + l)
except:
pass
def __del__(self):
""" Remove remaining temporary files. """
try:
self.cleanup('tex', 'bst')
except:
pass