-
Notifications
You must be signed in to change notification settings - Fork 0
/
gsch.py
executable file
·196 lines (151 loc) · 6.39 KB
/
gsch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env python3
### Imports ###
import argparse
import sys
import re
from datetime import datetime
from scidownl import scihub_download
from scholarly import scholarly
# from fuzzy_match import algorithims
# import bibtexparser
class NotFoundError(Exception):
def __init__(self, message):
self.message = message
def query_yes_no(question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
"""
valid = {"yes": True, "y": True, "ye": True,
"no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == '':
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' "
"(or 'y' or 'n').\n")
def query_bib_title(bibtex: dict) -> bool:
return query_yes_no(bibtex["title"])
### ======================================================
# Scholarly get bibtex
def get_bibtex_for_pubs(pubs: str) -> str:
"""Returns bibtex"""
search_query = scholarly.search_pubs(pubs)
for result in search_query:
if args.m != True:
return result['pub_url'], scholarly.bibtex(result)
else:
if query_bib_title(result["bib"]):
return result['pub_url'], scholarly.bibtex(result)
raise NotFoundError(f"Can't find {pubs}")
def add_url_to_bib(bib, url):
# print(bib, '\n')
biblines = bib.split('\n')
for i, line in enumerate(biblines):
line_strip = re.sub(r"\s+", "", line, flags=re.UNICODE)
# print(lne)
if bool(re.search(r'^title={', line_strip)):
# print('1',line)
biblines[i] = line.replace('}', '}}').replace('{', '{\href{%s}{' % url)
return '\n'.join(biblines)
def write_to_file(bibtex, url='', fname='out.bib'):
print(f'Saving to {fname}\n')
with open(fname, "a") as f:
f.write(url+'\n'+bibtex)
f.write('\n')
def download_one_paper(url, fname, folder='gsch-pdf'):
fname.replace(' ','-')
paper = url
paper_type = "pmid"
out = f"./{folder}/{fname}"
scihub_download(paper, paper_type=paper_type, out=out)
def make_filename(bib):
biblines = bib.split('\n')
for line in biblines:
line_strip = re.sub(r"\s+", "", line, flags=re.UNICODE)
if bool(re.search(r'^title={', line_strip)):
try:
title = line.split('{')[-1].split('}')[0]
except:
print("'title' was not found in bib")
title = ''
if bool(re.search(r'^pub_year={', line_strip)):
try:
pub_year = line.split('{')[1].split('}')[0]
except:
print("'pub_year' was not found in bib")
pub_year = ''
if pub_year+'-'+title != '':
return pub_year+'_'+re.sub('[^A-Za-z0-9]+', '-', title)
# print(pub_year+'_'+ title)
else:
return datetime.now().strftime('%H-%M-%S')
# def search_pubs_in_bib(bib: str, pubs:str) -> str:
# """Returns matched bibtex ID"""
# with open(bib) as bibtex_file:
# bib_database = bibtexparser.load(bibtex_file)
# def match_algo(titlez, key) -> float:
# if regex.search('(%s){e<=1}' % key, title, flags=regex.IGNORECASE):
# return 0.8
# return algorithims.trigram(title, key)
# match_threshold = 0.7
# results : List[Tuple[float, dict]] = []
# for entry in bib_database.entries:
# if 'title' in entry:
# score = match_algo(entry['title'], pubs)
# match_result = (score, entry)
# results.append(match_result)
# results.sort(key=lambda x: x[0], reverse=True)
# for result in results:
# if result[0] >= match_threshold and query_bib_title(result[1]):
# return result[1]['ID']
# raise NotFoundError(f"no match for 'l{pubs} in {bib}")
# def prepend_to_bib(new_entry: str, bibfile: str):
# with open(bibfile) as bibtex_file:
# bib_database = bibtexparser.load(bibtex_file)
# new_database = bibtexparser.loads(new_entry)
# bib_database.entries.insert(0, new_database.entries[0])
# with open(bibfile, 'w') as bibtex_file:
# bibtexparser.dump(bib_database, bibtex_file)
if __name__ == "__main__":
example_text = """Examples:
./gsch.py 'Steep-Slope Hysteresis-Free Negative-Capacitance 2D Transistors'
./gsch.py -pdf 'Steep-Slope Hysteresis-Free Negative-Capacitance 2D Transistors' 'Del Rio Castillo A E et al 2018 High-yield production of 2D crystals by wet-jet milling Mater' 'Karagiannidis P G et al 2017 Microfluidization of graphite and formulation of graphene-based conductive inks ACS Nano'
"""
description = "script for batch download of bib files and corresponding pdfs"
parser = argparse.ArgumentParser(description=description,
epilog=example_text,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('pubs', type=str, nargs='+', help="publication titles separated with space")
parser.add_argument('-m', action='store_true', help="manually accept search result")
parser.add_argument('-pdf', action='store_true', help="download pdf of the article via scihub")
args = parser.parse_args()
# print(args.m)
for i, pub in enumerate(args.pubs):
print(f"# Searching key words: {pub}")
url, bibtex = get_bibtex_for_pubs(pub)
# bibtex = add_url_to_bib(bibtex,url)
print(url,'\n',bibtex)
write_to_file(bibtex, url=url, fname='lit.bib')
if args.pdf == True:
print(f"## Dowloadingig: {pub}")
try:
fname = make_filename(bibtex)
download_one_paper(url, fname, folder='gsch-pdf')
except:
print(f"Error: SciHub could not download {pub}")