Skip to content

Commit

Permalink
close #44 - v1.2.1 is out
Browse files Browse the repository at this point in the history
  • Loading branch information
ferru97 committed Oct 29, 2021
1 parent 8a3c5df commit 587556c
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 33 deletions.
54 changes: 27 additions & 27 deletions PyPaperBot/Crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,41 +42,41 @@ def getPapersInfo(papers, scholar_search_link, restrict, scholar_results):
papers_return = []
num = 1
for paper in papers:
while num <= scholar_results:
title = paper['title']
queries = {'query.bibliographic': title.lower(),'sort':'relevance',"select":"DOI,title,deposited,author,short-container-title"}
#while num <= scholar_results:
title = paper['title']
queries = {'query.bibliographic': title.lower(),'sort':'relevance',"select":"DOI,title,deposited,author,short-container-title"}

print("Searching paper {} of {} on Crossref...".format(num,scholar_results))
num += 1
print("Searching paper {} of {} on Crossref...".format(num,len(papers)))
num += 1

found_timestamp = 0
paper_found = Paper(title,paper['link'],scholar_search_link, paper['cites'], paper['link_pdf'], paper['year'], paper['authors'])
while True:
try:
for el in iterate_publications_as_json(max_results=30, queries=queries):
found_timestamp = 0
paper_found = Paper(title,paper['link'],scholar_search_link, paper['cites'], paper['link_pdf'], paper['year'], paper['authors'])
while True:
try:
for el in iterate_publications_as_json(max_results=30, queries=queries):

el_date = 0
if "deposited" in el and "timestamp" in el["deposited"]:
el_date = int(el["deposited"]["timestamp"])
el_date = 0
if "deposited" in el and "timestamp" in el["deposited"]:
el_date = int(el["deposited"]["timestamp"])

if (paper_found.DOI==None or el_date>found_timestamp) and "title" in el and similarStrings(title.lower() ,el["title"][0].lower())>0.75:
found_timestamp = el_date
if (paper_found.DOI==None or el_date>found_timestamp) and "title" in el and similarStrings(title.lower() ,el["title"][0].lower())>0.75:
found_timestamp = el_date

if "DOI" in el:
paper_found.DOI = el["DOI"].strip().lower()
if "short-container-title" in el and len(el["short-container-title"])>0:
paper_found.jurnal = el["short-container-title"][0]
if "DOI" in el:
paper_found.DOI = el["DOI"].strip().lower()
if "short-container-title" in el and len(el["short-container-title"])>0:
paper_found.jurnal = el["short-container-title"][0]

if restrict==None or restrict!=1:
paper_found.setBibtex(getBibtex(paper_found.DOI))
if restrict==None or restrict!=1:
paper_found.setBibtex(getBibtex(paper_found.DOI))

break
except ConnectionError as e:
print("Wait 10 seconds and try again...")
time.sleep(10)
break
except ConnectionError as e:
print("Wait 10 seconds and try again...")
time.sleep(10)

papers_return.append(paper_found)
papers_return.append(paper_found)

time.sleep(random.randint(1,10))
time.sleep(random.randint(1,10))

return papers_return
4 changes: 2 additions & 2 deletions PyPaperBot/Downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def saveFile(file_name,content, paper,dwn_source):
paper.downloaded = True
paper.downloadedFrom = dwn_source

def downloadPapers(papers, dwnl_dir, num_limit, scholar_results, SciHub_URL=None):
def downloadPapers(papers, dwnl_dir, num_limit, SciHub_URL=None):
def URLjoin(*args):
return "/".join(map(lambda x: str(x).rstrip('/'), args))

Expand All @@ -56,7 +56,7 @@ def URLjoin(*args):
paper_files = []
for p in papers:
if p.canBeDownloaded() and (num_limit==None or num_downloaded<num_limit):
print("Download {} of {} -> {}".format(paper_number, scholar_results, p.title))
print("Download {} of {} -> {}".format(paper_number, len(papers), p.title))
paper_number += 1

pdf_dir = getSaveDir(dwnl_dir, p.getFileName())
Expand Down
3 changes: 3 additions & 0 deletions PyPaperBot/Scholar.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def scholar_requests(scholar_pages, url, restrict, scholar_results=10):
break

papers = schoolarParser(html)
if len(papers)>scholar_results:
papers = papers[0:scholar_results]

print("\nGoogle Scholar page {} : {} papers found".format(i,scholar_results))

if(len(papers)>0):
Expand Down
2 changes: 1 addition & 1 deletion PyPaperBot/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__= "1.2"
__version__= "1.2.1"
3 changes: 2 additions & 1 deletion PyPaperBot/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def main():
parser.add_argument('--journal-filter', default=None, type=str ,help='CSV file path of the journal filter (More info on github)')
parser.add_argument('--restrict', default=None, type=int ,choices=[0,1], help='0:Download only Bibtex - 1:Down load only papers PDF')
parser.add_argument('--scihub-mirror', default=None, type=str, help='Mirror for downloading papers from sci-hub. If not set, it is selected automatically')
parser.add_argument('--scholar-results', default=10, type=int, choices=[1,2,3,4,5,6,7,8,9,10], help='Downloads the first x results in a scholar page(max=10)')
parser.add_argument('--scholar-results', default=10, type=int, choices=[1,2,3,4,5,6,7,8,9,10], help='Downloads the first x results for each scholar page(default/max=10)')
parser.add_argument('--proxy', nargs='+', default=[], help='Use proxychains, provide a seperated list of proxies to use.Please specify the argument al the end')
args = parser.parse_args()

Expand Down Expand Up @@ -146,3 +146,4 @@ def main():

if __name__ == "__main__":
main()
print("""Work completed!\nIf you like this project, you can offer me a cup of coffee at --> https://www.paypal.com/paypalme/ferru97 <-- :)\n""")
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Use `pip` to install from pypi:
pip install PyPaperBot
```

If on windows you get an error saying *error: Microsoft Visual C++ 14.0 is required..* try to install [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/it/visual-cpp-build-tools/) or [Visual Studio](https://visualstudio.microsoft.com/it/downloads/)

### For Termux users

Since numpy cannot be directly installed....
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
setuptools.setup(
name = 'PyPaperBot',
packages = setuptools.find_packages(),
version = '1.2',
version = '1.2.1',
license='MIT',
description = 'PyPaperBot is a Python tool for downloading scientific papers using Google Scholar, Crossref, and SciHub.',
long_description=long_description,
long_description_content_type="text/markdown",
author = 'Vito Ferrulli',
author_email = '[email protected]',
url = 'https://github.com/ferru97/PyPaperBot',
download_url = 'https://github.com/ferru97/PyPaperBot/archive/v1.2.tar.gz',
download_url = 'https://github.com/ferru97/PyPaperBot/archive/v1.2.1.tar.gz',
keywords = ['download-papers','google-scholar', 'scihub', 'scholar', 'crossref', 'papers'],
install_requires=[
'astroid>=2.4.2,<=2.5',
Expand Down

0 comments on commit 587556c

Please sign in to comment.