Skip to content

Commit

Permalink
Add PubMed fetching rule
Browse files Browse the repository at this point in the history
  • Loading branch information
dspinellis committed Feb 1, 2024
1 parent 51d2c85 commit 4af0610
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 0 deletions.
1 change: 1 addition & 0 deletions examples/common/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Crossref-April-2022
.depend
.depend.all
ORCID_2022_10_summaries.tar.gz
pubmed-data
reports
rolap
ror-v1.17.1-2022-12-16.zip
Expand Down
4 changes: 4 additions & 0 deletions examples/common/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ CROSSREF_DIR?=../common/Crossref-April-2022
USPTO_DIR?=../common/uspto-data
ORCID_SUMMARIES?=../common/ORCID_2022_10_summaries.tar.gz
ROR?=../common/ror-v1.17.1-2022-12-16.zip
PUBMED_DIR?=../common/pubmed

V?=1
TIME?=time
Expand All @@ -26,6 +27,9 @@ $(CROSSREF_DIR):
$(USPTO_DIR):
cd ../common && ./fetch-uspto.sh

$(PUBMED_DIR):
cd ../common && ./fetch-pubmed.sh

$(ORCID_SUMMARIES):
curl -L https://orcid.figshare.com/ndownloader/files/37635374 >$@

Expand Down
22 changes: 22 additions & 0 deletions examples/common/fetch-pubmed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/sh
#
# Fetch baseline PubMed bibliographic data
#

# Fail on command errors and unset variables
set -eu

BASE=https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/

mkdir pubmed-data
cd pubmed-data

# Obtain last baseline file from README.txt
last=$(curl --silent https://ftp.ncbi.nlm.nih.gov/pubmed/baseline/README.txt |
sed -n '/pubmed24n0001/s/.*pubmed24n\([^.]*\)\.xml.*/\1/p')

for n in $(seq 1 $last) ; do
file_name=$(printf 'pubmed24n%04d.xml.gz' $n)

curl --silent $BASE/$file_name >$file_name
done

0 comments on commit 4af0610

Please sign in to comment.