-
Notifications
You must be signed in to change notification settings - Fork 6
/
doi2bibtex
executable file
·150 lines (137 loc) · 4.02 KB
/
doi2bibtex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/bin/sh
# doi2bibtex - Get BibTeX from DOI
#
# Authors: Felix Ospald <[email protected]>
# Gerd Wachsmuth <[email protected]>
# Version: 2017.1
# Licence: GPL v3
# maybe interesting (uses javascript):
# http://www.doi2bib.org/#/doi
# Issue help message if necessary
if [ $# = 0 ]; then
echo "Usage: $(basename $0) DOI"
echo "or: $(basename $0) http://dx.doi.org/DOI"
echo "will try and retrieve the BibTeX file for the DOI given."
exit 1
fi
# Extract doi. Pattern from https://github.com/regexhq/doi-regex/blob/master/index.js
# DOI=$(echo $1 | grep '10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![%"#? ])\S)+' -P -o )
# https://www.crossref.org/blog/dois-and-matching-regular-expressions/
DOI=$(echo $1 | egrep '10[.][0-9]{4,9}/[-._;()/:a-zA-Z0-9<>]+' -o )
BIBTEX=$(curl -LH "Accept: application/x-bibtex" "https://doi.org/$DOI" 2> /dev/null)
# env echo "$BIBTEX"
PROG1='
BEGIN {
IGNORECASE = 1;
delete_this_entry = 0;
};
/^\s*@/ {
# Print the header of the entry.
gsub(" ", "");
print(toupper($0));
next;
}
/^\s*(MRNUMBER|MRREVIEWER|MRCLASS|CODEN|URL\s*=\s*{https?:\/\/(dx\.)?doi\.org)/ {
# Delete certain unnecessary entries.
delete_this_entry = 1;
next;
};
/^\s*[A-Z]+\s*=/ {
# Do not delete the remaining entries.
delete_this_entry = 0;
print;
next;
};
/^}$/ {
print;
next;
};
// {
if (delete_this_entry == 0)
print;
};'
PROG2='
/@[A-Z]*{[A-Za-z0-9._]*,/{
IGNORECASE = 1;
# Save the header and init some variables.
head=substr($0,0,match($0,"{"));
body="";
authors_active=0;
have_read_authors=0;
year=0;
next;
}
/^}$/{
# This is the end of the entry. Print everything!
body=body $0;
printf "%s",head;
# Create a nice label (authors or editors and year) for the entry.
count=split(authors,author_array," +and +");
for( i=1; i<=count; i++ ) {
# First, try whether the names are given as "lastname, firstname"
idx=match(author_array[i],", ");
lastname=substr(author_array[i],0,idx-1);
if( idx == 0 ) {
# Now, try for "firstname lastname"
idx=match(author_array[i]," [^ ]*$");
lastname=substr(author_array[i],idx+1,length(author_array[i]));
}
lastname=gensub("{\\\\\"([a-zA-Z])}","\\1e","g", lastname); # test for umlauts
lastname=gensub("{\\\\ss}","ss","g", lastname); # test for eszett
lastname=gensub("{[^{}]*([a-zA-Z])}","\\1","g", lastname); # test for accents (this is needed two times, e.g. for "{\v{c}}")
lastname=gensub("{[^{}]*([a-zA-Z])}","\\1","g", lastname);
lastname=gensub("'\''","","g", lastname); # Remove apostroph
lastname=gensub("},$","","g", lastname); # Remove closing }
word_count = split( lastname, lastname_array, " "); # Split lastname into words and capitalize each of them
lastname = "";
for( j=1; j<= word_count; j++ ) { # Fuer Juan Carlos ;)
lastname = lastname toupper(substr(lastname_array[j],1,1)) substr(lastname_array[j],2,1000);
}
printf "%s",lastname;
};
print year ",";
print body;
next;
}
/ *[A-Z]* =/{
if( toupper($1) == "AUTHOR" ) {
# If we already have read the editors, we overwrite them.
authors=substr($0,match($0,"{")+1,100);
authors_active=1;
have_read_authors=1;
}
else if( (toupper($1) == "EDITOR") && !have_read_authors){
# If we already have read the authors, we skip the editors.
authors=substr($0,match($0,"{")+1,100);
authors_active=1;
}
else {
authors_active=0;
if(toupper($1) == "YEAR"){
year=substr($0,match($0,"[[0-9]]*"),4)
}
};
}
/^[^=]*$/{
if(authors_active){
authors=authors $0;
}
}
//{
body=body $0 "\n";
next;
}'
# Remove month field from output
BIBTEX=$(echo "$BIBTEX" | sed -E 's/month *= *[a-z]{3} *, *//')
# Also if it is the last entry
BIBTEX=$(echo "$BIBTEX" | sed -E 's/, *month *= *[a-z]{3} *//')
if [ $(echo "$BIBTEX" | wc -l) -lt 2 ];
then
# Insert line breaks
BIBTEX=$(echo "$BIBTEX" | sed 's/,/&\n/' | sed 's/},/&\n/g' | sed 's/ *}$/\n}/')
fi
# Fix "="
BIBTEX=$(echo "$BIBTEX" | sed 's/ *= */ = /')
env echo "$BIBTEX" | awk "$PROG1" | gawk "$PROG2"
# add a newline
echo