-
Notifications
You must be signed in to change notification settings - Fork 6
/
SIAMbook
executable file
·85 lines (66 loc) · 2.67 KB
/
SIAMbook
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/bin/bash
# Call this script like
# SIAMbook http://epubs.siam.org/ebooks/siam/classics_in_applied_mathematics/cl31
# Parse the arguments
if [ ! $# = 1 ]; then
echo "Usage: $(basename $0) URL-to-SIAM-content"
echo "Example: $(basename $0) http://epubs.siam.org/ebooks/siam/classics_in_applied_mathematics/cl31"
echo " $(basename $0) http://epubs.siam.org/doi/book/10.1137/1.9781611973655"
exit
fi
# First (and only) command line argument is the URL of the online resource^
URL=$1
# Use everything after the last "/" as an identifier.
ID=$(echo $URL | grep -o '[^/]*$')
if [ ! $ID"xxx" == "xxx" ]; then
echo "Looking up $URL (identifier $ID)"
else
echo "$(basename $0): Failed to parse URL."
exit
fi
# Remember the current directory
pushd .
# Make a temporary directory
echo "Making directory /tmp/$ID"
rm -rf /tmp/$ID
mkdir /tmp/$ID
# Get the source
cd /tmp/$ID
echo "$(basename $0): Getting the HTML source"
wget --user-agent="Mozilla/5.0" $URL -O source
# Parse the source to retrieve an ordered list of download URLs
grep PDF source | awk '{split($0,list,"\""); print "http://epubs.siam.org" list[16]}' > list
# Retrieve the individual files from SIAM
echo "$(basename $0): Getting the individual chapter files"
wget --user-agent="Mozilla/5.0" -i list -o logfile
# If resource was secured, print an error message and exit
grep --silent resource-secured logfile
if [ $? == 0 ]; then
echo "$(basename $0): Resource seems to be secured."
exit
fi
# Create a list of files to concatenate
# See http://superuser.com/questions/270256/how-to-make-grep-p-foo-b-r-display-only-whats-in-the-brackets-do-i-need
perl -ne '/^Saving to: (‘|\`|“)(.+?)(’|'\''|”)$/ and print "$2\n";' < logfile > download_list
# qpdf is confused by the name of the downloaded files.
# Therefore, we rename them.
j=0;
rm -f concat_list;
for i in $(cat download_list); do
mv "$i" $((++j)).pdf;
echo $j.pdf >> concat_list
done
# Concatenate the pdf files to a book
echo "$(basename $0): Concatenating files"
qpdf --empty $ID.pdf --pages $(cat concat_list) --
# Fetch the title
TITLE=$(grep "<h2>" source | perl -ne '/<h2>(.*)<\/h2>/ and $string=$1 and $string =~ s/ /_/g and print "$string\n";')
# Fetch the authors
AUTHORS=$(grep '<div class="contrib">.*?</div>' source -oP | grep -o 'ContribStored[^<]*</a' | perl -ne '/ContribStored=(.*?)%2C/ and print "$1_";')
# Rename the pdf ($AUTHORS already ends in a single _)
BOOKTITLE=${AUTHORS}_${TITLE}_BOOK
mv $ID.pdf $BOOKTITLE.pdf
# Return to the current directory
popd
# Inform the user
echo "The book has been compiled and now resides in /tmp/$ID/$BOOKTITLE.pdf"