diff --git a/Makefile b/Makefile index 7a2514e..cbf9b07 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ NORMAL_MARK := $(BUILD_DIR)/normal.mark TEX_FILES := $(wildcard $(LATEX_DIR)/*.tex) SVG_FILES := $(patsubst %.tex,%.svg,$(subst $(LATEX_DIR),$(IMAGES_DIR),$(TEX_FILES))) GIF_FILES := $(patsubst %.tex,%.gif,$(subst $(LATEX_DIR),$(IMAGES_DIR),$(TEX_FILES))) -CONTENT := $(shell find src -type f -not -name .DS_Store) src/mimetype +CONTENT := $(shell find src -type f -not -name .DS_Store -not -name \*.tex -not -name \*.sty -not -name \*book-Z-H-4.html) src/mimetype XML := $(shell find src -type f -name \*html) @@ -44,6 +44,7 @@ check: xmllint --noout $(XML) $(BUILD_DIR)/huge/%_cropped.pdf: $(BUILD_DIR)/huge/ $(LATEX_DIR)/%.tex + @echo "Building huge $@" sed 's/\\sicpsize}{\\fontsize{16}{18}/\\sicpsize}{\\fontsize{200}{220}/' < $(LATEX_DIR)/sicpstyle.sty > $(LATEX_DIR)/sicpstyle2.sty mv $(LATEX_DIR)/sicpstyle2.sty $(LATEX_DIR)/sicpstyle.sty cd $(LATEX_DIR) && pdflatex -output-dir ./build/huge/ ./$*.tex @@ -58,14 +59,18 @@ $(IMAGES_DIR)/%.svg: $(BUILD_DIR)/huge/%.pbm $(BUILD_DIR)/%_cropped.pdf: $(BUILD_DIR) $(LATEX_DIR)/%.tex + @echo "Building regular" + @echo "$(CONTENT)" + false sed 's/\\sicpsize}{\\fontsize{200}{220}/\\sicpsize}{\\fontsize{16}{18}/' < $(LATEX_DIR)/sicpstyle.sty > $(LATEX_DIR)/sicpstyle2.sty mv $(LATEX_DIR)/sicpstyle2.sty $(LATEX_DIR)/sicpstyle.sty cd $(LATEX_DIR) && pdflatex -output-dir ./build ./$*.tex pdfcrop --clip $(BUILD_DIR)/$*.pdf $(BUILD_DIR)/$*_cropped.pdf rm -f $(NORMAL_MARK) -$(IMAGES_DIR)/%.gif: $(BUILD_DIR)/%_cropped.pdf - convert $(BUILD_DIR)/$*_cropped.pdf $@ +# $(IMAGES_DIR)/%.gif: $(BUILD_DIR)/%_cropped.pdf +# @echo "Converting $@" +# convert $(BUILD_DIR)/$*_cropped.pdf $@ $(BUILD_DIR): mkdir -p $(BUILD_DIR) @@ -79,4 +84,4 @@ svg: $(SVG_FILES) gif: $(GIF_FILES) clean: - rm -rf sicp.epub $(BUILD_DIR) $(SVG_FILES) $(GIF_FILES) src/mimetype + rm -rf sicp.epub $(BUILD_DIR) $(SVG_FILES) src/mimetype diff --git a/error_counts.txt b/error_counts.txt new file mode 100644 index 0000000..3de7daf --- /dev/null +++ b/error_counts.txt @@ -0,0 +1,16 @@ + 103 element "p" not allowed here; expected the element end-tag, text or element "a", "abbr", "acronym", "applet", "b", "bdo", "big", "br", "cite", "code", "del", "dfn", "em", "i", "iframe", "img", "ins", "kbd", "map", "noscript", "ns:svg", "object", "q", "samp", "script", "small", "span", "strong", "sub", "sup", "tt" or "var" (with xmlns:ns="http://www.w3.org/2000/svg") + 101 element "div" not allowed here; expected the element end-tag, text or element "a", "abbr", "acronym", "applet", "b", "bdo", "big", "br", "cite", "code", "del", "dfn", "em", "i", "iframe", "img", "ins", "kbd", "map", "noscript", "ns:svg", "object", "q", "samp", "script", "small", "span", "strong", "sub", "sup", "tt" or "var" (with xmlns:ns="http://www.w3.org/2000/svg") + 94 element "caption" not allowed here; expected the element end-tag or element "tr" + 50 text not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 39 text not allowed here; expected element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 35 element "u" not allowed anywhere; expected the element end-tag, text or element "a", "abbr", "acronym", "applet", "b", "bdo", "big", "br", "cite", "code", "del", "dfn", "em", "i", "iframe", "img", "ins", "kbd", "map", "noscript", "ns:svg", "object", "q", "samp", "script", "small", "span", "strong", "sub", "sup", "tt" or "var" (with xmlns:ns="http://www.w3.org/2000/svg") + 35 element "a" not allowed here; expected element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 28 element "em" not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 10 element "font" not allowed anywhere; expected the element end-tag, text or element "a", "abbr", "acronym", "applet", "b", "bdo", "big", "br", "cite", "code", "del", "dfn", "em", "i", "iframe", "img", "ins", "kbd", "map", "noscript", "ns:svg", "object", "q", "samp", "script", "small", "span", "strong", "sub", "sup", "tt" or "var" (with xmlns:ns="http://www.w3.org/2000/svg") + 10 element "a" not allowed here; expected element "li" + 8 element "blockquote" incomplete; expected element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 7 element "tt" not allowed here; expected element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 5 element "a" not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 4 element "tt" not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 3 element "p" not allowed here; expected element "li" + 2 element "sub" not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") diff --git a/fetch_images.sh b/fetch_images.sh new file mode 100755 index 0000000..c89d406 --- /dev/null +++ b/fetch_images.sh @@ -0,0 +1,13 @@ +#! /bin/bash + +SRC_DIR="src/OEBPS/" +IMAGE_SRC="http://mitpress.mit.edu/sicp/full-text/book/" + +for img in `grep "media-type=\"image/" ${SRC_DIR}content.opf | sed "s/.*href=\"\([^\"]*\)\".*/\1/"`; do + destination_file="${SRC_DIR}${img}" + if [ ! -e "${destination_file}" ]; then + image_file=`echo "${img}" | sed "s/^images\/\(.*\)/\1/"` + echo ${destination_file} + curl -s -o "${destination_file}" "${IMAGE_SRC}${image_file}" + fi +done diff --git a/fix_toc.py b/fix_toc.py new file mode 100644 index 0000000..c92fad1 --- /dev/null +++ b/fix_toc.py @@ -0,0 +1,114 @@ +from collections import defaultdict, Counter +from HTMLParser import HTMLParser +import os.path +import re +import sys +from urlparse import urldefrag +from xml.etree import ElementTree + +ncx_namespace = 'http://www.daisy.org/z3986/2005/ncx/' + +ElementTree.register_namespace('', ncx_namespace) + +toc_file = sys.argv[1] +content_dir, _ = os.path.split(toc_file) + +found_sections = defaultdict(Counter) + + +def _tag_name(name): + return '{{{0}}}{1}'.format(ncx_namespace, name) + + +def _section_id(section_number): + return '__sec_' + section_number + + +def subsection_source(content_src, subsection_number): + section_number = subsection_match.group(1) + section_id = _section_id(section_number) + + new_src = '#'.join([content_src, section_id]) + return new_src + + +class SectionFinder(HTMLParser): + def __init__(self, src_file, section_title): + HTMLParser.__init__(self) + self.last_id = None + self.new_src = None + self.consume_text = False + self.current_text = '' + self.section_title = section_title + self.src_file = src_file + self.section_title_index = found_sections[src_file][section_title] + self.found_sections = 0 + + def build_src(self, anchor_id): + return '#'.join([self.src_file, anchor_id]) + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if tag == 'a' and 'id' in attrs: + self.last_id = attrs['id'] + elif self.last_id is not None and tag.startswith('h'): + self.consume_text = True + + def handle_endtag(self, tag): + if self.last_id and tag.startswith('h'): + if self.current_text == self.section_title: + if self.found_sections == self.section_title_index: + self.new_src = self.build_src(self.last_id) + found_sections[self.src_file][self.section_title] += 1 + + self.found_sections += 1 + + self.last_id = None + self.consume_text = False + self.current_text = '' + + def handle_data(self, data): + if self.consume_text: + self.current_text += data + + +def find_content_source(section_title, content_src): + source_path = os.path.join(content_dir, content_src) + section_finder = SectionFinder(content_src, section_title) + + with open(source_path) as source_file: + section_finder.feed(source_file.read()) + + return section_finder.new_src + + +doc = ElementTree.parse(toc_file) + +for nav_point in doc.iter(_tag_name('navPoint')): + text_node = nav_point.find(_tag_name('navLabel')).find(_tag_name('text')) + content_node = nav_point.find(_tag_name('content')) + + if text_node is None or content_node is None: + continue + + old_src = content_node.get('src') + content_src, fragment = urldefrag(old_src) + + if not fragment: + continue + + subsection_match = re.match(r'^([0-9.]+)', text_node.text) + if subsection_match: + new_src = subsection_source(content_src, subsection_match.group(1)) + else: + new_src = find_content_source(text_node.text, content_src) + + if new_src is None: + print 'Cannot find reference for {0} in {1}'.format(text_node.text, content_src) + continue + + print old_src, '->', new_src + + content_node.set('src', new_src) + +doc.write(toc_file + '-new', xml_declaration=True, encoding='UTF-8') diff --git a/src/OEBPS/book-Z-H-1.html b/src/OEBPS/book-Z-H-1.html index 5a9cf74..7a2c09d 100644 --- a/src/OEBPS/book-Z-H-1.html +++ b/src/OEBPS/book-Z-H-1.html @@ -5,15 +5,15 @@ (c) Dorai Sitaram, http://www.cs.rice.edu/~dorai/tex2page --> - + Structure and Interpretation of Computer Programs - +

Structure and Interpretation
of Computer Programs

-
second edition 
+
second edition 

Harold Abelson and Gerald Jay Sussman
with Julie Sussman 

diff --git a/src/OEBPS/book-Z-H-10.html b/src/OEBPS/book-Z-H-10.html index 5aefee3..2fb7150 100644 --- a/src/OEBPS/book-Z-H-10.html +++ b/src/OEBPS/book-Z-H-10.html @@ -5,7 +5,7 @@ (c) Dorai Sitaram, http://www.cs.rice.edu/~dorai/tex2page --> - + Structure and Interpretation of Computer Programs @@ -13,11 +13,11 @@ - -

1.1  The Elements of Programming

+ +

1.1  The Elements of Programming

- + A powerful programming language is more than just a means for instructing a computer to perform tasks. The language also serves as a framework within which we organize our ideas about processes. Thus, @@ -26,32 +26,32 @@

1.1  The Elements of Programming more complex ideas. Every powerful language has three mechanisms for accomplishing this:

-