diff --git a/Makefile b/Makefile index 7a2514e..cbf9b07 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ NORMAL_MARK := $(BUILD_DIR)/normal.mark TEX_FILES := $(wildcard $(LATEX_DIR)/*.tex) SVG_FILES := $(patsubst %.tex,%.svg,$(subst $(LATEX_DIR),$(IMAGES_DIR),$(TEX_FILES))) GIF_FILES := $(patsubst %.tex,%.gif,$(subst $(LATEX_DIR),$(IMAGES_DIR),$(TEX_FILES))) -CONTENT := $(shell find src -type f -not -name .DS_Store) src/mimetype +CONTENT := $(shell find src -type f -not -name .DS_Store -not -name \*.tex -not -name \*.sty -not -name \*book-Z-H-4.html) src/mimetype XML := $(shell find src -type f -name \*html) @@ -44,6 +44,7 @@ check: xmllint --noout $(XML) $(BUILD_DIR)/huge/%_cropped.pdf: $(BUILD_DIR)/huge/ $(LATEX_DIR)/%.tex + @echo "Building huge $@" sed 's/\\sicpsize}{\\fontsize{16}{18}/\\sicpsize}{\\fontsize{200}{220}/' < $(LATEX_DIR)/sicpstyle.sty > $(LATEX_DIR)/sicpstyle2.sty mv $(LATEX_DIR)/sicpstyle2.sty $(LATEX_DIR)/sicpstyle.sty cd $(LATEX_DIR) && pdflatex -output-dir ./build/huge/ ./$*.tex @@ -58,14 +59,18 @@ $(IMAGES_DIR)/%.svg: $(BUILD_DIR)/huge/%.pbm $(BUILD_DIR)/%_cropped.pdf: $(BUILD_DIR) $(LATEX_DIR)/%.tex + @echo "Building regular" + @echo "$(CONTENT)" + false sed 's/\\sicpsize}{\\fontsize{200}{220}/\\sicpsize}{\\fontsize{16}{18}/' < $(LATEX_DIR)/sicpstyle.sty > $(LATEX_DIR)/sicpstyle2.sty mv $(LATEX_DIR)/sicpstyle2.sty $(LATEX_DIR)/sicpstyle.sty cd $(LATEX_DIR) && pdflatex -output-dir ./build ./$*.tex pdfcrop --clip $(BUILD_DIR)/$*.pdf $(BUILD_DIR)/$*_cropped.pdf rm -f $(NORMAL_MARK) -$(IMAGES_DIR)/%.gif: $(BUILD_DIR)/%_cropped.pdf - convert $(BUILD_DIR)/$*_cropped.pdf $@ +# $(IMAGES_DIR)/%.gif: $(BUILD_DIR)/%_cropped.pdf +# @echo "Converting $@" +# convert $(BUILD_DIR)/$*_cropped.pdf $@ $(BUILD_DIR): mkdir -p $(BUILD_DIR) @@ -79,4 +84,4 @@ svg: $(SVG_FILES) gif: $(GIF_FILES) clean: - rm -rf sicp.epub $(BUILD_DIR) $(SVG_FILES) $(GIF_FILES) src/mimetype + rm -rf sicp.epub $(BUILD_DIR) $(SVG_FILES) src/mimetype diff --git a/error_counts.txt b/error_counts.txt new file mode 100644 index 0000000..3de7daf --- /dev/null +++ b/error_counts.txt @@ -0,0 +1,16 @@ + 103 element "p" not allowed here; expected the element end-tag, text or element "a", "abbr", "acronym", "applet", "b", "bdo", "big", "br", "cite", "code", "del", "dfn", "em", "i", "iframe", "img", "ins", "kbd", "map", "noscript", "ns:svg", "object", "q", "samp", "script", "small", "span", "strong", "sub", "sup", "tt" or "var" (with xmlns:ns="http://www.w3.org/2000/svg") + 101 element "div" not allowed here; expected the element end-tag, text or element "a", "abbr", "acronym", "applet", "b", "bdo", "big", "br", "cite", "code", "del", "dfn", "em", "i", "iframe", "img", "ins", "kbd", "map", "noscript", "ns:svg", "object", "q", "samp", "script", "small", "span", "strong", "sub", "sup", "tt" or "var" (with xmlns:ns="http://www.w3.org/2000/svg") + 94 element "caption" not allowed here; expected the element end-tag or element "tr" + 50 text not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 39 text not allowed here; expected element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 35 element "u" not allowed anywhere; expected the element end-tag, text or element "a", "abbr", "acronym", "applet", "b", "bdo", "big", "br", "cite", "code", "del", "dfn", "em", "i", "iframe", "img", "ins", "kbd", "map", "noscript", "ns:svg", "object", "q", "samp", "script", "small", "span", "strong", "sub", "sup", "tt" or "var" (with xmlns:ns="http://www.w3.org/2000/svg") + 35 element "a" not allowed here; expected element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 28 element "em" not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 10 element "font" not allowed anywhere; expected the element end-tag, text or element "a", "abbr", "acronym", "applet", "b", "bdo", "big", "br", "cite", "code", "del", "dfn", "em", "i", "iframe", "img", "ins", "kbd", "map", "noscript", "ns:svg", "object", "q", "samp", "script", "small", "span", "strong", "sub", "sup", "tt" or "var" (with xmlns:ns="http://www.w3.org/2000/svg") + 10 element "a" not allowed here; expected element "li" + 8 element "blockquote" incomplete; expected element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 7 element "tt" not allowed here; expected element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 5 element "a" not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 4 element "tt" not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") + 3 element "p" not allowed here; expected element "li" + 2 element "sub" not allowed here; expected the element end-tag or element "address", "blockquote", "del", "div", "dl", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "ins", "noscript", "ns:svg", "ol", "p", "pre", "script", "table" or "ul" (with xmlns:ns="http://www.w3.org/2000/svg") diff --git a/fetch_images.sh b/fetch_images.sh new file mode 100755 index 0000000..c89d406 --- /dev/null +++ b/fetch_images.sh @@ -0,0 +1,13 @@ +#! /bin/bash + +SRC_DIR="src/OEBPS/" +IMAGE_SRC="http://mitpress.mit.edu/sicp/full-text/book/" + +for img in `grep "media-type=\"image/" ${SRC_DIR}content.opf | sed "s/.*href=\"\([^\"]*\)\".*/\1/"`; do + destination_file="${SRC_DIR}${img}" + if [ ! -e "${destination_file}" ]; then + image_file=`echo "${img}" | sed "s/^images\/\(.*\)/\1/"` + echo ${destination_file} + curl -s -o "${destination_file}" "${IMAGE_SRC}${image_file}" + fi +done diff --git a/fix_toc.py b/fix_toc.py new file mode 100644 index 0000000..c92fad1 --- /dev/null +++ b/fix_toc.py @@ -0,0 +1,114 @@ +from collections import defaultdict, Counter +from HTMLParser import HTMLParser +import os.path +import re +import sys +from urlparse import urldefrag +from xml.etree import ElementTree + +ncx_namespace = 'http://www.daisy.org/z3986/2005/ncx/' + +ElementTree.register_namespace('', ncx_namespace) + +toc_file = sys.argv[1] +content_dir, _ = os.path.split(toc_file) + +found_sections = defaultdict(Counter) + + +def _tag_name(name): + return '{{{0}}}{1}'.format(ncx_namespace, name) + + +def _section_id(section_number): + return '__sec_' + section_number + + +def subsection_source(content_src, subsection_number): + section_number = subsection_match.group(1) + section_id = _section_id(section_number) + + new_src = '#'.join([content_src, section_id]) + return new_src + + +class SectionFinder(HTMLParser): + def __init__(self, src_file, section_title): + HTMLParser.__init__(self) + self.last_id = None + self.new_src = None + self.consume_text = False + self.current_text = '' + self.section_title = section_title + self.src_file = src_file + self.section_title_index = found_sections[src_file][section_title] + self.found_sections = 0 + + def build_src(self, anchor_id): + return '#'.join([self.src_file, anchor_id]) + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if tag == 'a' and 'id' in attrs: + self.last_id = attrs['id'] + elif self.last_id is not None and tag.startswith('h'): + self.consume_text = True + + def handle_endtag(self, tag): + if self.last_id and tag.startswith('h'): + if self.current_text == self.section_title: + if self.found_sections == self.section_title_index: + self.new_src = self.build_src(self.last_id) + found_sections[self.src_file][self.section_title] += 1 + + self.found_sections += 1 + + self.last_id = None + self.consume_text = False + self.current_text = '' + + def handle_data(self, data): + if self.consume_text: + self.current_text += data + + +def find_content_source(section_title, content_src): + source_path = os.path.join(content_dir, content_src) + section_finder = SectionFinder(content_src, section_title) + + with open(source_path) as source_file: + section_finder.feed(source_file.read()) + + return section_finder.new_src + + +doc = ElementTree.parse(toc_file) + +for nav_point in doc.iter(_tag_name('navPoint')): + text_node = nav_point.find(_tag_name('navLabel')).find(_tag_name('text')) + content_node = nav_point.find(_tag_name('content')) + + if text_node is None or content_node is None: + continue + + old_src = content_node.get('src') + content_src, fragment = urldefrag(old_src) + + if not fragment: + continue + + subsection_match = re.match(r'^([0-9.]+)', text_node.text) + if subsection_match: + new_src = subsection_source(content_src, subsection_match.group(1)) + else: + new_src = find_content_source(text_node.text, content_src) + + if new_src is None: + print 'Cannot find reference for {0} in {1}'.format(text_node.text, content_src) + continue + + print old_src, '->', new_src + + content_node.set('src', new_src) + +doc.write(toc_file + '-new', xml_declaration=True, encoding='UTF-8') diff --git a/src/OEBPS/book-Z-H-1.html b/src/OEBPS/book-Z-H-1.html index 5a9cf74..7a2c09d 100644 --- a/src/OEBPS/book-Z-H-1.html +++ b/src/OEBPS/book-Z-H-1.html @@ -5,15 +5,15 @@ (c) Dorai Sitaram, http://www.cs.rice.edu/~dorai/tex2page -->
- +Harold Abelson and Gerald Jay Sussman
with Julie Sussman
- + A powerful programming language is more than just a means for instructing a computer to perform tasks. The language also serves as a framework within which we organize our ideas about processes. Thus, @@ -26,32 +26,32 @@
-
-In programming, we deal with two kinds of elements: procedures and -data. (Later we will discover that they are really not so distinct.) +In programming, we deal with two kinds of elements: procedures and +data. (Later we will discover that they are really not so distinct.) Informally, data is “stuff” that we want to manipulate, and procedures are descriptions of the rules for manipulating the data. Thus, any powerful programming language should be able to describe primitive data and primitive procedures and should have methods for combining and abstracting procedures and data.
-In this chapter we will deal only with simple numerical data so that -we can focus on the rules for building procedures.4 In later chapters we will see that +In this chapter we will deal only with simple numerical data so that +we can focus on the rules for building procedures.4 In later chapters we will see that these same rules allow us to build procedures to manipulate compound data as well.
@@ -61,38 +61,38 @@
-One kind of primitive expression you might type is a number. (More +One kind of primitive expression you might type is a number. (More precisely, the expression that you type consists of the numerals that represent the number in base 10.) If you present Lisp with a number
486
-the interpreter will respond by printing5
+the interpreter will respond by printing5
486
-Expressions representing numbers may be combined with an expression -representing a primitive procedure (such as + or *) to form a +Expressions representing numbers may be combined with an expression +representing a primitive procedure (such as + or *) to form a compound expression that represents the application of the procedure to those numbers. For example:
-
(+ 137 349)
486
(- 1000 334)
666
-(* 5 99)
495
(/ 10 5)
2
+
(+ 137 349)
486
(- 1000 334)
666
+(* 5 99)
495
(/ 10 5)
2
(+ 2.7 10)
12.7
-Expressions such as these, formed by delimiting a list of expressions -within parentheses in order to denote procedure application, +Expressions such as these, formed by delimiting a list of expressions +within parentheses in order to denote procedure application, are called combinations. The leftmost -element in the list is called the operator, and the other -elements are called operands. The value of a combination is +element in the list is called the operator, and the other +elements are called operands. The value of a combination is obtained by applying the procedure specified by the operator to the -arguments that are the values of the operands.
+arguments that are the values of the operands.
The convention of placing the operator to the left of the operands is -known as prefix notation, and it may be somewhat confusing at +known as prefix notation, and it may be somewhat confusing at first because it departs significantly from the customary mathematical convention. Prefix notation has several advantages, however. One of -them is that it can accommodate procedures that may take an arbitrary +them is that it can accommodate procedures that may take an arbitrary number of arguments, as in the following examples:
(+ 21 35 12 7)
75
@@ -101,7 +101,7 @@ 1.1 The Elements of Programming
element and the entire combination is delimited by the
parentheses.
-A second advantage of prefix notation is that it extends in a +A second advantage of prefix notation is that it extends in a straightforward way to allow combinations to be nested, that is, to have combinations whose elements are themselves combinations:
@@ -123,32 +123,32 @@
-following a formatting convention known as pretty-printing, in +following a formatting convention known as pretty-printing, in which each long combination is written so that the operands are aligned vertically. The resulting indentations display clearly the -structure of the expression.6
+structure of the expression.6
Even with complex expressions, the interpreter always operates in the same basic cycle: It reads an expression from the terminal, evaluates the expression, and prints the result. This mode of operation is often expressed by saying that the -interpreter runs in a read-eval-print loop. +interpreter runs in a read-eval-print loop. Observe in particular that it is not necessary to explicitly -instruct the interpreter to print the value of the expression.7
+instruct the interpreter to print the value of the expression.7
A critical aspect of a programming language is the means it provides -for using names to refer to computational objects. We say that the -name identifies a variable whose value is the object.
+for using names to refer to computational objects. We say that the +name identifies a variable whose value is the object.
In the Scheme dialect of Lisp, we -name things with define. Typing
+name things with define. Typing
(define size 2)
causes the interpreter to associate the value 2 with the -name size.8 +name size.8 Once the name size has been associated with the number 2, we can refer to the value 2 by name:
@@ -163,7 +163,7 @@
-Define is our language's +Define is our language's simplest means of abstraction, for it allows us to use simple names to refer to the results of compound operations, such as the circumference computed above. @@ -175,23 +175,23 @@
It should be clear that the possibility of associating values with symbols and later retrieving them means that the interpreter must maintain some sort of memory that keeps track of the name-object -pairs. This memory is called the environment (more precisely -the global environment, since we will see later that a +pairs. This memory is called the environment (more precisely +the global environment, since we will see later that a computation may involve a number of different -environments).9
+environments).9
- + One of our goals in this chapter is to isolate issues about thinking procedurally. As a case in point, let us consider that, in evaluating combinations, the interpreter is itself following a procedure.
@@ -211,23 +211,23 @@
+itself.10
-Notice how succinctly the idea of recursion can be used to express +Notice how succinctly the idea of recursion can be used to express what, in the case of a deeply nested combination, would otherwise be viewed as a rather complicated process. For example, evaluating
(* (+ 2 (* 4 6))
(+ 3 5 7))
requires that the evaluation rule be applied to four different -combinations. We can obtain a picture of this process by representing -the combination in the form of a tree, as shown in -figure 1.1. Each combination is represented by a -node with branches corresponding to the operator and the +combinations. We can obtain a picture of this process by representing +the combination in the form of a tree, as shown in +figure 1.1. Each combination is represented by a +node with branches corresponding to the operator and the operands of the combination stemming from it. -The terminal nodes (that is, nodes with +The terminal nodes (that is, nodes with no branches stemming from them) represent either operators or numbers. Viewing evaluation in terms of the tree, we can imagine that the values of the operands percolate upward, starting from the terminal @@ -235,9 +235,9 @@
+of process known as tree accumulation.