diff --git a/src/linearize/Makefile b/src/linearize/Makefile new file mode 100644 index 0000000..279c731 --- /dev/null +++ b/src/linearize/Makefile @@ -0,0 +1,340 @@ +###################################################################### +## ## +## A standard Makefile that should pretty much work for all local ## +## projects including those with hierarchical directory structure. ## +## ## +## For your average project it should only be necessary to add ## +## SOURCES and TARGETS. ## +## You can find out dependencies with make .depend ## +## They are currently not automatically included into the Makefile ## +## ## +###################################################################### + +###################################################################### +## SDAG specific stuff +## Dealing with special environment variables +###################################################################### + + +OCAML_DIR=$(SDAG_OCAML_DIR) +DOC_PREFIX=$(SDAG_DOC_PREFIX) +PRE_LATEX_DOCDIR=$(SDAG_LATEX_DOCDIR) +PRE_HTML_DOCDIR=$(SDAG_HTML_DOCDIR) + + +LATEX_GEN=odoc_sdag_latex.cmo +HTML_GEN=odoc_sdag_html.cmo + +## No more SDAG beyond this point! +###################################################################### + +###################################################################### +## +## Here come the actual changes to customise the Makefile +## for your particular project. +## +###################################################################### +# All Source Files, ordered with respect to their dependencies: + +SOURCES= jsonfio.ml \ + synt.ml \ + util.ml \ + preprocessor.ml \ + linearize.ml + +# All Target Files, ordered with respect to their priority: + +TARGETS=linearizer.ml + +# All Packages needed for making sources and targets + +PACKAGES=json-wheel + +# A flag specifying if documentation is in short or long format +SHORTDOC=false + +# The following packages are not always necessary. +# If you are sure you don't need them, comment them out and +# you will get slightly smaller binaries. +PACKAGES+=unix str# Leave that comment to avoid unwanted space!!! + +###################################################################### +### +### No more changes beyond this point. +### If you need to change anything, please notify V.Sorge@cs.bham.ac.uk +### + +## Standard Executables +OCAMLC=ocamlc +OCAMLOPT=ocamlopt +OCAMLDEP=ocamldep +OCAMLDOC=ocamldoc +OCAMLLEX=ocamllex +OCAMLYACC=ocamlyacc +OCAMLFIND=ocamlfind +OCAMLFC=$(OCAMLFIND) $(OCAMLC) +OCAMLFOPT=$(OCAMLFIND) $(OCAMLOPT) + +## Assigning Special Directories + +SRCDIR=./ +LIBDIR=$(OCAML_DIR)/lib +DOCDIR=$(OCAML_DIR)/doc +BINDIR=$(OCAML_DIR)/bin + +SOURCES:=$(addprefix $(SRCDIR),$(SOURCES)) +TARGETS:=$(addprefix $(SRCDIR),$(TARGETS)) + +ifneq ($(PRE_LATEX_DOCDIR),) + LATEX_DOCDIR=$(PRE_LATEX_DOCDIR) + LATEX_DOC_RM_COMMAND=rm -f $(LATEX_DOCDIR)/* +else + ifneq ($(DOC_PREFIX),) + LATEX_DOCDIR=$(DOC_PREFIX)/latex + else + LATEX_DOCDIR=latex + endif + LATEX_DOC_RM_COMMAND=rm -rf $(LATEX_DOCDIR) +endif + +ifneq ($(PRE_HTML_DOCDIR),) + HTML_DOCDIR=$(PRE_HTML_DOCDIR) + HTML_DOC_RM_COMMAND=rm -f $(HTML_DOCDIR)/* +else + ifneq ($(DOC_PREFIX),) + HTML_DOCDIR=$(DOC_PREFIX)/html + else + HTML_DOCDIR=html + endif + HTML_DOC_RM_COMMAND=rm -rf $(HTML_DOCDIR) +endif + +## The stuff from here on should stay pretty much the same for every project, +## unless we are using subdirectories. That should be tested in more detail. +## + +## Main objects for byte code +MAIN_OBJS= $(foreach source,$(SOURCES),$(addsuffix .cmi,$(basename $(filter %.mli,$(source))))$(addsuffix .cmo,$(basename $(filter %.ml %.mll %.mly,$(source))))) +## Main objects for native code +MAIN_OBJS_OPT=$(foreach source,$(SOURCES),$(addsuffix .cmi,$(basename $(filter %.mli,$(source))))$(addsuffix .cmx,$(basename $(filter %.ml %.mll %.mly,$(source))))) + + +## This lets us handle hierarchical subdirectory structures if the code is not flat. +## Needs some testing though... +INCLUDEDIRS=$(sort $(dir $(SOURCES) $(TARGETS))) +INCLUDES=$(addprefix -I ,$(INCLUDEDIRS)) $(addprefix -I ,$(LIBDIR)) + +DOCINCLUDEDIRS=$(sort $(foreach package,$(PACKAGES),$(shell export OCAMLPATH=$(LIBDIR);$(OCAMLFIND) query $(package)))) +DOCINCLUDES=$(addprefix -I , $(DOCINCLUDEDIRS)) + + +## Flags +# PREPROCESS=-pp 'camlp4o -I . $(addsuffix .cmo,$(basename $(EXTENSIONS)))' # Preprocessing not used + +comma:= , +empty:= +space:= $(empty) $(empty) + +OCAMLFINDFLAGS=-package $(subst $(space),$(comma),$(PACKAGES)) + +OCAMLFLAGS=$(OCAMLFINDFLAGS) $(INCLUDES) # -custom unix.cma str.cma # $(PREPROCESS) # add other options for ocamlc here +OCAMLOPTFLAGS=$(OCAMLFINDFLAGS) $(INCLUDES) # -ccopt -static #unix.cmxa str.cmxa # $(PREPROCESS) # add other options for ocamlopt here + +## Docu Flags for our special generators +ifeq ($(SHORTDOC),true) + SHORTFLAG = -short +endif + +OCAMLHTMLDOCFLAGS=$(DOCINCLUDES) $(INCLUDES) $(SHORTFLAG) -i $(DOCDIR) -g $(HTML_GEN) -d $(HTML_DOCDIR) -hide Pervasives +OCAMLLATEXDOCFLAGS=$(DOCINCLUDES) $(INCLUDES) $(SHORTFLAG) -i $(DOCDIR) -g $(LATEX_GEN) -o $(LATEX_DOCDIR)/ocamldoc.tex -sepfiles -hide Pervasives + +# Common rules +.SUFFIXES: .ml .mli .cmo .cmi .cmx .mll .mly .cma .cmxa .o .a + +.ml.cmo: + export OCAMLPATH=$(LIBDIR); $(OCAMLFC) $(OCAMLFLAGS) -c $< + +.mli.cmi: + export OCAMLPATH=$(LIBDIR); $(OCAMLFC) $(OCAMLFLAGS) -c $< + +.ml.cmx: + export OCAMLPATH=$(LIBDIR); $(OCAMLFOPT) $(OCAMLOPTFLAGS) -c $< + +.mll.cmo: + $(OCAMLLEX) $< + export OCAMLPATH=$(LIBDIR); $(OCAMLFC) $(OCAMLFLAGS) -c $(addsuffix .ml,$(basename $<)) + +.mll.cmx: + $(OCAMLLEX) $< + export OCAMLPATH=$(LIBDIR); $(OCAMLFOPT) $(OCAMLOPTFLAGS) -c $(addsuffix .ml,$(basename $<)) + +.mly.cmo: $(addsuffix .cmo,$(basename $(filter %.mll,$(SOURCES)))) + $(OCAMLYACC) $< + export OCAMLPATH=$(LIBDIR); $(OCAMLFC) $(OCAMLFLAGS) -c $(addsuffix .mli,$(basename $<)) + export OCAMLPATH=$(LIBDIR); $(OCAMLFC) $(OCAMLFLAGS) -c $(addsuffix .ml,$(basename $<)) + +.mly.cmx: $(addsuffix .cmo,$(basename $(filter %.mll,$(SOURCES)))) + $(OCAMLYACC) $< + export OCAMLPATH=$(LIBDIR); $(OCAMLFOPT) $(OCAMLOPTFLAGS) -c $(addsuffix .mli,$(basename $<)) + export OCAMLPATH=$(LIBDIR); $(OCAMLFOPT) $(OCAMLOPTFLAGS) -c $(addsuffix .ml,$(basename $<)) +.mly.cmi: $(addsuffix .cmo,$(basename $<)) + +.cmo.cma: + export OCAMLPATH=$(LIBDIR); $(OCAMLFC) -a -o $(addsuffix .cma,$(basename $<)) $< + +.cmx.cmxa: + export OCAMLPATH=$(LIBDIR); $(OCAMLFOPT) -a -o $(addsuffix .cmxa,$(basename $<)) $< + +.o.a: + export OCAMLPATH=$(LIBDIR); $(OCAMLFOPT) -a -o $(addsuffix .a,$(basename $<)) $< + + +# Build + +## Byte Code +$(basename $(notdir $(TARGETS))): $(MAIN_OBJS) $(addsuffix .cmo,$(basename $(filter %.ml,$(TARGETS)))) + @echo Linking $@ + export OCAMLPATH=$(LIBDIR); $(OCAMLFC) -o $@ $(OCAMLFLAGS) $(MAIN_OBJS) $(addsuffix .cmo,$@) -linkpkg + +## Native Code +$(addsuffix .opt,$(basename $(notdir $(TARGETS)))): $(MAIN_OBJS_OPT) $(addsuffix .cmx,$(basename $(filter %$(subst .opt,,$@).ml,$(TARGETS)))) + @echo Linking $@ + export OCAMLPATH=$(LIBDIR); $(OCAMLFOPT) -o $@ $(OCAMLOPTFLAGS) $(MAIN_OBJS_OPT) $(addsuffix .cmx,$(basename $@)) -linkpkg + + +# Making the Main Targets + +.PHONY: all opt +all: + make clean_byte + make all_targets + +opt: + make clean_native + make opt_targets + +all_targets: $(basename $(notdir $(TARGETS))) + +opt_targets: $(addsuffix .opt, $(basename $(notdir $(TARGETS)))) + + +# Just making sources for libraries + +.PHONY: lib opt_lib +lib: $(MAIN_OBJS) $(addsuffix .cmo,$(basename $(filter %.ml,$(TARGETS)))) $(addsuffix .cma,$(basename $(filter %.ml,$(SOURCES) $(TARGETS)))) + +opt_lib: $(MAIN_OBJS_OPT) $(addsuffix .cmx,$(basename $(filter %$(subst .opt,,$@).ml,$(TARGETS)))) $(addsuffix .cmxa,$(basename $(filter %.ml,$(SOURCES) $(TARGETS)))) + +# Installing binaries and libraries + +.PHONY: install opt_install install_lib uninstall uninstall_lib +install: all_targets + @mkdir -p $(BINDIR) + @for i in $(basename $(TARGETS)); do \ + echo Installing $$i ; \ + (cp $$i $(BINDIR)) \ + done + +opt_install: opt_targets + @mkdir -p $(BINDIR) + @for i in $(addsuffix .opt, $(basename $(TARGETS))); do \ + echo Installing $$i ; \ + (cp $$i $(BINDIR)) \ + done + +##install_lib: lib opt_lib $(addsuffix ._lib, $(INCLUDEDIRS)) +install_lib: lib opt_lib $(addprefix $(LIBDIR)/,$(filter-out ./,$(INCLUDEDIRS))) lib_local + +## install_sub_lib +$(addprefix $(LIBDIR)/,$(filter-out ./,$(INCLUDEDIRS))): $(addsuffix META,$(filter-out ./,$(INCLUDEDIRS))) + @echo $(if $(wildcard $(addsuffix META,$@)),\ + $(shell echo "Library $(notdir $(subst /META,,$(addsuffix META,$@))) already installed! Uninstall old library first."),\ + $(shell $(OCAMLFIND) install -destdir $(LIBDIR) $(notdir $@) $(notdir $@)/META $(notdir $@)/*.cm[ioax] $(notdir $@)/*.cmxa $(notdir $@)/*.[oa] $(notdir $@)/*.ml*)) + +## install_single_lib +lib_local: META $(addprefix $(LIBDIR)/,$(notdir $(sort $(basename $(wildcard ./*.cma ./*.cmax ./*.a))))) + +$(addprefix $(LIBDIR)/,$(notdir $(sort $(basename $(wildcard ./*.cma ./*.cmax ./*.a))))): + @for i in $(notdir $(sort $(basename $(wildcard ./*.cma ./*.cmax ./*.a)))); do \ + echo Installing $$i ; \ + ($(OCAMLFIND) install -destdir $(LIBDIR) $$i META $$i.cm* $$i.[oa] $$i.ml*) \ + done + +## building the META files (if necessary). +## Observe that META files will NOT be removed by uninstall or clean! +$(addsuffix META,$(filter-out ./,$(INCLUDEDIRS))): + @echo Making $@ + @echo 'description = "'$(subst /META,,$@)' Library"' > $@ + @echo 'version = "[Unknown]"' >> $@ + @echo 'archive(byte) = "'$(notdir $(wildcard $(dir $@)*.cma))'"' >> $@ + @echo 'archive(native) = "'$(notdir $(wildcard $(dir $@)*.cmxa))'"' >> $@ + + +META: + @echo 'description = "'$(basename $(notdir $(wildcard ./*.cma)))' Library"' > ./META + @echo 'version = "[Unknown]"' >> ./META + @echo 'archive(byte) = "'$(notdir $(wildcard ./*.cma))'"' >> ./META + @echo 'archive(native) = "'$(notdir $(wildcard ./*.cmxa))'"' >> ./META + + +uninstall: + rm -f $(addprefix $(BINDIR)/,$(basename $(notdir $(TARGETS)))) + rm -f $(addsuffix .opt,$(addprefix $(BINDIR)/,$(basename $(notdir $(TARGETS))))) + +uninstall_lib: + @for i in $(notdir $(realpath $(filter-out ./,$(INCLUDEDIRS)))) $(notdir $(basename $(wildcard ./*.ml))); do \ + ($(OCAMLFIND) remove -destdir $(LIBDIR) $$i) \ + done + +# Documentation generation + +.PHONY: doc html_doc latex_doc +doc: $(HTML_GEN) $(LATEX_GEN) doc_html doc_latex + +doc_html: $(MAIN_OBJS) + mkdir -p $(HTML_DOCDIR) + $(OCAMLDOC) $(OCAMLHTMLDOCFLAGS) $(filter %.ml %.mli,$(SOURCES)) $(filter %.ml %.mli,$(TARGETS)) + +doc_latex: $(MAIN_OBJS) + mkdir -p $(LATEX_DOCDIR) + $(OCAMLDOC) $(OCAMLLATEXDOCFLAGS) $(filter %.ml %.mli,$(SOURCES)) $(filter %.ml %.mli,$(TARGETS)) + +$(LATEX_GEN): $(DOCDIR)/$(LATEX_GEN) + cd $(DOCDIR); make lib + +$(HTML_GEN): + cd $(DOCDIR); make lib + +# Clean up + +.PHONY: clean clean_objs clean_byte clean_native clean_depend clean_doc distclean +clean: distclean clean_doc uninstall + +distclean: clean_byte clean_native clean_depend + +clean_objs: + rm -f $(addsuffix *.cm[ioxa],$(INCLUDEDIRS)) + rm -f $(addsuffix *.cmxa,$(INCLUDEDIRS)) + rm -f $(addsuffix *.[ao],$(INCLUDEDIRS)) + rm -f $(addsuffix .ml,$(basename $(filter %.mll,$(SOURCES)))) + rm -f $(addsuffix .ml,$(basename $(filter %.mly,$(SOURCES)))) + rm -f $(addsuffix .mli,$(basename $(filter %.mly,$(SOURCES)))) + +clean_byte: clean_objs + rm -f $(basename $(notdir $(TARGETS))) + +clean_native: clean_objs + rm -f $(addsuffix .opt,$(basename $(notdir $(TARGETS)))) + +clean_depend: + rm -f .depend + +clean_doc: + $(LATEX_DOC_RM_COMMAND) + $(HTML_DOC_RM_COMMAND) + +# Dependencies +.depend: + $(OCAMLFIND) $(OCAMLDEP) $(OCAMLFINDFLAGS) $(INCLUDES) $(addsuffix *.ml,$(INCLUDEDIRS)) $(addsuffix *.mli,$(INCLUDEDIRS))> .depend + +# include .depend diff --git a/src/linearize/README.md b/src/linearize/README.md new file mode 100644 index 0000000..db443a8 --- /dev/null +++ b/src/linearize/README.md @@ -0,0 +1,16 @@ +To compile + + $ make + +To run on a directory + + $ ./linearizer -d ../samples/sample1/001/ + +To run on a single file + + $ ./linearizer -f ../samples/sample1/001/001.jsonf + + + $ ./linearizer -help + +Displays the different options diff --git a/src/linearize/jsonfio.ml b/src/linearize/jsonfio.ml new file mode 100644 index 0000000..61d06d9 --- /dev/null +++ b/src/linearize/jsonfio.ml @@ -0,0 +1,296 @@ +(* + * JsonCharClip.ml + * + * Made by (Alan Sexton) + * Login + * + *) + + +open Json_type + + + +(** + + Module to encapsulate loading from and saving to a .jsonf file. Jsonf files + contains the low level character and line information extracted from a clip + of a PDF file and with the true bounding boxes of the characters, lines etc + attached. + @edited: 06-DEC-2009 + @author: Alan P. Sexton +*) +module JsonfIO = struct + + (** A single character as extracted from a PDF file *) + type pdfChar = { + c: string; (* Name of character *) + bx: int; (* Basepoint x *) + by: int; (* Basepoint y *) + font: string; (* Name of font *) + scale: float; (* Scaling factor to be used in rendering this character *) + } + + (** A line as extracted from a PDF file *) + type line = { + sx: int; (* Start point x *) + sy: int; (* Start point y *) + lw: int; (* Width (thickness) of line *) + ex: int; (* End point x *) + ey: int; (* End point y *) + } + + (** A true bounding box of an object found in a PDF file as discovered by + rendering the PDF file and analysing the image *) + type bBox = { + x: int; (* Horizontal coordinate *) + y: int; (* Vertical coordinate *) + w: int; (* Width *) + h: int (* Height *) + } + + (** An element in a PDF file can be a line or a character *) + type element = + | Line of line (* line element *) + | PDFChar of pdfChar (* pdfChar element *) + + (** A symbol is a list of elements (usually touching each other if more than + one) with the true bounding box of the whole group of elements. We also + include the list of bounding boxes of each individual glyph discovered in the + rendered image of the PDF page that coresponds to each element or part + thereof in the symbol *) + type symbol = { + bbox: bBox; (* The bounding of the entire symbol *) + glyphs: bBox list; (* The list of bounding boxes of all glyphs in this symbol *) + elements: element list; (* The list of elements in this symbol *) + } + + (** The full information about a clip of a PDF file. *) + type clip = { + srcPDF: string; (* Name of the original PDF file that this clip was taken from *) + page: int; (* Page number in PDF file of this clip *) + pageHeight: int; (* Height of Page *) + pageWidth: int; (* Width of Page *) + clipX: int; (* Horizontal coordinate of this clip *) + clipY: int; (* Vertical coordinate of this clip *) + clipWidth: int; (* Width of this clip *) + clipHeight: int; (* Height of this clip *) + clipImage: string; (* File name of the tif image of this clip *) + symbols: symbol list; (* List of symbols found in this clip *) + } + + + + (** + Jasonf to Ocaml translation of a pdfChar + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let j2oPdfChar v = + let tbl = Browse.make_table (Browse.objekt v) in + { + c = Browse.string (Browse.field tbl "c") ; + bx = Browse.int (Browse.field tbl "bx") ; + by = Browse.int (Browse.field tbl "by") ; + font = Browse.string (Browse.field tbl "font") ; + scale = Browse.float (Browse.field tbl "scale") ; + } + + (** + Jasonf to Ocaml translation of a line + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let j2oLine v = + let tbl = Browse.make_table (Browse.objekt v) in + { + sx = Browse.int (Browse.field tbl "sx") ; + sy = Browse.int (Browse.field tbl "sy") ; + lw = Browse.int (Browse.field tbl "lw") ; + ex = Browse.int (Browse.field tbl "ex") ; + ey = Browse.int (Browse.field tbl "ey") ; + } + + (** + Jasonf to Ocaml translation of a bounding box + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let j2oBBox v = + let tbl = Browse.make_table (Browse.objekt v) in + { + x = Browse.int (Browse.field tbl "x") ; + y = Browse.int (Browse.field tbl "y") ; + w = Browse.int (Browse.field tbl "w") ; + h = Browse.int (Browse.field tbl "h") + } + + (** + Jasonf to Ocaml translation of an element (note that this is a sum type) + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let j2oElement v = + let lst = Browse.array v in + let len = List.length lst in + match Browse.string (List.hd lst) with + | "line" when len = 2 + -> Line (j2oLine (List.nth lst 1)) + | "pdfChar" when len = 2 + -> PDFChar (j2oPdfChar (List.nth lst 1)) + | _ -> raise (Json_error ("Invalid entry in json input: " ^ + (Json_io.string_of_json v))) + + (** + Jasonf to Ocaml translation of a symbol + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let j2oSymbol v = + let tbl = Browse.make_table (Browse.objekt v) in + { + bbox = j2oBBox (Browse.field tbl "bbox") ; + glyphs = Browse.list j2oBBox (Browse.field tbl "glyphs") ; + elements = Browse.list j2oElement (Browse.field tbl "elements") ; + } + + (** + Jasonf to Ocaml translation of a clip + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let j2oClip jsonClip = + let clipTbl = Browse.make_table (Browse.objekt jsonClip) in + { + srcPDF = Browse.string (Browse.field clipTbl "srcPDF") ; + page = Browse.int (Browse.field clipTbl "page") ; + pageWidth = Browse.int (Browse.field clipTbl "pageWidth") ; + pageHeight = Browse.int (Browse.field clipTbl "pageHeight") ; + clipX = Browse.int (Browse.field clipTbl "clipX") ; + clipY = Browse.int (Browse.field clipTbl "clipY") ; + clipWidth = Browse.int (Browse.field clipTbl "clipWidth") ; + clipHeight = Browse.int (Browse.field clipTbl "clipHeight") ; + clipImage = Browse.string (Browse.field clipTbl "clipImage") ; + symbols = Browse.list j2oSymbol (Browse.field clipTbl "symbols") ; + } + + (** + Given the filename of a jsonf file, read it in and translate it into a clip. + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let loadClip file = + let jsonClip = Json_io.load_json file in + j2oClip jsonClip + + (** + Ocaml to Jasonf translation of a pdfChar + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let o2jPdfChar o = + Object [ + "c", String o.c ; + "bx", Int o.bx ; + "by", Int o.by ; + "font", String o.font; + "scale", Float o.scale ; + ] + + (** + Ocaml to Jasonf translation of a line + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let o2jLine o = + Object [ + "sx" , Int o.sx ; + "sy" , Int o.sy ; + "lw" , Int o.lw ; + "ex" , Int o.ex ; + "ey" , Int o.ey ; + ] + + (** + Ocaml to Jasonf translation of a bounding box + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let o2jBBox o = + Object [ + "x", Int o.x; + "y", Int o.y; + "w", Int o.w; + "h", Int o.h + ] + + (** + Ocaml to Jasonf translation of an element + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let o2jElement = function + | Line(lin) -> Build.array [ Build.string "line"; o2jLine lin ] + | PDFChar(pc) -> Build.array [ Build.string "pdfChar"; o2jPdfChar pc ] + + (** + Ocaml to Jasonf translation of a symbol + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let o2jSymbol o = + Object [ + "bbox", o2jBBox o.bbox ; + "glyphs", Build.list o2jBBox o.glyphs; + "elements", Build.list o2jElement o.elements; + ] + + (** + Ocaml to Jasonf translation of a clip + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let o2jClip clip = + Object [ + "srcPDF" , String clip.srcPDF ; + "page" , Int clip.page ; + "pageWidth" , Int clip.pageWidth ; + "pageHeight" , Int clip.pageHeight ; + "clipX" , Int clip.clipX ; + "clipY" , Int clip.clipY ; + "clipWidth" , Int clip.clipWidth ; + "clipHeight" , Int clip.clipHeight ; + "clipImage" , String clip.clipImage ; + "symbols" , Build.list o2jSymbol clip.symbols + ] + + (** + Given the filename of a jsonf file and a clip, translate the clip into + jsonf format and write it to the file. + @edited: 06-DEC-2009 + @author: Alan P. Sexton + *) + let saveClip file clip = + let jsonClip = o2jClip clip in + Json_io.save_json file jsonClip + + let printClip clip = + let jsonClip = o2jClip clip in + print_string (Json_io.string_of_json jsonClip) + + let getSymbols file = + (loadClip file).symbols + +end + + +(* For testing only: *) + +(* let x = JsonfIO.loadClip "tst.jsonf" *) + +(* let _= JsonfIO.saveClip "tstSave.jsonf" x *) + +(* let y = JsonfIO.loadClip "tstSave.jsonf" *) + +(* let _= JsonfIO.saveClip "tstSave2.jsonf" y *) + diff --git a/src/linearize/linearize.ml b/src/linearize/linearize.ml new file mode 100644 index 0000000..2e3a874 --- /dev/null +++ b/src/linearize/linearize.ml @@ -0,0 +1,821 @@ +open Synt;; + +let bbstring = ref "" +let verbose = ref false + + + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: symbol + @effects: Updates BBstring + @output: + *) +let updateBB symbol = + bbstring := !bbstring^("<"^symbol.name^" , " + ^symbol.font^" , " + ^(string_of_float symbol.size)^ "> " + ^(string_of_int symbol.x1)^" " + ^(string_of_int symbol.x3)^" " + ^(string_of_int symbol.y1)^" " + ^(string_of_int symbol.y3)^"\n"); +;; + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts above t, bounded by x + *) +let rec getBoundedAbove t synts above = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when ((hd.y3 <= iTree.ttop) + && (hd.x1 >= iTree.tleft) + && (hd.x3 <= iTree.tright)) -> getBoundedAbove t tl (hd::above) + | _,hd::tl -> getBoundedAbove t tl above + | _,[] -> above + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts below t, bounded by x + *) +let rec getBoundedBelow t synts below = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when ((hd.y1 >= iTree.tbot) + && (hd.x1 >= iTree.tleft) + && (hd.x3 <= iTree.tright)) -> getBoundedBelow t tl (hd::below) + | _,hd::tl -> getBoundedBelow t tl below + | _,[] -> below + +(** + @edited: 22-JUL-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts bounded by t +*) +let rec getRootBody t synts body = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when ((hd.y1 >= iTree.ttop) + && (hd.y3 <= iTree.tbot) + && (hd.x3 <= iTree.tright)) -> getRootBody t tl (hd::body) + | _,hd::tl -> getRootBody t tl body + | _,[] -> body + + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts subing t, + *)(* +let rec getSub t synts sub = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when ((hd.y1 > iTree.ttop) + && (hd.y2 > iTree.tbase) + && (hd.y3 >= iTree.tbot) + && (hd.x1 < ((getRight t sub iTree.tright) + 20) )) + -> getSub t tl (hd::sub) + | _,hd::tl -> (*getSub t tl*) sub + | _,[] -> sub + *) +let rec getSub t synts sub = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when hd.y2 = iTree.tbase -> sub + | ITree iTree,hd::tl when ((hd.y1 > iTree.ttop) + && (((hd.y2 > iTree.tbase)&& (hd.y3 >= iTree.tbot))||(hd.y1 > ((iTree.ttop+iTree.tbot))/2)) + && (hd.x1 < ((getRight t sub iTree.tright) + 20) )) + -> getSub t tl (hd::sub) + | _,hd::tl -> sub + | _,[] -> sub + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts supering t, + *)(* +let rec getSuper t synts super = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when ((hd.y3 < iTree.tbot) + && ((hd.y2 < iTree.tbase)||(hd.y2 < ((iTree.ttop+iTree.tbot))/2)) + && (hd.y1 <= iTree.ttop) + && (hd.x1 < ((getRight t super iTree.tright) + 20) )) + -> getSuper t tl (hd::super) + | _,hd::tl -> (*getSuper t tl*) super + | _,[] -> super + *) +let rec getSuper t synts super = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when hd.y2 = iTree.tbase -> super + | ITree iTree,hd::tl when ((hd.y3 < iTree.tbot) + && (((hd.y2 < iTree.tbase)&& (hd.y1 <= iTree.ttop))||(hd.y2 < ((iTree.ttop+iTree.tbot))/2)) + && (hd.x1 < ((getRight t super iTree.tright) + 20) )) + -> getSuper t tl (hd::super) + | _,hd::tl -> super + | _,[] -> super + + +(** + @edited: 17-OCT-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts supering and subbing t + *) +let rec getSupSub t synts super sub = + match t,synts with + | Null,_ -> ([],[]) + | ITree iTree,hd::tl when (* ((hd.y3 < iTree.tbot) + && ((hd.y2 < iTree.tbase)||(hd.y2 < ((iTree.ttop+iTree.tbot))/2)) + && (hd.y1 <= iTree.ttop) + && (hd.x1 < ((getRight t super iTree.tright) + 20) )) *) + ((hd.y3 < iTree.tbot) + && (((hd.y2 < iTree.tbase)&& (hd.y1 <= iTree.ttop))||(hd.y2 < ((iTree.ttop+iTree.tbot))/2)) + && (hd.x1 < ((getRight t super iTree.tright) + 20) )) + -> getSupSub t tl (hd::super) sub + | ITree iTree,hd::tl when (* ((hd.y1 > iTree.ttop) + && (hd.y2 > iTree.tbase) + && (hd.y3 >= iTree.tbot) + && (hd.x1 < ((getRight t sub iTree.tright) + 20) )) *) + ((hd.y1 > iTree.ttop) + && (((hd.y2 > iTree.tbase)&& (hd.y3 >= iTree.tbot))||(hd.y1 > ((iTree.ttop+iTree.tbot))/2)) + && (hd.x1 < ((getRight t sub iTree.tright) + 20) )) + -> getSupSub t tl super (hd::sub) + | _,hd::tl -> (*getSuper t tl*) (super,sub) + | _,[] -> (super,sub) +;; + + + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts lower limiting t, + *) +let rec getBottomLimit t synts below = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when ((hd.y1 > iTree.tbot) + && (hd.x1 < ((getRight t below iTree.tright) + 10) )) + -> getBottomLimit t tl (hd::below) + | _,hd::tl -> getBottomLimit t tl below + | _,[] -> below + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts top limiting t, + *) +let rec getTopLimit t synts top = + match t,synts with + | Null,_ -> [] + | ITree iTree,hd::tl when ((hd.y3 < iTree.ttop) + && (hd.x1 < ((getRight t top iTree.tright) + 10) )) + -> getTopLimit t tl (hd::top) + | _,hd::tl -> getTopLimit t tl top + | _,[] -> top + + +(** + @edited: 25-JUL-2012 + @author: Josef Baker + @input: tree, List of synts + @effects: + @output: all synts overlapping tree horizontally + *) +let rec getRecursiveXOverlap t synts overlap = + match t,synts with + | Null, _ -> [] + | _,[] -> overlap + | ITree iTree, hd::tl when hd.x1 <= ((getRight t overlap iTree.tright)+10) -> getRecursiveXOverlap t tl (hd::overlap) + | _,_ -> overlap + + +(** Doesn't work!!!!!!!!!! + @edited: 25-JUL-2012 + @author: Josef Baker + @input: + @effects: + @output: + *) +let getLimits t synts = + let lines = getLines (getRecursiveXOverlap t synts []) in + + match t,lines with + | ITree iTree, ln1::ln2::[] (*Tree top ln1 base ln2 bottom*) + when iTree.tbot < getTop Null ln1 (List.hd ln1).y1 -> (1,lines) + | ITree iTree, ln1::ln2::[] (*Tree bottom ln1 top ln2 bottom*) + when iTree.ttop > getBot Null ln2 (List.hd ln2).y3 -> (2,lines) + | ITree iTree, ln1::ln2::[] (*Tree base ln1 top ln2 bottom*) + when (iTree.ttop > getBot Null ln1 (List.hd ln1).y3) + && (iTree.tbot < getTop Null ln2 (List.hd ln2).y1) -> (3,lines) + | ITree iTree, ln1::ln2::ln3::[] (*Tree ln1 top ln2 base ln3 bottom*) + when verticallyOverlaps t ln1 -> (4,lines) + | ITree iTree, ln1::ln2::ln3::[] (*ln1 top tree ln2 base ln3 bottom*) + when verticallyOverlaps t ln2 -> (5,lines) + | ITree iTree, ln1::ln2::ln3::[] (*ln1 top ln2 tree base ln3 bottom*) + when verticallyOverlaps t ln3 -> (6,lines) + | _,_-> (0,[]) + +(** + @edited: 31-JUL-2012 + @author: Josef Baker + @input: tree synts + @effects: + @output: returns matrix lines + *) +let getMatrixLines fence synts = + let contained = getSyntsUpto fence synts [] in + getMatLines (sortSynt contained) +;; + +(** Aux method of getMatrixCols*) +let rec getColsAux synts right col cols = + match synts with + hd::tl when hd.x1<=(right+40) -> getColsAux tl (max right hd.x3) (hd::col) cols + | hd::tl -> getColsAux tl hd.x3 [hd] ((List.rev col)::cols) + | _ -> List.rev (col::cols) +;; + +(** + @edited: 02-AUG-2012 + @author: Josef Baker + @input: list of synts + @effects: + @output: list of columns + *) +let getMatrixCols synts = + let synts = sortSynt synts in + match synts with + h::t -> getColsAux t h.x3 [h] [] + | _ -> [] +;; + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: list of synts + @effects: + @output: true if they form division + *) +let isDivision synts = + let hd = List.hd synts in + if hd.name = "line" then( + let t = makeLeaf hd in + match (getBoundedAbove t synts []),(getBoundedBelow t synts []) with + | [],_ | _,[] -> false + | _,_ -> true + ) + else false + +(** + @edited: 20-JUL-2012 + @author: Josef Baker + @input: list of synts + @effects: + @output: true if they form a root + *) +let isRoot synts = + match synts with + hd::tl when ((List.length (getRootBody (makeLeaf hd) tl [])) >0) -> ( + try (Str.search_forward (Str.regexp "radical") hd.name 0; + true) + with Not_found -> false) + | _ ->false + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: tree, list of synts + @effects: + @output: true if they form a subscript + *) +let isSub t synts = + match t,synts with + | ITree iTree, hd::tl when (List.length (getSub t synts [])>0)-> true + | _,_ -> false + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: tree, list of synts + @effects: + @output: true if they form a superscript + *) +let isSuper t synts = + match t,synts with + | ITree iTree, hd::tl when (List.length (getSuper t synts [])>0)-> true + | _,_ -> false + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: tree, list of synts + @effects: + @output: true if they form a super-sub-script + *) +let isSupSub t synts = + match t,synts with + | ITree iTree, hd::tl-> ( match getSupSub t synts [] [] with + [],_ -> false + | _,[] -> false + | _,_ -> true) + | _,_ -> false + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: tree, list of synts + @effects: + @output: true if hd is a bottom limit of t + *) +let hasBottomLimit t synts = + match t,synts with + | ITree iTree, hd::tl when (hd.y1 > iTree.tbot) && + (hd.y3-hd.y1) < (iTree.tbot-iTree.ttop) && + (hd.x1 < iTree.tright)-> true + | _,_ -> false + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: tree, list of synts + @effects: + @output: true if t is a bottom limit of hd + *) +let isBottomLimit t synts = +(* match t,synts with + | ITree iTree, hd::tl when (hd.y3 < iTree.ttop) && + (hd.y3-hd.y1) > (iTree.tbot-iTree.ttop) && + (iTree.tleft < hd.x3)-> true + | _,_ -> false +*) +let lines = getLines (getRecursiveXOverlap t synts []) in + match t,lines with + | ITree iTree, ln1::[] (*Tree top ln1 base*) + when iTree.ttop > getBot Null ln1 (List.hd ln1).y3 && + (((List.hd ln1).y3-((List.hd ln1).y1)) > (iTree.tbot-iTree.ttop)) -> true + | ITree iTree, ln1::ln2::[] (*Tree ln1 top ln2 base*) + when verticallyOverlaps t ln2 && + (((List.hd ln1).y3-((List.hd ln1).y1)) > (iTree.tbot-iTree.ttop))-> true + | _,_-> false + + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: tree, list of synts + @effects: + @output: true if hd is a top limit of t + *) +let hasTopLimit t synts = + match t,synts with + | ITree iTree, hd::tl when (hd.y3 < iTree.ttop) && + (hd.y3-hd.y1) < (iTree.tbot-iTree.ttop) && + (hd.x1 < iTree.tright)-> true + | _,_ -> false + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: tree, list of synts + @effects: + @output: true if t is a top limit of hd + *) +let isTopLimit t synts = + (* match t,synts with + | ITree iTree, hd::tl when (hd.y1 > iTree.tbot) && + (hd.y3-hd.y1) > (iTree.tbot-iTree.ttop) && + (iTree.tleft < hd.x3)-> true + | _,_ -> false + *) + +let lines = getLines (getRecursiveXOverlap t synts []) in + match t,lines with + | ITree iTree, ln1::[] (*Tree top ln1 base*) + when (iTree.tbot < getTop Null ln1 (List.hd ln1).y1) && + (((List.hd ln1).y3-((List.hd ln1).y1)) > (iTree.tbot-iTree.ttop)) -> true + | ITree iTree, ln1::ln2::[] (*Tree ln1 top ln2 base*) + when verticallyOverlaps t ln1 && + (((List.hd ln2).y3-((List.hd ln2).y1)) > (iTree.tbot-iTree.ttop))-> true + | _,_-> false + +(** + @edited: 25-JUL-2012 + @author: Josef Baker + @input: + @effects: + @output: + *) +let hasLimits t synts tstring= + + let lines = getLines (getRecursiveXOverlap t synts []) in + match t,lines with + | ITree iTree, ln1::ln2::[] (*Tree top ln1 base ln2 bottom*) + when iTree.tbot < getTop Null ln1 (List.hd ln1).y1 -> true + | ITree iTree, ln1::ln2::[] (*Tree bottom ln1 top ln2 bottom*) + when iTree.ttop > getBot Null ln2 (List.hd ln2).y3 -> true + | ITree iTree, ln1::ln2::[] (*Tree base ln1 top ln2 bottom*) + when (iTree.ttop > getBot Null ln1 (List.hd ln1).y3) + && (iTree.tbot < getTop Null ln2 (List.hd ln2).y1) -> true + | ITree iTree, ln1::ln2::ln3::[] (*Tree ln1 top ln2 base ln3 bottom*) + when verticallyOverlaps t ln1 -> true + | ITree iTree, ln1::ln2::ln3::[] (*ln1 top tree ln2 base ln3 bottom*) + when verticallyOverlaps t ln2 -> true + | ITree iTree, ln1::ln2::ln3::[] (*ln1 top ln2 tree base ln3 bottom*) + when verticallyOverlaps t ln3 -> true + | _,_-> false + + +;; + +(** + @edited: 31-JUL-2012 + @author: Josef Baker + @input: tree synts + @effects: + @output: true if tree forms beginning of a matrix + *) +let isMatrix t synts = + match t,synts with + Null,hd::tl -> ( + if (hasNextFence (makeLeaf hd) tl) then ( + let fence = getNextFence (makeLeaf hd) tl in + let contained = getSyntsUpto fence tl [] in + let lines = getMatLines contained in + if (List.length lines) > 1 then true + else false) + else false) + | _,_->false +;; + +(** + @edited: 31-JUL-2012 + @author: Josef Baker + @input: tree synts + @effects: + @output: true if tree forms beginning of a case + *) +let isCase t synts = + match t,synts with + Null,hd::tl -> ( + if (isRemainderVerticallyBound hd tl) then + ( let lines = getMatLines tl in + if (List.length lines) > 1 then true + else false) + else false) + | _,_->false +;; + +let printRule name synts tstring = + if (false) then ( + print_newline (); + print_endline ("*"^name^"*"); + print_endline tstring; + Util.printSyntList (synts); + print_string "**"; + print_newline ();); + () + ;; + +let rec linearise t synts tstring linearised= + + + match t,synts with + (*Nothing left to process*) + | Null,[] -> linearised + (*No more input symbols*) + | ITree iTree,[] -> linearised^tstring + (*Division*) + | _,hd::tl when isDivision synts -> ( + printRule "div" synts tstring; + + updateBB hd; + let numerator = sortSynt (getBoundedAbove (makeLeaf hd) tl []) in + let denominator = sortSynt (getBoundedBelow (makeLeaf hd) tl []) in + let fraction = (numerator@denominator@[hd]) in + linearise (ITree {tleft=(getLeft t fraction hd.x1); + tright=(getRight t fraction hd.x3); + ttop=(getTop t fraction hd.y1); + tbot=(getBot t fraction hd.y3); + tbase=hd.y2;}) + (sortSynt (remove fraction synts)) + (tstring^(getSpace t synts)^" frac("^(linearise Null numerator "" "")^")("^ + (linearise Null denominator "" "")^")") linearised + + ) + (*Root*) (*Still need to sort out index and itree*) + | _,hd::tl when isRoot synts -> ( + printRule "root" synts tstring; + updateBB hd; + let body = sortSynt (getRootBody (makeLeaf hd) tl []) in + let root = body@[hd] in + linearise (ITree {tleft=(getLeft t root hd.x1); + tright=(getRight t root hd.x3); + ttop=(getTop t root hd.y1); + tbot=(getBot t root hd.y3); + tbase=hd.y2;}) + (sortSynt (remove root synts)) + (tstring^(getSpace t synts)^" functor("^(lineariseSynt hd)^")( arg("^ + (linearise Null body "" "")^"))") linearised + ) + (*Matrix*) + | _, hd::tl when isMatrix t synts -> ( + printRule "matrix" synts tstring; + updateBB hd; + let openFence = hd in + let closeFence = getNextFence (makeLeaf hd) tl in + let lines = getMatrixLines closeFence tl in + let matrix = (List.concat lines)@[openFence]@[closeFence] in + let matrixCol = ref "" in + + let processCol col = + let col = ("col("^(linearise Null (sortSynt col) "" "")^")") in + matrixCol :=!matrixCol^col + in + + let matrixStr = ref ("matrix("^(lineariseSynt hd)^")(") in + + let processMatrix row = + let cols = getMatrixCols (sortSynt row) in + List.iter processCol cols; + let row = ("row("^(!matrixCol)^")") in + matrixStr :=!matrixStr^row; + matrixCol := ""; + in + + List.iter processMatrix lines; + let matString = !matrixStr^")("^(lineariseSynt closeFence)^") " in + updateBB closeFence; + + linearise (ITree {tleft=(getLeft t matrix hd.x1); + tright=(getRight t matrix hd.x3); + ttop=(getTop t matrix hd.y1); + tbot=(getBot t matrix hd.y3); + tbase=hd.y2;}) + (sortSynt (remove matrix synts)) + (tstring^(getSpace t synts)^matString) linearised + + + + ) + (*Case*) + | _, hd::tl when isCase t synts -> ( + printRule "case" synts tstring; + + updateBB hd; + let openFence = hd in + let lines = getMatLines tl in + let case = (List.concat lines)@[openFence] in + + let caseRef = ref ("cases("^(lineariseSynt hd)^")( ") in + + let processRow row = + let row = ("line ( alignat "^(linearise Null (sortSynt row) "" "")^")") in + caseRef :=!caseRef^row + in + + List.iter processRow (lines); + + caseRef :=!caseRef^")" ; + + linearise (ITree {tleft=(getLeft t case hd.x1); + tright=(getRight t case hd.x3); + ttop=(getTop t case hd.y1); + tbot=(getBot t case hd.y3); + tbase=hd.y2;}) + (sortSynt (remove case synts)) + (tstring^(getSpace t synts)^(!caseRef)) linearised + ) + + + (*Limits*) + | ITree iTree,hd::tl when hasLimits t synts tstring ->( + + printRule "limits" synts tstring; + + let limits = getLimits t synts in + match limits with + | (1, base::bot::[]) -> ((*Tree top ln1 base ln2 bottom*) + + linearise (ITree {tleft=(getLeft t (base@bot) iTree.tleft); + tright=(getRight t (base@bot) iTree.tright); + ttop=(getTop t (base@bot) iTree.ttop); + tbot=(getBot t (base@bot) iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove (base@bot) synts)) + (" lim("^(linearise Null (sortSynt base) "" "")^")("^(tstring)^")("^(linearise Null (sortSynt bot) "" "")^")") + linearised + ) + | (2, top::base::[]) -> ((*Tree bottom ln1 top ln2 bottom*) + + linearise (ITree {tleft=(getLeft t (base@top) iTree.tleft); + tright=(getRight t (base@top) iTree.tright); + ttop=(getTop t (base@top) iTree.ttop); + tbot=(getBot t (base@top) iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove (base@top) synts)) + (" lim("^(linearise Null (sortSynt base) "" "")^")("^(linearise Null (sortSynt top) "" "")^")("^(tstring)^")") + linearised + ) + | (3, top::bot::[]) -> ((*Tree base ln1 top ln2 bottom*) + + linearise (ITree {tleft=(getLeft t (bot@top) iTree.tleft); + tright=(getRight t (bot@top) iTree.tright); + ttop=(getTop t (bot@top) iTree.ttop); + tbot=(getBot t (bot@top) iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove (bot@top) synts)) + (" lim("^(tstring)^")("^(linearise Null (sortSynt top) "" "")^")("^(linearise Null (sortSynt bot) "" "")^")") + linearised + ) + | (4, top::base::bot::[]) -> ((*Tree ln1 top ln2 base ln3 bottom*) + + linearise (ITree {tleft=(getLeft t (bot@top@base) iTree.tleft); + tright=(getRight t (bot@top@base) iTree.tright); + ttop=(getTop t (bot@top@base) iTree.ttop); + tbot=(getBot t (bot@top@base) iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove (bot@top@base) synts)) + (" lim("^(linearise Null (sortSynt base) "" "")^")("^ + (linearise t (sortSynt top) tstring "")^")("^ + (linearise Null (sortSynt bot) "" "")^")") + linearised + ) + | (5, top::base::bot::[]) -> ((*ln1 top tree ln2 base ln3 bottom*) + + linearise (ITree {tleft=(getLeft t (bot@top@base) iTree.tleft); + tright=(getRight t (bot@top@base) iTree.tright); + ttop=(getTop t (bot@top@base) iTree.ttop); + tbot=(getBot t (bot@top@base) iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove (bot@top@base) synts)) + (" lim("^(linearise t (sortSynt base) tstring "")^")("^ + (linearise Null (sortSynt top) "" "")^")("^ + (linearise Null (sortSynt bot) "" "")^")") + linearised + ) + | (6, top::base::bot::[]) -> ((*ln1 top ln2 tree base ln3 bottom*) + + linearise (ITree {tleft=(getLeft t (bot@top@base) iTree.tleft); + tright=(getRight t (bot@top@base) iTree.tright); + ttop=(getTop t (bot@top@base) iTree.ttop); + tbot=(getBot t (bot@top@base) iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove (bot@top@base) synts)) + (" lim("^(linearise Null (sortSynt base) "" "")^")("^ + (linearise Null (sortSynt top) "" "")^")("^ + (linearise t (sortSynt bot) tstring "")^")") + linearised + ) + ) + + + (*Bottom limit- when tree is base*) + | ITree iTree,hd::tl when hasBottomLimit t synts ->( + printRule "under" synts tstring; + let under = sortSynt (getBottomLimit t synts []) in + let under = fixUnderbar under in + linearise (ITree {tleft=(getLeft t under hd.x1); + tright=(getRight t under hd.x3); + ttop=(getTop t under hd.y1); + tbot=(getBot t under hd.y3); + tbase=iTree.tbase;}) + (sortSynt (remove under synts)) + (" under("^tstring^")("^(linearise Null under "" "")^")") + linearised + ) + (*Bottom limit- when tree is limit*) + | ITree iTree,hd::tl when isBottomLimit t synts ->( + printRule "under-tree" synts tstring; + let under = sortSynt (getBottomLimit (makeLeaf hd) tl []) in + let tstring = fixUnderbarString tstring in + linearise (ITree {tleft=(getLeft t under hd.x1); + tright=(getRight t under hd.x3); + ttop=(getTop t under hd.y1); + tbot=(getBot t under hd.y3); + tbase=iTree.tbase;}) + (sortSynt (remove (hd::under) synts)) + (" under("^(linearise Null [hd] "" "")^")("^(linearise t under tstring "")^")") + linearised + ) + (*Top limit- when tree is base*) + | ITree iTree,hd::tl when hasTopLimit t synts ->( + printRule "over" synts tstring; + let limit = sortSynt (getTopLimit t synts []) in + let limit = fixOverbar limit in + linearise (ITree {tleft=(getLeft t limit hd.x1); + tright=(getRight t limit hd.x3); + ttop=(getTop t limit hd.y1); + tbot=(getBot t limit hd.y3); + tbase=iTree.tbase;}) + (sortSynt (remove limit synts)) + (" over("^tstring^")("^(linearise Null limit "" "")^")") + linearised + ) + (*Top limit- when tree is limit*) + | ITree iTree,hd::tl when isTopLimit t synts ->( + printRule "over-tree" synts tstring; + let limit = sortSynt (getTopLimit (makeLeaf hd) tl []) in + let tstring = fixOverbarString tstring in + linearise (ITree {tleft=(getLeft t limit hd.x1); + tright=(getRight t limit hd.x3); + ttop=(getTop t limit hd.y1); + tbot=(getBot t limit hd.y3); + tbase=iTree.tbase;}) + (sortSynt (remove (hd::limit) synts)) + (" over("^(linearise Null [hd] "" "")^")("^(linearise t limit tstring "")^")") + linearised + ) + (*Super-sub-script*) + | ITree iTree,hd::tl when isSupSub t synts ->( + printRule "supsub" synts tstring; + match getSupSub t synts [] [] with + sup,sub -> ( + let superscript = sortSynt sup in + let subscript = sortSynt sub in + let supsub = superscript@subscript in + linearise (ITree {tleft=(getLeft t supsub iTree.tleft); + tright=(getRight t supsub iTree.tright); + ttop=(getTop t supsub iTree.ttop); + tbot=(getBot t supsub iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove supsub synts)) + (" supsub("^tstring^")("^(linearise Null superscript "" "")^")("^(linearise Null subscript "" "")^")") + linearised + ) + ) + (*Super-script*) + | ITree iTree,hd::tl when isSuper t synts ->( + printRule "sup" synts tstring; + let superscript = sortSynt (getSuper t synts []) in + linearise (ITree {tleft=(getLeft t superscript iTree.tleft); + tright=(getRight t superscript iTree.tright); + ttop=(getTop t superscript iTree.ttop); + tbot=(getBot t superscript iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove superscript synts)) + (" sup("^tstring^")("^(linearise Null superscript "" "")^")") linearised + ) + (*Sub-script*) + | ITree iTree,hd::tl when isSub t synts ->( + printRule "sub" synts tstring; + let subscript = sortSynt (getSub t synts []) in + linearise (ITree {tleft=(getLeft t subscript iTree.tleft); + tright=(getRight t subscript iTree.tright); + ttop=(getTop t subscript iTree.ttop); + tbot=(getBot t subscript iTree.tbot); + tbase=iTree.tbase;}) + (sortSynt (remove subscript synts)) + (" sub("^tstring^")("^(linearise Null subscript "" "")^")") linearised + ) + (*A leaf node*) + | Null,hd::tl -> ( + printRule "leaf" synts tstring; + updateBB hd; + linearise (makeLeaf hd) tl (lineariseSynt hd) linearised + ) + (*Linearize*) + | ITree iTree,_ -> + printRule "lin" synts tstring; + (linearise Null synts "" (linearised^tstring^(getSpace t synts)) + ) +;; + +let lineariseLine synts bbChan= + (* Util.printSyntList (sortSynt synts);*) + match synts with + h::t -> ( + let x = string_of_int (getLeft Null synts h.x1) in + let y = string_of_int (getTop Null synts h.y1) in + let w = string_of_int ((getRight Null synts h.x3) - (getLeft Null synts h.x1)) in + let h = string_of_int ((getBot Null synts h.y3) -(getTop Null synts h.y1)) in + + bbstring := ""; + let line = linearise Null (sortSynt synts) "" (x^" "^y^" "^w^" "^h^" ") in + output_string bbChan (!bbstring); + line) + | _ -> "" + +;; +(*linearize (ITree {tleft=1; tright=1; ttop=1; tbot=1; tbase=1; +tstring="Q"})*) diff --git a/src/linearize/linearizer.ml b/src/linearize/linearizer.ml new file mode 100644 index 0000000..c316ac6 --- /dev/null +++ b/src/linearize/linearizer.ml @@ -0,0 +1,167 @@ +open Jsonfio;; +open Preprocessor;; +open Linearize;; + +let indir = ref "" +let infile = ref "" +let extension = ref "lin" +let bbox = ref true +let sdout = ref false +let rest = ref false + +let pages = ref 0 +let lines = ref 0 + + +let rec fileInt count = Printf.sprintf "%03d.jsonf" count + +let rec dirInt count = Printf.sprintf "%03d/" count + +let linearizeFile file = + + let symbols = JsonfIO.getSymbols file in + let synts = Preprocessor.preprocess symbols in + + + let outBB = ((String.sub file 0 ((String.length file)-5))^"bb") in + let outBBCh = open_out outBB in + let outStr = ((Linearize.lineariseLine synts outBBCh)^"\n") in + close_out outBBCh; + + + let outFile = ((String.sub file 0 ((String.length file)-5))^(!extension)) in + let outCh = open_out outFile in + output_string outCh outStr; + close_out outCh; + if (!sdout) then( + print_newline (); + print_string outStr; + print_newline ();) + +;; + +let rec linearizeDir dir count = + + try ( + + let file = dir^(fileInt count) in +(*print_newline (); +print_string file;*) +linearizeFile file; +lines := (!lines +1); +linearizeDir dir (count+1); + + ) + with + error -> () +;; + +let rec linearizeDirs dir count = + + try ( + + let file = dir^"/"^(dirInt count) in + (* print_string file;*) + if (Sys.file_exists file) then( + linearizeDir file 0; +pages := (!pages +1); + linearizeDirs dir (count+1) + ) + else () + + ) + with + error -> () +;; + + +let usage = "usage: " ^ Sys.argv.(0) ^ " [-d string] [-e string] [-f string] +[-b] [-s]" + +let speclist = [ + ("-d", Arg.Set_string indir, ": Name of the input directory."); + ("-f", Arg.Set_string infile, ": Name of the input file (obsolete)!"); + ("-e", Arg.Set_string extension, ": Output file extension. Default is "^(!extension)^"."); + ("-b", Arg.Clear bbox, ": Sets BBox file off"); + ("-s", Arg.Set sdout, ": Outputs linearised file to stdout"); +] + +let linearizer () = + Arg.parse + speclist + (* (fun x -> raise (Arg.Bad ("Bad argument : " ^ x))) *) + (fun f -> rest := true; + try linearizeFile f with +error ->()) +(* | Failure(s) -> print_endline ("Error `Failure("^s^")' in file "^f) + | _ -> print_endline ("Error in "^f)) *) + usage; + + match !indir,!infile with + | "","" when !rest -> exit 0 + | "","" -> print_string "Either input directory or input file must be specified\n"; exit(0) + | "",file -> linearizeFile file + | dir,"" -> (linearizeDirs !indir 0; +(*Testing to see if anything has been prodeuces, and if more than 3 lines per page are being extracted*) + if (!lines>0)&& (!pages>0)&&((!lines) / (!pages) >3) then ( + let logFile = ((!indir)^"/lin.log") in + let outCh = open_out logFile in + output_string outCh (("Lines: "^(string_of_int (!lines)))^("\nPages: "^(string_of_int (!pages)))); + close_out outCh;)) + + | _,_ -> print_string "Specify either an input directory or an + input file only\n"; exit(0) + (* +let linearizer () = +if (Preprocessor.isAlpha "p s e u d o hyphen w") then print_endline "true" else print_endline "false"; +if (Preprocessor.isAlpha "ffl comma a b period") then print_endline "true" else print_endline "false"; +if (Preprocessor.isAlpha "aif12333") then print_endline "true" else print_endline "false"; +if (Preprocessor.isAlpha "acaron hyphen b") then print_endline "true" else print_endline "false"; +if (Preprocessor.isAlpha "caron") then print_endline "true" else print_endline "false"; +if (Preprocessor.isAlpha "comma") then print_endline "true" else print_endline "false"; +if (Preprocessor.isAlpha "commacaron") then print_endline "true" else print_endline "false"; +if (Preprocessor.isAlpha "A") then print_endline "true" else print_endline "false"; +if (Preprocessor.isNum "one 1") then print_endline "true" else print_endline "false"; +if (Preprocessor.isNum "one two") then print_endline "true" else print_endline "false"; +if (Preprocessor.isNum "1 1") then print_endline "true" else print_endline "false"; +;; + *) +let _ = linearizer () +;; +(* + + let rec main ofile count = + + let file = ofile^(fileInt count) in + + try ( + + let symbols = JsonfIO.getSymbols file in + + let synts = Preprocessor.preprocess symbols in + + let outBB = ((String.sub file 0 ((String.length file)-5))^"bb") in + + let outBBCh = open_out outBB in + + let outStr = ((Linearize.lin outBBCh synts)^"\n") in + close_out outBBCh; + + let outFile = ((String.sub file 0 ((String.length file)-5))^"txt4") in + let outCh = open_out outFile in + output_string outCh outStr; + close_out outCh; +(* print_newline (); + print_string outStr; + print_newline ();*) + main ofile (count+1); + ) + with + Sys_error e -> () + ;; + + + +main Sys.argv.(1) 0 + +*) diff --git a/src/linearize/preprocessor.ml b/src/linearize/preprocessor.ml new file mode 100644 index 0000000..a92d341 --- /dev/null +++ b/src/linearize/preprocessor.ml @@ -0,0 +1,332 @@ +open Jsonfio;; +open Synt;; +let hTol = 20;; +let test h = +(h.JsonfIO.c);; + + + +(** + @edited: 07-OCT-2012 + @author: Josef Baker + @input: a string of characters, separated by spaces + @effects: + @output: true if its an alpha character, or would form part of a word + *) +let isAlpha ch = + let reg = Str.regexp "\\(\\([A-Z]\\|[a-z]\\(acute\\|caron\\|dieresis\\)?\\)\\|\\(f+\\(i\\|l\\)?\\)\\|\\(aif[0-9]*\\)\\)\\( \\(\\([A-Z]\\|[a-z]\\(acute\\|caron\\|dieresis\\)?\\)\\|\\(f+\\(i\\|l\\)?\\)\\|\\(period\\|comma\\|colon\\|semicolon\\|hyphen\\)\\|\\(aif[0-9]*\\)\\)\\)*$" in + Str.string_match reg ch 0 +;; + +let isNum ch = + let reg = Str.regexp + "\\(\\(one\\|two\\|three\\|four\\|five\\|six\\|seven\\|eight\\|nine\\|zero\\)\\( \\(one\\|two\\|three\\|four\\|five\\|six\\|seven\\|eight\\|nine\\|zero\\)\\)*$\\)\\|\\(\\([0-9]\\)\\( \\([0-9]\\)\\)*$\\)" in + Str.string_match reg ch 0 +;; + +let isSameType s1 s2 = + ((isAlpha s1) && (isAlpha s2)) || ((isNum s1) && (isNum s2)) + ;; + + +(** + @edited: 14-FEB-2010 + @author: Josef Baker + @input: + @effects: + @output: + *) +let mergeSynt s1 s2 = + let mname = s1.name ^" "^ s2.name in + let mfont = s1.font in + let msize = s1.size in + let mx1 = s1.Synt.x1 in + let mx2 = (s1.Synt.x2+s2.Synt.x2)/2 in + let mx3 = s2.Synt.x3 in + let my1 = (min s1.Synt.y1 s2.Synt.y1) in + let my2 = (s1.Synt.y2+s2.Synt.y2)/2 in + let my3 = (max s1.Synt.y3 s2.Synt.y3) in + {name=mname; font=mfont;size=msize; + Synt.x1=mx1; Synt.x2=mx2; Synt.x3=mx3; Synt.y1=my1; Synt.y2=my2; Synt.y3=my3;} +;; +(* + +(** + @edited: 14-FEB-2010 + @author: Josef Baker + @input: + @effects: + @output: + *) +let rec hasRightGroup synt list index = + match list with + [] -> -1 + | h::t -> ( + if (((h.font = synt.font) && (h.y2 = synt.y2) && ((h.x1 - synt.x3) outList + | h::t ->( + let second = hasRightGroup h t 0 in + if (second = -1) then group t (h::outList) + else( + let str = mergeSynt h (List.nth inList second) in + let inList2 = (Synt.delete inList second) in + group (str::(List.tl inList2)) outList)) +;; + + + + +let rec removeGroup inSynts group outSynts count = + match group,inSynts with + grouphd::grouptl,synthd::synttl when grouphd=count + -> removeGroup synttl grouptl outSynts (count+1) + | grouphd::grouptl,synthd::synttl + -> removeGroup synttl group (synthd::outSynts) (count+1) + | [],synthd::synttl -> removeGroup synttl group (synthd::outSynts) (count+1) + | [],[] -> List.rev outSynts +;; + +let rec makeGroup inSynts group outSynt count = + match group,inSynts with + grouphd::grouptl,synthd::synttl when grouphd=count + -> makeGroup synttl grouptl (mergeSynt outSynt synthd )(count+1) + | grouphd::grouptl,synthd::synttl -> makeGroup synttl group outSynt (count+1) + | [],_ ->outSynt + +(** + @edited: 13-AUG-2012 + @author: Josef Baker + @input: + @effects: + @output: + *) +let rec getAlphaNumGroup synt syntList group index= + match syntList with + [] -> List.rev group + | h::t when (h.font = synt.font) && (h.y2 = synt.y2) && + + (((float_of_int (h.x1-synt.x3)) + /. + ((float_of_int (h.x3-h.x1 + synt.x3-synt.x1))/.2.0)) + + + <=0.3) && + (isSameType synt.name h.name) + -> ( + getAlphaNumGroup h t (index::group) (index+1)) + | h::t when (syntsVOverlap synt h) -> List.rev group + | h::t ->( + getAlphaNumGroup synt t group (index+1)) + +(** + @edited: 13-AUG-2012 + @author: Josef Baker + @input: input list of synts, initially empty output list + @effects: + @output: + *) +let rec groupAlphaNum inSynts outSynts = + match inSynts with + [] -> outSynts + | h::t -> (let group = getAlphaNumGroup h t [0] 1 in + let inSynts = sortSynt (removeGroup inSynts group [] 0) in +(*Util.printSyntList (sortSynt inSynts);*) + let outSynts = ((makeGroup t (List.tl group) h 1)::outSynts) in + groupAlphaNum inSynts outSynts) + + *) +(** + @edited: 06-FEB-2010 + @author: Josef Baker + @input: + @effects: + @output: Elements name + *) +let rec element2name elementList = + match elementList with + (JsonfIO.Line line)::[] -> "line" + | (JsonfIO.PDFChar pdfChar)::[] -> pdfChar.JsonfIO.c + | (JsonfIO.Line line)::t -> ("line-"^element2name t) + | (JsonfIO.PDFChar pdfChar)::t -> (pdfChar.JsonfIO.c^"-"^element2name t) + | _ -> "error" +;; + +(** + @edited: 06-FEB-2010 + @author: Josef Baker + @input: + @effects: + @output: Element's size + *) +let rec element2size elementList = + match elementList with + (JsonfIO.Line line)::t -> 0.0; + | (JsonfIO.PDFChar pdfChar)::t -> pdfChar.JsonfIO.scale + | _ -> 0.0 +;; + +(** + @edited: 06-FEB-2010 + @author: Josef Baker + @input: + @effects: + @output: Element's font + *) +let rec element2font elementList = + match elementList with + (JsonfIO.Line line)::t -> "none"; + | (JsonfIO.PDFChar pdfChar)::t -> pdfChar.JsonfIO.font + | _ -> "none" +;; + +(** + @edited: 06-FEB-2010 + @author: Josef Baker + @input: + @effects: + @output: Element's base x coord + *) +let rec element2x symbol = + let elementList = symbol.JsonfIO.elements in + match elementList with + (JsonfIO.Line line)::[] -> (symbol.JsonfIO.bbox.JsonfIO.x + (symbol.JsonfIO.bbox.JsonfIO.w/2)) ; + | (JsonfIO.PDFChar pdfChar)::[] -> pdfChar.JsonfIO.bx + | _ -> (symbol.JsonfIO.bbox.JsonfIO.x + (symbol.JsonfIO.bbox.JsonfIO.w/2)) +;; + +(** + @edited: 23-JUL-2012 + @author: Josef Baker + @input: + @effects: + @output: + *) +let rec element2yaux elems base = + match elems with + | JsonfIO.Line line::[] -> (max base line.JsonfIO.ey) + | JsonfIO.PDFChar pdfChar::[] -> (max base pdfChar.JsonfIO.by) + | _ -> base + + +(** + @edited: 06-FEB-2010 + @author: Josef Baker + @input: symbol + @effects: + @output: Element's base y coord + *) +let rec element2y symbol = + let elementList = symbol.JsonfIO.elements in + let baseY = element2yaux elementList 0 in + if baseY < symbol.JsonfIO.bbox.JsonfIO.y then (symbol.JsonfIO.bbox.JsonfIO.y + (symbol.JsonfIO.bbox.JsonfIO.h/2)) + else baseY +;; + +(** + @edited: 06-FEB-2010 + @author: Josef Baker + @input: List of symbols from jsonf file + @effects: + @output: corresponding list of synt objects + *) +let rec symbol2synt symbolList syntList = + match symbolList with + h::t -> symbol2synt t ({Synt.name=(element2name h.JsonfIO.elements); + Synt.font=(element2font h.JsonfIO.elements); + Synt.size=(element2size h.JsonfIO.elements); + Synt.x1=h.JsonfIO.bbox.JsonfIO.x; + Synt.x2=(element2x h); + Synt.x3=((h.JsonfIO.bbox.JsonfIO.x)+(h.JsonfIO.bbox.JsonfIO.w)); + Synt.y1=h.JsonfIO.bbox.JsonfIO.y; + Synt.y2=(element2y h); + Synt.y3=((h.JsonfIO.bbox.JsonfIO.y)+(h.JsonfIO.bbox.JsonfIO.h))}::syntList) + | _ -> syntList + + +let getWidth synt = + let sList = Str.split (Str.regexp " ") synt.name in +float_of_int (List.length sList) +;; +(** + @edited: 08-OCT-2012 + @author: Josef Baker + @input: + @effects: + @output: + *) +let rec hasGroup synt list = + match list with + h::t when ((h.font = synt.font) && (h.y2 = synt.y2) && + (((float_of_int (h.x1-synt.x3)) /. ((float_of_int (h.x3-h.x1 + synt.x3-synt.x1))/.(1.0 +. (getWidth synt)))) + <=0.3) && (isSameType synt.name h.name)) -> true + | h::t when (((h.y1>synt.y1) && (h.y1synt.y1) && (h.y3h.y1) && (synt.y1h.y1) && (synt.y3 false + |[] -> false + | h::t -> hasGroup synt t +;; + + + +(** + @edited: 08-OCT-2012 + @author: Josef Baker + @input: + @effects: + @output: + *) +let rec mergeGroup synt inList outList = + match inList with + h::t when ((h.font = synt.font) && (h.y2 = synt.y2) && + (((float_of_int (h.x1-synt.x3)) /. ((float_of_int (h.x3-h.x1 + + synt.x3-synt.x1))/.(1.0 +. (getWidth synt)))) + <=0.3) && (isSameType synt.name h.name)) -> ((mergeSynt synt h)::((List.rev outList)@t)) + | h::t -> mergeGroup synt t (h::outList) +;; + +(** + @edited: 08-OCT-2012 + @author: Josef Baker + @input: + @effects: + @output: + *) +let rec group syntList grouped = + match syntList with + h::t when hasGroup h t -> group (mergeGroup h t []) grouped + | h::t -> group t (h::grouped) + | [] -> grouped + + + +(** + @edited: 13-AUG-2012 + @author: Josef Baker + @input: List of jsonf symbols (basically a jsonf file) + @effects: + @output: List of synts for linearize, grouped together where appropraite + *) +let preprocess symbolList = + + let syntList = symbol2synt symbolList [] in +(*Util.printSyntList (sortSynt syntList);*) + (* let syntList = sortSynt (groupAlphaNum (sortSynt syntList) []) in*) +(*Util.printSyntList (syntList);*) +let syntList = sortSynt (group (sortSynt syntList) []) in +(*Util.printSyntList (syntList);*) +syntList +;; diff --git a/src/linearize/synt.ml b/src/linearize/synt.ml new file mode 100644 index 0000000..b6a6d8b --- /dev/null +++ b/src/linearize/synt.ml @@ -0,0 +1,350 @@ + +(***********************************************************************) +(* *) +(* Holds synt&treeypes for use in linearize along with utility methods *) +(* *) +(***********************************************************************) + + + + +(*Coordinate system, origin: top left. x1 ... x3 left to right. y1 ... y3 top to bottom*) +type synt = {name: string; font: string; size: float; x1: int; x2: int; x3: int; y1: int; y2: int; y3: int};; + + +(*A tree, represented as a bounding box and linearised string, or an empty tree*) +type iTree = {tleft:int; tright:int; ttop:int; tbot:int; tbase:int;} +type tree = Null | ITree of iTree + + + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: list of synts right bound + @effects: + @output: largest right bound in list + *) +let rec getRight t synts right= + match t,synts with + | Null,[] -> right + | ITree iTree,[] -> max right iTree.tright + | _,hd::tl when hd.x3>right -> getRight t tl hd.x3 + | _,hd::tl -> getRight t tl right + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: list of synts, left bound + @effects: + @output: smallest left bound in list + *) +let rec getLeft t synts left= + match t,synts with + | Null,[] -> left + | ITree iTree,[] -> min left iTree.tleft + | _,hd::tl when hd.x1 getLeft t tl hd.x1 + | _,hd::tl -> getLeft t tl left + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: list of synts bottom bound + @effects: + @output: largest bottom bound in list + *) +let rec getBot t synts bot= + match t,synts with + | Null,[] -> bot + | ITree iTree,[] -> max bot iTree.tbot + | _,hd::tl when hd.y3>bot -> getBot t tl hd.y3 + | _,hd::tl -> getBot t tl bot + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: list of synts, top bound + @effects: + @output: smallest top bound in list + *) +let rec getTop t synts top= + match t,synts with + | Null,[] -> top + | ITree iTree,[] -> min top iTree.ttop + | _,hd::tl when hd.y1 getTop t tl hd.y1 + | _,hd::tl -> getTop t tl top + +(** + @edited: 22-JUL-2012 + @author: Josef Baker + @input: symbol + @effects: + @output: linearised form + *) +let lineariseSynt symbol = + match symbol.name with + "line" when symbol.x3 - symbol.x1 > symbol.y3-symbol.y1 -> (" ") + | "line" when symbol.y3 - symbol.y1 > symbol.x3-symbol.x1 -> (" ") + | _-> (" <"^symbol.name^" , "^symbol.font^" , "^(string_of_float symbol.size)^"> ") + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: symbol + @effects: + @output: tree (leaf) containing symbol + *) +let makeLeaf symbol = + (ITree {tleft = symbol.x1; + tright= symbol.x3; + ttop = symbol.y1; + tbot = symbol.y3; + tbase = symbol.y2;}) + +(** + @edited: 21-JUL-2012 + @author: Josef Baker + @input: tree, synts + @effects: + @output: space between tree and synts + *) +let getSpace t synts = + match t,synts with + | _,[] -> " " + | Null,_ -> " " + | ITree iTree,hd::tl when hd.x1-iTree.tright<20-> " w0 " + | ITree iTree,hd::tl when hd.x1-iTree.tright<30-> " w1 " + | ITree iTree,hd::tl when hd.x1-iTree.tright<40-> " w2 " + | ITree iTree,hd::tl when hd.x1-iTree.tright<50-> " w3 " + | _,_ -> " w4 " + +(** + @edited: 14-FEB-2010 + @author: Josef Baker + @input: List, pos + @effects: + @output: Deletes item in pos from list + *) +let rec delete inList outList item = + if item == 0 then (List.append (List.rev outList) (List.tl inList)) + else delete (List.tl inList) ((List.hd inList)::outList) (item-1) +;; + +let delete list item = +delete list [] item +;; + +let rec exists element list index = + match list with + head::tail ->( if element = head then index + else exists element tail (index+1)) + | [] -> -1 +;; + +let exists element list = + exists element list 0 +;; + +let rec remove subList list = + match subList with + head::tail -> ( + let index = exists head list in + remove tail (delete list index) + ) + | [] -> list +;; +let syntLess s1 s2 = + if s1.x1 < s2.x1 then true + else false +;; + +(*Sorts based upon left hand side*) +let rec sortSynt = function + | [] -> [] + | pivot :: rest -> + let is_less x = syntLess x pivot in + let left, right = List.partition is_less rest in + sortSynt left @ [pivot] @ sortSynt right +;; + +let syntLessV s1 s2 = + if s1.y1 < s2.y1 then true + else false +;; + +(*Sorts based upon top side*) +let rec sortSyntV = function + | [] -> [] + | pivot :: rest -> + let is_less x = syntLessV x pivot in + let left, right = List.partition is_less rest in + sortSyntV left @ [pivot] @ sortSyntV right +;; + +let rec checkMatLinesAux line = + match line with + hd::tl when hd.name = "line"-> checkMatLinesAux tl + | hd::tl when hd.name = "minus"-> checkMatLinesAux tl + | hd::tl -> true + |_ ->false +;; + +let rec checkMatLines lines = + match lines with + hd::tl when checkMatLinesAux hd -> checkMatLines tl + | hd::tl -> false + | [] -> true +;; + +let rec getMatLinesAux synts bottom line lines = + match synts with + hd::tl when hd.y1>(bottom+10) -> getMatLinesAux tl hd.y3 [hd] ((sortSynt line)::lines) + | hd::tl -> getMatLinesAux tl (max bottom hd.y3) (hd::line) lines + | [] -> (List.rev ((sortSynt line)::lines)) + +;; + +let getMatLines synts = + let synts = (sortSyntV synts) in + match synts with + h::t ->( let matLines = getMatLinesAux t h.y3 [h] [] in + if checkMatLines matLines then matLines + else []) + | _ -> [] +;; + +let rec getLinesAux synts bottom line lines = + match synts with + hd::tl when hd.y1>bottom -> getLinesAux tl hd.y3 [hd] ((sortSynt line)::lines) + | hd::tl -> getLinesAux tl (max bottom hd.y3) (hd::line) lines + | _ -> (List.rev ((sortSynt line)::lines)) +;; + +let getLines synts = + let synts = sortSyntV synts in + match synts with + h::t -> getLinesAux t h.y3 [h] [] + | _ -> [] +;; + +let rec verticallyOverlaps t synts = + match t,synts with + | Null,_ -> false + | _,[] -> false + | ITree iTree, hd::tl when + ((hd.y1 >= iTree.ttop) && (hd.y1 <= iTree.tbot)) || + ((hd.y3 >= iTree.ttop) && (hd.y3 <= iTree.tbot)) || + ((hd.y1 <= iTree.ttop) && (hd.y3 >= iTree.tbot))-> true + | ITree iTree, hd::tl -> verticallyOverlaps t tl + +let rec hasNextFence t synts = + match t, synts with + | ITree iTree, hd::tl when + (iTree.ttop = hd.y1) && (iTree.tbot = hd.y3)(* && + ((hd.x3-hd.x1)=(iTree.tright-iTree.tleft))*)-> true + | _,hd::tl -> hasNextFence t tl + | _,_ -> false +;; + +let rec getNextFence t synts = + match t, synts with + | ITree iTree, hd::tl when + (iTree.ttop = hd.y1)&&(iTree.tbot = hd.y3)(*&& + ((hd.x3-hd.x1)=(iTree.tright-iTree.tleft))*)-> hd + | _,hd::tl -> getNextFence t tl +;; + +let rec getSyntsUpto bound synts upto = + match synts with + hd::tl when ((hd.x3 < bound.x1) && + (hd.y1>=bound.y1)&& + (hd.y3<=bound.y3))-> getSyntsUpto bound tl (hd::upto) + | hd::tl when ((hd.x3 < bound.x1) && ( + (hd.y1bound.y3)))-> [] + | hd::tl -> getSyntsUpto bound tl upto + | _ -> upto +;; + +let rec isRemainderVerticallyBound bound synts = + match synts with + hd::tl when (hd.y1>=bound.y1)&& + (hd.y3<=bound.y3)-> isRemainderVerticallyBound bound tl + | hd::tl -> false + | _ -> true +;; + + + +(** + @edited: 06-AUG-2012 + @author: Josef Baker + @input: synt list + @effects: + @output: changes name to overbar if synt list is a single line element + *) +let fixOverbar synts = + match synts with + h::[] when h.name ="hline" -> [{name="overbar"; font=h.font; size=h.size; x1=h.x1; x2=h.x2; x3=h.x3; y1=h.y1; y2=h.y2; y3=h.y3}] + | _ -> synts + +(** + @edited: 06-AUG-2012 + @author: Josef Baker + @input: synt list + @effects: + @output: changes name to underbar if synt list is a single line element + *) +let fixUnderbar synts = + match synts with + h::[] when h.name ="line" ->[{name="underbar"; font=h.font; size=h.size; x1=h.x1; x2=h.x2; x3=h.x3; y1=h.y1; y2=h.y2; y3=h.y3}] + | _ -> synts + + +(** + @edited: 06-AUG-2012 + @author: Josef Baker + @input: string + @effects: + @output: changes line to underbar if string ends in a line + *) +let fixUnderbarString t = + let tregexp = Str.regexp_string "" in + try (Str.search_backward tregexp t ((String.length t)-1); + if Str.match_end () > ((String.length t)-10) + then Str.replace_matched "" t + else t) + with + Not_found -> t + +(** + @edited: 06-AUG-2012 + @author: Josef Baker + @input: string + @effects: + @output: changes line to overbar if string ends in a line + *) +let fixOverbarString t = + let tregexp = Str.regexp_string "" in + try (Str.search_backward tregexp t ((String.length t)-1); + if Str.match_end () > ((String.length t)-10) + then Str.replace_matched "" t + else t) + + with + Not_found -> t + + + +(** + @edited: 13-AUG-2012 + @author: Josef Baker + @input: + @effects: + @output: + *) +let syntsVOverlap s1 s2 = +((s1.y1 < s2.y1) && (s1.y3 > s2.y1)) || +((s1.y3 > s2.y3) && (s1.y1 < s2.y3)) || +((s1.y1 > s2.y1) && (s1.y3 strlength then false + else ( + if substring = string then true + else contains (String.sub string 1 (strlength-1)) substring ) +;; + + +let boundsMin a b = + if a=0 then b else + if ab then a + else b +;; + +let boundsMid a b = + if a =0 then b else a +;; + +let rec findBounds curBounds synts = + match synts with + [] -> curBounds + | h::t -> ( + findBounds ({x1=(boundsMin curBounds.x1 h.Synt.x1); + x2=h.Synt.x2; + x3=(boundsMax curBounds.x3 h.Synt.x3); + y1=(boundsMin curBounds.y1 h.Synt.y1); + y2=(boundsMid curBounds.y2 h.Synt.y2); + y3=(boundsMax curBounds.y3 h.Synt.y3); + size=h.Synt.size}) t) + + + + +let vertLess s1 s2 = + if s1.Synt.y1 < s2.Synt.y1 then true + else false +;; + +(*Sorts based upon top side*) +let rec sortVert = function + | [] -> [] + | pivot :: rest -> + let is_less x = vertLess x pivot in + let left, right = List.partition is_less rest in + sortVert left @ [pivot] @ sortVert right +;; + +let y3Less s1 s2 = + if s1.Synt.y3 < s2.Synt.y3 then true + else false +;; + +(*Sorts based upon top side*) +let rec sortY3 = function + | [] -> [] + | pivot :: rest -> + let is_less x = y3Less x pivot in + let left, right = List.partition is_less rest in + sortY3 left @ [pivot] @ sortY3 right +;; + + + +(** + @edited: 14-FEB-2010 + @author: Josef Baker + @input: Synt list + @effects: + @output: Prints Synt list + *) +let rec printSyntList inList = + match inList with + h::t -> (print_string h.Synt.name; + print_string " : "; + print_string h.Synt.font; + print_string " : "; + print_float h.Synt.size; + print_newline(); + print_string "X: "; + print_int h.Synt.x1; + print_string " "; + print_int h.Synt.x2; + print_string " "; + print_int h.Synt.x3; + + print_string " Y: "; + print_int h.Synt.y1; + print_string " "; + print_int h.Synt.y2; + print_string " "; + print_int h.Synt.y3; + print_newline(); + printSyntList t) + | [] -> print_string "END OF LIST"; +;; + +let rec print_test inList = + + match inList with + + h::t -> (print_string "(( "; + print_string h.Synt.name; + print_string " )( "; + print_string h.Synt.font; + print_string " )( -1 ))"; + print_test t) + | [] -> print_newline() +;; + +let test inList = + print_test (sortSynt inList) +;;