-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
1,990 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
// Use IntelliSense to learn about possible attributes. | ||
// Hover to view descriptions of existing attributes. | ||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 | ||
"version": "0.2.0", | ||
"configurations": [ | ||
{ | ||
"type": "node", | ||
"request": "launch", | ||
"name": "Launch Program", | ||
"skipFiles": [ | ||
"<node_internals>/**" | ||
], | ||
"program": "${workspaceFolder}/tools/testWords.js", | ||
"cwd": "${workspaceFolder}/tools" | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
(function (exports) { | ||
|
||
// true if this code is running in the browser | ||
const isBrowser = (typeof window !== "undefined"); | ||
// const SaxonJS = isBrowser ? window.SaxonJS : require('saxon-js'); | ||
// const fs = isBrowser ? null : require('fs'); | ||
const xmlUtils = isBrowser ? window.xmlUtils : require("./xml-utils"); | ||
|
||
let TEI2HTML_XSLT = 'data/tei2html.xslt' | ||
let HTML2HTML_XSLT = 'data/html2html.xslt' | ||
if (!isBrowser) { | ||
TEI2HTML_XSLT = `../app/${TEI2HTML_XSLT}` | ||
HTML2HTML_XSLT = `../app/${HTML2HTML_XSLT}` | ||
} | ||
|
||
exports.getHtmlFromTei = function(xmlString) { | ||
// Remove diacritic, b/c | ||
// a) XSLT template to markup each sign splits combined marks / modifier | ||
// b) partners requested they are hidden in annotator text viewer (because they are editorially supplied) | ||
// c) more complex to map to characters in the palaeographic definitions | ||
// Example, see 1408 and https://github.com/kingsdigitallab/crossreads/issues/37 | ||
// https://raw.githubusercontent.com/ISicily/ISicily/master/inscriptions/ISic001408.xml | ||
// έο̄ς | ||
// | ||
xmlString = xmlString.normalize("NFD") | ||
// But: | ||
// this removes non-combining marks as well, such as punctuation (ductus elevatus? middle dot) <g> | ||
// <g ref="#interpunct">·</g> | ||
// DONT USE THIS: it will remove non-diacritics, like · (middle dot) | ||
// xmlString = xmlString.replace(/\p{Diacritic}/gu, "") | ||
xmlString = xmlString.replace(/[\u0300-\u036f]/gu, "") | ||
|
||
// Remove spaces around <lb break="no"> | ||
// TODO: try to do it with XSLT? (too fiddly) | ||
xmlString = xmlString.replace(/\s*(<lb[^>]+break="no"[^>]*>)\s*/g, '$1') | ||
|
||
let ret = xmlUtils.xslt(xmlString, TEI2HTML_XSLT, true) | ||
|
||
// assign the @data-idx sequentially relative to each .is-word | ||
ret = xmlUtils.xslt(ret, HTML2HTML_XSLT, true) | ||
|
||
return ret | ||
} | ||
|
||
})(typeof exports === "undefined" ? (this["crossreadsXML"] = {}) : exports); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"N":"package","version":"10","packageVersion":"1","saxonVersion":"SaxonJS 2.5","target":"JS","targetVersion":"2","name":"TOP-LEVEL","relocatable":"false","buildDateTime":"2023-11-12T01:09:29.899Z","ns":"xml=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 =http://www.w3.org/1999/xhtml","C":[{"N":"co","id":"0","binds":"0","C":[{"N":"mode","onNo":"TC","flags":"","patternSlots":"0","prec":"","C":[{"N":"templateRule","rank":"0","prec":"0","seq":"1","ns":"xml=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 =http://www.w3.org/1999/xhtml","minImp":"0","flags":"s","slots":"200","baseUri":"file:///home/jeff/src/prj/crossreads/app/data/html2html.xslt","line":"16","module":"html2html.xslt","expand-text":"false","match":"@data-idx","prio":"0","matches":"NA nQ{}data-idx","C":[{"N":"p.nodeTest","role":"match","test":"NA nQ{}data-idx","sType":"1NA nQ{}data-idx","ns":"= xml=~ fn=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 "},{"N":"let","var":"Q{}n","slot":"0","sType":"*NA ","line":"17","role":"action","C":[{"N":"doc","sType":"1ND ","base":"file:///home/jeff/src/prj/crossreads/app/data/html2html.xslt","role":"select","C":[{"N":"valueOf","flags":"l","sType":"1NT ","C":[{"N":"numSeqFmt","flags":"1","C":[{"N":"nodeNum","role":"value","level":"any","needsNode":"true","C":[{"N":"dot","sType":"1NA nQ{}data-idx","ns":"= xml=~ fn=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 ex=~ ","role":"select","line":"3"},{"N":"p.withUpper","role":"count","axis":"parent","sType":"1NE u[NE nQ{}span,NE nQ{http://www.w3.org/1999/xhtml}span]","ns":"= xml=~ fn=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 ex=~ ","C":[{"N":"p.withPredicate","C":[{"N":"p.nodeTest","test":"NE u[NE nQ{}span,NE nQ{http://www.w3.org/1999/xhtml}span]"},{"N":"axis","name":"attribute","nodeTest":"*NA nQ{}data-idx"}]},{"N":"p.withUpper","axis":"ancestor-or-self","C":[{"N":"p.nodeTest","test":"N"},{"N":"p.withPredicate","C":[{"N":"p.nodeTest","test":"NE"},{"N":"fn","name":"contains","C":[{"N":"cvUntyped","to":"AS","diag":"0|0||contains","C":[{"N":"check","card":"?","diag":"0|0||contains","C":[{"N":"attVal","name":"Q{}class"}]}]},{"N":"str","val":"is-word"},{"N":"str","val":"http://www.w3.org/2005/xpath-functions/collation/codepoint"}]}]}]}]},{"N":"p.withPredicate","role":"from","sType":"1NE","ns":"= xml=~ fn=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 ex=~ ","C":[{"N":"p.nodeTest","test":"NE"},{"N":"fn","name":"contains","C":[{"N":"cvUntyped","to":"AS","diag":"0|0||contains","C":[{"N":"check","card":"?","diag":"0|0||contains","C":[{"N":"attVal","name":"Q{}class"}]}]},{"N":"str","val":"is-word"},{"N":"str","val":"http://www.w3.org/2005/xpath-functions/collation/codepoint"}]}]}]},{"N":"str","sType":"1AS ","val":"1","role":"format"},{"N":"str","sType":"1AS ","val":"1","role":"startAt"}]}]}]},{"N":"att","name":"data-idx","sType":"1NA ","line":"18","C":[{"N":"fn","name":"string-join","role":"select","C":[{"N":"first","C":[{"N":"forEach","sType":"*AS ","C":[{"N":"data","sType":"*A ","C":[{"N":"mergeAdj","C":[{"N":"valueOf","sType":"1NT ","flags":"l","C":[{"N":"fn","name":"string-join","role":"select","C":[{"N":"first","C":[{"N":"forEach","sType":"*AS ","C":[{"N":"data","sType":"*A ","C":[{"N":"mergeAdj","C":[{"N":"arith10","sType":"?AO","op":"-","calc":"d-d","ns":"= xml=~ fn=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 ","role":"select","line":"5","C":[{"N":"atomSing","diag":"1|0||arith","card":"?","C":[{"N":"first","C":[{"N":"varRef","name":"Q{}n","slot":"0"}]}]},{"N":"int","val":"1"}]}]}]},{"N":"fn","name":"string","sType":"1AS ","C":[{"N":"dot"}]}]}]},{"N":"str","sType":"1AS ","val":" "}]}]}]}]},{"N":"fn","name":"string","sType":"1AS ","C":[{"N":"dot"}]}]}]},{"N":"str","sType":"1AS ","val":""}]}]}]}]},{"N":"templateRule","rank":"1","prec":"0","seq":"0","ns":"xml=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 =http://www.w3.org/1999/xhtml","minImp":"0","flags":"s","slots":"200","baseUri":"file:///home/jeff/src/prj/crossreads/app/data/html2html.xslt","line":"10","module":"html2html.xslt","expand-text":"false","match":"node()|@*","prio":"-0.5","matches":"N u[NT,NP,NC,NE]","C":[{"N":"p.nodeTest","role":"match","test":"N u[NT,NP,NC,NE]","sType":"1N u[NT,NP,NC,NE]"},{"N":"copy","sType":"1N u[1NT ,1NP ,1NC ,1NE ] ","flags":"cin","role":"action","line":"11","C":[{"N":"applyT","sType":"* ","line":"12","mode":"#unnamed","bSlot":"0","C":[{"N":"docOrder","sType":"*N u[N u[N u[N u[NT,NP],NC],NE],NA]","role":"select","line":"12","C":[{"N":"union","op":"|","sType":"*N u[N u[N u[N u[NT,NP],NC],NE],NA]","ns":"= xml=~ fn=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 ","C":[{"N":"axis","name":"child","nodeTest":"*N u[NT,NP,NC,NE]"},{"N":"axis","name":"attribute","nodeTest":"*NA"}]}]}]}]}]},{"N":"templateRule","rank":"2","prec":"0","seq":"0","ns":"xml=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 =http://www.w3.org/1999/xhtml","minImp":"0","flags":"s","slots":"200","baseUri":"file:///home/jeff/src/prj/crossreads/app/data/html2html.xslt","line":"10","module":"html2html.xslt","expand-text":"false","match":"node()|@*","prio":"-0.5","matches":"NA","C":[{"N":"p.nodeTest","role":"match","test":"NA","sType":"1NA"},{"N":"copy","sType":"1NA ","flags":"cin","role":"action","line":"11","C":[{"N":"applyT","sType":"* ","line":"12","mode":"#unnamed","bSlot":"0","C":[{"N":"docOrder","sType":"*N u[N u[N u[N u[NT,NP],NC],NE],NA]","role":"select","line":"12","C":[{"N":"union","op":"|","sType":"*N u[N u[N u[N u[NT,NP],NC],NE],NA]","ns":"= xml=~ fn=~ xsl=~ tei=http://www.tei-c.org/ns/1.0 ","C":[{"N":"axis","name":"child","nodeTest":"*N u[NT,NP,NC,NE]"},{"N":"axis","name":"attribute","nodeTest":"*NA"}]}]}]}]}]}]}]},{"N":"overridden"},{"N":"output","C":[{"N":"property","name":"Q{http://saxon.sf.net/}stylesheet-version","value":"10"},{"N":"property","name":"method","value":"html"},{"N":"property","name":"encoding","value":"utf-8"},{"N":"property","name":"indent","value":"yes"}]},{"N":"decimalFormat"}],"Σ":"bbe774a0"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
<?xml version="1.0" ?> | ||
<xsl:stylesheet version="1.0" | ||
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
xmlns:tei="http://www.tei-c.org/ns/1.0" | ||
xmlns="http://www.w3.org/1999/xhtml" | ||
> | ||
<!-- saxon-js doesn't like 'version="1.0"' --> | ||
<xsl:output method="html" encoding="utf-8" indent="yes"/> | ||
|
||
<xsl:template match="node()|@*"> | ||
<xsl:copy> | ||
<xsl:apply-templates select="node()|@*"/> | ||
</xsl:copy> | ||
</xsl:template> | ||
|
||
<xsl:template match="@data-idx"> | ||
<xsl:variable name="n"><xsl:number level="any" from="*[contains(@class, 'is-word')]" count="*[contains(@class, 'is-word')]//span[@data-idx]"/></xsl:variable> | ||
<xsl:attribute name="data-idx"><xsl:value-of select="$n - 1"/></xsl:attribute> | ||
</xsl:template> | ||
|
||
</xsl:stylesheet> |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.