diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1fccc7dd..6625b8ed 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -122,4 +122,8 @@ jobs: # Warning: the diff line below is PowerShell syntax, not bash! run: | echo ćś | readalongs make-xml -l fra - - | findstr /v meta > cs.readalong + echo Output ==== + cat cs.readalong + echo Reference ==== + cat test/data/cs-ref.readalong if (diff (cat cs.readalong) (cat test/data/cs-ref.readalong)) { throw "Output did not match reference" } diff --git a/docs/cli-guide.md b/docs/cli-guide.md index b683431d..c7e27e40 100644 --- a/docs/cli-guide.md +++ b/docs/cli-guide.md @@ -67,7 +67,7 @@ The format of the generated XML is based on [TEI Lite](https://tei-c.org/guidelines/customization/lite/) but is considerably simplified. The DTD (document type definition) can be found in the ReadAlong Studio source code under -`readalongs/static/read-along-1.1.dtd`. +`readalongs/static/read-along-1.2.dtd`. (dna)= diff --git a/readalongs/_version.py b/readalongs/_version.py index a70b1685..a17ef292 100644 --- a/readalongs/_version.py +++ b/readalongs/_version.py @@ -1,3 +1,3 @@ VERSION = "1.1.0" -READALONG_FILE_FORMAT_VERSION = "1.1" +READALONG_FILE_FORMAT_VERSION = "1.2" diff --git a/readalongs/align.py b/readalongs/align.py index 74416ea9..88df880a 100644 --- a/readalongs/align.py +++ b/readalongs/align.py @@ -181,7 +181,7 @@ def parse_and_make_xml( """Parse XML input and run tokenization and G2P. Args: - xml_path (str): Path to input in ReadAlong XML format (see static/read-along-1.1.dtd) + xml_path (str): Path to input in ReadAlong XML format (see static/read-along-1.2.dtd) config (dict): Optional; ReadAlong-Studio configuration to use save_temps (str): Optional; Save temporary files, by default None verbose_g2p_warnings (boolean): Optional; display all g2p errors and warnings @@ -574,7 +574,7 @@ def align_audio( """Align an XML input file to an audio file. Args: - xml_path (str): Path to input file in ReadAlong XML format (see static/read-along-1.1.dtd) + xml_path (str): Path to input file in ReadAlong XML format (see static/read-along-1.2.dtd) audio_path (str): Path to audio input. Must be in a format supported by ffmpeg unit (str): Optional; Element to create alignments for, by default 'w' bare (boolean): Optional; @@ -1192,7 +1192,7 @@ def convert_to_xhtml(tokenized_xml, title="Book"): def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) -> str: - """Create input xml in ReadAlong XML format (see static/read-along-1.1.dtd) + """Create input xml in ReadAlong XML format (see static/read-along-1.2.dtd) Uses the line sequence to infer paragraph and sentence structure from plain text: Assumes a double blank line marks a page break, and a single blank line marks a paragraph break. @@ -1240,7 +1240,7 @@ def create_ras_from_text(lines: Iterable[str], text_languages=Sequence[str]) -> def create_input_ras(**kwargs): - """Create input xml in ReadAlong XML format (see static/read-along-1.1.dtd) + """Create input xml in ReadAlong XML format (see static/read-along-1.2.dtd) Uses readlines to infer paragraph and sentence structure from plain text. Assumes a double blank line marks a page break, and a single blank line marks a paragraph break. diff --git a/readalongs/static/read-along-1.2.dtd b/readalongs/static/read-along-1.2.dtd new file mode 100644 index 00000000..4c16a8aa --- /dev/null +++ b/readalongs/static/read-along-1.2.dtd @@ -0,0 +1,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/readalongs/web_api.py b/readalongs/web_api.py index 29b2e5e8..c5a2ec4a 100644 --- a/readalongs/web_api.py +++ b/readalongs/web_api.py @@ -78,7 +78,7 @@ # Call get_langs() when the server loads to load the languages into memory LANGS = get_langs() # Get the DTD -DTDPATH = os.path.join(os.path.dirname(__file__), "static", "read-along-1.1.dtd") +DTDPATH = os.path.join(os.path.dirname(__file__), "static", "read-along-1.2.dtd") with open(DTDPATH) as dtdfh: DTD = etree.DTD(dtdfh) diff --git a/test/data/cs-ref.readalong b/test/data/cs-ref.readalong index b5d09e82..b5863fc5 100644 --- a/test/data/cs-ref.readalong +++ b/test/data/cs-ref.readalong @@ -1,5 +1,5 @@ - +
diff --git a/test/data/ej-fra-annotated.readalong b/test/data/ej-fra-annotated.readalong new file mode 100644 index 00000000..5fdbdfaf --- /dev/null +++ b/test/data/ej-fra-annotated.readalong @@ -0,0 +1,34 @@ + + + + + + + + + +
+ +

+ Bonjour. + + Kwei. + + Hello. + Je m'appelle Éric Joanis. + Éric + Joanis nindijinikàz. + My + name is Éric Joanis. +

+
+ + +
+
diff --git a/test/data/ras-dtd-1.2.readalong b/test/data/ras-dtd-1.2.readalong new file mode 100644 index 00000000..5fdbdfaf --- /dev/null +++ b/test/data/ras-dtd-1.2.readalong @@ -0,0 +1,34 @@ + + + + + + + + + +
+ +

+ Bonjour. + + Kwei. + + Hello. + Je m'appelle Éric Joanis. + Éric + Joanis nindijinikàz. + My + name is Éric Joanis. +

+
+ + +
+
diff --git a/test/test_dtd.py b/test/test_dtd.py index f0a279cd..78483c77 100644 --- a/test/test_dtd.py +++ b/test/test_dtd.py @@ -11,12 +11,13 @@ from readalongs.text.util import load_xml DTDPATH = os.path.join( - dirname(__file__), "..", "readalongs", "static", "read-along-1.1.dtd" + dirname(__file__), "..", "readalongs", "static", "read-along-1.2.dtd" ) VALID_RAS = """ ej-fra-anchors2.readalong ej-fra-anchors.readalong +ej-fra-annotated.readalong ej-fra-converted.readalong ej-fra-dna.readalong ej-fra-package.readalong @@ -70,7 +71,11 @@ def test_invalid_inputs(self): def test_backwards_compatibility(self): # the DTD needs to be backwards compatible as long as the major version does not change - versions = ["ras-dtd-1.0.readalong", "ras-dtd-1.1.readalong"] + versions = [ + "ras-dtd-1.0.readalong", + "ras-dtd-1.1.readalong", + "ras-dtd-1.2.readalong", + ] for name in versions: path = os.path.join(dirname(__file__), "data", name.strip()) # DTD is text, XML is binary... okay