diff --git a/test.py b/test.py index 2d1afc00..f486424a 100644 --- a/test.py +++ b/test.py @@ -11,6 +11,7 @@ from xml2rfc.boilerplate_rfc_7841 import boilerplate_rfc_status_of_memo from xml2rfc.walkpdf import xmldoc from xml2rfc.writers.base import default_options +from xml2rfc.writers.text import MAX_WIDTH try: from xml2rfc import debug @@ -635,6 +636,67 @@ def test_boilerplate_insert_status_of_memo_editorial(self): target = 'https://www.rfc-editor.org/info/rfc9280' self.assertEqual(target, rfc.xpath('./section/t/eref')[0].get('target')) +class TextWriterTest(unittest.TestCase): + '''TextWriter tests''' + + def setUp(self): + xml2rfc.log.quiet = True + path = 'tests/input/elements.xml' + self.parser = xml2rfc.XmlRfcParser(path, + quiet=True, + options=default_options, + **options_for_xmlrfcparser) + self.xmlrfc = self.parser.parse() + self.writer = xml2rfc.TextWriter(self.xmlrfc, quiet=True) + + def test_render_reference(self): + # test annotations + reference = ''' + + + + Reference Test + + ACMI Corp. + + + + {annotation} + +''' + self.writer.refname_mapping['REFTEST'] = 'REFTEST' + + # single line annotation + annotation = 'foobar' + references = lxml.etree.fromstring(reference.format(annotation=annotation)) + lines = self.writer.render_reference(references.getchildren()[0], width=60) + self.assertEqual(len(lines), 1) + self.assertIn(annotation, lines[0].text) + + # multi line annotation + annotation = '''Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. + Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim.''' + references = lxml.etree.fromstring(reference.format(annotation=annotation)) + lines = self.writer.render_reference(references.getchildren()[0], width=60) + self.assertGreater(len(lines), 1) + self.assertIn(annotation[:5], lines[0].text) + + # single line annotation (larger than width and smaller than MAX_WIDTH) + annotation = 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commo' + references = lxml.etree.fromstring(reference.format(annotation=annotation)) + lines = self.writer.render_reference(references.getchildren()[0], width=len(annotation)-5) + self.assertGreater(MAX_WIDTH, len(annotation)) + self.assertGreater(len(lines), 1) + self.assertIn(annotation[:5], lines[0].text) + + # annotation with URL + url = 'https://example.org' + annotation = f'Example: ' + references = lxml.etree.fromstring(reference.format(annotation=annotation)) + lines = self.writer.render_reference(references.getchildren()[0], width=60) + self.assertEqual(len(lines), 2) + self.assertIn(url, lines[1].text) + if __name__ == '__main__': unittest.main() diff --git a/xml2rfc/writers/text.py b/xml2rfc/writers/text.py index 3d6fb45e..f83d9633 100644 --- a/xml2rfc/writers/text.py +++ b/xml2rfc/writers/text.py @@ -36,6 +36,9 @@ from xml2rfc.utils import justify_inline, clean_text +MAX_WIDTH = 72 +SPLITTER_WIDTH = 67 + IndexItem = namedtuple('indexitem', ['item', 'subitem', 'anchor', 'page', ]) Joiner = namedtuple('joiner', ['join', 'indent', 'hang', 'overlap', 'do_outdent']) # Joiner parts: @@ -80,8 +83,8 @@ def __init__(self, elem, prev, next=None, beg=None, end=None): self.beg = beg # beginning line of block self.end = end # ending line of block -wrapper = utils.TextWrapper(width=72) -splitter = utils.TextSplitter(width=67) +wrapper = utils.TextWrapper(width=MAX_WIDTH) +splitter = utils.TextSplitter(width=SPLITTER_WIDTH) seen = set() # This is not a complete list of whitespace characters, and isn't intended to be. It's @@ -299,7 +302,7 @@ def process(self): joiners = base_joiners if self.options.pagination: self.add_pageno_placeholders() - lines = self.render(self.root, width=72, joiners=joiners) + lines = self.render(self.root, width=MAX_WIDTH, joiners=joiners) if self.options.pagination: lines = findblocks(lines) @@ -321,8 +324,8 @@ def process(self): sys.stderr.write(("%3d %10s [%4s] %s\n" % (i, tag, page, l.text))) for i, l in enumerate(lines): length = len(l.text) - if length > 72: - self.warn(l.elem, "Too long line found (L%s), %s characters longer than 72 characters: \n%s" %(i+1, length-72, l.text)) + if length > MAX_WIDTH: + self.warn(l.elem, f"Too long line found (L{i + 1}), {length - MAX_WIDTH} characters longer than {MAX_WIDTH} characters: \n{l.text}") text = ('\n'.join( l.text for l in lines )).rstrip(stripspace) + '\n' @@ -505,7 +508,7 @@ def update_toc(self, lines): else: pass # new toc, to be used to replace the old one - toclines = self.render(toc, width=72, joiners=base_joiners) + toclines = self.render(toc, width=MAX_WIDTH, joiners=base_joiners) if toc_start and toc_end: j = 2 for i in range(toc_start+2, toc_end): @@ -1811,7 +1814,7 @@ def join_cols(left, right): textwidth_l = textwidth(l) textwidth_r = textwidth(r) #assert textwidth_l+textwidth_r< 70 - w = 72-textwidth_l-textwidth_r + w = MAX_WIDTH-textwidth_l-textwidth_r lines.append(l+' '*w+r) return '\n'.join(lines).rstrip(stripspace)+'\n' # @@ -1819,7 +1822,7 @@ def wrap(label, items, left, right, suffix=''): line = '%s%s%s' % (label, items, suffix) ll = len(left) lr = len(right) - width = 48 if ll >= lr else min(48, 72-4-len(right[ll])) + width = 48 if ll >= lr else min(48, MAX_WIDTH-4-len(right[ll])) wrapper = textwrap.TextWrapper(width=width, subsequent_indent=' '*len(label)) return wrapper.wrap(line) # @@ -2746,7 +2749,8 @@ def render_reference(self, e, width, **kwargs): text += '.' for ctag in ('annotation', ): for c in e.iterdescendants(ctag): - text = self.tjoin(text, c, width, **kwargs) + # use MAX_WIDTH here since text gets formatted later + text = self.tjoin(text, c, MAX_WIDTH, keep_url=True, **kwargs) text = fill(text, width=width, fix_sentence_endings=False, keep_url=True, **kwargs).lstrip(stripspace) text = indent(text, 11, 0) @@ -3957,7 +3961,7 @@ def find_minwidths(e, cells, hyphen_split=False): Find the minimum column widths of regular cells """ i = 0 - splitter = utils.TextSplitter(width=67, hyphen_split=hyphen_split) + splitter = utils.TextSplitter(width=SPLITTER_WIDTH, hyphen_split=hyphen_split) for p in e.iterchildren(['thead', 'tbody', 'tfoot']): for r in list(p.iterchildren('tr')): j = 0