Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(text): Don't break URLs in annotation #1104

Merged
merged 4 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from xml2rfc.boilerplate_rfc_7841 import boilerplate_rfc_status_of_memo
from xml2rfc.walkpdf import xmldoc
from xml2rfc.writers.base import default_options
from xml2rfc.writers.text import MAX_WIDTH

try:
from xml2rfc import debug
Expand Down Expand Up @@ -635,6 +636,67 @@ def test_boilerplate_insert_status_of_memo_editorial(self):
target = 'https://www.rfc-editor.org/info/rfc9280'
self.assertEqual(target, rfc.xpath('./section/t/eref')[0].get('target'))

class TextWriterTest(unittest.TestCase):
'''TextWriter tests'''

def setUp(self):
xml2rfc.log.quiet = True
path = 'tests/input/elements.xml'
self.parser = xml2rfc.XmlRfcParser(path,
quiet=True,
options=default_options,
**options_for_xmlrfcparser)
self.xmlrfc = self.parser.parse()
self.writer = xml2rfc.TextWriter(self.xmlrfc, quiet=True)

def test_render_reference(self):
# test annotations
reference = '''
<references>
<reference anchor="REFTEST">
<front>
<title>Reference Test</title>
<author initials="J." surname="Doe" fullname="Jane Doe">
<organization>ACMI Corp.</organization>
</author>
<date month="February" year="2024"/>
</front>
<annotation>{annotation}</annotation>
</reference>
</references>'''
self.writer.refname_mapping['REFTEST'] = 'REFTEST'

# single line annotation
annotation = 'foobar'
references = lxml.etree.fromstring(reference.format(annotation=annotation))
lines = self.writer.render_reference(references.getchildren()[0], width=60)
self.assertEqual(len(lines), 1)
self.assertIn(annotation, lines[0].text)

# multi line annotation
annotation = '''Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa.
Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim.'''
references = lxml.etree.fromstring(reference.format(annotation=annotation))
lines = self.writer.render_reference(references.getchildren()[0], width=60)
self.assertGreater(len(lines), 1)
self.assertIn(annotation[:5], lines[0].text)

# single line annotation (larger than width and smaller than MAX_WIDTH)
annotation = 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commo'
references = lxml.etree.fromstring(reference.format(annotation=annotation))
lines = self.writer.render_reference(references.getchildren()[0], width=len(annotation)-5)
self.assertGreater(MAX_WIDTH, len(annotation))
self.assertGreater(len(lines), 1)
self.assertIn(annotation[:5], lines[0].text)

# annotation with URL
url = 'https://example.org'
annotation = f'Example: <eref target="{url}" />'
references = lxml.etree.fromstring(reference.format(annotation=annotation))
lines = self.writer.render_reference(references.getchildren()[0], width=60)
self.assertEqual(len(lines), 2)
self.assertIn(url, lines[1].text)


if __name__ == '__main__':
unittest.main()
24 changes: 14 additions & 10 deletions xml2rfc/writers/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@
from xml2rfc.utils import justify_inline, clean_text


MAX_WIDTH = 72
SPLITTER_WIDTH = 67

IndexItem = namedtuple('indexitem', ['item', 'subitem', 'anchor', 'page', ])
Joiner = namedtuple('joiner', ['join', 'indent', 'hang', 'overlap', 'do_outdent'])
# Joiner parts:
Expand Down Expand Up @@ -80,8 +83,8 @@ def __init__(self, elem, prev, next=None, beg=None, end=None):
self.beg = beg # beginning line of block
self.end = end # ending line of block

wrapper = utils.TextWrapper(width=72)
splitter = utils.TextSplitter(width=67)
wrapper = utils.TextWrapper(width=MAX_WIDTH)
splitter = utils.TextSplitter(width=SPLITTER_WIDTH)
seen = set()

# This is not a complete list of whitespace characters, and isn't intended to be. It's
Expand Down Expand Up @@ -299,7 +302,7 @@ def process(self):
joiners = base_joiners
if self.options.pagination:
self.add_pageno_placeholders()
lines = self.render(self.root, width=72, joiners=joiners)
lines = self.render(self.root, width=MAX_WIDTH, joiners=joiners)

if self.options.pagination:
lines = findblocks(lines)
Expand All @@ -321,8 +324,8 @@ def process(self):
sys.stderr.write(("%3d %10s [%4s] %s\n" % (i, tag, page, l.text)))
for i, l in enumerate(lines):
length = len(l.text)
if length > 72:
self.warn(l.elem, "Too long line found (L%s), %s characters longer than 72 characters: \n%s" %(i+1, length-72, l.text))
if length > MAX_WIDTH:
self.warn(l.elem, f"Too long line found (L{i + 1}), {length - MAX_WIDTH} characters longer than {MAX_WIDTH} characters: \n{l.text}")

text = ('\n'.join( l.text for l in lines )).rstrip(stripspace) + '\n'

Expand Down Expand Up @@ -505,7 +508,7 @@ def update_toc(self, lines):
else:
pass
# new toc, to be used to replace the old one
toclines = self.render(toc, width=72, joiners=base_joiners)
toclines = self.render(toc, width=MAX_WIDTH, joiners=base_joiners)
if toc_start and toc_end:
j = 2
for i in range(toc_start+2, toc_end):
Expand Down Expand Up @@ -1811,15 +1814,15 @@ def join_cols(left, right):
textwidth_l = textwidth(l)
textwidth_r = textwidth(r)
#assert textwidth_l+textwidth_r< 70
w = 72-textwidth_l-textwidth_r
w = MAX_WIDTH-textwidth_l-textwidth_r
lines.append(l+' '*w+r)
return '\n'.join(lines).rstrip(stripspace)+'\n'
#
def wrap(label, items, left, right, suffix=''):
line = '%s%s%s' % (label, items, suffix)
ll = len(left)
lr = len(right)
width = 48 if ll >= lr else min(48, 72-4-len(right[ll]))
width = 48 if ll >= lr else min(48, MAX_WIDTH-4-len(right[ll]))
wrapper = textwrap.TextWrapper(width=width, subsequent_indent=' '*len(label))
return wrapper.wrap(line)
#
Expand Down Expand Up @@ -2746,7 +2749,8 @@ def render_reference(self, e, width, **kwargs):
text += '.'
for ctag in ('annotation', ):
for c in e.iterdescendants(ctag):
text = self.tjoin(text, c, width, **kwargs)
# use MAX_WIDTH here since text gets formatted later
text = self.tjoin(text, c, MAX_WIDTH, keep_url=True, **kwargs)
kesara marked this conversation as resolved.
Show resolved Hide resolved
text = fill(text, width=width, fix_sentence_endings=False, keep_url=True, **kwargs).lstrip(stripspace)

text = indent(text, 11, 0)
Expand Down Expand Up @@ -3957,7 +3961,7 @@ def find_minwidths(e, cells, hyphen_split=False):
Find the minimum column widths of regular cells
"""
i = 0
splitter = utils.TextSplitter(width=67, hyphen_split=hyphen_split)
splitter = utils.TextSplitter(width=SPLITTER_WIDTH, hyphen_split=hyphen_split)
for p in e.iterchildren(['thead', 'tbody', 'tfoot']):
for r in list(p.iterchildren('tr')):
j = 0
Expand Down
Loading