Skip to content

Commit

Permalink
HTML API: Join successive text nodes in html5lib test representation.
Browse files Browse the repository at this point in the history
Many tests from the html5lib test suite fail because of differences in
text handling between a DOM API and the HTML API, even though the
semantics of the parse are equivalent. For example, it's possible in
the HTML API to read multiple successive text nodes when the tokens
between them are ignored.

The test suite didn't account for this and so was failing tests. This
patch improves the construction of the representation to compare
against the test suite so that those tests don't fail inaccurately.

Developed in WordPress#6984
Discussed in https://core.trac.wordpress.org/ticket/61576

Props bernhard-reiter, dmsnell, jonsurrell.
See #61576.


git-svn-id: https://develop.svn.wordpress.org/trunk@58712 602fd350-edb4-49c9-b593-d223f7449a82
  • Loading branch information
dmsnell committed Jul 12, 2024
1 parent 7f697bc commit cd3bf1b
Showing 1 changed file with 28 additions and 9 deletions.
37 changes: 28 additions & 9 deletions tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
'menuitem-element/line0012' => 'Bug.',
'tests1/line0342' => "Closing P tag implicitly creates opener, which we don't visit.",
'tests1/line0720' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests1/line0833' => 'Bug.',
'tests15/line0001' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests15/line0022' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests2/line0650' => 'Whitespace only test never enters "in body" parsing mode.',
Expand All @@ -51,15 +50,8 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
'tests23/line0101' => 'Unimplemented: Reconstruction of active formatting elements.',
'tests25/line0169' => 'Bug.',
'tests26/line0263' => 'Bug: An active formatting element should be created for a trailing text node.',
'tests7/line0354' => 'Bug.',
'tests8/line0001' => 'Bug.',
'tests8/line0020' => 'Bug.',
'tests8/line0037' => 'Bug.',
'tests8/line0052' => 'Bug.',
'webkit01/line0174' => 'Bug.',
);


/**
* Verify the parsing results of the HTML Processor against the
* test cases in the Html5lib tests project.
Expand Down Expand Up @@ -160,12 +152,20 @@ private static function build_tree_representation( ?string $fragment_context, st
// Initially, assume we're 2 levels deep at: html > body > [position]
$indent_level = 2;
$indent = ' ';
$was_text = null;
$text_node = '';

while ( $processor->next_token() ) {
if ( ! is_null( $processor->get_last_error() ) ) {
return null;
}

if ( $was_text && '#text' !== $processor->get_token_name() ) {
$output .= "{$text_node}\"\n";
$was_text = false;
$text_node = '';
}

switch ( $processor->get_token_type() ) {
case '#tag':
$tag_name = strtolower( $processor->get_tag() );
Expand Down Expand Up @@ -198,12 +198,27 @@ private static function build_tree_representation( ?string $fragment_context, st
}
$output .= str_repeat( $indent, $tag_indent + 1 ) . "{$attribute_name}=\"{$val}\"\n";
}

// Self-contained tags contain their inner contents as modifiable text.
$modifiable_text = $processor->get_modifiable_text();
if ( '' !== $modifiable_text ) {
$was_text = true;
if ( '' === $text_node ) {
$text_node = str_repeat( $indent, $indent_level ) . '"';
}
$text_node .= $modifiable_text;
--$indent_level;
}
}

break;

case '#text':
$output .= str_repeat( $indent, $indent_level ) . "\"{$processor->get_modifiable_text()}\"\n";
$was_text = true;
if ( '' === $text_node ) {
$text_node .= str_repeat( $indent, $indent_level ) . '"';
}
$text_node .= $processor->get_modifiable_text();
break;

case '#comment':
Expand Down Expand Up @@ -238,6 +253,10 @@ private static function build_tree_representation( ?string $fragment_context, st
return null;
}

if ( '' !== $text_node ) {
$output .= "${text_node}\"\n";
}

// Tests always end with a trailing newline.
return $output . "\n";
}
Expand Down

0 comments on commit cd3bf1b

Please sign in to comment.