From 7f703c36f88189cc79b91fdade9a384e332bd6f2 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 19 Oct 2023 23:19:54 -0500 Subject: [PATCH] HTML API: Support elements in container group containin ARTICLE element. There are a handful of elements the behave similarly and are generically container elements. These are the following elements: ADDRESS, ARTICLE, ASIDE, BLOCKQUOTE, CENTER, DETAILS, DIALOG, DIR, DL, DIV, FIELDSET, FIGCAPTION, FIGURE, FOOTER, HEADER, HGROUP, MAIN, MENU, NAV, SEARCH, SECTION, SUMMARY, This patch adds support to the HTML Processor for handling these elements. They do not require any additional logic in the rest of the class, and carry no specific semantic rules for parsing beyond what is listed in their group in the IN BODY section of the HTML5 specification. --- .../html-api/class-wp-html-processor.php | 47 +++++++- .../html-api/wpHtmlProcessorBreadcrumbs.php | 35 +++--- .../html-api/wpHtmlProcessorSemanticRules.php | 107 +++++++++++++++++- 3 files changed, 165 insertions(+), 24 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index b8e1093054726..74093da51b28b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -99,12 +99,15 @@ * * The following list specifies the HTML tags that _are_ supported: * + * - Containers: ADDRESS, CENTER, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. + * - Form elements: BUTTON, FIELDSET, SEARCH. + * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. + * - Heading elements: HGROUP. * - Links: A. - * - The formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. - * - Containers: DIV, FIGCAPTION, FIGURE, SPAN. - * - Form elements: BUTTON. + * - Lists: DL. + * - Media elements: FIGCAPTION, FIGURE, IMG. * - Paragraph: P. - * - Void elements: IMG. + * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION * * ### Supported markup * @@ -621,11 +624,29 @@ private function step_in_body() { * > "fieldset", "figcaption", "figure", "footer", "header", "hgroup", * > "main", "menu", "nav", "ol", "p", "search", "section", "summary", "ul" */ + case '+ADDRESS': + case '+ARTICLE': + case '+ASIDE': case '+BLOCKQUOTE': + case '+CENTER': + case '+DETAILS': + case '+DIALOG': + case '+DIR': + case '+DL': case '+DIV': + case '+FIELDSET': case '+FIGCAPTION': case '+FIGURE': + case '+FOOTER': + case '+HEADER': + case '+HGROUP': + case '+MAIN': + case '+MENU': + case '+NAV': case '+P': + case '+SEARCH': + case '+SECTION': + case '+SUMMARY': if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); } @@ -639,11 +660,29 @@ private function step_in_body() { * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" */ + case '-ADDRESS': + case '-ARTICLE': + case '-ASIDE': case '-BLOCKQUOTE': case '-BUTTON': + case '-CENTER': + case '-DETAILS': + case '-DIALOG': + case '-DIR': case '-DIV': + case '-DL': + case '-FIELDSET': case '-FIGCAPTION': case '-FIGURE': + case '-FOOTER': + case '-HEADER': + case '-HGROUP': + case '-MAIN': + case '-MENU': + case '-NAV': + case '-SEARCH': + case '-SECTION': + case '-SUMMARY': if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) { // @TODO: Report parse error. // Ignore the token. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index d454ab4842fbf..60de148c85dd6 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -37,22 +37,39 @@ public function test_navigates_into_normative_html_for_supported_elements( $html public function data_single_tag_of_supported_elements() { $supported_elements = array( 'A', + 'ADDRESS', + 'ARTICLE', + 'ASIDE', 'B', 'BIG', 'BUTTON', + 'CENTER', // Neutralized 'CODE', + 'DETAILS', + 'DIALOG', 'DIV', + 'DL', 'EM', + 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FONT', + 'FOOTER', + 'HEADER', + 'HGROUP', 'I', 'IMG', + 'MAIN', + 'MENU', + 'NAV', 'P', + 'SEARCH', + 'SECTION', 'SMALL', 'SPAN', 'STRIKE', 'STRONG', + 'SUMMARY', 'TT', 'U', ); @@ -99,11 +116,8 @@ public function data_unsupported_elements() { $unsupported_elements = array( 'ABBR', 'ACRONYM', // Neutralized - 'ADDRESS', 'APPLET', // Deprecated 'AREA', - 'ARTICLE', - 'ASIDE', 'AUDIO', 'BASE', 'BDI', @@ -114,7 +128,6 @@ public function data_unsupported_elements() { 'BR', 'CANVAS', 'CAPTION', - 'CENTER', // Neutralized 'CITE', 'COL', 'COLGROUP', @@ -122,14 +135,9 @@ public function data_unsupported_elements() { 'DATALIST', 'DD', 'DEL', - 'DETAILS', 'DEFN', - 'DIALOG', - 'DL', 'DT', 'EMBED', - 'FIELDSET', - 'FOOTER', 'FORM', 'FRAME', 'FRAMESET', @@ -140,8 +148,6 @@ public function data_unsupported_elements() { 'H5', 'H6', 'HEAD', - 'HEADER', - 'HGROUP', 'HR', 'HTML', 'IFRAME', @@ -155,16 +161,13 @@ public function data_unsupported_elements() { 'LI', 'LINK', 'LISTING', // Deprecated, use PRE instead. - 'MAIN', 'MAP', 'MARK', 'MARQUEE', // Deprecated 'MATH', - 'MENU', 'META', 'METER', 'MULTICOL', // Deprecated - 'NAV', 'NEXTID', // Deprecated 'NOBR', // Neutralized 'NOEMBED', // Neutralized @@ -187,14 +190,12 @@ public function data_unsupported_elements() { 'RUBY', 'SAMP', 'SCRIPT', - 'SECTION', 'SELECT', 'SLOT', 'SOURCE', 'SPACER', // Deprecated 'STYLE', 'SUB', - 'SUMMARY', 'SUP', 'SVG', 'TABLE', @@ -348,6 +349,8 @@ public function data_html_target_with_breadcrumbs() { array( 'HTML', 'BODY', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'STRONG', 'EM', 'CODE' ), 2, ), + 'MAIN inside MAIN inside SPAN' => array( '
', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ), + 'MAIN next to unclosed P' => array( '

', array( 'HTML', 'BODY', 'MAIN' ), 1 ), ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index 01bb41ba844f1..dcaa39fbcdfed 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -16,6 +16,105 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * RULES FOR "IN BODY" MODE *******************************************************************/ + /** + * Verifies that tags in the container group, including the ARTICLE element, + * close out an open P element if one exists. + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket {TICKET_NUMBER} + * + * @dataProvider data_article_container_group + * + * @param string $tag_name Name of tag in group under test. + */ + public function test_in_body_article_group_closes_open_p_element( $tag_name ) { + $processor = WP_HTML_Processor::create_fragment( "

<{$tag_name} target>" ); + + while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) { + continue; + } + + $this->assertEquals( + $tag_name, + $processor->get_tag(), + "Expected to find {$tag_name} but found {$processor->get_tag()} instead." + ); + + $this->assertSame( + array( 'HTML', 'BODY', $tag_name ), + $processor->get_breadcrumbs(), + "Expected to find {$tag_name} as direct child of BODY as a result of implicitly closing an open P element." + ); + } + + /** + * Verifies that tags in the container group, including the ARTICLE element, + * nest inside each other despite being invalid in most cases. + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket {TICKET_NUMBER} + * + * @dataProvider data_article_container_group + * + * @param string $tag_name Name of tag in group under test. + */ + public function test_in_body_article_group_can_nest_inside_itself( $tag_name ) { + $processor = WP_HTML_Processor::create_fragment( "

<{$tag_name}><{$tag_name}><{$tag_name}><{$tag_name} target>" ); + + while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) { + continue; + } + + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', $tag_name, $tag_name, 'SPAN', $tag_name ), + $processor->get_breadcrumbs(), + "Expected to find {$tag_name} deeply nested inside itself." + ); + } + + /** + * Data provider. + * + * @return array[]. + */ + public function data_article_container_group() { + $group = array(); + + foreach ( + array( + 'ADDRESS', + 'ARTICLE', + 'ASIDE', + 'BLOCKQUOTE', + 'CENTER', + 'DETAILS', + 'DIALOG', + 'DIR', + 'DL', + 'DIV', + 'FIELDSET', + 'FIGCAPTION', + 'FIGURE', + 'FOOTER', + 'HEADER', + 'HGROUP', + 'MAIN', + 'MENU', + 'NAV', + 'SEARCH', + 'SECTION', + 'SUMMARY', + ) + as $tag_name + ) { + $group[ $tag_name ] = array( $tag_name ); + } + + return $group; + } + /** * Verifies that when encountering an end tag for which there is no corresponding * element in scope, that it skips the tag entirely. @@ -142,11 +241,11 @@ public function test_in_body_button_with_button_in_scope_as_ancestor() { * that the HTML processor ignores the end tag if there's a special * element on the stack of open elements before the matching opening. * + * @covers WP_HTML_Processor::step_in_body + * * @ticket 58907 * * @since 6.4.0 - * - * @covers WP_HTML_Processor::step_in_body */ public function test_in_body_any_other_end_tag_with_unclosed_special_element() { $p = WP_HTML_Processor::create_fragment( '

' ); @@ -165,11 +264,11 @@ public function test_in_body_any_other_end_tag_with_unclosed_special_element() { * that the HTML processor closes appropriate elements on the stack of * open elements up to the matching opening. * + * @covers WP_HTML_Processor::step_in_body + * * @ticket 58907 * * @since 6.4.0 - * - * @covers WP_HTML_Processor::step_in_body */ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element() { $p = WP_HTML_Processor::create_fragment( '
' );