From 7fc51fe8692f86e57d7a039b516fbcee4a676e66 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Thu, 19 Oct 2023 23:19:54 -0500 Subject: [PATCH] HTML API: Support elements in container group containin ARTICLE element. There are a handful of elements the behave similarly and are generically container elements. These are the following elements: ADDRESS, ARTICLE, ASIDE, BLOCKQUOTE, CENTER, DETAILS, DIALOG, DIR, DL, DIV, FIELDSET, FIGCAPTION, FIGURE, FOOTER, HEADER, HGROUP, MAIN, MENU, NAV, SEARCH, SECTION, SUMMARY, This patch adds support to the HTML Processor for handling these elements. They do not require any additional logic in the rest of the class, and carry no specific semantic rules for parsing beyond what is listed in their group in the IN BODY section of the HTML5 specification. --- .../html-api/class-wp-html-processor.php | 48 +++++++- .../html-api/wpHtmlProcessorBreadcrumbs.php | 35 +++--- .../html-api/wpHtmlProcessorSemanticRules.php | 107 +++++++++++++++++- 3 files changed, 165 insertions(+), 25 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 9dd85228e1092..72059e9f93d9b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -99,13 +99,15 @@ * * The following list specifies the HTML tags that _are_ supported: * + * - Containers: ADDRESS, CENTER, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. + * - Form elements: BUTTON, FIELDSET, SEARCH. + * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. + * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. * - Links: A. - * - The formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. - * - Containers: DIV, FIGCAPTION, FIGURE, SPAN. - * - Form elements: BUTTON. - * - Heading elements: H1, H2, H3, H4, H5, H6. + * - Lists: DL. + * - Media elements: FIGCAPTION, FIGURE, IMG. * - Paragraph: P. - * - Void elements: IMG. + * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION * * ### Supported markup * @@ -622,11 +624,29 @@ private function step_in_body() { * > "fieldset", "figcaption", "figure", "footer", "header", "hgroup", * > "main", "menu", "nav", "ol", "p", "search", "section", "summary", "ul" */ + case '+ADDRESS': + case '+ARTICLE': + case '+ASIDE': case '+BLOCKQUOTE': + case '+CENTER': + case '+DETAILS': + case '+DIALOG': + case '+DIR': + case '+DL': case '+DIV': + case '+FIELDSET': case '+FIGCAPTION': case '+FIGURE': + case '+FOOTER': + case '+HEADER': + case '+HGROUP': + case '+MAIN': + case '+MENU': + case '+NAV': case '+P': + case '+SEARCH': + case '+SECTION': + case '+SUMMARY': if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); } @@ -640,11 +660,29 @@ private function step_in_body() { * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" */ + case '-ADDRESS': + case '-ARTICLE': + case '-ASIDE': case '-BLOCKQUOTE': case '-BUTTON': + case '-CENTER': + case '-DETAILS': + case '-DIALOG': + case '-DIR': case '-DIV': + case '-DL': + case '-FIELDSET': case '-FIGCAPTION': case '-FIGURE': + case '-FOOTER': + case '-HEADER': + case '-HGROUP': + case '-MAIN': + case '-MENU': + case '-NAV': + case '-SEARCH': + case '-SECTION': + case '-SUMMARY': if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) { // @TODO: Report parse error. // Ignore the token. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php index b74b7a128cd3d..cf11ab99e8e43 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php @@ -37,28 +37,45 @@ public function test_navigates_into_normative_html_for_supported_elements( $html public function data_single_tag_of_supported_elements() { $supported_elements = array( 'A', + 'ADDRESS', + 'ARTICLE', + 'ASIDE', 'B', 'BIG', 'BUTTON', + 'CENTER', // Neutralized 'CODE', + 'DETAILS', + 'DIALOG', 'DIV', + 'DL', 'EM', + 'FIELDSET', 'FIGCAPTION', 'FIGURE', 'FONT', + 'FOOTER', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', + 'HEADER', + 'HGROUP', 'I', 'IMG', + 'MAIN', + 'MENU', + 'NAV', 'P', + 'SEARCH', + 'SECTION', 'SMALL', 'SPAN', 'STRIKE', 'STRONG', + 'SUMMARY', 'TT', 'U', ); @@ -105,11 +122,8 @@ public function data_unsupported_elements() { $unsupported_elements = array( 'ABBR', 'ACRONYM', // Neutralized - 'ADDRESS', 'APPLET', // Deprecated 'AREA', - 'ARTICLE', - 'ASIDE', 'AUDIO', 'BASE', 'BDI', @@ -120,7 +134,6 @@ public function data_unsupported_elements() { 'BR', 'CANVAS', 'CAPTION', - 'CENTER', // Neutralized 'CITE', 'COL', 'COLGROUP', @@ -128,20 +141,13 @@ public function data_unsupported_elements() { 'DATALIST', 'DD', 'DEL', - 'DETAILS', 'DEFN', - 'DIALOG', - 'DL', 'DT', 'EMBED', - 'FIELDSET', - 'FOOTER', 'FORM', 'FRAME', 'FRAMESET', 'HEAD', - 'HEADER', - 'HGROUP', 'HR', 'HTML', 'IFRAME', @@ -155,16 +161,13 @@ public function data_unsupported_elements() { 'LI', 'LINK', 'LISTING', // Deprecated, use PRE instead. - 'MAIN', 'MAP', 'MARK', 'MARQUEE', // Deprecated 'MATH', - 'MENU', 'META', 'METER', 'MULTICOL', // Deprecated - 'NAV', 'NEXTID', // Deprecated 'NOBR', // Neutralized 'NOEMBED', // Neutralized @@ -187,14 +190,12 @@ public function data_unsupported_elements() { 'RUBY', 'SAMP', 'SCRIPT', - 'SECTION', 'SELECT', 'SLOT', 'SOURCE', 'SPACER', // Deprecated 'STYLE', 'SUB', - 'SUMMARY', 'SUP', 'SVG', 'TABLE', @@ -350,6 +351,8 @@ public function data_html_target_with_breadcrumbs() { ), 'EM inside H3 after unclosed P' => array( '

Important Message

', array( 'HTML', 'BODY', 'H3', 'EM' ), 1 ), 'H4 after H2' => array( '

Major

Minor

', array( 'HTML', 'BODY', 'H4' ), 1 ), + 'MAIN inside MAIN inside SPAN' => array( '
', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ), + 'MAIN next to unclosed P' => array( '

', array( 'HTML', 'BODY', 'MAIN' ), 1 ), ); } diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index cf2dd527ff2ef..66a66b3f00816 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -16,6 +16,105 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase { * RULES FOR "IN BODY" MODE *******************************************************************/ + /** + * Verifies that tags in the container group, including the ARTICLE element, + * close out an open P element if one exists. + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket {TICKET_NUMBER} + * + * @dataProvider data_article_container_group + * + * @param string $tag_name Name of tag in group under test. + */ + public function test_in_body_article_group_closes_open_p_element( $tag_name ) { + $processor = WP_HTML_Processor::create_fragment( "

<{$tag_name} target>" ); + + while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) { + continue; + } + + $this->assertEquals( + $tag_name, + $processor->get_tag(), + "Expected to find {$tag_name} but found {$processor->get_tag()} instead." + ); + + $this->assertSame( + array( 'HTML', 'BODY', $tag_name ), + $processor->get_breadcrumbs(), + "Expected to find {$tag_name} as direct child of BODY as a result of implicitly closing an open P element." + ); + } + + /** + * Verifies that tags in the container group, including the ARTICLE element, + * nest inside each other despite being invalid in most cases. + * + * @covers WP_HTML_Processor::step_in_body + * + * @ticket {TICKET_NUMBER} + * + * @dataProvider data_article_container_group + * + * @param string $tag_name Name of tag in group under test. + */ + public function test_in_body_article_group_can_nest_inside_itself( $tag_name ) { + $processor = WP_HTML_Processor::create_fragment( "

<{$tag_name}><{$tag_name}><{$tag_name}><{$tag_name} target>" ); + + while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) { + continue; + } + + $this->assertSame( + array( 'HTML', 'BODY', 'DIV', $tag_name, $tag_name, 'SPAN', $tag_name ), + $processor->get_breadcrumbs(), + "Expected to find {$tag_name} deeply nested inside itself." + ); + } + + /** + * Data provider. + * + * @return array[]. + */ + public function data_article_container_group() { + $group = array(); + + foreach ( + array( + 'ADDRESS', + 'ARTICLE', + 'ASIDE', + 'BLOCKQUOTE', + 'CENTER', + 'DETAILS', + 'DIALOG', + 'DIR', + 'DL', + 'DIV', + 'FIELDSET', + 'FIGCAPTION', + 'FIGURE', + 'FOOTER', + 'HEADER', + 'HGROUP', + 'MAIN', + 'MENU', + 'NAV', + 'SEARCH', + 'SECTION', + 'SUMMARY', + ) + as $tag_name + ) { + $group[ $tag_name ] = array( $tag_name ); + } + + return $group; + } + /** * Verifies that when encountering an end tag for which there is no corresponding * element in scope, that it skips the tag entirely. @@ -231,11 +330,11 @@ public function data_heading_combinations() { * that the HTML processor ignores the end tag if there's a special * element on the stack of open elements before the matching opening. * + * @covers WP_HTML_Processor::step_in_body* + * * @ticket 58907 * * @since 6.4.0 - * - * @covers WP_HTML_Processor::step_in_body */ public function test_in_body_any_other_end_tag_with_unclosed_special_element() { $p = WP_HTML_Processor::create_fragment( '

' ); @@ -254,11 +353,11 @@ public function test_in_body_any_other_end_tag_with_unclosed_special_element() { * that the HTML processor closes appropriate elements on the stack of * open elements up to the matching opening. * + * @covers WP_HTML_Processor::step_in_body + * * @ticket 58907 * * @since 6.4.0 - * - * @covers WP_HTML_Processor::step_in_body */ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element() { $p = WP_HTML_Processor::create_fragment( '
' );