Skip to content

Commit

Permalink
HTML API: Support elements in container group containin ARTICLE element.
Browse files Browse the repository at this point in the history
There are a handful of elements the behave similarly and are generically
container elements. These are the following elements:

    ADDRESS, ARTICLE, ASIDE, BLOCKQUOTE, CENTER, DETAILS, DIALOG, DIR,
    DL, DIV, FIELDSET, FIGCAPTION, FIGURE, FOOTER, HEADER, HGROUP, MAIN,
    MENU, NAV, SEARCH, SECTION, SUMMARY,

This patch adds support to the HTML Processor for handling these elements.
They do not require any additional logic in the rest of the class, and carry
no specific semantic rules for parsing beyond what is listed in their group
in the IN BODY section of the HTML5 specification.
  • Loading branch information
dmsnell committed Nov 15, 2023
1 parent 0b8ca16 commit 7f703c3
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 24 deletions.
47 changes: 43 additions & 4 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,15 @@
*
* The following list specifies the HTML tags that _are_ supported:
*
* - Containers: ADDRESS, CENTER, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
* - Form elements: BUTTON, FIELDSET, SEARCH.
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
* - Heading elements: HGROUP.
* - Links: A.
* - The formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
* - Containers: DIV, FIGCAPTION, FIGURE, SPAN.
* - Form elements: BUTTON.
* - Lists: DL.
* - Media elements: FIGCAPTION, FIGURE, IMG.
* - Paragraph: P.
* - Void elements: IMG.
* - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION
*
* ### Supported markup
*
Expand Down Expand Up @@ -621,11 +624,29 @@ private function step_in_body() {
* > "fieldset", "figcaption", "figure", "footer", "header", "hgroup",
* > "main", "menu", "nav", "ol", "p", "search", "section", "summary", "ul"
*/
case '+ADDRESS':
case '+ARTICLE':
case '+ASIDE':
case '+BLOCKQUOTE':
case '+CENTER':
case '+DETAILS':
case '+DIALOG':
case '+DIR':
case '+DL':
case '+DIV':
case '+FIELDSET':
case '+FIGCAPTION':
case '+FIGURE':
case '+FOOTER':
case '+HEADER':
case '+HGROUP':
case '+MAIN':
case '+MENU':
case '+NAV':
case '+P':
case '+SEARCH':
case '+SECTION':
case '+SUMMARY':
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}
Expand All @@ -639,11 +660,29 @@ private function step_in_body() {
* > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main",
* > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul"
*/
case '-ADDRESS':
case '-ARTICLE':
case '-ASIDE':
case '-BLOCKQUOTE':
case '-BUTTON':
case '-CENTER':
case '-DETAILS':
case '-DIALOG':
case '-DIR':
case '-DIV':
case '-DL':
case '-FIELDSET':
case '-FIGCAPTION':
case '-FIGURE':
case '-FOOTER':
case '-HEADER':
case '-HGROUP':
case '-MAIN':
case '-MENU':
case '-NAV':
case '-SEARCH':
case '-SECTION':
case '-SUMMARY':
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) {
// @TODO: Report parse error.
// Ignore the token.
Expand Down
35 changes: 19 additions & 16 deletions tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,39 @@ public function test_navigates_into_normative_html_for_supported_elements( $html
public function data_single_tag_of_supported_elements() {
$supported_elements = array(
'A',
'ADDRESS',
'ARTICLE',
'ASIDE',
'B',
'BIG',
'BUTTON',
'CENTER', // Neutralized
'CODE',
'DETAILS',
'DIALOG',
'DIV',
'DL',
'EM',
'FIELDSET',
'FIGCAPTION',
'FIGURE',
'FONT',
'FOOTER',
'HEADER',
'HGROUP',
'I',
'IMG',
'MAIN',
'MENU',
'NAV',
'P',
'SEARCH',
'SECTION',
'SMALL',
'SPAN',
'STRIKE',
'STRONG',
'SUMMARY',
'TT',
'U',
);
Expand Down Expand Up @@ -99,11 +116,8 @@ public function data_unsupported_elements() {
$unsupported_elements = array(
'ABBR',
'ACRONYM', // Neutralized
'ADDRESS',
'APPLET', // Deprecated
'AREA',
'ARTICLE',
'ASIDE',
'AUDIO',
'BASE',
'BDI',
Expand All @@ -114,22 +128,16 @@ public function data_unsupported_elements() {
'BR',
'CANVAS',
'CAPTION',
'CENTER', // Neutralized
'CITE',
'COL',
'COLGROUP',
'DATA',
'DATALIST',
'DD',
'DEL',
'DETAILS',
'DEFN',
'DIALOG',
'DL',
'DT',
'EMBED',
'FIELDSET',
'FOOTER',
'FORM',
'FRAME',
'FRAMESET',
Expand All @@ -140,8 +148,6 @@ public function data_unsupported_elements() {
'H5',
'H6',
'HEAD',
'HEADER',
'HGROUP',
'HR',
'HTML',
'IFRAME',
Expand All @@ -155,16 +161,13 @@ public function data_unsupported_elements() {
'LI',
'LINK',
'LISTING', // Deprecated, use PRE instead.
'MAIN',
'MAP',
'MARK',
'MARQUEE', // Deprecated
'MATH',
'MENU',
'META',
'METER',
'MULTICOL', // Deprecated
'NAV',
'NEXTID', // Deprecated
'NOBR', // Neutralized
'NOEMBED', // Neutralized
Expand All @@ -187,14 +190,12 @@ public function data_unsupported_elements() {
'RUBY',
'SAMP',
'SCRIPT',
'SECTION',
'SELECT',
'SLOT',
'SOURCE',
'SPACER', // Deprecated
'STYLE',
'SUB',
'SUMMARY',
'SUP',
'SVG',
'TABLE',
Expand Down Expand Up @@ -348,6 +349,8 @@ public function data_html_target_with_breadcrumbs() {
array( 'HTML', 'BODY', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'DIV', 'STRONG', 'EM', 'CODE' ),
2,
),
'MAIN inside MAIN inside SPAN' => array( '<span><main><main target>', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ),
'MAIN next to unclosed P' => array( '<p><main target>', array( 'HTML', 'BODY', 'MAIN' ), 1 ),
);
}

Expand Down
107 changes: 103 additions & 4 deletions tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,105 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase {
* RULES FOR "IN BODY" MODE
*******************************************************************/

/**
* Verifies that tags in the container group, including the ARTICLE element,
* close out an open P element if one exists.
*
* @covers WP_HTML_Processor::step_in_body
*
* @ticket {TICKET_NUMBER}
*
* @dataProvider data_article_container_group
*
* @param string $tag_name Name of tag in group under test.
*/
public function test_in_body_article_group_closes_open_p_element( $tag_name ) {
$processor = WP_HTML_Processor::create_fragment( "<p><p><p><p><{$tag_name} target>" );

while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) {
continue;
}

$this->assertEquals(
$tag_name,
$processor->get_tag(),
"Expected to find {$tag_name} but found {$processor->get_tag()} instead."
);

$this->assertSame(
array( 'HTML', 'BODY', $tag_name ),
$processor->get_breadcrumbs(),
"Expected to find {$tag_name} as direct child of BODY as a result of implicitly closing an open P element."
);
}

/**
* Verifies that tags in the container group, including the ARTICLE element,
* nest inside each other despite being invalid in most cases.
*
* @covers WP_HTML_Processor::step_in_body
*
* @ticket {TICKET_NUMBER}
*
* @dataProvider data_article_container_group
*
* @param string $tag_name Name of tag in group under test.
*/
public function test_in_body_article_group_can_nest_inside_itself( $tag_name ) {
$processor = WP_HTML_Processor::create_fragment( "<div><{$tag_name}><{$tag_name}></{$tag_name}><{$tag_name}><span><{$tag_name} target>" );

while ( $processor->next_tag() && null === $processor->get_attribute( 'target' ) ) {
continue;
}

$this->assertSame(
array( 'HTML', 'BODY', 'DIV', $tag_name, $tag_name, 'SPAN', $tag_name ),
$processor->get_breadcrumbs(),
"Expected to find {$tag_name} deeply nested inside itself."
);
}

/**
* Data provider.
*
* @return array[].
*/
public function data_article_container_group() {
$group = array();

foreach (
array(
'ADDRESS',
'ARTICLE',
'ASIDE',
'BLOCKQUOTE',
'CENTER',
'DETAILS',
'DIALOG',
'DIR',
'DL',
'DIV',
'FIELDSET',
'FIGCAPTION',
'FIGURE',
'FOOTER',
'HEADER',
'HGROUP',
'MAIN',
'MENU',
'NAV',
'SEARCH',
'SECTION',
'SUMMARY',
)
as $tag_name
) {
$group[ $tag_name ] = array( $tag_name );
}

return $group;
}

/**
* Verifies that when encountering an end tag for which there is no corresponding
* element in scope, that it skips the tag entirely.
Expand Down Expand Up @@ -142,11 +241,11 @@ public function test_in_body_button_with_button_in_scope_as_ancestor() {
* that the HTML processor ignores the end tag if there's a special
* element on the stack of open elements before the matching opening.
*
* @covers WP_HTML_Processor::step_in_body
*
* @ticket 58907
*
* @since 6.4.0
*
* @covers WP_HTML_Processor::step_in_body
*/
public function test_in_body_any_other_end_tag_with_unclosed_special_element() {
$p = WP_HTML_Processor::create_fragment( '<div><span><p></span><div>' );
Expand All @@ -165,11 +264,11 @@ public function test_in_body_any_other_end_tag_with_unclosed_special_element() {
* that the HTML processor closes appropriate elements on the stack of
* open elements up to the matching opening.
*
* @covers WP_HTML_Processor::step_in_body
*
* @ticket 58907
*
* @since 6.4.0
*
* @covers WP_HTML_Processor::step_in_body
*/
public function test_in_body_any_other_end_tag_with_unclosed_non_special_element() {
$p = WP_HTML_Processor::create_fragment( '<div><span><code></span><div>' );
Expand Down

0 comments on commit 7f703c3

Please sign in to comment.