diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index ca7a4cf3e0e4f..168b8ace4c394 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -3972,7 +3972,71 @@ private function step_in_template(): bool {
* @return bool Whether an element was found.
*/
private function step_after_body(): bool {
- $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY . ' state.' );
+ $tag_name = $this->get_token_name();
+ $token_type = $this->get_token_type();
+ $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
+ $op = "{$op_sigil}{$tag_name}";
+
+ switch ( $op ) {
+ /*
+ * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
+ * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+ *
+ * > Process the token using the rules for the "in body" insertion mode.
+ */
+ case '#text':
+ $text = $this->get_modifiable_text();
+ if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+ return $this->step_in_body();
+ }
+ goto after_body_anything_else;
+ break;
+
+ /*
+ * > A comment token
+ */
+ case '#comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
+ $this->bail( 'Content outside of BODY is unsupported.' );
+ break;
+
+ /*
+ * > A DOCTYPE token
+ */
+ case 'html':
+ // Parse error: ignore the token.
+ return $this->step();
+
+ /*
+ * > A start tag whose tag name is "html"
+ */
+ case '+HTML':
+ return $this->step_in_body();
+
+ /*
+ * > An end tag whose tag name is "html"
+ *
+ * > If the parser was created as part of the HTML fragment parsing algorithm,
+ * > this is a parse error; ignore the token. (fragment case)
+ * >
+ * > Otherwise, switch the insertion mode to "after after body".
+ */
+ case '-HTML':
+ if ( isset( $this->context_node ) ) {
+ return $this->step();
+ }
+
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY;
+ return true;
+ }
+
+ /*
+ * > Parse error. Switch the insertion mode to "in body" and reprocess the token.
+ */
+ after_body_anything_else:
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
}
/**
@@ -3991,7 +4055,109 @@ private function step_after_body(): bool {
* @return bool Whether an element was found.
*/
private function step_in_frameset(): bool {
- $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET . ' state.' );
+ $tag_name = $this->get_token_name();
+ $token_type = $this->get_token_type();
+ $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
+ $op = "{$op_sigil}{$tag_name}";
+
+ switch ( $op ) {
+ /*
+ * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
+ * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+ * >
+ * > Insert the character.
+ *
+ * This algorithm effectively strips non-whitespace characters from text and inserts
+ * them under HTML. This is not supported at this time.
+ */
+ case '#text':
+ $text = $this->get_modifiable_text();
+ $text = $this->get_modifiable_text();
+ if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+ return $this->step_in_body();
+ }
+ $this->bail( 'Non-whitespace characters cannot be handled in frameset.' );
+ break;
+
+ /*
+ * > A comment token
+ */
+ case '#comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > A DOCTYPE token
+ */
+ case 'html':
+ // Parse error: ignore the token.
+ return $this->step();
+
+ /*
+ * > A start tag whose tag name is "html"
+ */
+ case '+HTML':
+ return $this->step_in_body();
+
+ /*
+ * > A start tag whose tag name is "frameset"
+ */
+ case '+FRAMESET':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > An end tag whose tag name is "frameset"
+ */
+ case '-FRAMESET':
+ /*
+ * > If the current node is the root html element, then this is a parse error;
+ * > ignore the token. (fragment case)
+ */
+ if ( $this->state->stack_of_open_elements->current_node_is( 'HTML' ) ) {
+ return $this->step();
+ }
+
+ /*
+ * > Otherwise, pop the current node from the stack of open elements.
+ */
+ $this->state->stack_of_open_elements->pop();
+
+ /*
+ * > If the parser was not created as part of the HTML fragment parsing algorithm
+ * > (fragment case), and the current node is no longer a frameset element, then
+ * > switch the insertion mode to "after frameset".
+ */
+ if ( ! isset( $this->context_node ) && ! $this->state->stack_of_open_elements->current_node_is( 'FRAMESET' ) ) {
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET;
+ }
+
+ return true;
+
+ /*
+ * > A start tag whose tag name is "frame"
+ *
+ * > Insert an HTML element for the token. Immediately pop the
+ * > current node off the stack of open elements.
+ * >
+ * > Acknowledge the token's self-closing flag, if it is set.
+ */
+ case '+FRAME':
+ $this->insert_html_element( $this->state->current_token );
+ $this->state->stack_of_open_elements->pop();
+ return true;
+
+ /*
+ * > A start tag whose tag name is "noframes"
+ */
+ case '+NOFRAMES':
+ return $this->step_in_head();
+ }
+
+ // Parse error: ignore the token.
+ return $this->step();
}
/**
@@ -4010,7 +4176,67 @@ private function step_in_frameset(): bool {
* @return bool Whether an element was found.
*/
private function step_after_frameset(): bool {
- $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET . ' state.' );
+ $tag_name = $this->get_token_name();
+ $token_type = $this->get_token_type();
+ $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
+ $op = "{$op_sigil}{$tag_name}";
+
+ switch ( $op ) {
+ /*
+ * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
+ * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+ * >
+ * > Insert the character.
+ *
+ * This algorithm effectively strips non-whitespace characters from text and inserts
+ * them under HTML. This is not supported at this time.
+ */
+ case '#text':
+ $text = $this->get_modifiable_text();
+ if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+ return $this->step_in_body();
+ }
+ $this->bail( 'Non-whitespace characters cannot be handled in after frameset' );
+ break;
+
+ /*
+ * > A comment token
+ */
+ case '#comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > A DOCTYPE token
+ */
+ case 'html':
+ // Parse error: ignore the token.
+ return $this->step();
+
+ /*
+ * > A start tag whose tag name is "html"
+ */
+ case '+HTML':
+ return $this->step_in_body();
+
+ /*
+ * > An end tag whose tag name is "html"
+ */
+ case '-HTML':
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET;
+ return true;
+
+ /*
+ * > A start tag whose tag name is "noframes"
+ */
+ case '+NOFRAMES':
+ return $this->step_in_head();
+ }
+
+ // Parse error: ignore the token.
+ return $this->step();
}
/**
@@ -4029,7 +4255,52 @@ private function step_after_frameset(): bool {
* @return bool Whether an element was found.
*/
private function step_after_after_body(): bool {
- $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY . ' state.' );
+ $tag_name = $this->get_token_name();
+ $token_type = $this->get_token_type();
+ $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
+ $op = "{$op_sigil}{$tag_name}";
+
+ switch ( $op ) {
+ /*
+ * > A comment token
+ */
+ case '#comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
+ $this->bail( 'Content outside of HTML is unsupported.' );
+ break;
+
+ /*
+ * > A DOCTYPE token
+ * > A start tag whose tag name is "html"
+ *
+ * > Process the token using the rules for the "in body" insertion mode.
+ */
+ case 'html':
+ case '+HTML':
+ return $this->step_in_body();
+
+ /*
+ * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
+ * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+ * >
+ * > Process the token using the rules for the "in body" insertion mode.
+ */
+ case '#text':
+ $text = $this->get_modifiable_text();
+ if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+ return $this->step_in_body();
+ }
+ goto after_after_body_anything_else;
+ break;
+ }
+
+ /*
+ * > Parse error. Switch the insertion mode to "in body" and reprocess the token.
+ */
+ after_after_body_anything_else:
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
}
/**
@@ -4048,7 +4319,57 @@ private function step_after_after_body(): bool {
* @return bool Whether an element was found.
*/
private function step_after_after_frameset(): bool {
- $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET . ' state.' );
+ $tag_name = $this->get_token_name();
+ $token_type = $this->get_token_type();
+ $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
+ $op = "{$op_sigil}{$tag_name}";
+
+ switch ( $op ) {
+ /*
+ * > A comment token
+ */
+ case '#comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
+ $this->bail( 'Content outside of HTML is unsupported.' );
+ break;
+
+ /*
+ * > A DOCTYPE token
+ * > A start tag whose tag name is "html"
+ *
+ * > Process the token using the rules for the "in body" insertion mode.
+ */
+ case 'html':
+ case '+HTML':
+ return $this->step_in_body();
+
+ /*
+ * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
+ * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
+ * >
+ * > Process the token using the rules for the "in body" insertion mode.
+ *
+ * This algorithm effectively strips non-whitespace characters from text and inserts
+ * them under HTML. This is not supported at this time.
+ */
+ case '#text':
+ $text = $this->get_modifiable_text();
+ if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
+ return $this->step_in_body();
+ }
+ $this->bail( 'Non-whitespace characters cannot be handled in after after frameset.' );
+ break;
+
+ /*
+ * > A start tag whose tag name is "noframes"
+ */
+ case '+NOFRAMES':
+ return $this->step_in_head();
+ }
+
+ // Parse error: ignore the token.
+ return $this->step();
}
/**
@@ -4115,7 +4436,8 @@ private function step_in_foreign_content(): bool {
*/
case '#cdata-section':
case '#comment':
- case '#funky_comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
$this->insert_foreign_element( $this->state->current_token, false );
return true;
diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
index e772ab39e6356..15d50d1934116 100644
--- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
+++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php
@@ -33,10 +33,13 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
'tests1/line0692' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests19/line0488' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests19/line0500' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests19/line0965' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
+ 'tests2/line0697' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
'tests5/line0013' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
'tests5/line0077' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',