diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 1be795c5c7de2..e88757ec7b4c2 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -279,44 +279,24 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * form is provided because a context element may have attributes that * impact the parse, such as with a SCRIPT tag and its `type` attribute. * - * Example: - * - * // Usually, snippets of HTML ought to be processed in the default `` context: - * $processor = WP_HTML_Processor::create_fragment( '

Hi

' ); - * - * // Some fragments should be processed in the correct context like this SVG: - * $processor = WP_HTML_Processor::create_fragment( '', '' ); - * - * // This fragment with TD tags should be processed in a TR context: - * $processor = WP_HTML_Processor::create_fragment( - * '123', - * '' - * ); - * - * In order to create a fragment processor at the correct location, the - * provided fragment will be processed as part of a full HTML document. - * The processor will search for the last opener tag in the document and - * create a fragment processor at that location. The document will be - * forced into "no-quirks" mode by including the HTML5 doctype. - * - * For advanced usage and precise control over the context element, use - * `WP_HTML_Processor::create_full_processor()` and - * `WP_HTML_Processor::create_fragment_at_current_node()`. + * ## Current HTML Support * - * UTF-8 is the only allowed encoding. If working with a document that - * isn't UTF-8, first convert the document to UTF-8, then pass in the - * converted HTML. + * - The only supported context is ``, which is the default value. + * - The only supported document encoding is `UTF-8`, which is the default value. * * @since 6.4.0 * @since 6.6.0 Returns `static` instead of `self` so it can create subclass instances. - * @since 6.8.0 Can create fragments with any context element. * * @param string $html Input HTML fragment to process. - * @param string $context Context element for the fragment. Defaults to ``. + * @param string $context Context element for the fragment, must be default of ``. * @param string $encoding Text encoding of the document; must be default of 'UTF-8'. * @return static|null The created processor if successful, otherwise null. */ public static function create_fragment( $html, $context = '', $encoding = 'UTF-8' ) { + if ( '' !== $context || 'UTF-8' !== $encoding ) { + return null; + } + $context_processor = static::create_full_parser( "{$context}", $encoding ); if ( null === $context_processor ) { return null; @@ -475,7 +455,7 @@ function ( WP_HTML_Token $token ): void { * @param string $html Input HTML fragment to process. * @return static|null The created processor if successful, otherwise null. */ - public function create_fragment_at_current_node( string $html ) { + private function create_fragment_at_current_node( string $html ) { if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) { _doing_it_wrong( __METHOD__, diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php b/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php deleted file mode 100644 index 4913fa07eb412..0000000000000 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorFragmentParsing.php +++ /dev/null @@ -1,178 +0,0 @@ -' ); - $this->assertTrue( $processor->next_tag( 'SVG' ) ); - - $fragment = $processor->create_fragment_at_current_node( "\0preceded-by-nul-byte
" ); - - $this->assertSame( 'svg', $fragment->get_namespace() ); - $this->assertTrue( $fragment->next_token() ); - - /* - * In HTML parsing, a nul byte would be ignored. - * In SVG it should be replaced with a replacement character. - */ - $this->assertSame( '#text', $fragment->get_token_type() ); - $this->assertSame( "\u{FFFD}", $fragment->get_modifiable_text() ); - - $this->assertTrue( $fragment->next_tag( 'RECT' ) ); - $this->assertSame( 'svg', $fragment->get_namespace() ); - - $this->assertTrue( $fragment->next_tag( 'CIRCLE' ) ); - $this->assertSame( array( 'HTML', 'SVG', 'CIRCLE' ), $fragment->get_breadcrumbs() ); - $this->assertTrue( $fragment->next_tag( 'foreignObject' ) ); - $this->assertSame( 'svg', $fragment->get_namespace() ); - } - - /** - * @ticket 62357 - */ - public function test_create_fragment_at_current_node_in_foreign_content_integration_point() { - $processor = WP_HTML_Processor::create_full_parser( '' ); - $this->assertTrue( $processor->next_tag( 'foreignObject' ) ); - - $fragment = $processor->create_fragment_at_current_node( "\0not-preceded-by-nul-byte" ); - - // Nothing has been processed, the html namespace should be used for parsing as an integration point. - $this->assertSame( 'html', $fragment->get_namespace() ); - - // HTML parsing transforms IMAGE into IMG. - $this->assertTrue( $fragment->next_tag( 'IMG' ) ); - - $this->assertTrue( $fragment->next_token() ); - - // In HTML parsing, the nul byte is ignored and the text is reached. - $this->assertSame( '#text', $fragment->get_token_type() ); - $this->assertSame( 'not-preceded-by-nul-byte', $fragment->get_modifiable_text() ); - - /* - * svg:foreignObject is an HTML integration point, so the processor should be in the HTML namespace. - * RECT is an HTML element here, meaning it may have the self-closing flag but does not self-close. - */ - $this->assertTrue( $fragment->next_tag( 'RECT' ) ); - $this->assertSame( array( 'HTML', 'FOREIGNOBJECT', 'RECT' ), $fragment->get_breadcrumbs() ); - $this->assertSame( 'html', $fragment->get_namespace() ); - $this->assertTrue( $fragment->has_self_closing_flag() ); - $this->assertTrue( $fragment->expects_closer() ); - } - - /** - * @expectedIncorrectUsage WP_HTML_Processor::create_fragment_at_current_node - * @ticket 62357 - */ - public function test_prevent_fragment_creation_on_closers() { - $processor = WP_HTML_Processor::create_full_parser( '

' ); - $processor->next_tag( 'P' ); - $processor->next_tag( - array( - 'tag_name' => 'P', - 'tag_closers' => 'visit', - ) - ); - $this->assertSame( 'P', $processor->get_tag() ); - $this->assertTrue( $processor->is_tag_closer() ); - $this->assertNull( $processor->create_fragment_at_current_node( 'fragment HTML' ) ); - } - - /** - * Verifies that the fragment parser doesn't allow invalid context nodes. - * - * This includes void elements and self-contained elements because they can - * contain no inner HTML. Operations on self-contained elements should occur - * through methods such as {@see WP_HTML_Tag_Processor::set_modifiable_text}. - * - * @ticket 62584 - * - * @dataProvider data_invalid_fragment_contexts - * - * @param string $context Invalid context node for fragment parser. - */ - public function test_rejects_invalid_fragment_contexts( string $context, string $doing_it_wrong_method_name ) { - $this->setExpectedIncorrectUsage( "WP_HTML_Processor::{$doing_it_wrong_method_name}" ); - $this->assertNull( - WP_HTML_Processor::create_fragment( 'just a test', $context ), - "Should not have been able to create a fragment parser with context node {$context}" - ); - } - - /** - * Data provider. - * - * @return array[] - */ - public static function data_invalid_fragment_contexts() { - return array( - /* - * Invalid contexts. - */ - /* - * The text node is confused with a virtual body open tag. - * This should fail to set a bookmark in `create_fragment` - * but currently does not, it slips through and fails in - * `create_fragment_at_current_node`. - */ - 'Invalid text' => array( 'just some text', 'create_fragment_at_current_node' ), - 'Invalid comment' => array( '', 'create_fragment' ), - 'Invalid closing' => array( '', 'create_fragment' ), - 'Invalid DOCTYPE' => array( '', 'create_fragment' ), - /* - * PLAINTEXT should appear in the unsupported elements, but at the - * moment it's completely unsupported by the processor so - * the context element cannot be found. - */ - 'Unsupported PLAINTEXT' => array( '', 'create_fragment' ), - - /* - * Invalid contexts. - */ - 'AREA' => array( '<area>', 'create_fragment_at_current_node' ), - 'BASE' => array( '<base>', 'create_fragment_at_current_node' ), - 'BASEFONT' => array( '<basefont>', 'create_fragment_at_current_node' ), - 'BGSOUND' => array( '<bgsound>', 'create_fragment_at_current_node' ), - 'BR' => array( '<br>', 'create_fragment_at_current_node' ), - 'COL' => array( '<table><colgroup><col>', 'create_fragment_at_current_node' ), - 'EMBED' => array( '<embed>', 'create_fragment_at_current_node' ), - 'FRAME' => array( '<frameset><frame>', 'create_fragment_at_current_node' ), - 'HR' => array( '<hr>', 'create_fragment_at_current_node' ), - 'IMG' => array( '<img>', 'create_fragment_at_current_node' ), - 'INPUT' => array( '<input>', 'create_fragment_at_current_node' ), - 'KEYGEN' => array( '<keygen>', 'create_fragment_at_current_node' ), - 'LINK' => array( '<link>', 'create_fragment_at_current_node' ), - 'META' => array( '<meta>', 'create_fragment_at_current_node' ), - 'PARAM' => array( '<param>', 'create_fragment_at_current_node' ), - 'SOURCE' => array( '<source>', 'create_fragment_at_current_node' ), - 'TRACK' => array( '<track>', 'create_fragment_at_current_node' ), - 'WBR' => array( '<wbr>', 'create_fragment_at_current_node' ), - - /* - * Unsupported elements. Include a tag closer to ensure the element can be found - * and does not pause the parser at an incomplete token. - */ - 'IFRAME' => array( '<iframe></iframe>', 'create_fragment_at_current_node' ), - 'NOEMBED' => array( '<noembed></noembed>', 'create_fragment_at_current_node' ), - 'NOFRAMES' => array( '<noframes></noframes>', 'create_fragment_at_current_node' ), - 'SCRIPT' => array( '<script></script>', 'create_fragment_at_current_node' ), - 'SCRIPT with type' => array( '<script type="javascript"></script>', 'create_fragment_at_current_node' ), - 'STYLE' => array( '<style></style>', 'create_fragment_at_current_node' ), - 'TEXTAREA' => array( '<textarea></textarea>', 'create_fragment_at_current_node' ), - 'TITLE' => array( '<title></title>', 'create_fragment_at_current_node' ), - 'XMP' => array( '<xmp></xmp>', 'create_fragment_at_current_node' ), - ); - } -} diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php index 5e0c3b77f8732..a03a9ab806a93 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php @@ -138,6 +138,10 @@ public function data_external_html5lib_tests() { * @return bool True if the test case should be skipped. False otherwise. */ private static function should_skip_test( ?string $test_context_element, string $test_name ): bool { + if ( null !== $test_context_element && 'body' !== $test_context_element ) { + return true; + } + if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) { return true; } @@ -153,63 +157,11 @@ private static function should_skip_test( ?string $test_context_element, string * @return string|null Tree structure of parsed HTML, if supported, else null. */ private static function build_tree_representation( ?string $fragment_context, string $html ) { - if ( $fragment_context ) { - /* - * If the string of characters starts with "svg ", the context - * element is in the SVG namespace and the substring after - * "svg " is the local name. If the string of characters starts - * with "math ", the context element is in the MathML namespace - * and the substring after "math " is the local name. - * Otherwise, the context element is in the HTML namespace and - * the string is the local name. - */ - if ( str_starts_with( $fragment_context, 'svg ' ) ) { - $tag_name = substr( $fragment_context, 4 ); - if ( 'svg' === $tag_name ) { - $fragment_context_html = '<svg>'; - } else { - $fragment_context_html = "<svg><{$tag_name}>"; - } - } elseif ( str_starts_with( $fragment_context, 'math ' ) ) { - $tag_name = substr( $fragment_context, 5 ); - if ( 'math' === $tag_name ) { - $fragment_context_html = '<math>'; - } else { - $fragment_context_html = "<math><{$tag_name}>"; - } - } else { - // Tags that only appear in tables need a special case. - if ( in_array( - $fragment_context, - array( - 'caption', - 'col', - 'colgroup', - 'tbody', - 'td', - 'tfoot', - 'th', - 'thead', - 'tr', - ), - true - ) ) { - $fragment_context_html = "<table><{$fragment_context}>"; - } else { - $fragment_context_html = "<{$fragment_context}>"; - } - } - - $processor = WP_HTML_Processor::create_fragment( $html, $fragment_context_html ); - - if ( null === $processor ) { - throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); - } - } else { - $processor = WP_HTML_Processor::create_full_parser( $html ); - if ( null === $processor ) { - throw new Exception( 'Could not create a full parser.' ); - } + $processor = $fragment_context + ? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" ) + : WP_HTML_Processor::create_full_parser( $html ); + if ( null === $processor ) { + throw new WP_HTML_Unsupported_Exception( "Could not create a parser with the given fragment context: {$fragment_context}.", '', 0, '', array(), array() ); } $output = '';