diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 61b0cb696f186..bf0a881e6c756 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -317,6 +317,16 @@ class WP_HTML_Tag_Processor { */ private $stop_on_tag_closers; + /** + * Whether to visit funky comments, e.g. 1>, when walking an input document. + * + * These are funny because they are errors. + * + * @since 6.3.0 + * @var bool + */ + private $stop_on_funky_comments; + /** * Holds updated HTML as updates are applied. * @@ -538,6 +548,18 @@ class WP_HTML_Tag_Processor { */ protected $seek_count = 0; + /** + * @since 6.3.0 + * @var string + */ + private $funky_comment_content = null; + + /** + * @since 6.3.0 + * @var int + */ + private $placeholders = 0; + /** * Constructor. * @@ -1161,11 +1183,21 @@ private function parse_next_tag() { * See https://github.com/WordPress/wordpress-develop/pull/4256 */ if ( $this->is_closing_tag ) { - $closer_at = strpos( $html, '>', $at + 3 ); + $closer_at = strpos( $html, '>', $at ); if ( false === $closer_at ) { return false; } + if ( $this->stop_on_funky_comments ) { + ++$at; + $this->tag_name_length = 0; + $this->tag_name_starts_at = $at; + $this->bytes_already_parsed = $closer_at; + $this->funky_comment_content = array( $at, $closer_at ); + + return true; + } + $at = $closer_at + 1; continue; } @@ -1301,11 +1333,12 @@ private function skip_whitespace() { private function after_tag() { $this->class_name_updates_to_attributes_updates(); $this->apply_attributes_updates(); - $this->tag_name_starts_at = null; - $this->tag_name_length = null; - $this->tag_ends_at = null; - $this->is_closing_tag = null; - $this->attributes = array(); + $this->tag_name_starts_at = null; + $this->tag_name_length = null; + $this->tag_ends_at = null; + $this->is_closing_tag = null; + $this->attributes = array(); + $this->funky_comment_content = null; } /** @@ -1552,7 +1585,7 @@ public function seek( $bookmark_name ) { if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) { _doing_it_wrong( __METHOD__, - __( 'Unknown bookmark name.' ), + __( 'Unknown bookmark name.' . ' ' . $bookmark_name ), '6.2.0' ); return false; @@ -1577,6 +1610,14 @@ public function seek( $bookmark_name ) { return $this->next_tag( array( 'tag_closers' => 'visit' ) ); } + public function rewind() { +// $this->get_updated_html(); + $this->after_tag(); + $this->bytes_already_parsed = 0; + $this->bytes_already_copied = 0; + $this->output_buffer = ''; + } + /** * Compare two WP_HTML_Text_Replacement objects. * @@ -1857,6 +1898,13 @@ public function is_tag_closer() { return $this->is_closing_tag; } + /** + * @since 6.3.0 + */ + public function is_funky_comment() { + return null !== $this->funky_comment_content; + } + /** * Updates or creates a new attribute on the currently matched tag with the passed value. * @@ -2113,6 +2161,13 @@ public function __toString() { return $this->get_updated_html(); } + public function get_funky_content() { + if ( $this->funky_comment_content !== null ) { + list( $start, $end ) = $this->funky_comment_content; + return substr( $this->html, $start, $end - $start ); + } + } + /** * Returns the string representation of the HTML Tag Processor. * @@ -2204,11 +2259,12 @@ private function parse_query( $query ) { return; } - $this->last_query = $query; - $this->sought_tag_name = null; - $this->sought_class_name = null; - $this->sought_match_offset = 1; - $this->stop_on_tag_closers = false; + $this->last_query = $query; + $this->sought_tag_name = null; + $this->sought_class_name = null; + $this->sought_match_offset = 1; + $this->stop_on_tag_closers = false; + $this->stop_on_funky_comments = false; // A single string value means "find the tag of this name". if ( is_string( $query ) ) { @@ -2246,8 +2302,94 @@ private function parse_query( $query ) { if ( isset( $query['tag_closers'] ) ) { $this->stop_on_tag_closers = 'visit' === $query['tag_closers']; } + + if ( isset( $query['funky_comments'] ) ) { + $this->stop_on_funky_comments = 'visit' === $query['funky_comments']; + } } + public function declarative_match( $pattern_html ) { + $this->placeholders = 0; + while ( $this->placeholders > 0 ) { + $this->release_bookmark( "__placeholder_{$this->placeholders}" ); + $this->placeholders--; + } + $pattern = new WP_HTML_Tag_Processor( $pattern_html ); + $visit_everything = array( 'tag_closers' => 'visit', 'funky_comments' => 'visit' ); + + $same_thing = function ( WP_HTML_Tag_Processor $pattern, WP_HTML_Tag_Processor $test ) { + if ( $pattern->is_funky_comment() ) { + $this->placeholders++; + $this->set_bookmark( "__placeholder_{$this->placeholders}" ); + return true; + } + + if ( ! ( + $pattern->get_tag() === $test->get_tag() && + $pattern->is_tag_closer() === $test->is_tag_closer() && + $pattern->is_funky_comment() === $test->is_funky_comment() + ) ) { + return false; + } + + $attribute_constraints = $pattern->get_attribute_names_with_prefix( '' ); + if ( null === $attribute_constraints ) { + return true; + } + + foreach ( $attribute_constraints as $name ) { + if ( $pattern->get_attribute( $name ) !== $test->get_attribute( $name ) ) { + return false; + } + } + + return true; + }; + + step_one: // find the next spot the patterns start the same. + if ( ! $pattern->next_tag( $visit_everything ) ) { + return false; + } + + while ( $this->placeholders > 0 ) { + $this->release_bookmark( "__placeholder_{$this->placeholders}" ); + $this->placeholders--; + } + while ( $this->next_tag( $visit_everything ) ) { + if ( $same_thing( $pattern, $this ) ) { + goto step_two; + } + } + return false; + + step_two: // see if the subsequence tokens in the pattern and test match. + $this->set_bookmark( 'match_start' ); + if ( ! $pattern->next_tag( $visit_everything ) ) { + $this->release_bookmark( 'match_start' ); + return true; + } + + while ( true ) { + if ( ! $this->next_tag( $visit_everything ) ) { + $this->release_bookmark( 'match_start' ); + return false; + } + + if ( ! $same_thing( $pattern, $this ) ) { + $pattern->rewind(); + goto step_one; + } + + if ( ! $pattern->next_tag( $visit_everything ) ) { + break; + } + } + + $this->set_bookmark( 'match_end' ); + $this->seek( 'match_start' ); + $this->release_bookmark( 'match_start' ); + return true; + } /** * Checks whether a given tag and its attributes match the search criteria. @@ -2257,6 +2399,10 @@ private function parse_query( $query ) { * @return boolean Whether the given tag and its attribute match the search criteria. */ private function matches() { + if ( null !== $this->funky_comment_content && $this->stop_on_funky_comments ) { + return true; + } + if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) { return false; } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 7b74b279124ed..62b58fe5d86a6 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -17,6 +17,10 @@ class WP_UnitTestCase extends PHPUnit\Framework\TestCase {} // require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-processor.php'; function esc_attr( $s ) { return str_replace( [ '<', '>', '"' ], [ '<', '>', '"' ], $s ); } + function __( $s ) { return $s; } + function _doing_it_wrong( ...$args ) { + var_dump( $args ); + } } /** @@ -2247,4 +2251,97 @@ public function data_updating_attributes_in_malformed_html() { ), ); } + + /** + * @dataProvider data_funky_comments + */ + public function test_stops_at_funky_comments( $html, $content ) { + $p = new WP_HTML_Tag_Processor( $html ); + + $this->assertTrue( $p->next_tag( array( 'funky_comments' => 'visit' ) ) ); + $this->assertEquals( $content, $p->get_funky_content() ); + } + + public function data_funky_comments() { + return array( + 'Isolated comment' => array( '1>', '1' ), + 'Inside text' => array( 'Before1>After', '1' ), + '%name syntax' => array( 'Today is %day>.', '%day' ), + 'With spaces inside' => array( 'What $variable is this>?', '$variable is this' ), + ); + } + + /** + * @dataProvider data_declarative_patterns + */ + public function test_matches_declarative_pattern( $pattern, $html, $matches ) { + $p = new WP_HTML_Tag_Processor( $html ); + + if ( $matches ) { + $this->assertTrue( $p->declarative_match( $pattern ) ); + } else { + $this->assertFalse( $p->declarative_match( $pattern ) ); + } + } + + public function data_declarative_patterns() { + return array( + 'Single tag' => array( '
This is really cool!
Just a thought