Skip to content

Commit

Permalink
WIP: HTML API: Stop at funky comments
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Apr 6, 2023
1 parent d3286f8 commit 61b15f1
Show file tree
Hide file tree
Showing 2 changed files with 255 additions and 12 deletions.
170 changes: 158 additions & 12 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,16 @@ class WP_HTML_Tag_Processor {
*/
private $stop_on_tag_closers;

/**
* Whether to visit funky comments, e.g. </1>, when walking an input document.
*
* These are funny because they are errors.
*
* @since 6.3.0
* @var bool
*/
private $stop_on_funky_comments;

/**
* Holds updated HTML as updates are applied.
*
Expand Down Expand Up @@ -538,6 +548,18 @@ class WP_HTML_Tag_Processor {
*/
protected $seek_count = 0;

/**
* @since 6.3.0
* @var string
*/
private $funky_comment_content = null;

/**
* @since 6.3.0
* @var int
*/
private $placeholders = 0;

/**
* Constructor.
*
Expand Down Expand Up @@ -1161,11 +1183,21 @@ private function parse_next_tag() {
* See https://github.com/WordPress/wordpress-develop/pull/4256
*/
if ( $this->is_closing_tag ) {
$closer_at = strpos( $html, '>', $at + 3 );
$closer_at = strpos( $html, '>', $at );
if ( false === $closer_at ) {
return false;
}

if ( $this->stop_on_funky_comments ) {
++$at;
$this->tag_name_length = 0;
$this->tag_name_starts_at = $at;
$this->bytes_already_parsed = $closer_at;
$this->funky_comment_content = array( $at, $closer_at );

return true;
}

$at = $closer_at + 1;
continue;
}
Expand Down Expand Up @@ -1301,11 +1333,12 @@ private function skip_whitespace() {
private function after_tag() {
$this->class_name_updates_to_attributes_updates();
$this->apply_attributes_updates();
$this->tag_name_starts_at = null;
$this->tag_name_length = null;
$this->tag_ends_at = null;
$this->is_closing_tag = null;
$this->attributes = array();
$this->tag_name_starts_at = null;
$this->tag_name_length = null;
$this->tag_ends_at = null;
$this->is_closing_tag = null;
$this->attributes = array();
$this->funky_comment_content = null;
}

/**
Expand Down Expand Up @@ -1552,7 +1585,7 @@ public function seek( $bookmark_name ) {
if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) {
_doing_it_wrong(
__METHOD__,
__( 'Unknown bookmark name.' ),
__( 'Unknown bookmark name.' . ' ' . $bookmark_name ),
'6.2.0'
);
return false;
Expand All @@ -1577,6 +1610,14 @@ public function seek( $bookmark_name ) {
return $this->next_tag( array( 'tag_closers' => 'visit' ) );
}

public function rewind() {
// $this->get_updated_html();
$this->after_tag();
$this->bytes_already_parsed = 0;
$this->bytes_already_copied = 0;
$this->output_buffer = '';
}

/**
* Compare two WP_HTML_Text_Replacement objects.
*
Expand Down Expand Up @@ -1857,6 +1898,13 @@ public function is_tag_closer() {
return $this->is_closing_tag;
}

/**
* @since 6.3.0
*/
public function is_funky_comment() {
return null !== $this->funky_comment_content;
}

/**
* Updates or creates a new attribute on the currently matched tag with the passed value.
*
Expand Down Expand Up @@ -2113,6 +2161,13 @@ public function __toString() {
return $this->get_updated_html();
}

public function get_funky_content() {
if ( $this->funky_comment_content !== null ) {
list( $start, $end ) = $this->funky_comment_content;
return substr( $this->html, $start, $end - $start );
}
}

/**
* Returns the string representation of the HTML Tag Processor.
*
Expand Down Expand Up @@ -2204,11 +2259,12 @@ private function parse_query( $query ) {
return;
}

$this->last_query = $query;
$this->sought_tag_name = null;
$this->sought_class_name = null;
$this->sought_match_offset = 1;
$this->stop_on_tag_closers = false;
$this->last_query = $query;
$this->sought_tag_name = null;
$this->sought_class_name = null;
$this->sought_match_offset = 1;
$this->stop_on_tag_closers = false;
$this->stop_on_funky_comments = false;

// A single string value means "find the tag of this name".
if ( is_string( $query ) ) {
Expand Down Expand Up @@ -2246,8 +2302,94 @@ private function parse_query( $query ) {
if ( isset( $query['tag_closers'] ) ) {
$this->stop_on_tag_closers = 'visit' === $query['tag_closers'];
}

if ( isset( $query['funky_comments'] ) ) {
$this->stop_on_funky_comments = 'visit' === $query['funky_comments'];
}
}

public function declarative_match( $pattern_html ) {
$this->placeholders = 0;
while ( $this->placeholders > 0 ) {
$this->release_bookmark( "__placeholder_{$this->placeholders}" );
$this->placeholders--;
}
$pattern = new WP_HTML_Tag_Processor( $pattern_html );
$visit_everything = array( 'tag_closers' => 'visit', 'funky_comments' => 'visit' );

$same_thing = function ( WP_HTML_Tag_Processor $pattern, WP_HTML_Tag_Processor $test ) {
if ( $pattern->is_funky_comment() ) {
$this->placeholders++;
$this->set_bookmark( "__placeholder_{$this->placeholders}" );
return true;
}

if ( ! (
$pattern->get_tag() === $test->get_tag() &&
$pattern->is_tag_closer() === $test->is_tag_closer() &&
$pattern->is_funky_comment() === $test->is_funky_comment()
) ) {
return false;
}

$attribute_constraints = $pattern->get_attribute_names_with_prefix( '' );
if ( null === $attribute_constraints ) {
return true;
}

foreach ( $attribute_constraints as $name ) {
if ( $pattern->get_attribute( $name ) !== $test->get_attribute( $name ) ) {
return false;
}
}

return true;
};

step_one: // find the next spot the patterns start the same.
if ( ! $pattern->next_tag( $visit_everything ) ) {
return false;
}

while ( $this->placeholders > 0 ) {
$this->release_bookmark( "__placeholder_{$this->placeholders}" );
$this->placeholders--;
}
while ( $this->next_tag( $visit_everything ) ) {
if ( $same_thing( $pattern, $this ) ) {
goto step_two;
}
}
return false;

step_two: // see if the subsequence tokens in the pattern and test match.
$this->set_bookmark( 'match_start' );
if ( ! $pattern->next_tag( $visit_everything ) ) {
$this->release_bookmark( 'match_start' );
return true;
}

while ( true ) {
if ( ! $this->next_tag( $visit_everything ) ) {
$this->release_bookmark( 'match_start' );
return false;
}

if ( ! $same_thing( $pattern, $this ) ) {
$pattern->rewind();
goto step_one;
}

if ( ! $pattern->next_tag( $visit_everything ) ) {
break;
}
}

$this->set_bookmark( 'match_end' );
$this->seek( 'match_start' );
$this->release_bookmark( 'match_start' );
return true;
}

/**
* Checks whether a given tag and its attributes match the search criteria.
Expand All @@ -2257,6 +2399,10 @@ private function parse_query( $query ) {
* @return boolean Whether the given tag and its attribute match the search criteria.
*/
private function matches() {
if ( null !== $this->funky_comment_content && $this->stop_on_funky_comments ) {
return true;
}

if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) {
return false;
}
Expand Down
97 changes: 97 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ class WP_UnitTestCase extends PHPUnit\Framework\TestCase {}
// require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-processor.php';

function esc_attr( $s ) { return str_replace( [ '<', '>', '"' ], [ '&lt;', '&gt;', '&quot;' ], $s ); }
function __( $s ) { return $s; }
function _doing_it_wrong( ...$args ) {
var_dump( $args );
}
}

/**
Expand Down Expand Up @@ -2247,4 +2251,97 @@ public function data_updating_attributes_in_malformed_html() {
),
);
}

/**
* @dataProvider data_funky_comments
*/
public function test_stops_at_funky_comments( $html, $content ) {
$p = new WP_HTML_Tag_Processor( $html );

$this->assertTrue( $p->next_tag( array( 'funky_comments' => 'visit' ) ) );
$this->assertEquals( $content, $p->get_funky_content() );
}

public function data_funky_comments() {
return array(
'Isolated comment' => array( '</1>', '1' ),
'Inside text' => array( 'Before</1>After', '1' ),
'%name syntax' => array( 'Today is </%day>.', '%day' ),
'With spaces inside' => array( 'What </$variable is this>?', '$variable is this' ),
);
}

/**
* @dataProvider data_declarative_patterns
*/
public function test_matches_declarative_pattern( $pattern, $html, $matches ) {
$p = new WP_HTML_Tag_Processor( $html );

if ( $matches ) {
$this->assertTrue( $p->declarative_match( $pattern ) );
} else {
$this->assertFalse( $p->declarative_match( $pattern ) );
}
}

public function data_declarative_patterns() {
return array(
'Single tag' => array( '<div>', '<div>', true ),
'^Single tag' => array( '<div>', '<img>', false ),
'Wrapped image' => array( '<div><img></div>', '<div><img></div>', true ),
'Wrapped image w/attributes' => array( '<div><img></div>', '<div id="14"><img src="hallumi" inert></div>', true ),
'Prefix before match' => array( '<li><img></li>', '<main><h1>Stuff!</h1><ul><li><img></li></ul></main>', true ),
'Pattern with attribute' => array( '<li is-active><img></li>', '<li is-active><img></li>', true ),
'^Pattern with attribute' => array( '<li is-active><img></li>', '<li><img></li>', false ),
'Pattern with attributes' => array( '<li is-active class="slick"><img></li>', '<li class="slick" is-active><img></li>', true ),
'^Pattern with attributes' => array( '<li is-active class="slick"><img></li>', '<li id="slick" is-active><img></li>', false ),
'^Pattern with attributes 2' => array( '<li is-active class="slick"><img></li>', '<li class="wicket" is-active><img></li>', false ),
'Test with attributes' => array( '<li is-active><img></li>', '<li id="5" is-funky=maybe style=\'color: red;\' is-active class="test-class bright"><img></li>', true ),
'^Test with attributes' => array( '<li is-active><img></li>', '<li id="5" is-funky=maybe style=\'color: red;\' isactive class="test-class bright"><img></li>', false ),
'Attribute with value' => array( '<input disabled>', '<input type="text"><input><input disabled><input value="5">', true ),
'Attribute with text' => array( '<input id="5">', '<input type="text"><input><input id=5><input disabled><input value="5">', true ),
'^Attribute with value' => array( '<input disabled>', '<input type="text"><input><input disable><input value="5">', false ),
'Wildcard' => array( '<hgroup></1></2></hgroup>', '<hgroup><h1>Important</h1></hgroup>', true ),
'^Wildcard' => array( '<hgroup></1></2></hgroup>', '<hgroup><img></hgroup>', false ),
'Wildcard attributes' => array( '</1 aria-label="placeholder">', '<div><p><strong>This</strong> is <em aria-label="placeholder">really</em> cool!</p></div>', true ),
);
}

public function test_declarative_match_pauses_at_start_of_match() {
$p = new WP_HTML_Tag_Processor( '<main><h1>Stuff!</h1><ul><li pick-me><img></li></ul></main>' );

$this->assertTrue( $p->declarative_match( '<li><img></li>' ) );
$this->assertTrue( $p->get_attribute( 'pick-me' ) );
}

public function test_declarative_match_bookmarks_markup_wildcards_delete_me_this_is_an_internal_detail_but_for_now_helpful_for_development() {
$p = new WP_HTML_Tag_Processor( <<<HTML
<main>
<h1>Stuff!</h1>
<ul>
<li id=1><p>Just a thought</p></li>
<img>
<li id=2 pick-me><img></li>
</ul>
</main>
HTML
);

$p->next_tag();

$this->assertTrue( $p->declarative_match( '<li></1></li>' ) );
$p->seek( '__placeholder_1' );
$this->assertSame( 'IMG', $p->get_tag() );

$p->rewind();
$this->assertTrue( $p->declarative_match( '<main></1></2><ul>' ) );

$p->seek( '__placeholder_1' );
$this->assertSame( 'H1', $p->get_tag() );
$this->assertFalse( $p->is_tag_closer() );

$p->seek( '__placeholder_2' );
$this->assertSame( 'H1', $p->get_tag() );
$this->assertTrue( $p->is_tag_closer() );
}
}

0 comments on commit 61b15f1

Please sign in to comment.