From 4eec5cb9787ee18b8a4a1f7cbc28e861a885be55 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Fri, 12 Jul 2024 22:18:16 +0000 Subject: [PATCH] HTML API: Simplify breadcrumb accounting. Since the HTML Processor started visiting all nodes in a document, both real and virtual, the breadcrumb accounting became a bit complicated and it's not entirely clear that it is fully reliable. In this patch the breadcrumbs are rebuilt separately from the stack of open elements in order to eliminate the problem of the stateful stack interactions and the post-hoc event queue. Breadcrumbs are greatly simplified as a result, and more verifiably correct, in this construction. Developed in https://github.com/WordPress/wordpress-develop/pull/6981 Discussed in https://core.trac.wordpress.org/ticket/61576 Follow-up to [58590]. Props bernhard-reiter, dmsnell. See #61576. git-svn-id: https://develop.svn.wordpress.org/trunk@58713 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-processor.php | 126 +++++++----------- .../html-api/wpHtmlProcessorSemanticRules.php | 11 +- 2 files changed, 61 insertions(+), 76 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 223fcda76a885..d7f77f495cbe9 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -211,6 +211,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ private $element_queue = array(); + /** + * Stores the current breadcrumbs. + * + * @since 6.7.0 + * + * @var string[] + */ + private $breadcrumbs = array(); + /** * Current stack event, if set, representing a matched token. * @@ -310,8 +319,8 @@ public static function create_fragment( $html, $context = '', $encoding = false ); - $processor->state->stack_of_open_elements->push( $context_node ); $processor->context_node = $context_node; + $processor->breadcrumbs = array( 'HTML', $context_node->node_name ); return $processor; } @@ -523,44 +532,46 @@ public function next_token() { return false; } - if ( 'done' !== $this->has_seen_context_node && 0 === count( $this->element_queue ) && ! $this->step() ) { - while ( 'context-node' !== $this->state->stack_of_open_elements->current_node()->bookmark_name && $this->state->stack_of_open_elements->pop() ) { - continue; - } - $this->has_seen_context_node = 'done'; - return $this->next_token(); + /* + * Prime the events if there are none. + * + * @todo In some cases, probably related to the adoption agency + * algorithm, this call to step() doesn't create any new + * events. Calling it again creates them. Figure out why + * this is and if it's inherent or if it's a bug. Looping + * until there are events or until there are no more + * tokens works in the meantime and isn't obviously wrong. + */ + while ( empty( $this->element_queue ) && $this->step() ) { + continue; } + // Process the next event on the queue. $this->current_element = array_shift( $this->element_queue ); - while ( isset( $this->context_node ) && ! $this->has_seen_context_node ) { - if ( isset( $this->current_element ) ) { - if ( $this->context_node === $this->current_element->token && WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) { - $this->has_seen_context_node = true; - return $this->next_token(); - } - } - $this->current_element = array_shift( $this->element_queue ); + if ( ! isset( $this->current_element ) ) { + return false; } - if ( ! isset( $this->current_element ) ) { - if ( 'done' === $this->has_seen_context_node ) { - return false; - } else { - return $this->next_token(); - } + $is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation; + + /* + * The root node only exists in the fragment parser, and closing it + * indicates that the parse is complete. Stop before popping if from + * the breadcrumbs. + */ + if ( 'root-node' === $this->current_element->token->bookmark_name ) { + return ! $is_pop && $this->next_token(); } - if ( isset( $this->context_node ) && WP_HTML_Stack_Event::POP === $this->current_element->operation && $this->context_node === $this->current_element->token ) { - $this->element_queue = array(); - $this->current_element = null; - return false; + // Adjust the breadcrumbs for this event. + if ( $is_pop ) { + array_pop( $this->breadcrumbs ); + } else { + $this->breadcrumbs[] = $this->current_element->token->node_name; } // Avoid sending close events for elements which don't expect a closing. - if ( - WP_HTML_Stack_Event::POP === $this->current_element->operation && - ! static::expects_closer( $this->current_element->token ) - ) { + if ( $is_pop && ! static::expects_closer( $this->current_element->token ) ) { return $this->next_token(); } @@ -643,10 +654,11 @@ public function matches_breadcrumbs( $breadcrumbs ) { return false; } - foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + for ( $i = count( $this->breadcrumbs ) - 1; $i >= 0; $i-- ) { + $node = $this->breadcrumbs[ $i ]; $crumb = strtoupper( current( $breadcrumbs ) ); - if ( '*' !== $crumb && $node->node_name !== $crumb ) { + if ( '*' !== $crumb && $node !== $crumb ) { return false; } @@ -862,46 +874,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { * @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL. */ public function get_breadcrumbs() { - $breadcrumbs = array(); - - foreach ( $this->state->stack_of_open_elements->walk_down() as $stack_item ) { - $breadcrumbs[] = $stack_item->node_name; - } - - if ( ! $this->is_virtual() ) { - return $breadcrumbs; - } - - foreach ( $this->element_queue as $queue_item ) { - if ( $this->current_element->token->bookmark_name === $queue_item->token->bookmark_name ) { - break; - } - - if ( 'context-node' === $queue_item->token->bookmark_name ) { - break; - } - - if ( 'real' === $queue_item->provenance ) { - break; - } - - if ( WP_HTML_Stack_Event::PUSH === $queue_item->operation ) { - $breadcrumbs[] = $queue_item->token->node_name; - } else { - array_pop( $breadcrumbs ); - } - } - - if ( null !== parent::get_token_name() && ! parent::is_tag_closer() ) { - array_pop( $breadcrumbs ); - } - - // Add the virtual node we're at. - if ( WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) { - $breadcrumbs[] = $this->current_element->token->node_name; - } - - return $breadcrumbs; + return $this->breadcrumbs; } /** @@ -930,9 +903,7 @@ public function get_breadcrumbs() { * @return int Nesting-depth of current location in the document. */ public function get_current_depth() { - return $this->is_virtual() - ? count( $this->get_breadcrumbs() ) - : $this->state->stack_of_open_elements->count(); + return count( $this->breadcrumbs ); } /** @@ -2552,7 +2523,6 @@ public function seek( $bookmark_name ) { ? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start : 0; $bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start; - $bookmark_length = $this->bookmarks[ $actual_bookmark_name ]->length; $direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward'; /* @@ -2610,6 +2580,12 @@ public function seek( $bookmark_name ) { $this->state->frameset_ok = true; $this->element_queue = array(); $this->current_element = null; + + if ( isset( $this->context_node ) ) { + $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); + } else { + $this->breadcrumbs = array(); + } } // When moving forwards, reparse the document until reaching the same location as the original bookmark. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php index 717276935a780..adce614506429 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php @@ -387,7 +387,16 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element $this->assertSame( 'CODE', $processor->get_tag(), "Expected to start test on CODE element but found {$processor->get_tag()} instead." ); $this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'CODE' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting.' ); - $this->assertTrue( $processor->next_token(), 'Failed to advance past CODE tag to expected SPAN closer.' ); + $this->assertTrue( + $processor->next_tag( + array( + 'tag_name' => 'SPAN', + 'tag_closers' => 'visit', + ) + ), + 'Failed to advance past CODE tag to expected SPAN closer.' + ); + $this->assertSame( 'SPAN', $processor->get_tag() ); $this->assertTrue( $processor->is_tag_closer(), 'Expected to find closing SPAN, but found opener instead.' ); $this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs(), 'Failed to advance past CODE tag to expected DIV opener.' );