From 519a8f6bbd7f386c01264b02a57901e8e3f410f9 Mon Sep 17 00:00:00 2001 From: dmsnell Date: Fri, 12 Jul 2024 22:20:15 +0000 Subject: [PATCH] HTML API: Simplify breadcrumb accounting. Since the HTML Processor started visiting all nodes in a document, both real and virtual, the breadcrumb accounting became a bit complicated and it's not entirely clear that it is fully reliable. In this patch the breadcrumbs are rebuilt separately from the stack of open elements in order to eliminate the problem of the stateful stack interactions and the post-hoc event queue. Breadcrumbs are greatly simplified as a result, and more verifiably correct, in this construction. Developed in https://github.com/WordPress/wordpress-develop/pull/6981 Discussed in https://core.trac.wordpress.org/ticket/61576 Follow-up to [58590]. Props bernhard-reiter, dmsnell. See #61576. Built from https://develop.svn.wordpress.org/trunk@58713 git-svn-id: http://core.svn.wordpress.org/trunk@58115 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- .../html-api/class-wp-html-processor.php | 130 +++++++----------- wp-includes/version.php | 2 +- 2 files changed, 54 insertions(+), 78 deletions(-) diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index 223fcda76a..d7f77f495c 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -211,6 +211,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ private $element_queue = array(); + /** + * Stores the current breadcrumbs. + * + * @since 6.7.0 + * + * @var string[] + */ + private $breadcrumbs = array(); + /** * Current stack event, if set, representing a matched token. * @@ -310,8 +319,8 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { false ); - $processor->state->stack_of_open_elements->push( $context_node ); $processor->context_node = $context_node; + $processor->breadcrumbs = array( 'HTML', $context_node->node_name ); return $processor; } @@ -523,44 +532,46 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return false; } - if ( 'done' !== $this->has_seen_context_node && 0 === count( $this->element_queue ) && ! $this->step() ) { - while ( 'context-node' !== $this->state->stack_of_open_elements->current_node()->bookmark_name && $this->state->stack_of_open_elements->pop() ) { - continue; - } - $this->has_seen_context_node = 'done'; - return $this->next_token(); + /* + * Prime the events if there are none. + * + * @todo In some cases, probably related to the adoption agency + * algorithm, this call to step() doesn't create any new + * events. Calling it again creates them. Figure out why + * this is and if it's inherent or if it's a bug. Looping + * until there are events or until there are no more + * tokens works in the meantime and isn't obviously wrong. + */ + while ( empty( $this->element_queue ) && $this->step() ) { + continue; } + // Process the next event on the queue. $this->current_element = array_shift( $this->element_queue ); - while ( isset( $this->context_node ) && ! $this->has_seen_context_node ) { - if ( isset( $this->current_element ) ) { - if ( $this->context_node === $this->current_element->token && WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) { - $this->has_seen_context_node = true; - return $this->next_token(); - } - } - $this->current_element = array_shift( $this->element_queue ); - } - if ( ! isset( $this->current_element ) ) { - if ( 'done' === $this->has_seen_context_node ) { - return false; - } else { - return $this->next_token(); - } - } - - if ( isset( $this->context_node ) && WP_HTML_Stack_Event::POP === $this->current_element->operation && $this->context_node === $this->current_element->token ) { - $this->element_queue = array(); - $this->current_element = null; return false; } + $is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation; + + /* + * The root node only exists in the fragment parser, and closing it + * indicates that the parse is complete. Stop before popping if from + * the breadcrumbs. + */ + if ( 'root-node' === $this->current_element->token->bookmark_name ) { + return ! $is_pop && $this->next_token(); + } + + // Adjust the breadcrumbs for this event. + if ( $is_pop ) { + array_pop( $this->breadcrumbs ); + } else { + $this->breadcrumbs[] = $this->current_element->token->node_name; + } + // Avoid sending close events for elements which don't expect a closing. - if ( - WP_HTML_Stack_Event::POP === $this->current_element->operation && - ! static::expects_closer( $this->current_element->token ) - ) { + if ( $is_pop && ! static::expects_closer( $this->current_element->token ) ) { return $this->next_token(); } @@ -643,10 +654,11 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return false; } - foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + for ( $i = count( $this->breadcrumbs ) - 1; $i >= 0; $i-- ) { + $node = $this->breadcrumbs[ $i ]; $crumb = strtoupper( current( $breadcrumbs ) ); - if ( '*' !== $crumb && $node->node_name !== $crumb ) { + if ( '*' !== $crumb && $node !== $crumb ) { return false; } @@ -862,46 +874,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL. */ public function get_breadcrumbs() { - $breadcrumbs = array(); - - foreach ( $this->state->stack_of_open_elements->walk_down() as $stack_item ) { - $breadcrumbs[] = $stack_item->node_name; - } - - if ( ! $this->is_virtual() ) { - return $breadcrumbs; - } - - foreach ( $this->element_queue as $queue_item ) { - if ( $this->current_element->token->bookmark_name === $queue_item->token->bookmark_name ) { - break; - } - - if ( 'context-node' === $queue_item->token->bookmark_name ) { - break; - } - - if ( 'real' === $queue_item->provenance ) { - break; - } - - if ( WP_HTML_Stack_Event::PUSH === $queue_item->operation ) { - $breadcrumbs[] = $queue_item->token->node_name; - } else { - array_pop( $breadcrumbs ); - } - } - - if ( null !== parent::get_token_name() && ! parent::is_tag_closer() ) { - array_pop( $breadcrumbs ); - } - - // Add the virtual node we're at. - if ( WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) { - $breadcrumbs[] = $this->current_element->token->node_name; - } - - return $breadcrumbs; + return $this->breadcrumbs; } /** @@ -930,9 +903,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return int Nesting-depth of current location in the document. */ public function get_current_depth() { - return $this->is_virtual() - ? count( $this->get_breadcrumbs() ) - : $this->state->stack_of_open_elements->count(); + return count( $this->breadcrumbs ); } /** @@ -2552,7 +2523,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { ? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start : 0; $bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start; - $bookmark_length = $this->bookmarks[ $actual_bookmark_name ]->length; $direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward'; /* @@ -2610,6 +2580,12 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->state->frameset_ok = true; $this->element_queue = array(); $this->current_element = null; + + if ( isset( $this->context_node ) ) { + $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); + } else { + $this->breadcrumbs = array(); + } } // When moving forwards, reparse the document until reaching the same location as the original bookmark. diff --git a/wp-includes/version.php b/wp-includes/version.php index 36e285b707..453cc90702 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.7-alpha-58712'; +$wp_version = '6.7-alpha-58713'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.