From 0be8a89a8f009a446f829898200498fda1d2e6e2 Mon Sep 17 00:00:00 2001 From: Bernhard Reiter Date: Tue, 12 Nov 2024 10:32:17 +0000 Subject: [PATCH] HTML API: Ensure that full processor can seek to earlier bookmarks. When the HTML Processor seeks to an earlier place, it returns the the beginning of the document and proceeds forward until it reaches the appropriate location. This requires resetting internal state so that the processor can correctly proceed from the beginning of the document. The seeking reset logic was not adapted to account for the full processor (i.e. when created via `WP_HTML_Processor::create_full_parser()`). This change updates the seek logic to account for the full and fragment parsers as well as other state that has been introduced in the interim and should be reset. Props jonsurrell, dmsnell, westonruter, mi5t4n. Fixes #62290. Built from https://develop.svn.wordpress.org/trunk@59391 git-svn-id: http://core.svn.wordpress.org/trunk@58777 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- .../html-api/class-wp-html-open-elements.php | 9 -- .../html-api/class-wp-html-processor.php | 96 +++++++++++++------ wp-includes/version.php | 2 +- 3 files changed, 69 insertions(+), 38 deletions(-) diff --git a/wp-includes/html-api/class-wp-html-open-elements.php b/wp-includes/html-api/class-wp-html-open-elements.php index cb913853f0..210492ab9a 100644 --- a/wp-includes/html-api/class-wp-html-open-elements.php +++ b/wp-includes/html-api/class-wp-html-open-elements.php @@ -520,11 +520,6 @@ class WP_HTML_Open_Elements { return false; } - if ( 'context-node' === $item->bookmark_name ) { - $this->stack[] = $item; - return false; - } - $this->after_element_pop( $item ); return true; } @@ -585,10 +580,6 @@ class WP_HTML_Open_Elements { * @return bool Whether the node was found and removed from the stack of open elements. */ public function remove_node( WP_HTML_Token $token ): bool { - if ( 'context-node' === $token->bookmark_name ) { - return false; - } - foreach ( $this->walk_up() as $position_from_end => $item ) { if ( $token->bookmark_name !== $item->bookmark_name ) { continue; diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index 19d15bfa43..6a2c7d6fbe 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -5328,52 +5328,92 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * and computation time. */ if ( 'backward' === $direction ) { + /* - * Instead of clearing the parser state and starting fresh, calling the stack methods - * maintains the proper flags in the parser. + * When moving backward, stateful stacks should be cleared. */ foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { - break; - } - $this->state->stack_of_open_elements->remove_node( $item ); } foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { - break; - } - $this->state->active_formatting_elements->remove_node( $item ); } - parent::seek( 'context-node' ); - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - $this->state->frameset_ok = true; - $this->element_queue = array(); - $this->current_element = null; + /* + * **After** clearing stacks, more processor state can be reset. + * This must be done after clearing the stack because those stacks generate events that + * would appear on a subsequent call to `next_token()`. + */ + $this->state->frameset_ok = true; + $this->state->stack_of_template_insertion_modes = array(); + $this->state->head_element = null; + $this->state->form_element = null; + $this->state->current_token = null; + $this->current_element = null; + $this->element_queue = array(); - if ( isset( $this->context_node ) ) { - $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); + /* + * The absence of a context node indicates a full parse. + * The presence of a context node indicates a fragment parser. + */ + if ( null === $this->context_node ) { + $this->change_parsing_namespace( 'html' ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; + $this->breadcrumbs = array(); + + $this->bookmarks['initial'] = new WP_HTML_Span( 0, 0 ); + parent::seek( 'initial' ); + unset( $this->bookmarks['initial'] ); } else { - $this->breadcrumbs = array(); + + /* + * Push the root-node (HTML) back onto the stack of open elements. + * + * Fragment parsers require this extra bit of setup. + * It's handled in full parsers by advancing the processor state. + */ + $this->state->stack_of_open_elements->push( + new WP_HTML_Token( + 'root-node', + 'HTML', + false + ) + ); + + $this->change_parsing_namespace( + $this->context_node->integration_node_type + ? 'html' + : $this->context_node->namespace + ); + + if ( 'TEMPLATE' === $this->context_node->node_name ) { + $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + } + + $this->reset_insertion_mode_appropriately(); + $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); + parent::seek( $this->context_node->bookmark_name ); } } - // When moving forwards, reparse the document until reaching the same location as the original bookmark. - if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { - return true; - } - - while ( $this->next_token() ) { + /* + * Here, the processor moves forward through the document until it matches the bookmark. + * do-while is used here because the processor is expected to already be stopped on + * a token than may match the bookmarked location. + */ + do { + /* + * The processor will stop on virtual tokens, but bookmarks may not be set on them. + * They should not be matched when seeking a bookmark, skip them. + */ + if ( $this->is_virtual() ) { + continue; + } if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { - while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { - $this->current_element = array_shift( $this->element_queue ); - } return true; } - } + } while ( $this->next_token() ); return false; } diff --git a/wp-includes/version.php b/wp-includes/version.php index 7af6c9d00c..a75dfbf805 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.8-alpha-59390'; +$wp_version = '6.8-alpha-59391'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.