diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index d924c9be5d..661b9c712a 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -843,10 +843,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { if ( self::PROCESS_NEXT_NODE === $node_to_process ) { parent::next_token(); - if ( - WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state || - WP_HTML_Tag_Processor::STATE_CDATA_NODE === $this->parser_state - ) { + if ( WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state ) { parent::subdivide_text_appropriately(); } } @@ -4375,7 +4372,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { } switch ( $op ) { - case '#cdata-section': case '#text': /* * > A character token that is U+0000 NULL @@ -4395,6 +4391,24 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->insert_foreign_element( $this->state->current_token, false ); return true; + /* + * CDATA sections are alternate wrappers for text content and therefore + * ought to follow the same rules as text nodes. + */ + case '#cdata-section': + /* + * NULL bytes and whitespace do not change the frameset-ok flag. + */ + $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; + $cdata_content_start = $current_token->start + 9; + $cdata_content_length = $current_token->length - 12; + if ( strspn( $this->html, "\0 \t\n\f\r", $cdata_content_start, $cdata_content_length ) !== $cdata_content_length ) { + $this->state->frameset_ok = false; + } + + $this->insert_foreign_element( $this->state->current_token, false ); + return true; + /* * > A comment token */ diff --git a/wp-includes/html-api/class-wp-html-tag-processor.php b/wp-includes/html-api/class-wp-html-tag-processor.php index e4397b2644..e8572935a6 100644 --- a/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/wp-includes/html-api/class-wp-html-tag-processor.php @@ -3337,8 +3337,8 @@ class WP_HTML_Tag_Processor { } /** - * Subdivides a matched text node or CDATA text node, splitting NULL byte sequences - * and decoded whitespace as distinct prefixes. + * Subdivides a matched text node, splitting NULL byte sequences and decoded whitespace as + * distinct nodes prefixes. * * Note that once anything that's neither a NULL byte nor decoded whitespace is * encountered, then the remainder of the text node is left intact as generic text. @@ -3368,70 +3368,55 @@ class WP_HTML_Tag_Processor { * @return bool Whether the text node was subdivided. */ public function subdivide_text_appropriately(): bool { - $this->text_node_classification = self::TEXT_IS_GENERIC; - - if ( self::STATE_TEXT_NODE === $this->parser_state ) { - /* - * NULL bytes are treated categorically different than numeric character - * references whose number is zero. `�` is not the same as `"\x00"`. - */ - $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); - if ( $leading_nulls > 0 ) { - $this->token_length = $leading_nulls; - $this->text_length = $leading_nulls; - $this->bytes_already_parsed = $this->token_starts_at + $leading_nulls; - $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; - return true; - } - - /* - * Start a decoding loop to determine the point at which the - * text subdivides. This entails raw whitespace bytes and any - * character reference that decodes to the same. - */ - $at = $this->text_starts_at; - $end = $this->text_starts_at + $this->text_length; - while ( $at < $end ) { - $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at ); - $at += $skipped; - - if ( $at < $end && '&' === $this->html[ $at ] ) { - $matched_byte_length = null; - $replacement = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length ); - if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) { - $at += $matched_byte_length; - continue; - } - } - - break; - } - - if ( $at > $this->text_starts_at ) { - $new_length = $at - $this->text_starts_at; - $this->text_length = $new_length; - $this->token_length = $new_length; - $this->bytes_already_parsed = $at; - $this->text_node_classification = self::TEXT_IS_WHITESPACE; - return true; - } - + if ( self::STATE_TEXT_NODE !== $this->parser_state ) { return false; } - // Unlike text nodes, there are no character references within CDATA sections. - if ( self::STATE_CDATA_NODE === $this->parser_state ) { - $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); - if ( $leading_nulls === $this->text_length ) { - $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; - return true; + $this->text_node_classification = self::TEXT_IS_GENERIC; + + /* + * NULL bytes are treated categorically different than numeric character + * references whose number is zero. `�` is not the same as `"\x00"`. + */ + $leading_nulls = strspn( $this->html, "\x00", $this->text_starts_at, $this->text_length ); + if ( $leading_nulls > 0 ) { + $this->token_length = $leading_nulls; + $this->text_length = $leading_nulls; + $this->bytes_already_parsed = $this->token_starts_at + $leading_nulls; + $this->text_node_classification = self::TEXT_IS_NULL_SEQUENCE; + return true; + } + + /* + * Start a decoding loop to determine the point at which the + * text subdivides. This entails raw whitespace bytes and any + * character reference that decodes to the same. + */ + $at = $this->text_starts_at; + $end = $this->text_starts_at + $this->text_length; + while ( $at < $end ) { + $skipped = strspn( $this->html, " \t\f\r\n", $at, $end - $at ); + $at += $skipped; + + if ( $at < $end && '&' === $this->html[ $at ] ) { + $matched_byte_length = null; + $replacement = WP_HTML_Decoder::read_character_reference( 'data', $this->html, $at, $matched_byte_length ); + if ( isset( $replacement ) && 1 === strspn( $replacement, " \t\f\r\n" ) ) { + $at += $matched_byte_length; + continue; + } } - $leading_ws = strspn( $this->html, " \t\f\r\n", $this->text_starts_at, $this->text_length ); - if ( $leading_ws === $this->text_length ) { - $this->text_node_classification = self::TEXT_IS_WHITESPACE; - return true; - } + break; + } + + if ( $at > $this->text_starts_at ) { + $new_length = $at - $this->text_starts_at; + $this->text_length = $new_length; + $this->token_length = $new_length; + $this->bytes_already_parsed = $at; + $this->text_node_classification = self::TEXT_IS_WHITESPACE; + return true; } return false; diff --git a/wp-includes/version.php b/wp-includes/version.php index 0c86a35335..807bfa3b4d 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.7-alpha-58976'; +$wp_version = '6.7-alpha-58977'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.