HTML API: Fix detection of single-length funky comments.

Since [60428] the Tag Processor has been misidentifying single-character
funky comments. It has been asserting that the full token-length for a
funky comment must be at least three characters after the opening (e.g.
`</1>`), but it has been starting to look for the closing `>` after
those same three characters. This means that it has been skipping the
actual close of these funky comments and swallowing up the next syntax
until it finds a `>`, often consuming the next tag in the process.

This patch fixes the detector and restores finding the following token.

Developed in https://github.com/WordPress/wordpress-develop/pull/6412
Discussed in https://core.trac.wordpress.org/ticket/60170

Follow-up to [60428].
Fixes #60170.
Props dmsnell, gziolo, jonsurrell.

Built from https://develop.svn.wordpress.org/trunk@58040


git-svn-id: http://core.svn.wordpress.org/trunk@57506 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
dmsnell 2024-04-24 07:45:14 +00:00
parent 9d5f8481df
commit f9776f536f
2 changed files with 10 additions and 4 deletions

View File

@ -1629,7 +1629,7 @@ class WP_HTML_Tag_Processor {
* `<!` transitions to markup declaration open state * `<!` transitions to markup declaration open state
* https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state * https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
*/ */
if ( '!' === $html[ $at + 1 ] ) { if ( ! $this->is_closing_tag && '!' === $html[ $at + 1 ] ) {
/* /*
* `<!--` transitions to a comment state apply further comment rules. * `<!--` transitions to a comment state apply further comment rules.
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
@ -1809,6 +1809,12 @@ class WP_HTML_Tag_Processor {
* See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
*/ */
if ( '>' === $html[ $at + 1 ] ) { if ( '>' === $html[ $at + 1 ] ) {
// `<>` is interpreted as plaintext.
if ( ! $this->is_closing_tag ) {
++$at;
continue;
}
$this->parser_state = self::STATE_PRESUMPTUOUS_TAG; $this->parser_state = self::STATE_PRESUMPTUOUS_TAG;
$this->token_length = $at + 2 - $this->token_starts_at; $this->token_length = $at + 2 - $this->token_starts_at;
$this->bytes_already_parsed = $at + 2; $this->bytes_already_parsed = $at + 2;
@ -1819,7 +1825,7 @@ class WP_HTML_Tag_Processor {
* `<?` transitions to a bogus comment state skip to the nearest > * `<?` transitions to a bogus comment state skip to the nearest >
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/ */
if ( '?' === $html[ $at + 1 ] ) { if ( ! $this->is_closing_tag && '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 ); $closer_at = strpos( $html, '>', $at + 2 );
if ( false === $closer_at ) { if ( false === $closer_at ) {
$this->parser_state = self::STATE_INCOMPLETE_INPUT; $this->parser_state = self::STATE_INCOMPLETE_INPUT;
@ -1891,7 +1897,7 @@ class WP_HTML_Tag_Processor {
return false; return false;
} }
$closer_at = strpos( $html, '>', $at + 3 ); $closer_at = strpos( $html, '>', $at + 2 );
if ( false === $closer_at ) { if ( false === $closer_at ) {
$this->parser_state = self::STATE_INCOMPLETE_INPUT; $this->parser_state = self::STATE_INCOMPLETE_INPUT;

View File

@ -16,7 +16,7 @@
* *
* @global string $wp_version * @global string $wp_version
*/ */
$wp_version = '6.6-alpha-58039'; $wp_version = '6.6-alpha-58040';
/** /**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema. * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.