HTML API: Add support for a few invalid HTML comment forms.

- Comments created by means of a tag closer with an invalid tag name, e.g. `</3>`.
 - Comments closed with the invalid `--!>` closer. (Comments should be closed by `-->` but if the `!` appears it will also close it, in error.)
 - Empty tag name elements, which are technically skipped over and aren't comments, e.g. `</>`.

Props dmsnell, costdev.
Fixes #58007.
Built from https://develop.svn.wordpress.org/trunk@55667


git-svn-id: http://core.svn.wordpress.org/trunk@55179 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Bernhard Reiter 2023-04-20 17:10:20 +00:00
parent 19d36bfdd2
commit b116fcdb27
2 changed files with 62 additions and 6 deletions

View File

@ -971,6 +971,7 @@ class WP_HTML_Tag_Processor {
* closing `>`; these are left for other methods. * closing `>`; these are left for other methods.
* *
* @since 6.2.0 * @since 6.2.0
* @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements.
* *
* @return bool Whether a tag was found before the end of the document. * @return bool Whether a tag was found before the end of the document.
*/ */
@ -1039,13 +1040,42 @@ class WP_HTML_Tag_Processor {
'-' === $html[ $at + 2 ] && '-' === $html[ $at + 2 ] &&
'-' === $html[ $at + 3 ] '-' === $html[ $at + 3 ]
) { ) {
$closer_at = strpos( $html, '-->', $at + 4 ); $closer_at = $at + 4;
if ( false === $closer_at ) { // If it's not possible to close the comment then there is nothing more to scan.
if ( strlen( $html ) <= $closer_at ) {
return false; return false;
} }
$at = $closer_at + 3; // Abruptly-closed empty comments are a sequence of dashes followed by `>`.
continue; $span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
$at = $closer_at + $span_of_dashes + 1;
continue;
}
/*
* Comments may be closed by either a --> or an invalid --!>.
* The first occurrence closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment
*/
$closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping.
while ( ++$closer_at < strlen( $html ) ) {
$closer_at = strpos( $html, '--', $closer_at );
if ( false === $closer_at ) {
return false;
}
if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
$at = $closer_at + 3;
continue 2;
}
if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
$at = $closer_at + 4;
continue 2;
}
}
} }
/* /*
@ -1104,9 +1134,19 @@ class WP_HTML_Tag_Processor {
continue; continue;
} }
/*
* </> is a missing end tag name, which is ignored.
*
* See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
*/
if ( '>' === $html[ $at + 1 ] ) {
$at++;
continue;
}
/* /*
* <? transitions to a bogus comment state skip to the nearest > * <? transitions to a bogus comment state skip to the nearest >
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/ */
if ( '?' === $html[ $at + 1 ] ) { if ( '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 ); $closer_at = strpos( $html, '>', $at + 2 );
@ -1118,6 +1158,22 @@ class WP_HTML_Tag_Processor {
continue; continue;
} }
/*
* If a non-alpha starts the tag name in a tag closer it's a comment.
* Find the first `>`, which closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*/
if ( $this->is_closing_tag ) {
$closer_at = strpos( $html, '>', $at + 3 );
if ( false === $closer_at ) {
return false;
}
$at = $closer_at + 1;
continue;
}
++$at; ++$at;
} }

View File

@ -16,7 +16,7 @@
* *
* @global string $wp_version * @global string $wp_version
*/ */
$wp_version = '6.3-alpha-55666'; $wp_version = '6.3-alpha-55667';
/** /**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema. * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.