HTML API: Include doctype in full parser serialize.

Output DOCTYPE when calling `WP_HTML_Processor::serialize` on a full document that includes a DOCTYPE.

The DOCTYPE should be included in the serialized/normalized HTML output as it has an impact in how the document is handled, in particular whether the document should be handled in quirks or no-quirks mode.

This only affects the serialization of full parsers at this time because DOCTYPE tokens are currently ignored in all possible fragments. The omission of the DOCTYPE is subtle but can change the serialized document's quirks/no-quirks mode.

Props jonsurrell.
Fixes #62396.
Built from https://develop.svn.wordpress.org/trunk@59399


git-svn-id: http://core.svn.wordpress.org/trunk@58785 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Bernhard Reiter 2024-11-13 12:20:15 +00:00
parent 9a99674204
commit 8f5eb7a835
2 changed files with 25 additions and 5 deletions

View File

@ -1178,6 +1178,30 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
$token_type = $this->get_token_type();
switch ( $token_type ) {
case '#doctype':
$doctype = $this->get_doctype_info();
if ( null === $doctype ) {
break;
}
$html .= '<!DOCTYPE';
if ( $doctype->name ) {
$html .= " {$doctype->name}";
}
if ( null !== $doctype->public_identifier ) {
$html .= " PUBLIC \"{$doctype->public_identifier}\"";
}
if ( null !== $doctype->system_identifier ) {
if ( null === $doctype->public_identifier ) {
$html .= ' SYSTEM';
}
$html .= " \"{$doctype->system_identifier}\"";
}
$html .= '>';
break;
case '#text':
$html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
break;
@ -1194,10 +1218,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case '#cdata-section':
$html .= "<![CDATA[{$this->get_modifiable_text()}]]>";
break;
case 'html':
$html .= '<!DOCTYPE html>';
break;
}
if ( '#tag' !== $token_type ) {

View File

@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
$wp_version = '6.8-alpha-59398';
$wp_version = '6.8-alpha-59399';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.