diff --git a/wp-includes/html-api/class-wp-html-processor-state.php b/wp-includes/html-api/class-wp-html-processor-state.php index 16875c4ac1..b7cdd347ca 100644 --- a/wp-includes/html-api/class-wp-html-processor-state.php +++ b/wp-includes/html-api/class-wp-html-processor-state.php @@ -299,31 +299,6 @@ class WP_HTML_Processor_State { */ const INSERTION_MODE_AFTER_AFTER_FRAMESET = 'insertion-mode-after-after-frameset'; - /** - * No-quirks mode document compatability mode. - * - * > In no-quirks mode, the behavior is (hopefully) the desired behavior - * > described by the modern HTML and CSS specifications. - * - * @since 6.7.0 - * - * @var string - */ - const NO_QUIRKS_MODE = 'no-quirks-mode'; - - /** - * Quirks mode document compatability mode. - * - * > In quirks mode, layout emulates behavior in Navigator 4 and Internet - * > Explorer 5. This is essential in order to support websites that were - * > built before the widespread adoption of web standards. - * - * @since 6.7.0 - * - * @var string - */ - const QUIRKS_MODE = 'quirks-mode'; - /** * The stack of template insertion modes. * @@ -381,30 +356,6 @@ class WP_HTML_Processor_State { */ public $insertion_mode = self::INSERTION_MODE_INITIAL; - /** - * Indicates if the document is in quirks mode or no-quirks mode. - * - * Impact on HTML parsing: - * - * - In `NO_QUIRKS_MODE` CSS class and ID selectors match in a byte-for-byte - * manner, otherwise for backwards compatability, class selectors are to - * match in an ASCII case-insensitive manner. - * - * - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag - * if one is in scope and open, otherwise the TABLE becomes a child of the P. - * - * `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but - * none of the other changes modifies how the HTML is parsed or selected. - * - * @see self::QUIRKS_MODE - * @see self::NO_QUIRKS_MODE - * - * @since 6.7.0 - * - * @var string - */ - public $document_mode = self::NO_QUIRKS_MODE; - /** * Context node initializing fragment parser, if created as a fragment parser. * diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index 661b9c712a..55b9061368 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -1080,7 +1080,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { case 'html': $doctype = $this->get_doctype_info(); if ( null !== $doctype && 'quirks' === $doctype->indicated_compatability_mode ) { - $this->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE; + $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; } /* @@ -1095,7 +1095,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * > Anything else */ initial_anything_else: - $this->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE; + $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML; return $this->step( self::REPROCESS_CURRENT_NODE ); } @@ -2448,7 +2448,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * > has a p element in button scope, then close a p element. */ if ( - WP_HTML_Processor_State::QUIRKS_MODE !== $this->state->document_mode && + WP_HTML_Tag_Processor::QUIRKS_MODE !== $this->compat_mode && $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); @@ -4938,6 +4938,10 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * @since 6.6.0 Subclassed for the HTML Processor. * + * @todo When reconstructing active formatting elements with attributes, find a way + * to indicate if the virtually-reconstructed formatting elements contain the + * wanted class name. + * * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. * @return bool|null Whether the matched tag contains the given class name, or null if not matched. */ diff --git a/wp-includes/html-api/class-wp-html-tag-processor.php b/wp-includes/html-api/class-wp-html-tag-processor.php index e8572935a6..1ea8066d97 100644 --- a/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/wp-includes/html-api/class-wp-html-tag-processor.php @@ -511,6 +511,32 @@ class WP_HTML_Tag_Processor { */ protected $parser_state = self::STATE_READY; + /** + * Indicates if the document is in quirks mode or no-quirks mode. + * + * Impact on HTML parsing: + * + * - In `NO_QUIRKS_MODE` (also known as "standard mode"): + * - CSS class and ID selectors match byte-for-byte (case-sensitively). + * - A TABLE start tag `` implicitly closes any open `P` element. + * + * - In `QUIRKS_MODE`: + * - CSS class and ID selectors match match in an ASCII case-insensitive manner. + * - A TABLE start tag `
` opens a `TABLE` element as a child of a `P` + * element if one is open. + * + * Quirks and no-quirks mode are thus mostly about styling, but have an impact when + * tables are found inside paragraph elements. + * + * @see self::QUIRKS_MODE + * @see self::NO_QUIRKS_MODE + * + * @since 6.7.0 + * + * @var string + */ + protected $compat_mode = self::NO_QUIRKS_MODE; + /** * Indicates whether the parser is inside foreign content, * e.g. inside an SVG or MathML element. @@ -1155,6 +1181,8 @@ class WP_HTML_Tag_Processor { $seen = array(); + $is_quirks = self::QUIRKS_MODE === $this->compat_mode; + $at = 0; while ( $at < strlen( $class ) ) { // Skip past any initial boundary characters. @@ -1169,13 +1197,11 @@ class WP_HTML_Tag_Processor { return; } - /* - * CSS class names are case-insensitive in the ASCII range. - * - * @see https://www.w3.org/TR/CSS2/syndata.html#x1 - */ - $name = str_replace( "\x00", "\u{FFFD}", strtolower( substr( $class, $at, $length ) ) ); - $at += $length; + $name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) ); + if ( $is_quirks ) { + $name = strtolower( $name ); + } + $at += $length; /* * It's expected that the number of class names for a given tag is relatively small. @@ -1205,10 +1231,14 @@ class WP_HTML_Tag_Processor { return null; } - $wanted_class = strtolower( $wanted_class ); + $case_insensitive = self::QUIRKS_MODE === $this->compat_mode; + $wanted_length = strlen( $wanted_class ); foreach ( $this->class_list() as $class_name ) { - if ( $class_name === $wanted_class ) { + if ( + strlen( $class_name ) === $wanted_length && + 0 === substr_compare( $class_name, $wanted_class, 0, strlen( $wanted_class ), $case_insensitive ) + ) { return true; } } @@ -2296,6 +2326,23 @@ class WP_HTML_Tag_Processor { */ $modified = false; + $seen = array(); + $to_remove = array(); + $is_quirks = self::QUIRKS_MODE === $this->compat_mode; + if ( $is_quirks ) { + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( self::REMOVE_CLASS === $action ) { + $to_remove[] = strtolower( $updated_name ); + } + } + } else { + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( self::REMOVE_CLASS === $action ) { + $to_remove[] = $updated_name; + } + } + } + // Remove unwanted classes by only copying the new ones. $existing_class_length = strlen( $existing_class ); while ( $at < $existing_class_length ) { @@ -2311,25 +2358,23 @@ class WP_HTML_Tag_Processor { break; } - $name = substr( $existing_class, $at, $name_length ); - $at += $name_length; + $name = substr( $existing_class, $at, $name_length ); + $comparable_class_name = $is_quirks ? strtolower( $name ) : $name; + $at += $name_length; - // If this class is marked for removal, start processing the next one. - $remove_class = ( - isset( $this->classname_updates[ $name ] ) && - self::REMOVE_CLASS === $this->classname_updates[ $name ] - ); - - // If a class has already been seen then skip it; it should not be added twice. - if ( ! $remove_class ) { - $this->classname_updates[ $name ] = self::SKIP_CLASS; - } - - if ( $remove_class ) { + // If this class is marked for removal, remove it and move on to the next one. + if ( in_array( $comparable_class_name, $to_remove, true ) ) { $modified = true; continue; } + // If a class has already been seen then skip it; it should not be added twice. + if ( in_array( $comparable_class_name, $seen, true ) ) { + continue; + } + + $seen[] = $comparable_class_name; + /* * Otherwise, append it to the new "class" attribute value. * @@ -2350,7 +2395,8 @@ class WP_HTML_Tag_Processor { // Add new classes by appending those which haven't already been seen. foreach ( $this->classname_updates as $name => $operation ) { - if ( self::ADD_CLASS === $operation ) { + $comparable_name = $is_quirks ? strtolower( $name ) : $name; + if ( self::ADD_CLASS === $operation && ! in_array( $comparable_name, $seen, true ) ) { $modified = true; $class .= strlen( $class ) > 0 ? ' ' : ''; @@ -3932,8 +3978,29 @@ class WP_HTML_Tag_Processor { return false; } - $this->classname_updates[ $class_name ] = self::ADD_CLASS; + if ( self::QUIRKS_MODE !== $this->compat_mode ) { + $this->classname_updates[ $class_name ] = self::ADD_CLASS; + return true; + } + /* + * Because class names are matched ASCII-case-insensitively in quirks mode, + * this needs to see if a case variant of the given class name is already + * enqueued and update that existing entry, if so. This picks the casing of + * the first-provided class name for all lexical variations. + */ + $class_name_length = strlen( $class_name ); + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( + strlen( $updated_name ) === $class_name_length && + 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) + ) { + $this->classname_updates[ $updated_name ] = self::ADD_CLASS; + return true; + } + } + + $this->classname_updates[ $class_name ] = self::ADD_CLASS; return true; } @@ -3953,10 +4020,29 @@ class WP_HTML_Tag_Processor { return false; } - if ( null !== $this->tag_name_starts_at ) { + if ( self::QUIRKS_MODE !== $this->compat_mode ) { $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; + return true; } + /* + * Because class names are matched ASCII-case-insensitively in quirks mode, + * this needs to see if a case variant of the given class name is already + * enqueued and update that existing entry, if so. This picks the casing of + * the first-provided class name for all lexical variations. + */ + $class_name_length = strlen( $class_name ); + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( + strlen( $updated_name ) === $class_name_length && + 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) + ) { + $this->classname_updates[ $updated_name ] = self::REMOVE_CLASS; + return true; + } + } + + $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; return true; } @@ -4350,6 +4436,37 @@ class WP_HTML_Tag_Processor { */ const COMMENT_AS_INVALID_HTML = 'COMMENT_AS_INVALID_HTML'; + /** + * No-quirks mode document compatability mode. + * + * > In no-quirks mode, the behavior is (hopefully) the desired behavior + * > described by the modern HTML and CSS specifications. + * + * @see self::$compat_mode + * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode + * + * @since 6.7.0 + * + * @var string + */ + const NO_QUIRKS_MODE = 'no-quirks-mode'; + + /** + * Quirks mode document compatability mode. + * + * > In quirks mode, layout emulates behavior in Navigator 4 and Internet + * > Explorer 5. This is essential in order to support websites that were + * > built before the widespread adoption of web standards. + * + * @see self::$compat_mode + * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode + * + * @since 6.7.0 + * + * @var string + */ + const QUIRKS_MODE = 'quirks-mode'; + /** * Indicates that a span of text may contain any combination of significant * kinds of characters: NULL bytes, whitespace, and others. diff --git a/wp-includes/version.php b/wp-includes/version.php index 50f8507872..b8aec43c6e 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.7-alpha-58984'; +$wp_version = '6.7-alpha-58985'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.