diff --git a/wp-includes/html-api/class-wp-html-open-elements.php b/wp-includes/html-api/class-wp-html-open-elements.php index c760009ce0..5ce1f8feb5 100644 --- a/wp-includes/html-api/class-wp-html-open-elements.php +++ b/wp-includes/html-api/class-wp-html-open-elements.php @@ -113,13 +113,13 @@ class WP_HTML_Open_Elements { * * @param int $nth Retrieve the nth item on the stack, with 1 being * the top element, 2 being the second, etc... - * @return string|null Name of the node on the stack at the given location, - * or `null` if the location isn't on the stack. + * @return WP_HTML_Token|null Name of the node on the stack at the given location, + * or `null` if the location isn't on the stack. */ - public function at( int $nth ): ?string { + public function at( int $nth ): ?WP_HTML_Token { foreach ( $this->walk_down() as $item ) { if ( 0 === --$nth ) { - return $item->node_name; + return $item; } } @@ -242,18 +242,22 @@ class WP_HTML_Open_Elements { */ public function has_element_in_specific_scope( string $tag_name, $termination_list ): bool { foreach ( $this->walk_up() as $node ) { - if ( $node->node_name === $tag_name ) { + $namespaced_name = 'html' === $node->namespace + ? $node->node_name + : "{$node->namespace} {$node->node_name}"; + + if ( $namespaced_name === $tag_name ) { return true; } if ( '(internal: H1 through H6 - do not use)' === $tag_name && - in_array( $node->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) + in_array( $namespaced_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true ) ) { return true; } - if ( in_array( $node->node_name, $termination_list, true ) ) { + if ( in_array( $namespaced_name, $termination_list, true ) ) { return false; } } @@ -288,7 +292,7 @@ class WP_HTML_Open_Elements { * > - SVG title * * @since 6.4.0 - * @since 6.7.0 Supports all required HTML elements. + * @since 6.7.0 Full support. * * @see https://html.spec.whatwg.org/#has-an-element-in-scope * @@ -309,19 +313,16 @@ class WP_HTML_Open_Elements { 'OBJECT', 'TEMPLATE', - /* - * @todo Support SVG and MathML nodes when support for foreign content is added. - * - * - MathML mi - * - MathML mo - * - MathML mn - * - MathML ms - * - MathML mtext - * - MathML annotation-xml - * - SVG foreignObject - * - SVG desc - * - SVG title - */ + 'math MI', + 'math MO', + 'math MN', + 'math MS', + 'math MTEXT', + 'math ANNOTATION-XML', + + 'svg FOREIGNOBJECT', + 'svg DESC', + 'svg TITLE', ) ); } @@ -363,19 +364,16 @@ class WP_HTML_Open_Elements { 'TEMPLATE', 'UL', - /* - * @todo Support SVG and MathML nodes when support for foreign content is added. - * - * - MathML mi - * - MathML mo - * - MathML mn - * - MathML ms - * - MathML mtext - * - MathML annotation-xml - * - SVG foreignObject - * - SVG desc - * - SVG title - */ + 'math MI', + 'math MO', + 'math MN', + 'math MS', + 'math MTEXT', + 'math ANNOTATION-XML', + + 'svg FOREIGNOBJECT', + 'svg DESC', + 'svg TITLE', ) ); } @@ -413,19 +411,16 @@ class WP_HTML_Open_Elements { 'OBJECT', 'TEMPLATE', - /* - * @todo Support SVG and MathML nodes when support for foreign content is added. - * - * - MathML mi - * - MathML mo - * - MathML mn - * - MathML ms - * - MathML mtext - * - MathML annotation-xml - * - SVG foreignObject - * - SVG desc - * - SVG title - */ + 'math MI', + 'math MO', + 'math MN', + 'math MS', + 'math MTEXT', + 'math ANNOTATION-XML', + + 'svg FOREIGNOBJECT', + 'svg DESC', + 'svg TITLE', ) ); } @@ -692,11 +687,15 @@ class WP_HTML_Open_Elements { * @param WP_HTML_Token $item Element that was added to the stack of open elements. */ public function after_element_push( WP_HTML_Token $item ): void { + $namespaced_name = 'html' === $item->namespace + ? $item->node_name + : "{$item->namespace} {$item->node_name}"; + /* * When adding support for new elements, expand this switch to trap * cases where the precalculated value needs to change. */ - switch ( $item->node_name ) { + switch ( $namespaced_name ) { case 'APPLET': case 'BUTTON': case 'CAPTION': @@ -707,6 +706,15 @@ class WP_HTML_Open_Elements { case 'MARQUEE': case 'OBJECT': case 'TEMPLATE': + case 'math MI': + case 'math MO': + case 'math MN': + case 'math MS': + case 'math MTEXT': + case 'math ANNOTATION-XML': + case 'svg FOREIGNOBJECT': + case 'svg DESC': + case 'svg TITLE': $this->has_p_in_button_scope = false; break; @@ -750,6 +758,15 @@ class WP_HTML_Open_Elements { case 'MARQUEE': case 'OBJECT': case 'TEMPLATE': + case 'math MI': + case 'math MO': + case 'math MN': + case 'math MS': + case 'math MTEXT': + case 'math ANNOTATION-XML': + case 'svg FOREIGNOBJECT': + case 'svg DESC': + case 'svg TITLE': $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); break; } diff --git a/wp-includes/html-api/class-wp-html-processor-state.php b/wp-includes/html-api/class-wp-html-processor-state.php index 97f6da95a0..16875c4ac1 100644 --- a/wp-includes/html-api/class-wp-html-processor-state.php +++ b/wp-includes/html-api/class-wp-html-processor-state.php @@ -299,18 +299,6 @@ class WP_HTML_Processor_State { */ const INSERTION_MODE_AFTER_AFTER_FRAMESET = 'insertion-mode-after-after-frameset'; - /** - * In foreign content insertion mode for full HTML parser. - * - * @since 6.7.0 - * - * @see https://html.spec.whatwg.org/#parsing-main-inforeign - * @see WP_HTML_Processor_State::$insertion_mode - * - * @var string - */ - const INSERTION_MODE_IN_FOREIGN_CONTENT = 'insertion-mode-in-foreign-content'; - /** * No-quirks mode document compatability mode. * diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index 39ba43e467..3820fe0277 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -307,14 +307,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); - $processor->state->stack_of_open_elements->push( - new WP_HTML_Token( - 'root-node', - 'HTML', - false - ) + $root_node = new WP_HTML_Token( + 'root-node', + 'HTML', + false ); + $processor->state->stack_of_open_elements->push( $root_node ); + $context_node = new WP_HTML_Token( 'context-node', $processor->state->context_node[0], @@ -392,6 +392,8 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); + + $this->change_parsing_namespace( $token->namespace ); } ); @@ -401,6 +403,12 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance ); + $adjusted_current_node = $this->get_adjusted_current_node(); + $this->change_parsing_namespace( + $adjusted_current_node + ? $adjusted_current_node->namespace + : 'html' + ); } ); @@ -767,19 +775,20 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * foreign content will also act just like a void tag, immediately * closing as soon as the processor advances to the next token. * - * @since 6.6.0 + * @todo Review the self-closing logic when no node is present, ensure it + * matches the expectations in `step()`. * - * @todo When adding support for foreign content, ensure that - * this returns false for self-closing elements in the - * SVG and MathML namespace. + * @since 6.6.0 * * @param WP_HTML_Token|null $node Optional. Node to examine, if provided. * Default is to examine current node. * @return bool|null Whether to expect a closer for the currently-matched node, * or `null` if not matched on any token. */ - public function expects_closer( $node = null ): ?bool { - $token_name = $node->node_name ?? $this->get_token_name(); + public function expects_closer( WP_HTML_Token $node = null ): ?bool { + $token_name = $node->node_name ?? $this->get_token_name(); + $token_namespace = $node->namespace ?? $this->get_namespace(); + if ( ! isset( $token_name ) ) { return null; } @@ -792,7 +801,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { // Void elements. self::is_void( $token_name ) || // Special atomic elements. - in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) + ( 'html' === $token_namespace && in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) || + // Self-closing elements in foreign content. + ( isset( $node ) && 'html' !== $node->namespace && $node->has_self_closing_flag ) ); } @@ -824,14 +835,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * When moving on to the next node, therefore, if the bottom-most element * on the stack is a void element, it must be closed. - * - * @todo Once self-closing foreign elements and BGSOUND are supported, - * they must also be implicitly closed here too. BGSOUND is - * special since it's only self-closing if the self-closing flag - * is provided in the opening tag, otherwise it expects a tag closer. */ $top_node = $this->state->stack_of_open_elements->current_node(); - if ( isset( $top_node ) && ! static::expects_closer( $top_node ) ) { + if ( isset( $top_node ) && ! $this->expects_closer( $top_node ) ) { $this->state->stack_of_open_elements->pop(); } } @@ -848,14 +854,46 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return false; } - $this->state->current_token = new WP_HTML_Token( - $this->bookmark_token(), - $this->get_token_name(), - $this->has_self_closing_flag(), - $this->release_internal_bookmark_on_destruct + $adjusted_current_node = $this->get_adjusted_current_node(); + $is_closer = $this->is_tag_closer(); + $is_start_tag = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer; + $token_name = $this->get_token_name(); + + if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) { + $this->state->current_token = new WP_HTML_Token( + $this->bookmark_token(), + $token_name, + $this->has_self_closing_flag(), + $this->release_internal_bookmark_on_destruct + ); + } + + $parse_in_current_insertion_mode = ( + 0 === $this->state->stack_of_open_elements->count() || + 'html' === $adjusted_current_node->namespace || + ( + 'math' === $adjusted_current_node->integration_node_type && + ( + ( $is_start_tag && ! in_array( $token_name, array( 'MGLYPH', 'MALIGNMARK' ), true ) ) || + '#text' === $token_name + ) + ) || + ( + 'math' === $adjusted_current_node->namespace && + 'ANNOTATION-XML' === $adjusted_current_node->node_name && + $is_start_tag && 'SVG' === $token_name + ) || + ( + 'html' === $adjusted_current_node->integration_node_type && + ( $is_start_tag || '#text' === $token_name ) + ) ); try { + if ( ! $parse_in_current_insertion_mode ) { + return $this->step_in_foreign_content(); + } + switch ( $this->state->insertion_mode ) { case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: return $this->step_initial(); @@ -923,9 +961,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: return $this->step_after_after_frameset(); - case WP_HTML_Processor_State::INSERTION_MODE_IN_FOREIGN_CONTENT: - return $this->step_in_foreign_content(); - // This should be unreachable but PHP doesn't have total type checking on switch. default: $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); @@ -1853,7 +1888,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { case '+BODY': if ( 1 === $this->state->stack_of_open_elements->count() || - 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) || + 'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) || $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { // Ignore the token. @@ -1879,7 +1914,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { case '+FRAMESET': if ( 1 === $this->state->stack_of_open_elements->count() || - 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) || + 'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) || false === $this->state->frameset_ok ) { // Ignore the token. @@ -2075,7 +2110,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { 'ADDRESS' !== $node->node_name && 'DIV' !== $node->node_name && 'P' !== $node->node_name && - $this->is_special( $node->node_name ) + self::is_special( $node ) ) { /* * > If node is in the special category, but is not an address, div, @@ -2136,11 +2171,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset", * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" - * - * @todo This needs to check if the element in scope is an HTML element, meaning that - * when SVG and MathML support is added, this needs to differentiate between an - * HTML element of the given name, such as `
`, and a foreign element of - * the same given name. */ case '-ADDRESS': case '-ARTICLE': @@ -2411,11 +2441,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { /* * > A end tag token whose tag name is one of: "applet", "marquee", "object" - * - * @todo This needs to check if the element in scope is an HTML element, meaning that - * when SVG and MathML support is added, this needs to differentiate between an - * HTML element of the given name, such as ``, and a foreign element of - * the same given name. */ case '-APPLET': case '-MARQUEE': @@ -2679,9 +2704,12 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * These ought to be handled in the attribute methods. */ - - $this->bail( 'Cannot process MATH element, opening foreign content.' ); - break; + $this->state->current_token->namespace = 'math'; + $this->insert_html_element( $this->state->current_token ); + if ( $this->state->current_token->has_self_closing_flag ) { + $this->state->stack_of_open_elements->pop(); + } + return true; /* * > A start tag whose tag name is "svg" @@ -2695,9 +2723,12 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * These ought to be handled in the attribute methods. */ - - $this->bail( 'Cannot process SVG element, opening foreign content.' ); - break; + $this->state->current_token->namespace = 'svg'; + $this->insert_html_element( $this->state->current_token ); + if ( $this->state->current_token->has_self_closing_flag ) { + $this->state->stack_of_open_elements->pop(); + } + return true; /* * > A start tag whose tag name is one of: "caption", "col", "colgroup", @@ -2737,17 +2768,11 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * close anything beyond its containing `P` or `DIV` element. */ foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { - /* - * @todo This needs to check if the element in scope is an HTML element, meaning that - * when SVG and MathML support is added, this needs to differentiate between an - * HTML element of the given name, such as ``, and a foreign element of - * the same given name. - */ - if ( $token_name === $node->node_name ) { + if ( 'html' === $node->namespace && $token_name === $node->node_name ) { break; } - if ( self::is_special( $node->node_name ) ) { + if ( self::is_special( $node ) ) { // This is a parse error, ignore the token. return $this->step(); } @@ -4069,7 +4094,284 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_foreign_content(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_FOREIGN_CONTENT . ' state.' ); + $tag_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$tag_name}"; + + /* + * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" + * + * This section drawn out above the switch to more easily incorporate + * the additional rules based on the presence of the attributes. + */ + if ( + '+FONT' === $op && + ( + null !== $this->get_attribute( 'color' ) || + null !== $this->get_attribute( 'face' ) || + null !== $this->get_attribute( 'size' ) + ) + ) { + $op = '+FONT with attributes'; + } + + switch ( $op ) { + case '#text': + /* + * > A character token that is U+0000 NULL + * + * This is handled by `get_modifiable_text()`. + */ + + /* + * Whitespace-only text does not affect the frameset-ok flag. + * It is probably inter-element whitespace, but it may also + * contain character references which decode only to whitespace. + */ + $text = $this->get_modifiable_text(); + if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) { + $this->state->frameset_ok = false; + } + + $this->insert_foreign_element( $this->state->current_token, false ); + return true; + + /* + * > A comment token + */ + case '#cdata-section': + case '#comment': + case '#funky_comment': + $this->insert_foreign_element( $this->state->current_token, false ); + return true; + + /* + * > A DOCTYPE token + */ + case 'html': + // Parse error: ignore the token. + return $this->step(); + + /* + * > A start tag whose tag name is "b", "big", "blockquote", "body", "br", "center", + * > "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", + * > "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol", + * > "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", + * > "table", "tt", "u", "ul", "var" + * + * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" + * + * > An end tag whose tag name is "br", "p" + * + * Closing BR tags are always reported by the Tag Processor as opening tags. + */ + case '+B': + case '+BIG': + case '+BLOCKQUOTE': + case '+BODY': + case '+BR': + case '+CENTER': + case '+CODE': + case '+DD': + case '+DIV': + case '+DL': + case '+DT': + case '+EM': + case '+EMBED': + case '+H1': + case '+H2': + case '+H3': + case '+H4': + case '+H5': + case '+H6': + case '+HEAD': + case '+HR': + case '+I': + case '+IMG': + case '+LI': + case '+LISTING': + case '+MENU': + case '+META': + case '+NOBR': + case '+OL': + case '+P': + case '+PRE': + case '+RUBY': + case '+S': + case '+SMALL': + case '+SPAN': + case '+STRONG': + case '+STRIKE': + case '+SUB': + case '+SUP': + case '+TABLE': + case '+TT': + case '+U': + case '+UL': + case '+VAR': + case '+FONT with attributes': + case '-BR': + case '-P': + // @todo Indicate a parse error once it's possible. + foreach ( $this->state->stack_of_open_elements->walk_up() as $current_node ) { + if ( + 'math' === $current_node->integration_node_type || + 'html' === $current_node->integration_node_type || + 'html' === $current_node->namespace + ) { + break; + } + + $this->state->stack_of_open_elements->pop(); + } + return $this->step( self::REPROCESS_CURRENT_NODE ); + } + + /* + * > Any other start tag + */ + if ( ! $this->is_tag_closer() ) { + $this->insert_foreign_element( $this->state->current_token, false ); + + /* + * > If the token has its self-closing flag set, then run + * > the appropriate steps from the following list: + */ + if ( $this->state->current_token->has_self_closing_flag ) { + if ( 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { + /* + * > Acknowledge the token's self-closing flag, and then act as + * > described in the steps for a "script" end tag below. + * + * @todo Verify that this shouldn't be handled by the rule for + * "An end tag whose name is 'script', if the current node + * is an SVG script element." + */ + goto in_foreign_content_any_other_end_tag; + } else { + $this->state->stack_of_open_elements->pop(); + } + } + return true; + } + + /* + * > An end tag whose name is "script", if the current node is an SVG script element. + */ + if ( $this->is_tag_closer() && 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { + $this->state->stack_of_open_elements->pop(); + } + + /* + * > Any other end tag + */ + if ( $this->is_tag_closer() ) { + in_foreign_content_any_other_end_tag: + $node = $this->state->stack_of_open_elements->current_node(); + if ( $tag_name !== $node->node_name ) { + // @todo Indicate a parse error once it's possible. + } + in_foreign_content_end_tag_loop: + if ( $node === $this->state->stack_of_open_elements->at( 1 ) ) { + return true; + } + + /* + * > If node's tag name, converted to ASCII lowercase, is the same as the tag name + * > of the token, pop elements from the stack of open elements until node has + * > been popped from the stack, and then return. + */ + if ( 0 === strcasecmp( $node->node_name, $tag_name ) ) { + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + $this->state->stack_of_open_elements->pop(); + if ( $node === $item ) { + return true; + } + } + } + + foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { + $node = $item; + break; + } + + if ( 'html' !== $node->namespace ) { + goto in_foreign_content_end_tag_loop; + } + + switch ( $this->state->insertion_mode ) { + case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: + return $this->step_initial(); + + case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML: + return $this->step_before_html(); + + case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD: + return $this->step_before_head(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD: + return $this->step_in_head(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT: + return $this->step_in_head_noscript(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD: + return $this->step_after_head(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY: + return $this->step_in_body(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: + return $this->step_in_table(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT: + return $this->step_in_table_text(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION: + return $this->step_in_caption(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP: + return $this->step_in_column_group(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: + return $this->step_in_table_body(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: + return $this->step_in_row(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: + return $this->step_in_cell(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT: + return $this->step_in_select(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE: + return $this->step_in_select_in_table(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE: + return $this->step_in_template(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY: + return $this->step_after_body(); + + case WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET: + return $this->step_in_frameset(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET: + return $this->step_after_frameset(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY: + return $this->step_after_after_body(); + + case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: + return $this->step_after_after_frameset(); + + // This should be unreachable but PHP doesn't have total type checking on switch. + default: + $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); + } + } } /* @@ -4099,6 +4401,19 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * HTML semantic overrides for Tag Processor */ + /** + * Indicates the namespace of the current token, or "html" if there is none. + * + * @return string One of "html", "math", or "svg". + */ + public function get_namespace(): string { + if ( ! isset( $this->current_element ) ) { + return 'html'; + } + + return $this->current_element->token->namespace; + } + /** * Returns the uppercase name of the matched tag. * @@ -4734,6 +5049,28 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { } } + /** + * Returns the adjusted current node. + * + * > The adjusted current node is the context element if the parser was created as + * > part of the HTML fragment parsing algorithm and the stack of open elements + * > has only one element in it (fragment case); otherwise, the adjusted current + * > node is the current node. + * + * @see https://html.spec.whatwg.org/#adjusted-current-node + * + * @since 6.7.0 + * + * @return WP_HTML_Token|null The adjusted current node. + */ + private function get_adjusted_current_node(): ?WP_HTML_Token { + if ( isset( $this->context_node ) && 1 === $this->state->stack_of_open_elements->count() ) { + return $this->context_node; + } + + return $this->state->stack_of_open_elements->current_node(); + } + /** * Reconstructs the active formatting elements. * @@ -5043,7 +5380,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { continue; } - if ( self::is_special( $item->node_name ) ) { + if ( self::is_special( $item ) ) { $furthest_block = $item; break; } @@ -5111,6 +5448,45 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->state->stack_of_open_elements->push( $token ); } + /** + * Inserts a foreign element on to the stack of open elements. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#insert-a-foreign-element + * + * @param WP_HTML_Token $token Insert this token. The token's namespace and + * insertion point will be updated correctly. + * @param bool $only_add_to_element_stack Whether to skip the "insert an element at the adjusted + * insertion location" algorithm when adding this element. + */ + private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to_element_stack ): void { + $adjusted_current_node = $this->get_adjusted_current_node(); + + $token->namespace = $adjusted_current_node ? $adjusted_current_node->namespace : 'html'; + + if ( $this->is_mathml_integration_point() ) { + $token->integration_node_type = 'math'; + } elseif ( $this->is_html_integration_point() ) { + $token->integration_node_type = 'html'; + } + + if ( false === $only_add_to_element_stack ) { + /* + * @todo Implement the "appropriate place for inserting a node" and the + * "insert an element at the adjusted insertion location" algorithms. + * + * These algorithms mostly impacts DOM tree construction and not the HTML API. + * Here, there's no DOM node onto which the element will be appended, so the + * parser will skip this step. + * + * @see https://html.spec.whatwg.org/#insert-an-element-at-the-adjusted-insertion-location + */ + } + + $this->insert_html_element( $token ); + } + /** * Inserts a virtual element on the stack of open elements. * @@ -5136,6 +5512,88 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * HTML Specification Helpers */ + /** + * Indicates if the current token is a MathML integration point. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#mathml-text-integration-point + * + * @return bool Whether the current token is a MathML integration point. + */ + private function is_mathml_integration_point(): bool { + $current_token = $this->state->current_token; + if ( ! isset( $current_token ) ) { + return false; + } + + if ( 'math' !== $current_token->namespace || 'M' !== $current_token->node_name[0] ) { + return false; + } + + $tag_name = $current_token->node_name; + + return ( + 'MI' === $tag_name || + 'MO' === $tag_name || + 'MN' === $tag_name || + 'MS' === $tag_name || + 'MTEXT' === $tag_name + ); + } + + /** + * Indicates if the current token is an HTML integration point. + * + * Note that this method must be an instance method with access + * to the current token, since it needs to examine the attributes + * of the currently-matched tag, if it's in the MathML namespace. + * Otherwise it would be required to scan the HTML and ensure that + * no other accounting is overlooked. + * + * @since 6.7.0 + * + * @see https://html.spec.whatwg.org/#html-integration-point + * + * @return bool Whether the current token is an HTML integration point. + */ + private function is_html_integration_point(): bool { + $current_token = $this->state->current_token; + if ( ! isset( $current_token ) ) { + return false; + } + + if ( 'html' === $current_token->namespace ) { + return false; + } + + $tag_name = $current_token->node_name; + + if ( 'svg' === $current_token->namespace ) { + return ( + 'DESC' === $tag_name || + 'FOREIGNOBJECT' === $tag_name || + 'TITLE' === $tag_name + ); + } + + if ( 'math' === $current_token->namespace ) { + if ( 'ANNOTATION-XML' !== $tag_name ) { + return false; + } + + $encoding = $this->get_attribute( 'encoding' ); + + return ( + is_string( $encoding ) && + ( + 0 === strcasecmp( $encoding, 'application/xhtml+xml' ) || + 0 === strcasecmp( $encoding, 'text/html' ) + ) + ); + } + } + /** * Returns whether an element of a given name is in the HTML special category. * @@ -5143,11 +5601,17 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * * @see https://html.spec.whatwg.org/#special * - * @param string $tag_name Name of element to check. + * @param WP_HTML_Token|string $tag_name Node to check, or only its name if in the HTML namespace. * @return bool Whether the element of the given name is in the special category. */ public static function is_special( $tag_name ): bool { - $tag_name = strtoupper( $tag_name ); + if ( is_string( $tag_name ) ) { + $tag_name = strtoupper( $tag_name ); + } else { + $tag_name = 'html' === $tag_name->namespace + ? strtoupper( $tag_name->node_name ) + : "{$tag_name->namespace} {$tag_name->node_name}"; + } return ( 'ADDRESS' === $tag_name || @@ -5235,17 +5699,17 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { 'XMP' === $tag_name || // MathML. - 'MI' === $tag_name || - 'MO' === $tag_name || - 'MN' === $tag_name || - 'MS' === $tag_name || - 'MTEXT' === $tag_name || - 'ANNOTATION-XML' === $tag_name || + 'math MI' === $tag_name || + 'math MO' === $tag_name || + 'math MN' === $tag_name || + 'math MS' === $tag_name || + 'math MTEXT' === $tag_name || + 'math ANNOTATION-XML' === $tag_name || // SVG. - 'FOREIGNOBJECT' === $tag_name || - 'DESC' === $tag_name || - 'TITLE' === $tag_name + 'svg DESC' === $tag_name || + 'svg FOREIGNOBJECT' === $tag_name || + 'svg TITLE' === $tag_name ); } diff --git a/wp-includes/html-api/class-wp-html-tag-processor.php b/wp-includes/html-api/class-wp-html-tag-processor.php index 0ff2cdc4dd..fb21c15d1d 100644 --- a/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/wp-includes/html-api/class-wp-html-tag-processor.php @@ -511,6 +511,23 @@ class WP_HTML_Tag_Processor { */ protected $parser_state = self::STATE_READY; + /** + * Indicates whether the parser is inside foreign content, + * e.g. inside an SVG or MathML element. + * + * One of 'html', 'svg', or 'math'. + * + * Several parsing rules change based on whether the parser + * is inside foreign content, including whether CDATA sections + * are allowed and whether a self-closing flag indicates that + * an element has no content. + * + * @since 6.7.0 + * + * @var string + */ + private $parsing_namespace = 'html'; + /** * What kind of syntax token became an HTML comment. * @@ -780,6 +797,25 @@ class WP_HTML_Tag_Processor { $this->html = $html; } + /** + * Switches parsing mode into a new namespace, such as when + * encountering an SVG tag and entering foreign content. + * + * @since 6.7.0 + * + * @param string $new_namespace One of 'html', 'svg', or 'math' indicating into what + * namespace the next tokens will be processed. + * @return bool Whether the namespace was valid and changed. + */ + public function change_parsing_namespace( string $new_namespace ): bool { + if ( ! in_array( $new_namespace, array( 'html', 'math', 'svg' ), true ) ) { + return false; + } + + $this->parsing_namespace = $new_namespace; + return true; + } + /** * Finds the next tag matching the $query. * @@ -843,6 +879,7 @@ class WP_HTML_Tag_Processor { * The Tag Processor currently only supports the tag token. * * @since 6.5.0 + * @since 6.7.0 Recognizes CDATA sections within foreign content. * * @return bool Whether a token was parsed. */ @@ -956,6 +993,7 @@ class WP_HTML_Tag_Processor { */ if ( $this->is_closing_tag || + 'html' !== $this->parsing_namespace || 1 !== strspn( $this->html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 ) ) { return true; @@ -996,7 +1034,6 @@ class WP_HTML_Tag_Processor { $duplicate_attributes = $this->duplicate_attributes; // Find the closing tag if necessary. - $found_closer = false; switch ( $tag_name ) { case 'SCRIPT': $found_closer = $this->skip_script_data(); @@ -1759,6 +1796,32 @@ class WP_HTML_Tag_Processor { return true; } + if ( + 'html' !== $this->parsing_namespace && + strlen( $html ) > $at + 8 && + '[' === $html[ $at + 2 ] && + 'C' === $html[ $at + 3 ] && + 'D' === $html[ $at + 4 ] && + 'A' === $html[ $at + 5 ] && + 'T' === $html[ $at + 6 ] && + 'A' === $html[ $at + 7 ] && + '[' === $html[ $at + 8 ] + ) { + $closer_at = strpos( $html, ']]>', $at + 9 ); + if ( false === $closer_at ) { + $this->parser_state = self::STATE_INCOMPLETE_INPUT; + + return false; + } + + $this->parser_state = self::STATE_CDATA_NODE; + $this->text_starts_at = $at + 9; + $this->text_length = $closer_at - $this->text_starts_at; + $this->token_length = $closer_at + 3 - $this->token_starts_at; + $this->bytes_already_parsed = $closer_at + 3; + return true; + } + /* * Anything else here is an incorrectly-opened comment and transitions * to the bogus comment state - skip to the nearest >. If no closer is @@ -2653,6 +2716,17 @@ class WP_HTML_Tag_Processor { return $matches; } + /** + * Returns the namespace of the matched token. + * + * @since 6.7.0 + * + * @return string One of 'html', 'math', or 'svg'. + */ + public function get_namespace(): string { + return $this->parsing_namespace; + } + /** * Returns the uppercase name of the matched tag. * @@ -2690,6 +2764,388 @@ class WP_HTML_Tag_Processor { return null; } + /** + * Returns the adjusted tag name for a given token, taking into + * account the current parsing context, whether HTML, SVG, or MathML. + * + * @since 6.7.0 + * + * @return string|null Name of current tag name. + */ + public function get_qualified_tag_name(): ?string { + $tag_name = $this->get_tag(); + if ( null === $tag_name ) { + return null; + } + + if ( 'html' === $this->get_namespace() ) { + return $tag_name; + } + + $lower_tag_name = strtolower( $tag_name ); + if ( 'math' === $this->get_namespace() ) { + return $lower_tag_name; + } + + if ( 'svg' === $this->get_namespace() ) { + switch ( $lower_tag_name ) { + case 'altglyph': + return 'altGlyph'; + + case 'altglyphdef': + return 'altGlyphDef'; + + case 'altglyphitem': + return 'altGlyphItem'; + + case 'animatecolor': + return 'animateColor'; + + case 'animatemotion': + return 'animateMotion'; + + case 'animatetransform': + return 'animateTransform'; + + case 'clippath': + return 'clipPath'; + + case 'feblend': + return 'feBlend'; + + case 'fecolormatrix': + return 'feColorMatrix'; + + case 'fecomponenttransfer': + return 'feComponentTransfer'; + + case 'fecomposite': + return 'feComposite'; + + case 'feconvolvematrix': + return 'feConvolveMatrix'; + + case 'fediffuselighting': + return 'feDiffuseLighting'; + + case 'fedisplacementmap': + return 'feDisplacementMap'; + + case 'fedistantlight': + return 'feDistantLight'; + + case 'fedropshadow': + return 'feDropShadow'; + + case 'feflood': + return 'feFlood'; + + case 'fefunca': + return 'feFuncA'; + + case 'fefuncb': + return 'feFuncB'; + + case 'fefuncg': + return 'feFuncG'; + + case 'fefuncr': + return 'feFuncR'; + + case 'fegaussianblur': + return 'feGaussianBlur'; + + case 'feimage': + return 'feImage'; + + case 'femerge': + return 'feMerge'; + + case 'femergenode': + return 'feMergeNode'; + + case 'femorphology': + return 'feMorphology'; + + case 'feoffset': + return 'feOffset'; + + case 'fepointlight': + return 'fePointLight'; + + case 'fespecularlighting': + return 'feSpecularLighting'; + + case 'fespotlight': + return 'feSpotLight'; + + case 'fetile': + return 'feTile'; + + case 'feturbulence': + return 'feTurbulence'; + + case 'foreignobject': + return 'foreignObject'; + + case 'glyphref': + return 'glyphRef'; + + case 'lineargradient': + return 'linearGradient'; + + case 'radialgradient': + return 'radialGradient'; + + case 'textpath': + return 'textPath'; + + default: + return $lower_tag_name; + } + } + } + + /** + * Returns the adjusted attribute name for a given attribute, taking into + * account the current parsing context, whether HTML, SVG, or MathML. + * + * @since 6.7.0 + * + * @param string $attribute_name Which attribute to adjust. + * + * @return string|null + */ + public function get_qualified_attribute_name( $attribute_name ): ?string { + if ( self::STATE_MATCHED_TAG !== $this->parser_state ) { + return null; + } + + $namespace = $this->get_namespace(); + $lower_name = strtolower( $attribute_name ); + + if ( 'math' === $namespace && 'definitionurl' === $lower_name ) { + return 'definitionURL'; + } + + if ( 'svg' === $this->get_namespace() ) { + switch ( $lower_name ) { + case 'attributename': + return 'attributeName'; + + case 'attributetype': + return 'attributeType'; + + case 'basefrequency': + return 'baseFrequency'; + + case 'baseprofile': + return 'baseProfile'; + + case 'calcmode': + return 'calcMode'; + + case 'clippathunits': + return 'clipPathUnits'; + + case 'diffuseconstant': + return 'diffuseConstant'; + + case 'edgemode': + return 'edgeMode'; + + case 'filterunits': + return 'filterUnits'; + + case 'glyphref': + return 'glyphRef'; + + case 'gradienttransform': + return 'gradientTransform'; + + case 'gradientunits': + return 'gradientUnits'; + + case 'kernelmatrix': + return 'kernelMatrix'; + + case 'kernelunitlength': + return 'kernelUnitLength'; + + case 'keypoints': + return 'keyPoints'; + + case 'keysplines': + return 'keySplines'; + + case 'keytimes': + return 'keyTimes'; + + case 'lengthadjust': + return 'lengthAdjust'; + + case 'limitingconeangle': + return 'limitingConeAngle'; + + case 'markerheight': + return 'markerHeight'; + + case 'markerunits': + return 'markerUnits'; + + case 'markerwidth': + return 'markerWidth'; + + case 'maskcontentunits': + return 'maskContentUnits'; + + case 'maskunits': + return 'maskUnits'; + + case 'numoctaves': + return 'numOctaves'; + + case 'pathlength': + return 'pathLength'; + + case 'patterncontentunits': + return 'patternContentUnits'; + + case 'patterntransform': + return 'patternTransform'; + + case 'patternunits': + return 'patternUnits'; + + case 'pointsatx': + return 'pointsAtX'; + + case 'pointsaty': + return 'pointsAtY'; + + case 'pointsatz': + return 'pointsAtZ'; + + case 'preservealpha': + return 'preserveAlpha'; + + case 'preserveaspectratio': + return 'preserveAspectRatio'; + + case 'primitiveunits': + return 'primitiveUnits'; + + case 'refx': + return 'refX'; + + case 'refy': + return 'refY'; + + case 'repeatcount': + return 'repeatCount'; + + case 'repeatdur': + return 'repeatDur'; + + case 'requiredextensions': + return 'requiredExtensions'; + + case 'requiredfeatures': + return 'requiredFeatures'; + + case 'specularconstant': + return 'specularConstant'; + + case 'specularexponent': + return 'specularExponent'; + + case 'spreadmethod': + return 'spreadMethod'; + + case 'startoffset': + return 'startOffset'; + + case 'stddeviation': + return 'stdDeviation'; + + case 'stitchtiles': + return 'stitchTiles'; + + case 'surfacescale': + return 'surfaceScale'; + + case 'systemlanguage': + return 'systemLanguage'; + + case 'tablevalues': + return 'tableValues'; + + case 'targetx': + return 'targetX'; + + case 'targety': + return 'targetY'; + + case 'textlength': + return 'textLength'; + + case 'viewbox': + return 'viewBox'; + + case 'viewtarget': + return 'viewTarget'; + + case 'xchannelselector': + return 'xChannelSelector'; + + case 'ychannelselector': + return 'yChannelSelector'; + + case 'zoomandpan': + return 'zoomAndPan'; + } + } + + if ( 'html' !== $namespace ) { + switch ( $lower_name ) { + case 'xlink:actuate': + return 'xlink actuate'; + + case 'xlink:arcrole': + return 'xlink arcrole'; + + case 'xlink:href': + return 'xlink href'; + + case 'xlink:role': + return 'xlink role'; + + case 'xlink:show': + return 'xlink show'; + + case 'xlink:title': + return 'xlink title'; + + case 'xlink:type': + return 'xlink type'; + + case 'xml:lang': + return 'xml lang'; + + case 'xml:space': + return 'xml space'; + + case 'xmlns': + return 'xmlns'; + + case 'xmlns:xlink': + return 'xmlns xlink'; + } + } + + return $attribute_name; + } + /** * Indicates if the currently matched tag contains the self-closing flag. * @@ -2963,8 +3419,12 @@ class WP_HTML_Tag_Processor { * In all other contexts it's replaced by the replacement character (U+FFFD) * for security reasons (to avoid joining together strings that were safe * when separated, but not when joined). + * + * @todo Inside HTML integration points and MathML integration points, the + * text is processed according to the insertion mode, not according + * to the foreign content rules. This should strip the NULL bytes. */ - return '#text' === $tag_name + return ( '#text' === $tag_name && 'html' === $this->get_namespace() ) ? str_replace( "\x00", '', $decoded ) : str_replace( "\x00", "\u{FFFD}", $decoded ); } diff --git a/wp-includes/html-api/class-wp-html-token.php b/wp-includes/html-api/class-wp-html-token.php index 948fe343df..d5e51ac290 100644 --- a/wp-includes/html-api/class-wp-html-token.php +++ b/wp-includes/html-api/class-wp-html-token.php @@ -60,6 +60,24 @@ class WP_HTML_Token { */ public $has_self_closing_flag = false; + /** + * Indicates if the element is an HTML element or if it's inside foreign content. + * + * @since 6.7.0 + * + * @var string 'html', 'svg', or 'math'. + */ + public $namespace = 'html'; + + /** + * Indicates which kind of integration point the element is, if any. + * + * @since 6.7.0 + * + * @var string|null 'math', 'html', or null if not an integration point. + */ + public $integration_node_type = null; + /** * Called when token is garbage-collected or otherwise destroyed. * @@ -80,6 +98,7 @@ class WP_HTML_Token { */ public function __construct( ?string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) { $this->bookmark_name = $bookmark_name; + $this->namespace = 'html'; $this->node_name = $node_name; $this->has_self_closing_flag = $has_self_closing_flag; $this->on_destroy = $on_destroy; diff --git a/wp-includes/version.php b/wp-includes/version.php index f4d8950bd7..6c144b0662 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.7-alpha-58866'; +$wp_version = '6.7-alpha-58867'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.