diff --git a/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 69e34dca49..2f51482eee 100644 --- a/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -86,6 +86,22 @@ class WP_HTML_Active_Formatting_Elements { return $current_node ? $current_node : null; } + /** + * Inserts a "marker" at the end of the list of active formatting elements. + * + * > The markers are inserted when entering applet, object, marquee, + * > template, td, th, and caption elements, and are used to prevent + * > formatting from "leaking" into applet, object, marquee, template, + * > td, th, and caption elements. + * + * @see https://html.spec.whatwg.org/#concept-parser-marker + * + * @since 6.7.0 + */ + public function insert_marker(): void { + $this->push( new WP_HTML_Token( null, 'marker', false ) ); + } + /** * Pushes a node onto the stack of active formatting elements. * @@ -184,4 +200,30 @@ class WP_HTML_Active_Formatting_Elements { yield $this->stack[ $i ]; } } + + /** + * Clears the list of active formatting elements up to the last marker. + * + * > When the steps below require the UA to clear the list of active formatting elements up to + * > the last marker, the UA must perform the following steps: + * > + * > 1. Let entry be the last (most recently added) entry in the list of active + * > formatting elements. + * > 2. Remove entry from the list of active formatting elements. + * > 3. If entry was a marker, then stop the algorithm at this point. + * > The list has been cleared up to the last marker. + * > 4. Go to step 1. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker + * + * @since 6.7.0 + */ + public function clear_up_to_last_marker(): void { + foreach ( $this->walk_up() as $item ) { + array_pop( $this->stack ); + if ( 'marker' === $item->node_name ) { + break; + } + } + } } diff --git a/wp-includes/html-api/class-wp-html-open-elements.php b/wp-includes/html-api/class-wp-html-open-elements.php index 065bbd25c9..d59bd32140 100644 --- a/wp-includes/html-api/class-wp-html-open-elements.php +++ b/wp-includes/html-api/class-wp-html-open-elements.php @@ -101,6 +101,49 @@ class WP_HTML_Open_Elements { $this->push_handler = $handler; } + /** + * Returns the name of the node at the nth position on the stack + * of open elements, or `null` if no such position exists. + * + * Note that this uses a 1-based index, which represents the + * "nth item" on the stack, counting from the top, where the + * top-most element is the 1st, the second is the 2nd, etc... + * + * @since 6.7.0 + * + * @param int $nth Retrieve the nth item on the stack, with 1 being + * the top element, 2 being the second, etc... + * @return string|null Name of the node on the stack at the given location, + * or `null` if the location isn't on the stack. + */ + public function at( int $nth ): ?string { + foreach ( $this->walk_down() as $item ) { + if ( 0 === --$nth ) { + return $item->node_name; + } + } + + return null; + } + + /** + * Reports if a node of a given name is in the stack of open elements. + * + * @since 6.7.0 + * + * @param string $node_name Name of node for which to check. + * @return bool Whether a node of the given name is in the stack of open elements. + */ + public function contains( string $node_name ): bool { + foreach ( $this->walk_up() as $item ) { + if ( $node_name === $item->node_name ) { + return true; + } + } + + return false; + } + /** * Reports if a specific node is in the stack of open elements. * @@ -111,7 +154,7 @@ class WP_HTML_Open_Elements { */ public function contains_node( WP_HTML_Token $token ): bool { foreach ( $this->walk_up() as $item ) { - if ( $token->bookmark_name === $item->bookmark_name ) { + if ( $token === $item ) { return true; } } @@ -210,11 +253,6 @@ class WP_HTML_Open_Elements { return true; } - switch ( $node->node_name ) { - case 'HTML': - return false; - } - if ( in_array( $node->node_name, $termination_list, true ) ) { return false; } @@ -226,7 +264,31 @@ class WP_HTML_Open_Elements { /** * Returns whether a particular element is in scope. * + * > The stack of open elements is said to have a particular element in + * > scope when it has that element in the specific scope consisting of + * > the following element types: + * > + * > - applet + * > - caption + * > - html + * > - table + * > - td + * > - th + * > - marquee + * > - object + * > - template + * > - MathML mi + * > - MathML mo + * > - MathML mn + * > - MathML ms + * > - MathML mtext + * > - MathML annotation-xml + * > - SVG foreignObject + * > - SVG desc + * > - SVG title + * * @since 6.4.0 + * @since 6.7.0 Supports all required HTML elements. * * @see https://html.spec.whatwg.org/#has-an-element-in-scope * @@ -237,14 +299,16 @@ class WP_HTML_Open_Elements { return $this->has_element_in_specific_scope( $tag_name, array( - - /* - * Because it's not currently possible to encounter - * one of the termination elements, they don't need - * to be listed here. If they were, they would be - * unreachable and only waste CPU cycles while - * scanning through HTML. - */ + 'APPLET', + 'CAPTION', + 'HTML', + 'TABLE', + 'TD', + 'TH', + 'MARQUEE', + 'OBJECT', + 'TEMPLATE', + // @todo: Support SVG and MathML nodes when support for foreign content is added. ) ); } @@ -252,8 +316,17 @@ class WP_HTML_Open_Elements { /** * Returns whether a particular element is in list item scope. * + * > The stack of open elements is said to have a particular element + * > in list item scope when it has that element in the specific scope + * > consisting of the following element types: + * > + * > - All the element types listed above for the has an element in scope algorithm. + * > - ol in the HTML namespace + * > - ul in the HTML namespace + * * @since 6.4.0 * @since 6.5.0 Implemented: no longer throws on every invocation. + * @since 6.7.0 Supports all required HTML elements. * * @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope * @@ -264,9 +337,19 @@ class WP_HTML_Open_Elements { return $this->has_element_in_specific_scope( $tag_name, array( - // There are more elements that belong here which aren't currently supported. + 'APPLET', + 'BUTTON', + 'CAPTION', + 'HTML', + 'TABLE', + 'TD', + 'TH', + 'MARQUEE', + 'OBJECT', 'OL', + 'TEMPLATE', 'UL', + // @todo: Support SVG and MathML nodes when support for foreign content is added. ) ); } @@ -274,7 +357,15 @@ class WP_HTML_Open_Elements { /** * Returns whether a particular element is in button scope. * + * > The stack of open elements is said to have a particular element + * > in button scope when it has that element in the specific scope + * > consisting of the following element types: + * > + * > - All the element types listed above for the has an element in scope algorithm. + * > - button in the HTML namespace + * * @since 6.4.0 + * @since 6.7.0 Supports all required HTML elements. * * @see https://html.spec.whatwg.org/#has-an-element-in-button-scope * @@ -282,25 +373,52 @@ class WP_HTML_Open_Elements { * @return bool Whether given element is in scope. */ public function has_element_in_button_scope( string $tag_name ): bool { - return $this->has_element_in_specific_scope( $tag_name, array( 'BUTTON' ) ); + return $this->has_element_in_specific_scope( + $tag_name, + array( + 'APPLET', + 'BUTTON', + 'CAPTION', + 'HTML', + 'TABLE', + 'TD', + 'TH', + 'MARQUEE', + 'OBJECT', + 'TEMPLATE', + // @todo: Support SVG and MathML nodes when support for foreign content is added. + ) + ); } /** * Returns whether a particular element is in table scope. * + * > The stack of open elements is said to have a particular element + * > in table scope when it has that element in the specific scope + * > consisting of the following element types: + * > + * > - html in the HTML namespace + * > - table in the HTML namespace + * > - template in the HTML namespace + * * @since 6.4.0 + * @since 6.7.0 Full implementation. * * @see https://html.spec.whatwg.org/#has-an-element-in-table-scope * - * @throws WP_HTML_Unsupported_Exception Always until this function is implemented. - * * @param string $tag_name Name of tag to check. * @return bool Whether given element is in scope. */ public function has_element_in_table_scope( string $tag_name ): bool { - throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on table scope.' ); - - return false; // The linter requires this unreachable code until the function is implemented and can return. + return $this->has_element_in_specific_scope( + $tag_name, + array( + 'HTML', + 'TABLE', + 'TEMPLATE', + ) + ); } /** @@ -540,7 +658,16 @@ class WP_HTML_Open_Elements { * cases where the precalculated value needs to change. */ switch ( $item->node_name ) { + case 'APPLET': case 'BUTTON': + case 'CAPTION': + case 'HTML': + case 'TABLE': + case 'TD': + case 'TH': + case 'MARQUEE': + case 'OBJECT': + case 'TEMPLATE': $this->has_p_in_button_scope = false; break; @@ -573,11 +700,17 @@ class WP_HTML_Open_Elements { * cases where the precalculated value needs to change. */ switch ( $item->node_name ) { + case 'APPLET': case 'BUTTON': - $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); - break; - + case 'CAPTION': + case 'HTML': case 'P': + case 'TABLE': + case 'TD': + case 'TH': + case 'MARQUEE': + case 'OBJECT': + case 'TEMPLATE': $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); break; } diff --git a/wp-includes/html-api/class-wp-html-processor-state.php b/wp-includes/html-api/class-wp-html-processor-state.php index eadfe30d26..e0469bea02 100644 --- a/wp-includes/html-api/class-wp-html-processor-state.php +++ b/wp-includes/html-api/class-wp-html-processor-state.php @@ -311,6 +311,31 @@ class WP_HTML_Processor_State { */ const INSERTION_MODE_IN_FOREIGN_CONTENT = 'insertion-mode-in-foreign-content'; + /** + * No-quirks mode document compatability mode. + * + * > In no-quirks mode, the behavior is (hopefully) the desired behavior + * > described by the modern HTML and CSS specifications. + * + * @since 6.7.0 + * + * @var string + */ + const NO_QUIRKS_MODE = 'no-quirks-mode'; + + /** + * Quirks mode document compatability mode. + * + * > In quirks mode, layout emulates behavior in Navigator 4 and Internet + * > Explorer 5. This is essential in order to support websites that were + * > built before the widespread adoption of web standards. + * + * @since 6.7.0 + * + * @var string + */ + const QUIRKS_MODE = 'quirks-mode'; + /** * The stack of template insertion modes. * @@ -368,6 +393,30 @@ class WP_HTML_Processor_State { */ public $insertion_mode = self::INSERTION_MODE_INITIAL; + /** + * Indicates if the document is in quirks mode or no-quirks mode. + * + * Impact on HTML parsing: + * + * - In `NO_QUIRKS_MODE` CSS class and ID selectors match in a byte-for-byte + * manner, otherwise for backwards compatability, class selectors are to + * match in an ASCII case-insensitive manner. + * + * - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag + * if one is in scope and open, otherwise the TABLE becomes a child of the P. + * + * `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but + * none of the other changes modifies how the HTML is parsed or selected. + * + * @see self::QUIRKS_MODE + * @see self::NO_QUIRKS_MODE + * + * @since 6.7.0 + * + * @var string + */ + public $document_mode = self::NO_QUIRKS_MODE; + /** * Context node initializing fragment parser, if created as a fragment parser. * @@ -390,6 +439,24 @@ class WP_HTML_Processor_State { */ public $head_element = null; + /** + * FORM element pointer. + * + * > points to the last form element that was opened and whose end tag has + * > not yet been seen. It is used to make form controls associate with + * > forms in the face of dramatically bad markup, for historical reasons. + * > It is ignored inside template elements. + * + * @todo This may be invalidated by a seek operation. + * + * @see https://html.spec.whatwg.org/#form-element-pointer + * + * @since 6.7.0 + * + * @var WP_HTML_Token|null + */ + public $form_element = null; + /** * The frameset-ok flag indicates if a `FRAMESET` element is allowed in the current state. * diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index 72f39d3ad7..d614112a76 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -97,22 +97,11 @@ * will abort early and stop all processing. This draconian measure ensures * that the HTML Processor won't break any HTML it doesn't fully understand. * - * The following list specifies the HTML tags that _are_ supported: + * The HTML Processor supports all elements other than a specific set: * - * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. - * - Custom elements: All custom elements are supported. :) - * - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, OPTGROUP, OPTION, PROGRESS, SEARCH, SELECT. - * - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR. - * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. - * - Links: A. - * - Lists: DD, DL, DT, LI, OL, UL. - * - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK, VIDEO. - * - Paragraph: BR, P. - * - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR. - * - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION. - * - Templating elements: SLOT. - * - Text decoration: RUBY. - * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM, SPACER. + * - Any element inside a TABLE. + * - Any element inside foreign content, including SVG and MATH. + * - Any element outside the IN BODY insertion mode, e.g. doctype declarations, meta, links. * * ### Supported markup * @@ -121,15 +110,30 @@ * may in fact belong _before_ the table in the DOM. If the HTML Processor encounters * such a case it will stop processing. * - * The following list specifies HTML markup that _is_ supported: + * The following list illustrates some common examples of unexpected HTML inputs that + * the HTML Processor properly parses and represents: * - * - Markup involving only those tags listed above. - * - Fully-balanced and non-overlapping tags. - * - HTML with unexpected tag closers. - * - Some unbalanced or overlapping tags. - * - P tags after unclosed P tags. - * - BUTTON tags after unclosed BUTTON tags. - * - A tags after unclosed A tags that don't involve any active formatting elements. + * - HTML with optional tags omitted, e.g. `

one

two`. + * - HTML with unexpected tag closers, e.g. `

one more

`. + * - Non-void tags with self-closing flag, e.g. `
the DIV is still open.
`. + * - Heading elements which close open heading elements of another level, e.g. `

Closed by

`. + * - Elements containing text that looks like other tags but isn't, e.g. `The <img> is plaintext`. + * - SCRIPT and STYLE tags containing text that looks like HTML but isn't, e.g. ``. + * - SCRIPT content which has been escaped, e.g. ``. + * + * ### Unsupported Features + * + * This parser does not report parse errors. + * + * Normally, when additional HTML or BODY tags are encountered in a document, if there + * are any additional attributes on them that aren't found on the previous elements, + * the existing HTML and BODY elements adopt those missing attribute values. This + * parser does not add those additional attributes. + * + * In certain situations, elements are moved to a different part of the document in + * a process called "adoption" and "fostering." Because the nodes move to a location + * in the document that the parser had already processed, this parser does not support + * these situations and will bail. * * @since 6.4.0 * @@ -1104,15 +1108,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $op = "{$op_sigil}{$token_name}"; switch ( $op ) { - case '#comment': - case '#funky-comment': - case '#presumptuous-tag': - $this->insert_html_element( $this->state->current_token ); - return true; - case '#text': - $this->reconstruct_active_formatting_elements(); - $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; /* @@ -1133,6 +1129,8 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return $this->step(); } + $this->reconstruct_active_formatting_elements(); + /* * Whitespace-only text does not affect the frameset-ok flag. * It is probably inter-element whitespace, but it may also @@ -1146,29 +1144,146 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->insert_html_element( $this->state->current_token ); return true; + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A DOCTYPE token + * > Parse error. Ignore the token. + */ case 'html': - /* - * > A DOCTYPE token - * > Parse error. Ignore the token. - */ return $this->step(); /* - * > A start tag whose tag name is "button" + * > A start tag whose tag name is "html" */ - case '+BUTTON': - if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) { - // @todo Indicate a parse error once it's possible. This error does not impact the logic here. - $this->generate_implied_end_tags(); - $this->state->stack_of_open_elements->pop_until( 'BUTTON' ); + case '+HTML': + if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { + /* + * > Otherwise, for each attribute on the token, check to see if the attribute + * > is already present on the top element of the stack of open elements. If + * > it is not, add the attribute and its corresponding value to that element. + * + * This parser does not currently support this behavior: ignore the token. + */ } - $this->reconstruct_active_formatting_elements(); - $this->insert_html_element( $this->state->current_token ); - $this->state->frameset_ok = false; + // Ignore the token. + return $this->step(); + /* + * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link", + * > "meta", "noframes", "script", "style", "template", "title" + * > + * > An end tag whose tag name is "template" + */ + case '+BASE': + case '+BASEFONT': + case '+BGSOUND': + case '+LINK': + case '+META': + case '+NOFRAMES': + case '+SCRIPT': + case '+STYLE': + case '+TEMPLATE': + case '+TITLE': + case '-TEMPLATE': + return $this->step_in_head(); + + /* + * > A start tag whose tag name is "body" + * + * This tag in the IN BODY insertion mode is a parse error. + */ + case '+BODY': + if ( + 1 === $this->state->stack_of_open_elements->count() || + 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) || + $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) + ) { + // Ignore the token. + return $this->step(); + } + + /* + * > Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute + * > on the token, check to see if the attribute is already present on the body + * > element (the second element) on the stack of open elements, and if it is + * > not, add the attribute and its corresponding value to that element. + * + * This parser does not currently support this behavior: ignore the token. + */ + $this->state->frameset_ok = false; + return $this->step(); + + /* + * > A start tag whose tag name is "frameset" + * + * This tag in the IN BODY insertion mode is a parse error. + */ + case '+FRAMESET': + if ( + 1 === $this->state->stack_of_open_elements->count() || + 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) || + false === $this->state->frameset_ok + ) { + // Ignore the token. + return $this->step(); + } + + /* + * > Otherwise, run the following steps: + */ + $this->bail( 'Cannot process non-ignored FRAMESET tags.' ); + break; + + /* + * > An end tag whose tag name is "body" + */ + case '-BODY': + if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'BODY' ) ) { + // Parse error: ignore the token. + return $this->step(); + } + + /* + * > Otherwise, if there is a node in the stack of open elements that is not either a + * > dd element, a dt element, an li element, an optgroup element, an option element, + * > a p element, an rb element, an rp element, an rt element, an rtc element, a tbody + * > element, a td element, a tfoot element, a th element, a thread element, a tr + * > element, the body element, or the html element, then this is a parse error. + * + * There is nothing to do for this parse error, so don't check for it. + */ + + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY; return true; + /* + * > An end tag whose tag name is "html" + */ + case '-HTML': + if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'BODY' ) ) { + // Parse error: ignore the token. + return $this->step(); + } + + /* + * > Otherwise, if there is a node in the stack of open elements that is not either a + * > dd element, a dt element, an li element, an optgroup element, an option element, + * > a p element, an rb element, an rp element, an rt element, an rtc element, a tbody + * > element, a td element, a tfoot element, a th element, a thread element, a tr + * > element, the body element, or the html element, then this is a parse error. + * + * There is nothing to do for this parse error, so don't check for it. + */ + + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY; + return $this->step( self::REPROCESS_CURRENT_NODE ); + /* * > A start tag whose tag name is one of: "address", "article", "aside", * > "blockquote", "center", "details", "dialog", "dir", "div", "dl", @@ -1207,52 +1322,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->insert_html_element( $this->state->current_token ); return true; - /* - * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote", - * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset", - * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", - * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" - */ - case '-ADDRESS': - case '-ARTICLE': - case '-ASIDE': - case '-BLOCKQUOTE': - case '-BUTTON': - case '-CENTER': - case '-DETAILS': - case '-DIALOG': - case '-DIR': - case '-DIV': - case '-DL': - case '-FIELDSET': - case '-FIGCAPTION': - case '-FIGURE': - case '-FOOTER': - case '-HEADER': - case '-HGROUP': - case '-LISTING': - case '-MAIN': - case '-MENU': - case '-NAV': - case '-OL': - case '-PRE': - case '-SEARCH': - case '-SECTION': - case '-SUMMARY': - case '-UL': - if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) { - // @todo Report parse error. - // Ignore the token. - return $this->step(); - } - - $this->generate_implied_end_tags(); - if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { - // @todo Record parse error: this error doesn't impact parsing. - } - $this->state->stack_of_open_elements->pop_until( $token_name ); - return true; - /* * > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" */ @@ -1288,35 +1357,39 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); } + + /* + * > If the next token is a U+000A LINE FEED (LF) character token, + * > then ignore that token and move on to the next one. (Newlines + * > at the start of pre blocks are ignored as an authoring convenience.) + * + * This is handled in `get_modifiable_text()`. + */ + $this->insert_html_element( $this->state->current_token ); $this->state->frameset_ok = false; return true; /* - * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" + * > A start tag whose tag name is "form" */ - case '-H1': - case '-H2': - case '-H3': - case '-H4': - case '-H5': - case '-H6': - if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) { - /* - * This is a parse error; ignore the token. - * - * @todo Indicate a parse error once it's possible. - */ + case '+FORM': + $stack_contains_template = $this->state->stack_of_open_elements->contains( 'TEMPLATE' ); + + if ( isset( $this->state->form_element ) && ! $stack_contains_template ) { + // Parse error: ignore the token. return $this->step(); } - $this->generate_implied_end_tags(); - - if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { - // @todo Record parse error: this error doesn't impact parsing. + if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { + $this->close_a_p_element(); + } + + $this->insert_html_element( $this->state->current_token ); + if ( ! $stack_contains_template ) { + $this->state->form_element = $this->state->current_token; } - $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); return true; /* @@ -1377,6 +1450,150 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->insert_html_element( $this->state->current_token ); return true; + case '+PLAINTEXT': + if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { + $this->close_a_p_element(); + } + + /* + * @todo This may need to be handled in the Tag Processor and turn into + * a single self-contained tag like TEXTAREA, whose modifiable text + * is the rest of the input document as plaintext. + */ + $this->bail( 'Cannot process PLAINTEXT elements.' ); + break; + + /* + * > A start tag whose tag name is "button" + */ + case '+BUTTON': + if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) { + // @todo Indicate a parse error once it's possible. This error does not impact the logic here. + $this->generate_implied_end_tags(); + $this->state->stack_of_open_elements->pop_until( 'BUTTON' ); + } + + $this->reconstruct_active_formatting_elements(); + $this->insert_html_element( $this->state->current_token ); + $this->state->frameset_ok = false; + + return true; + + /* + * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote", + * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset", + * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main", + * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul" + * + * @todo This needs to check if the element in scope is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as `
`, and a foreign element of + * the same given name. + */ + case '-ADDRESS': + case '-ARTICLE': + case '-ASIDE': + case '-BLOCKQUOTE': + case '-BUTTON': + case '-CENTER': + case '-DETAILS': + case '-DIALOG': + case '-DIR': + case '-DIV': + case '-DL': + case '-FIELDSET': + case '-FIGCAPTION': + case '-FIGURE': + case '-FOOTER': + case '-HEADER': + case '-HGROUP': + case '-LISTING': + case '-MAIN': + case '-MENU': + case '-NAV': + case '-OL': + case '-PRE': + case '-SEARCH': + case '-SECTION': + case '-SUMMARY': + case '-UL': + if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) { + // @todo Report parse error. + // Ignore the token. + return $this->step(); + } + + $this->generate_implied_end_tags(); + if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { + // @todo Record parse error: this error doesn't impact parsing. + } + $this->state->stack_of_open_elements->pop_until( $token_name ); + return true; + + /* + * > An end tag whose tag name is "form" + */ + case '-FORM': + if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { + $node = $this->state->form_element; + $this->state->form_element = null; + + /* + * > If node is null or if the stack of open elements does not have node + * > in scope, then this is a parse error; return and ignore the token. + * + * @todo It's necessary to check if the form token itself is in scope, not + * simply whether any FORM is in scope. + */ + if ( + null === $node || + ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' ) + ) { + // Parse error: ignore the token. + return $this->step(); + } + + $this->generate_implied_end_tags(); + if ( $node !== $this->state->stack_of_open_elements->current_node() ) { + // @todo Indicate a parse error once it's possible. This error does not impact the logic here. + $this->bail( 'Cannot close a FORM when other elements remain open as this would throw off the breadcrumbs for the following tokens.' ); + } + + $this->state->stack_of_open_elements->remove_node( $node ); + } else { + /* + * > If the stack of open elements does not have a form element in scope, + * > then this is a parse error; return and ignore the token. + * + * Note that unlike in the clause above, this is checking for any FORM in scope. + */ + if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' ) ) { + // Parse error: ignore the token. + return $this->step(); + } + + $this->generate_implied_end_tags(); + + if ( ! $this->state->stack_of_open_elements->current_node_is( 'FORM' ) ) { + // @todo Indicate a parse error once it's possible. This error does not impact the logic here. + } + + $this->state->stack_of_open_elements->pop_until( 'FORM' ); + return true; + } + break; + + /* + * > An end tag whose tag name is "p" + */ + case '-P': + if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) { + $this->insert_html_element( $this->state->current_token ); + } + + $this->close_a_p_element(); + return true; + /* * > An end tag whose tag name is "li" * > An end tag whose tag name is one of: "dd", "dt" @@ -1423,17 +1640,35 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return true; /* - * > An end tag whose tag name is "p" + * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" */ - case '-P': - if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) { - $this->insert_html_element( $this->state->current_token ); + case '-H1': + case '-H2': + case '-H3': + case '-H4': + case '-H5': + case '-H6': + if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) { + /* + * This is a parse error; ignore the token. + * + * @todo Indicate a parse error once it's possible. + */ + return $this->step(); } - $this->close_a_p_element(); + $this->generate_implied_end_tags(); + + if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { + // @todo Record parse error: this error doesn't impact parsing. + } + + $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); return true; - // > A start tag whose tag name is "a" + /* + * > A start tag whose tag name is "a" + */ case '+A': foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { switch ( $item->node_name ) { @@ -1474,6 +1709,22 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->state->active_formatting_elements->push( $this->state->current_token ); return true; + /* + * > A start tag whose tag name is "nobr" + */ + case '+NOBR': + $this->reconstruct_active_formatting_elements(); + + if ( $this->state->stack_of_open_elements->has_element_in_scope( 'NOBR' ) ) { + // Parse error. + $this->run_adoption_agency_algorithm(); + $this->reconstruct_active_formatting_elements(); + } + + $this->insert_html_element( $this->state->current_token ); + $this->state->active_formatting_elements->push( $this->state->current_token ); + return true; + /* * > An end tag whose tag name is one of: "a", "b", "big", "code", "em", "font", "i", * > "nobr", "s", "small", "strike", "strong", "tt", "u" @@ -1495,15 +1746,64 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return true; /* - * > An end tag whose tag name is "br" - * > Parse error. Drop the attributes from the token, and act as described in the next - * > entry; i.e. act as if this was a "br" start tag token with no attributes, rather - * > than the end tag token that it actually is. + * > A start tag whose tag name is one of: "applet", "marquee", "object" + */ + case '+APPLET': + case '+MARQUEE': + case '+OBJECT': + $this->reconstruct_active_formatting_elements(); + $this->insert_html_element( $this->state->current_token ); + $this->state->active_formatting_elements->insert_marker(); + $this->state->frameset_ok = false; + return true; + + /* + * > A end tag token whose tag name is one of: "applet", "marquee", "object" + * + * @todo This needs to check if the element in scope is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as ``, and a foreign element of + * the same given name. + */ + case '-APPLET': + case '-MARQUEE': + case '-OBJECT': + if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) { + // Parse error: ignore the token. + return $this->step(); + } + + $this->generate_implied_end_tags(); + if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { + // This is a parse error. + } + + $this->state->stack_of_open_elements->pop_until( $token_name ); + $this->state->active_formatting_elements->clear_up_to_last_marker(); + return true; + + /* + * > A start tag whose tag name is "table" + */ + case '+TABLE': + if ( + WP_HTML_Processor_State::QUIRKS_MODE !== $this->state->document_mode && + $this->state->stack_of_open_elements->has_p_in_button_scope() + ) { + $this->close_a_p_element(); + } + + $this->insert_html_element( $this->state->current_token ); + $this->state->frameset_ok = false; + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; + return true; + + /* + * > An end tag whose tag name is "br" + * + * This is prevented from happening because the Tag Processor + * reports all closing BR tags as if they were opening tags. */ - case '-BR': - $this->bail( 'Closing BR tags require unimplemented special handling.' ); - // This return required because PHPCS can't determine that the call to bail() throws. - return false; /* * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr" @@ -1525,15 +1825,26 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { case '+INPUT': $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); - $type_attribute = $this->get_attribute( 'type' ); + /* * > If the token does not have an attribute with the name "type", or if it does, * > but that attribute's value is not an ASCII case-insensitive match for the * > string "hidden", then: set the frameset-ok flag to "not ok". */ + $type_attribute = $this->get_attribute( 'type' ); if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { $this->state->frameset_ok = false; } + + return true; + + /* + * > A start tag whose tag name is one of: "param", "source", "track" + */ + case '+PARAM': + case '+SOURCE': + case '+TRACK': + $this->insert_html_element( $this->state->current_token ); return true; /* @@ -1548,11 +1859,80 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return true; /* - * > A start tag whose tag name is one of: "param", "source", "track" + * > A start tag whose tag name is "image" */ - case '+PARAM': - case '+SOURCE': - case '+TRACK': + case '+IMAGE': + /* + * > Parse error. Change the token's tag name to "img" and reprocess it. (Don't ask.) + * + * Note that this is handled elsewhere, so it should not be possible to reach this code. + */ + $this->bail( "Cannot process an IMAGE tag. (Don't ask.)" ); + break; + + /* + * > A start tag whose tag name is "textarea" + */ + case '+TEXTAREA': + $this->insert_html_element( $this->state->current_token ); + + /* + * > If the next token is a U+000A LINE FEED (LF) character token, then ignore + * > that token and move on to the next one. (Newlines at the start of + * > textarea elements are ignored as an authoring convenience.) + * + * This is handled in `get_modifiable_text()`. + */ + + $this->state->frameset_ok = false; + + /* + * > Switch the insertion mode to "text". + * + * As a self-contained node, this behavior is handled in the Tag Processor. + */ + return true; + + /* + * > A start tag whose tag name is "xmp" + */ + case '+XMP': + if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { + $this->close_a_p_element(); + } + + $this->reconstruct_active_formatting_elements(); + $this->state->frameset_ok = false; + + /* + * > Follow the generic raw text element parsing algorithm. + * + * As a self-contained node, this behavior is handled in the Tag Processor. + */ + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * A start tag whose tag name is "iframe" + */ + case '+IFRAME': + $this->state->frameset_ok = false; + + /* + * > Follow the generic raw text element parsing algorithm. + * + * As a self-contained node, this behavior is handled in the Tag Processor. + */ + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A start tag whose tag name is "noembed" + * > A start tag whose tag name is "noscript", if the scripting flag is enabled + * + * The scripting flag is never enabled in this parser. + */ + case '+NOEMBED': $this->insert_html_element( $this->state->current_token ); return true; @@ -1597,69 +1977,89 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); return true; - } - /* - * These tags require special handling in the 'in body' insertion mode - * but that handling hasn't yet been implemented. - * - * As the rules for each tag are implemented, the corresponding tag - * name should be removed from this list. An accompanying test should - * help ensure this list is maintained. - * - * @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags - * - * Since this switch structure throws a WP_HTML_Unsupported_Exception, it's - * possible to handle "any other start tag" and "any other end tag" below, - * as that guarantees execution doesn't proceed for the unimplemented tags. - * - * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody - */ - switch ( $token_name ) { - case 'APPLET': - case 'BASE': - case 'BASEFONT': - case 'BGSOUND': - case 'BODY': - case 'CAPTION': - case 'COL': - case 'COLGROUP': - case 'FORM': - case 'FRAME': - case 'FRAMESET': - case 'HEAD': - case 'HTML': - case 'IFRAME': - case 'LINK': - case 'MARQUEE': - case 'MATH': - case 'META': - case 'NOBR': - case 'NOEMBED': - case 'NOFRAMES': - case 'NOSCRIPT': - case 'OBJECT': - case 'PLAINTEXT': - case 'RB': - case 'RP': - case 'RT': - case 'RTC': - case 'SARCASM': - case 'SCRIPT': - case 'STYLE': - case 'SVG': - case 'TABLE': - case 'TBODY': - case 'TD': - case 'TEMPLATE': - case 'TEXTAREA': - case 'TFOOT': - case 'TH': - case 'THEAD': - case 'TITLE': - case 'TR': - case 'XMP': - $this->bail( "Cannot process {$token_name} element." ); + /* + * > A start tag whose tag name is one of: "rb", "rtc" + */ + case '+RB': + case '+RTC': + if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) { + $this->generate_implied_end_tags(); + + if ( $this->state->stack_of_open_elements->current_node_is( 'RUBY' ) ) { + // @todo Indicate a parse error once it's possible. + } + } + + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A start tag whose tag name is one of: "rp", "rt" + */ + case '+RP': + case '+RT': + if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) { + $this->generate_implied_end_tags( 'RTC' ); + + $current_node_name = $this->state->stack_of_open_elements->current_node()->node_name; + if ( 'RTC' === $current_node_name || 'RUBY' === $current_node_name ) { + // @todo Indicate a parse error once it's possible. + } + } + + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A start tag whose tag name is "math" + */ + case '+MATH': + $this->reconstruct_active_formatting_elements(); + + /* + * @todo Adjust MathML attributes for the token. (This fixes the case of MathML attributes that are not all lowercase.) + * @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink.) + * + * These ought to be handled in the attribute methods. + */ + + $this->bail( 'Cannot process MATH element, opening foreign content.' ); + break; + + /* + * > A start tag whose tag name is "svg" + */ + case '+SVG': + $this->reconstruct_active_formatting_elements(); + + /* + * @todo Adjust SVG attributes for the token. (This fixes the case of SVG attributes that are not all lowercase.) + * @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink in SVG.) + * + * These ought to be handled in the attribute methods. + */ + + $this->bail( 'Cannot process SVG element, opening foreign content.' ); + break; + + /* + * > A start tag whose tag name is one of: "caption", "col", "colgroup", + * > "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr" + */ + case '+CAPTION': + case '+COL': + case '+COLGROUP': + case '+FRAME': + case '+HEAD': + case '+TBODY': + case '+TD': + case '+TFOOT': + case '+TH': + case '+THEAD': + case '+TR': + // Parse error. Ignore the token. + return $this->step(); } if ( ! parent::is_tag_closer() ) { @@ -1681,6 +2081,12 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * close anything beyond its containing `P` or `DIV` element. */ foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + /* + * @todo This needs to check if the element in scope is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as ``, and a foreign element of + * the same given name. + */ if ( $token_name === $node->node_name ) { break; } diff --git a/wp-includes/html-api/class-wp-html-tag-processor.php b/wp-includes/html-api/class-wp-html-tag-processor.php index 77782aa950..7d04fd31d8 100644 --- a/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/wp-includes/html-api/class-wp-html-tag-processor.php @@ -129,7 +129,7 @@ * $processor = new WP_HTML_Tag_Processor( '
' ); * true === $processor->next_tag( 'DIV' ); * - * #### Special elements + * #### Special self-contained elements * * Some HTML elements are handled in a special way; their start and end tags * act like a void tag. These are special because their contents can't contain @@ -755,6 +755,20 @@ class WP_HTML_Tag_Processor { */ protected $seek_count = 0; + /** + * Whether the parser should skip over an immediately-following linefeed + * character, as is the case with LISTING, PRE, and TEXTAREA. + * + * > If the next token is a U+000A LINE FEED (LF) character token, then + * > ignore that token and move on to the next one. (Newlines at the start + * > of [these] elements are ignored as an authoring convenience.) + * + * @since 6.7.0 + * + * @var int|null + */ + private $skip_newline_at = null; + /** * Constructor. * @@ -926,20 +940,23 @@ class WP_HTML_Tag_Processor { $this->token_length = $this->bytes_already_parsed - $this->token_starts_at; /* - * For non-DATA sections which might contain text that looks like HTML tags but - * isn't, scan with the appropriate alternative mode. Looking at the first letter - * of the tag name as a pre-check avoids a string allocation when it's not needed. + * Certain tags require additional processing. The first-letter pre-check + * avoids unnecessary string allocation when comparing the tag names. + * + * - IFRAME + * - LISTING (deprecated) + * - NOEMBED (deprecated) + * - NOFRAMES (deprecated) + * - PRE + * - SCRIPT + * - STYLE + * - TEXTAREA + * - TITLE + * - XMP (deprecated) */ - $t = $this->html[ $this->tag_name_starts_at ]; if ( $this->is_closing_tag || - ! ( - 'i' === $t || 'I' === $t || - 'n' === $t || 'N' === $t || - 's' === $t || 'S' === $t || - 't' === $t || 'T' === $t || - 'x' === $t || 'X' === $t - ) + 1 !== strspn( $this->html, 'iIlLnNpPsStTxX', $this->tag_name_starts_at, 1 ) ) { return true; } @@ -947,6 +964,26 @@ class WP_HTML_Tag_Processor { $tag_name = $this->get_tag(); /* + * For LISTING, PRE, and TEXTAREA, the first linefeed of an immediately-following + * text node is ignored as an authoring convenience. + * + * @see static::skip_newline_at + */ + if ( 'LISTING' === $tag_name || 'PRE' === $tag_name ) { + $this->skip_newline_at = $this->bytes_already_parsed; + return true; + } + + /* + * There are certain elements whose children are not DATA but are instead + * RCDATA or RAWTEXT. These cannot contain other elements, and the contents + * are parsed as plaintext, with character references decoded in RCDATA but + * not in RAWTEXT. + * + * These elements are described here as "self-contained" or special atomic + * elements whose end tag is consumed with the opening tag, and they will + * contain modifiable text inside of them. + * * Preserve the opening tag pointers, as these will be overwritten * when finding the closing tag. They will be reset after finding * the closing to tag to point to the opening of the special atomic @@ -2690,13 +2727,23 @@ class WP_HTML_Tag_Processor { * $p->is_tag_closer() === true; * * @since 6.2.0 + * @since 6.7.0 Reports all BR tags as opening tags. * * @return bool Whether the current tag is a tag closer. */ public function is_tag_closer(): bool { return ( self::STATE_MATCHED_TAG === $this->parser_state && - $this->is_closing_tag + $this->is_closing_tag && + + /* + * The BR tag can only exist as an opening tag. If something like `
` + * appears then the HTML parser will treat it as an opening tag with no + * attributes. The BR tag is unique in this way. + * + * @see https://html.spec.whatwg.org/#parsing-main-inbody + */ + 'BR' !== $this->get_tag() ); } @@ -2825,17 +2872,38 @@ class WP_HTML_Tag_Processor { * that a token has modifiable text, and a token with modifiable text may * have an empty string (e.g. a comment with no contents). * + * Limitations: + * + * - This function will not strip the leading newline appropriately + * after seeking into a LISTING or PRE element. To ensure that the + * newline is treated properly, seek to the LISTING or PRE opening + * tag instead of to the first text node inside the element. + * * @since 6.5.0 + * @since 6.7.0 Replaces NULL bytes (U+0000) and newlines appropriately. * * @return string */ public function get_modifiable_text(): string { - if ( null === $this->text_starts_at ) { + if ( null === $this->text_starts_at || 0 === $this->text_length ) { return ''; } $text = substr( $this->html, $this->text_starts_at, $this->text_length ); + /* + * Pre-processing the input stream would normally happen before + * any parsing is done, but deferring it means it's possible to + * skip in most cases. When getting the modifiable text, however + * it's important to apply the pre-processing steps, which is + * normalizing newlines. + * + * @see https://html.spec.whatwg.org/#preprocessing-the-input-stream + * @see https://infra.spec.whatwg.org/#normalize-newlines + */ + $text = str_replace( "\r\n", "\n", $text ); + $text = str_replace( "\r", "\n", $text ); + // Comment data is not decoded. if ( self::STATE_CDATA_NODE === $this->parser_state || @@ -2843,10 +2911,10 @@ class WP_HTML_Tag_Processor { self::STATE_DOCTYPE === $this->parser_state || self::STATE_FUNKY_COMMENT === $this->parser_state ) { - return $text; + return str_replace( "\x00", "\u{FFFD}", $text ); } - $tag_name = $this->get_tag(); + $tag_name = $this->get_token_name(); if ( // Script data is not decoded. 'SCRIPT' === $tag_name || @@ -2858,29 +2926,34 @@ class WP_HTML_Tag_Processor { 'STYLE' === $tag_name || 'XMP' === $tag_name ) { - return $text; + return str_replace( "\x00", "\u{FFFD}", $text ); } $decoded = WP_HTML_Decoder::decode_text_node( $text ); /* - * TEXTAREA skips a leading newline, but this newline may appear not only as the - * literal character `\n`, but also as a character reference, such as in the - * following markup: ``. + * Skip the first line feed after LISTING, PRE, and TEXTAREA opening tags. * - * For these cases it's important to first decode the text content before checking - * for a leading newline and removing it. + * Note that this first newline may come in the form of a character + * reference, such as ` `, and so it's important to perform + * this transformation only after decoding the raw text content. */ if ( - self::STATE_MATCHED_TAG === $this->parser_state && - 'TEXTAREA' === $tag_name && - strlen( $decoded ) > 0 && - "\n" === $decoded[0] + ( "\n" === ( $decoded[0] ?? '' ) ) && + ( ( $this->skip_newline_at === $this->token_starts_at && '#text' === $tag_name ) || 'TEXTAREA' === $tag_name ) ) { - return substr( $decoded, 1 ); + $decoded = substr( $decoded, 1 ); } - return $decoded; + /* + * Only in normative text nodes does the NULL byte (U+0000) get removed. + * In all other contexts it's replaced by the replacement character (U+FFFD) + * for security reasons (to avoid joining together strings that were safe + * when separated, but not when joined). + */ + return '#text' === $tag_name + ? str_replace( "\x00", '', $decoded ) + : str_replace( "\x00", "\u{FFFD}", $decoded ); } /** diff --git a/wp-includes/html-api/class-wp-html-token.php b/wp-includes/html-api/class-wp-html-token.php index fe8636fb5e..948fe343df 100644 --- a/wp-includes/html-api/class-wp-html-token.php +++ b/wp-includes/html-api/class-wp-html-token.php @@ -72,12 +72,13 @@ class WP_HTML_Token { * * @since 6.4.0 * - * @param string $bookmark_name Name of bookmark corresponding to location in HTML where token is found. + * @param string|null $bookmark_name Name of bookmark corresponding to location in HTML where token is found, + * or `null` for markers and nodes without a bookmark. * @param string $node_name Name of node token represents; if uppercase, an HTML element; if lowercase, a special value like "marker". * @param bool $has_self_closing_flag Whether the source token contains the self-closing flag, regardless of whether it's valid. * @param callable|null $on_destroy Optional. Function to call when destroying token, useful for releasing the bookmark. */ - public function __construct( string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) { + public function __construct( ?string $bookmark_name, string $node_name, bool $has_self_closing_flag, ?callable $on_destroy = null ) { $this->bookmark_name = $bookmark_name; $this->node_name = $node_name; $this->has_self_closing_flag = $has_self_closing_flag; diff --git a/wp-includes/version.php b/wp-includes/version.php index 0488739be6..3de31b7b78 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.7-alpha-58778'; +$wp_version = '6.7-alpha-58779'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.