diff --git a/wp-includes/html-api/class-wp-html-open-elements.php b/wp-includes/html-api/class-wp-html-open-elements.php index d59bd32140..1162a267f9 100644 --- a/wp-includes/html-api/class-wp-html-open-elements.php +++ b/wp-includes/html-api/class-wp-html-open-elements.php @@ -720,6 +720,80 @@ class WP_HTML_Open_Elements { } } + /** + * Clear the stack back to a table context. + * + * > When the steps above require the UA to clear the stack back to a table context, it means + * > that the UA must, while the current node is not a table, template, or html element, pop + * > elements from the stack of open elements. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-context + * + * @since 6.7.0 + */ + public function clear_to_table_context(): void { + foreach ( $this->walk_up() as $item ) { + if ( + 'TABLE' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name + ) { + break; + } + $this->pop(); + } + } + + /** + * Clear the stack back to a table body context. + * + * > When the steps above require the UA to clear the stack back to a table body context, it + * > means that the UA must, while the current node is not a tbody, tfoot, thead, template, or + * > html element, pop elements from the stack of open elements. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-body-context + * + * @since 6.7.0 + */ + public function clear_to_table_body_context(): void { + foreach ( $this->walk_up() as $item ) { + if ( + 'TBODY' === $item->node_name || + 'TFOOT' === $item->node_name || + 'THEAD' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name + ) { + break; + } + $this->pop(); + } + } + + /** + * Clear the stack back to a table row context. + * + * > When the steps above require the UA to clear the stack back to a table row context, it + * > means that the UA must, while the current node is not a tr, template, or html element, pop + * > elements from the stack of open elements. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-stack-back-to-a-table-row-context + * + * @since 6.7.0 + */ + public function clear_to_table_row_context(): void { + foreach ( $this->walk_up() as $item ) { + if ( + 'TR' === $item->node_name || + 'TEMPLATE' === $item->node_name || + 'HTML' === $item->node_name + ) { + break; + } + $this->pop(); + } + } + /** * Wakeup magic method. * diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index f9073492d8..975f21a0f0 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -1786,6 +1786,10 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * > A start tag whose tag name is "table" */ case '+TABLE': + /* + * > If the Document is not set to quirks mode, and the stack of open elements + * > has a p element in button scope, then close a p element. + */ if ( WP_HTML_Processor_State::QUIRKS_MODE !== $this->state->document_mode && $this->state->stack_of_open_elements->has_p_in_button_scope() @@ -2117,7 +2121,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * This internal function performs the 'in table' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.7.0 Stub implementation. + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -2127,7 +2131,245 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_table(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE . ' state.' ); + $token_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$token_name}"; + + switch ( $op ) { + /* + * > A character token, if the current node is table, + * > tbody, template, tfoot, thead, or tr element + */ + case '#text': + $current_node = $this->state->stack_of_open_elements->current_node(); + $current_node_name = $current_node ? $current_node->node_name : null; + if ( + $current_node_name && ( + 'TABLE' === $current_node_name || + 'TBODY' === $current_node_name || + 'TEMPLATE' === $current_node_name || + 'TFOOT' === $current_node_name || + 'THEAD' === $current_node_name || + 'TR' === $current_node_name + ) + ) { + $text = $this->get_modifiable_text(); + /* + * If the text is empty after processing HTML entities and stripping + * U+0000 NULL bytes then ignore the token. + */ + if ( '' === $text ) { + return $this->step(); + } + + /* + * This follows the rules for "in table text" insertion mode. + * + * Whitespace-only text nodes are inserted in-place. Otherwise + * foster parenting is enabled and the nodes would be + * inserted out-of-place. + * + * > If any of the tokens in the pending table character tokens + * > list are character tokens that are not ASCII whitespace, + * > then this is a parse error: reprocess the character tokens + * > in the pending table character tokens list using the rules + * > given in the "anything else" entry in the "in table" + * > insertion mode. + * > + * > Otherwise, insert the characters given by the pending table + * > character tokens list. + * + * @see https://html.spec.whatwg.org/#parsing-main-intabletext + */ + if ( strlen( $text ) === strspn( $text, " \t\f\r\n" ) ) { + $this->insert_html_element( $this->state->current_token ); + return true; + } + + // Non-whitespace would trigger fostering, unsupported at this time. + $this->bail( 'Foster parenting is not supported.' ); + break; + } + break; + + /* + * > A comment token + */ + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A DOCTYPE token + */ + case 'html': + // Parse error: ignore the token. + return $this->step(); + + /* + * > A start tag whose tag name is "caption" + */ + case '+CAPTION': + $this->state->stack_of_open_elements->clear_to_table_context(); + $this->state->active_formatting_elements->insert_marker(); + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; + return true; + + /* + * > A start tag whose tag name is "colgroup" + */ + case '+COLGROUP': + $this->state->stack_of_open_elements->clear_to_table_context(); + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + return true; + + /* + * > A start tag whose tag name is "col" + */ + case '+COL': + $this->state->stack_of_open_elements->clear_to_table_context(); + + /* + * > Insert an HTML element for a "colgroup" start tag token with no attributes, + * > then switch the insertion mode to "in column group". + */ + $this->insert_virtual_node( 'COLGROUP' ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > A start tag whose tag name is one of: "tbody", "tfoot", "thead" + */ + case '+TBODY': + case '+TFOOT': + case '+THEAD': + $this->state->stack_of_open_elements->clear_to_table_context(); + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; + return true; + + /* + * > A start tag whose tag name is one of: "td", "th", "tr" + */ + case '+TD': + case '+TH': + case '+TR': + $this->state->stack_of_open_elements->clear_to_table_context(); + /* + * > Insert an HTML element for a "tbody" start tag token with no attributes, + * > then switch the insertion mode to "in table body". + */ + $this->insert_virtual_node( 'TBODY' ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > A start tag whose tag name is "table" + * + * This tag in the IN TABLE insertion mode is a parse error. + */ + case '+TABLE': + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { + return $this->step(); + } + + $this->state->stack_of_open_elements->pop_until( 'TABLE' ); + $this->reset_insertion_mode(); + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > An end tag whose tag name is "table" + */ + case '-TABLE': + if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { + // @todo Indicate a parse error once it's possible. + return $this->step(); + } + + $this->state->stack_of_open_elements->pop_until( 'TABLE' ); + $this->reset_insertion_mode(); + return true; + + /* + * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" + */ + case '-BODY': + case '-CAPTION': + case '-COL': + case '-COLGROUP': + case '-HTML': + case '-TBODY': + case '-TD': + case '-TFOOT': + case '-TH': + case '-THEAD': + case '-TR': + // Parse error: ignore the token. + return $this->step(); + + /* + * > A start tag whose tag name is one of: "style", "script", "template" + * > An end tag whose tag name is "template" + */ + case '+STYLE': + case '+SCRIPT': + case '+TEMPLATE': + case '-TEMPLATE': + /* + * > Process the token using the rules for the "in head" insertion mode. + */ + return $this->step_in_head(); + + /* + * > A start tag whose tag name is "input" + * + * > If the token does not have an attribute with the name "type", or if it does, but + * > that attribute's value is not an ASCII case-insensitive match for the string + * > "hidden", then: act as described in the "anything else" entry below. + */ + case '+INPUT': + $type_attribute = $this->get_attribute( 'type' ); + if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { + goto anything_else; + } + // @todo Indicate a parse error once it's possible. + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A start tag whose tag name is "form" + * + * This tag in the IN TABLE insertion mode is a parse error. + */ + case '+FORM': + if ( + $this->state->stack_of_open_elements->has_element_in_scope( 'TEMPLATE' ) || + isset( $this->state->form_element ) + ) { + return $this->step(); + } + + // This FORM is special because it immediately closes and cannot have other children. + $this->insert_html_element( $this->state->current_token ); + $this->state->form_element = $this->state->current_token; + $this->state->stack_of_open_elements->pop(); + return true; + } + + /* + * > Anything else + * > Parse error. Enable foster parenting, process the token using the rules for the + * > "in body" insertion mode, and then disable foster parenting. + * + * @todo Indicate a parse error once it's possible. + */ + anything_else: + $this->bail( 'Foster parenting is not supported.' ); } /** @@ -2193,7 +2435,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * This internal function performs the 'in table body' insertion mode * logic for the generalized WP_HTML_Processor::step() function. * - * @since 6.7.0 Stub implementation. + * @since 6.7.0 * * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. * @@ -2203,7 +2445,97 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_table_body(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY . ' state.' ); + $tag_name = $this->get_tag(); + $op_sigil = $this->is_tag_closer() ? '-' : '+'; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A start tag whose tag name is "tr" + */ + case '+TR': + $this->state->stack_of_open_elements->clear_to_table_body_context(); + $this->insert_html_element( $this->state->current_token ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return true; + + /* + * > A start tag whose tag name is one of: "th", "td" + */ + case '+TH': + case '+TD': + // @todo Indicate a parse error once it's possible. + $this->state->stack_of_open_elements->clear_to_table_body_context(); + $this->insert_virtual_node( 'TR' ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; + return $this->step( self::REPROCESS_CURRENT_NODE ); + + /* + * > An end tag whose tag name is one of: "tbody", "tfoot", "thead" + */ + case '-TBODY': + case '-TFOOT': + case '-THEAD': + /* + * @todo This needs to check if the element in scope is an HTML element, meaning that + * when SVG and MathML support is added, this needs to differentiate between an + * HTML element of the given name, such as `