From 372e3c4b977974fd3478d57fca87cfb32f1fa922 Mon Sep 17 00:00:00 2001 From: Bernhard Reiter Date: Mon, 8 Jan 2024 14:05:24 +0000 Subject: [PATCH] HTML API: Add explicit handling or failure for all tags. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The HTML API HTML processor does not yet support all tags. Many tags (e.g. list elements) have some complicated rules in the [https://html.spec.whatwg.org/#parsing-main-inbody "in body" insertion mode]. Implementing these special rules is blocking the implementation for a catch-all rule for "any other tag" because we need to prevent special rules from being handled by the catch-all. Any other start tag Reconstruct the active formatting elements, if any. Insert an HTML element for the token. … This change ensures the HTML Processor fails when handling special tags. This is the same as existing behavior, but will allow us to implement the catch-all "any other tag" handling without unintentionally handling special elements. Additionally, we add tests that assert the special elements are unhandled. As these tags are implemented, this should help to ensure they're removed from the unsupported tag list. Props jonsurrell, dmsnell. Fixes #60092. Built from https://develop.svn.wordpress.org/trunk@57248 git-svn-id: http://core.svn.wordpress.org/trunk@56754 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- .../html-api/class-wp-html-processor.php | 157 ++++++++++++++---- wp-includes/version.php | 2 +- 2 files changed, 127 insertions(+), 32 deletions(-) diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index e46c368c70..41823af00f 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -100,15 +100,19 @@ * The following list specifies the HTML tags that _are_ supported: * * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. - * - Form elements: BUTTON, FIELDSET, SEARCH. + * - Custom elements: All custom elements are supported. :) + * - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH. * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U. * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. * - Links: A. * - Lists: DL. - * - Media elements: FIGCAPTION, FIGURE, IMG. + * - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO. * - Paragraph: P. - * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION - * - Deprecated elements: CENTER, DIR + * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR. + * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION. + * - Templating elements: SLOT. + * - Text decoration: RUBY. + * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER. * * ### Supported markup * @@ -830,41 +834,132 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { $this->reconstruct_active_formatting_elements(); $this->insert_html_element( $this->state->current_token ); return true; + } + /* + * These tags require special handling in the 'in body' insertion mode + * but that handling hasn't yet been implemented. + * + * As the rules for each tag are implemented, the corresponding tag + * name should be removed from this list. An accompanying test should + * help ensure this list is maintained. + * + * @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags + * + * Since this switch structure throws a WP_HTML_Unsupported_Exception, it's + * possible to handle "any other start tag" and "any other end tag" below, + * as that guarantees execution doesn't proceed for the unimplemented tags. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody + */ + switch ( $tag_name ) { + case 'APPLET': + case 'AREA': + case 'BASE': + case 'BASEFONT': + case 'BGSOUND': + case 'BODY': + case 'BR': + case 'CAPTION': + case 'COL': + case 'COLGROUP': + case 'DD': + case 'DT': + case 'EMBED': + case 'FORM': + case 'FRAME': + case 'FRAMESET': + case 'HEAD': + case 'HR': + case 'HTML': + case 'IFRAME': + case 'INPUT': + case 'KEYGEN': + case 'LI': + case 'LINK': + case 'LISTING': + case 'MARQUEE': + case 'MATH': + case 'META': + case 'NOBR': + case 'NOEMBED': + case 'NOFRAMES': + case 'NOSCRIPT': + case 'OBJECT': + case 'OL': + case 'OPTGROUP': + case 'OPTION': + case 'PARAM': + case 'PLAINTEXT': + case 'PRE': + case 'RB': + case 'RP': + case 'RT': + case 'RTC': + case 'SARCASM': + case 'SCRIPT': + case 'SELECT': + case 'SOURCE': + case 'STYLE': + case 'SVG': + case 'TABLE': + case 'TBODY': + case 'TD': + case 'TEMPLATE': + case 'TEXTAREA': + case 'TFOOT': + case 'TH': + case 'THEAD': + case 'TITLE': + case 'TR': + case 'TRACK': + case 'UL': + case 'WBR': + case 'XMP': + $this->last_error = self::ERROR_UNSUPPORTED; + throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + } + + if ( ! $this->is_tag_closer() ) { /* * > Any other start tag */ - case '+SPAN': - $this->reconstruct_active_formatting_elements(); - $this->insert_html_element( $this->state->current_token ); - return true; + $this->reconstruct_active_formatting_elements(); + $this->insert_html_element( $this->state->current_token ); + return true; + } else { + /* + * > Any other end tag + */ /* - * Any other end tag + * Find the corresponding tag opener in the stack of open elements, if + * it exists before reaching a special element, which provides a kind + * of boundary in the stack. For example, a `` should not + * close anything beyond its containing `P` or `DIV` element. */ - case '-SPAN': - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - // > If node is an HTML element with the same tag name as the token, then: - if ( $item->node_name === $tag_name ) { - $this->generate_implied_end_tags( $tag_name ); - - // > If node is not the current node, then this is a parse error. - - $this->state->stack_of_open_elements->pop_until( $tag_name ); - return true; - } - - // > Otherwise, if node is in the special category, then this is a parse error; ignore the token, and return. - if ( self::is_special( $item->node_name ) ) { - return $this->step(); - } + foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { + if ( $tag_name === $node->node_name ) { + break; } - // Execution should not reach here; if it does then something went wrong. - return false; - default: - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." ); + if ( self::is_special( $node->node_name ) ) { + // This is a parse error, ignore the token. + return $this->step(); + } + } + + $this->generate_implied_end_tags( $tag_name ); + if ( $node !== $this->state->stack_of_open_elements->current_node() ) { + // @todo Record parse error: this error doesn't impact parsing. + } + + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + $this->state->stack_of_open_elements->pop(); + if ( $node === $item ) { + return true; + } + } } } @@ -1264,7 +1359,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) { - $this->state->active_formatting_elements->remove_node( $formatting_element->bookmark_name ); + $this->state->active_formatting_elements->remove_node( $formatting_element ); return; } diff --git a/wp-includes/version.php b/wp-includes/version.php index 2084fa2f76..f26c1e1c56 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.5-alpha-57247'; +$wp_version = '6.5-alpha-57248'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.