diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index ca7a4cf3e0..168b8ace4c 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -3972,7 +3972,71 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_body(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY . ' state.' ); + $tag_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), + * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE + * + * > Process the token using the rules for the "in body" insertion mode. + */ + case '#text': + $text = $this->get_modifiable_text(); + if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { + return $this->step_in_body(); + } + goto after_body_anything_else; + break; + + /* + * > A comment token + */ + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + $this->bail( 'Content outside of BODY is unsupported.' ); + break; + + /* + * > A DOCTYPE token + */ + case 'html': + // Parse error: ignore the token. + return $this->step(); + + /* + * > A start tag whose tag name is "html" + */ + case '+HTML': + return $this->step_in_body(); + + /* + * > An end tag whose tag name is "html" + * + * > If the parser was created as part of the HTML fragment parsing algorithm, + * > this is a parse error; ignore the token. (fragment case) + * > + * > Otherwise, switch the insertion mode to "after after body". + */ + case '-HTML': + if ( isset( $this->context_node ) ) { + return $this->step(); + } + + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY; + return true; + } + + /* + * > Parse error. Switch the insertion mode to "in body" and reprocess the token. + */ + after_body_anything_else: + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + return $this->step( self::REPROCESS_CURRENT_NODE ); } /** @@ -3991,7 +4055,109 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_frameset(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET . ' state.' ); + $tag_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), + * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE + * > + * > Insert the character. + * + * This algorithm effectively strips non-whitespace characters from text and inserts + * them under HTML. This is not supported at this time. + */ + case '#text': + $text = $this->get_modifiable_text(); + $text = $this->get_modifiable_text(); + if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { + return $this->step_in_body(); + } + $this->bail( 'Non-whitespace characters cannot be handled in frameset.' ); + break; + + /* + * > A comment token + */ + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A DOCTYPE token + */ + case 'html': + // Parse error: ignore the token. + return $this->step(); + + /* + * > A start tag whose tag name is "html" + */ + case '+HTML': + return $this->step_in_body(); + + /* + * > A start tag whose tag name is "frameset" + */ + case '+FRAMESET': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > An end tag whose tag name is "frameset" + */ + case '-FRAMESET': + /* + * > If the current node is the root html element, then this is a parse error; + * > ignore the token. (fragment case) + */ + if ( $this->state->stack_of_open_elements->current_node_is( 'HTML' ) ) { + return $this->step(); + } + + /* + * > Otherwise, pop the current node from the stack of open elements. + */ + $this->state->stack_of_open_elements->pop(); + + /* + * > If the parser was not created as part of the HTML fragment parsing algorithm + * > (fragment case), and the current node is no longer a frameset element, then + * > switch the insertion mode to "after frameset". + */ + if ( ! isset( $this->context_node ) && ! $this->state->stack_of_open_elements->current_node_is( 'FRAMESET' ) ) { + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET; + } + + return true; + + /* + * > A start tag whose tag name is "frame" + * + * > Insert an HTML element for the token. Immediately pop the + * > current node off the stack of open elements. + * > + * > Acknowledge the token's self-closing flag, if it is set. + */ + case '+FRAME': + $this->insert_html_element( $this->state->current_token ); + $this->state->stack_of_open_elements->pop(); + return true; + + /* + * > A start tag whose tag name is "noframes" + */ + case '+NOFRAMES': + return $this->step_in_head(); + } + + // Parse error: ignore the token. + return $this->step(); } /** @@ -4010,7 +4176,67 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_frameset(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET . ' state.' ); + $tag_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), + * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE + * > + * > Insert the character. + * + * This algorithm effectively strips non-whitespace characters from text and inserts + * them under HTML. This is not supported at this time. + */ + case '#text': + $text = $this->get_modifiable_text(); + if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { + return $this->step_in_body(); + } + $this->bail( 'Non-whitespace characters cannot be handled in after frameset' ); + break; + + /* + * > A comment token + */ + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + $this->insert_html_element( $this->state->current_token ); + return true; + + /* + * > A DOCTYPE token + */ + case 'html': + // Parse error: ignore the token. + return $this->step(); + + /* + * > A start tag whose tag name is "html" + */ + case '+HTML': + return $this->step_in_body(); + + /* + * > An end tag whose tag name is "html" + */ + case '-HTML': + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET; + return true; + + /* + * > A start tag whose tag name is "noframes" + */ + case '+NOFRAMES': + return $this->step_in_head(); + } + + // Parse error: ignore the token. + return $this->step(); } /** @@ -4029,7 +4255,52 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_after_body(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY . ' state.' ); + $tag_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A comment token + */ + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + $this->bail( 'Content outside of HTML is unsupported.' ); + break; + + /* + * > A DOCTYPE token + * > A start tag whose tag name is "html" + * + * > Process the token using the rules for the "in body" insertion mode. + */ + case 'html': + case '+HTML': + return $this->step_in_body(); + + /* + * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), + * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE + * > + * > Process the token using the rules for the "in body" insertion mode. + */ + case '#text': + $text = $this->get_modifiable_text(); + if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { + return $this->step_in_body(); + } + goto after_after_body_anything_else; + break; + } + + /* + * > Parse error. Switch the insertion mode to "in body" and reprocess the token. + */ + after_after_body_anything_else: + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + return $this->step( self::REPROCESS_CURRENT_NODE ); } /** @@ -4048,7 +4319,57 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_after_frameset(): bool { - $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET . ' state.' ); + $tag_name = $this->get_token_name(); + $token_type = $this->get_token_type(); + $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; + $op = "{$op_sigil}{$tag_name}"; + + switch ( $op ) { + /* + * > A comment token + */ + case '#comment': + case '#funky-comment': + case '#presumptuous-tag': + $this->bail( 'Content outside of HTML is unsupported.' ); + break; + + /* + * > A DOCTYPE token + * > A start tag whose tag name is "html" + * + * > Process the token using the rules for the "in body" insertion mode. + */ + case 'html': + case '+HTML': + return $this->step_in_body(); + + /* + * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), + * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE + * > + * > Process the token using the rules for the "in body" insertion mode. + * + * This algorithm effectively strips non-whitespace characters from text and inserts + * them under HTML. This is not supported at this time. + */ + case '#text': + $text = $this->get_modifiable_text(); + if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) { + return $this->step_in_body(); + } + $this->bail( 'Non-whitespace characters cannot be handled in after after frameset.' ); + break; + + /* + * > A start tag whose tag name is "noframes" + */ + case '+NOFRAMES': + return $this->step_in_head(); + } + + // Parse error: ignore the token. + return $this->step(); } /** @@ -4115,7 +4436,8 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ case '#cdata-section': case '#comment': - case '#funky_comment': + case '#funky-comment': + case '#presumptuous-tag': $this->insert_foreign_element( $this->state->current_token, false ); return true; diff --git a/wp-includes/version.php b/wp-includes/version.php index ef7e74496f..0338d92c6a 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.7-alpha-58925'; +$wp_version = '6.7-alpha-58926'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.