HTML API: Add support for missing FRAMESET and "after" insertion modes.
As part of work to add more spec support to the HTML API, this patch adds support for the FRAMESET-related insertion modes, as well as the set of missing after insertion modes. These modes run at the end of parsing a document, closing it and taking care of any lingering tags. Developed in https://github.com/wordpress/wordpress-develop/7165 Discussed in https://core.trac.wordpress.org/ticket/61576 Props dmsnell, jonsurrell. See #61576. Built from https://develop.svn.wordpress.org/trunk@58926 git-svn-id: http://core.svn.wordpress.org/trunk@58322 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
e3a3c13497
commit
01d2199622
|
@ -3972,7 +3972,71 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||
* @return bool Whether an element was found.
|
||||
*/
|
||||
private function step_after_body(): bool {
|
||||
$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY . ' state.' );
|
||||
$tag_name = $this->get_token_name();
|
||||
$token_type = $this->get_token_type();
|
||||
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
|
||||
$op = "{$op_sigil}{$tag_name}";
|
||||
|
||||
switch ( $op ) {
|
||||
/*
|
||||
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
|
||||
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
|
||||
*
|
||||
* > Process the token using the rules for the "in body" insertion mode.
|
||||
*/
|
||||
case '#text':
|
||||
$text = $this->get_modifiable_text();
|
||||
if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
|
||||
return $this->step_in_body();
|
||||
}
|
||||
goto after_body_anything_else;
|
||||
break;
|
||||
|
||||
/*
|
||||
* > A comment token
|
||||
*/
|
||||
case '#comment':
|
||||
case '#funky-comment':
|
||||
case '#presumptuous-tag':
|
||||
$this->bail( 'Content outside of BODY is unsupported.' );
|
||||
break;
|
||||
|
||||
/*
|
||||
* > A DOCTYPE token
|
||||
*/
|
||||
case 'html':
|
||||
// Parse error: ignore the token.
|
||||
return $this->step();
|
||||
|
||||
/*
|
||||
* > A start tag whose tag name is "html"
|
||||
*/
|
||||
case '+HTML':
|
||||
return $this->step_in_body();
|
||||
|
||||
/*
|
||||
* > An end tag whose tag name is "html"
|
||||
*
|
||||
* > If the parser was created as part of the HTML fragment parsing algorithm,
|
||||
* > this is a parse error; ignore the token. (fragment case)
|
||||
* >
|
||||
* > Otherwise, switch the insertion mode to "after after body".
|
||||
*/
|
||||
case '-HTML':
|
||||
if ( isset( $this->context_node ) ) {
|
||||
return $this->step();
|
||||
}
|
||||
|
||||
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* > Parse error. Switch the insertion mode to "in body" and reprocess the token.
|
||||
*/
|
||||
after_body_anything_else:
|
||||
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
|
||||
return $this->step( self::REPROCESS_CURRENT_NODE );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -3991,7 +4055,109 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||
* @return bool Whether an element was found.
|
||||
*/
|
||||
private function step_in_frameset(): bool {
|
||||
$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET . ' state.' );
|
||||
$tag_name = $this->get_token_name();
|
||||
$token_type = $this->get_token_type();
|
||||
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
|
||||
$op = "{$op_sigil}{$tag_name}";
|
||||
|
||||
switch ( $op ) {
|
||||
/*
|
||||
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
|
||||
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
|
||||
* >
|
||||
* > Insert the character.
|
||||
*
|
||||
* This algorithm effectively strips non-whitespace characters from text and inserts
|
||||
* them under HTML. This is not supported at this time.
|
||||
*/
|
||||
case '#text':
|
||||
$text = $this->get_modifiable_text();
|
||||
$text = $this->get_modifiable_text();
|
||||
if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
|
||||
return $this->step_in_body();
|
||||
}
|
||||
$this->bail( 'Non-whitespace characters cannot be handled in frameset.' );
|
||||
break;
|
||||
|
||||
/*
|
||||
* > A comment token
|
||||
*/
|
||||
case '#comment':
|
||||
case '#funky-comment':
|
||||
case '#presumptuous-tag':
|
||||
$this->insert_html_element( $this->state->current_token );
|
||||
return true;
|
||||
|
||||
/*
|
||||
* > A DOCTYPE token
|
||||
*/
|
||||
case 'html':
|
||||
// Parse error: ignore the token.
|
||||
return $this->step();
|
||||
|
||||
/*
|
||||
* > A start tag whose tag name is "html"
|
||||
*/
|
||||
case '+HTML':
|
||||
return $this->step_in_body();
|
||||
|
||||
/*
|
||||
* > A start tag whose tag name is "frameset"
|
||||
*/
|
||||
case '+FRAMESET':
|
||||
$this->insert_html_element( $this->state->current_token );
|
||||
return true;
|
||||
|
||||
/*
|
||||
* > An end tag whose tag name is "frameset"
|
||||
*/
|
||||
case '-FRAMESET':
|
||||
/*
|
||||
* > If the current node is the root html element, then this is a parse error;
|
||||
* > ignore the token. (fragment case)
|
||||
*/
|
||||
if ( $this->state->stack_of_open_elements->current_node_is( 'HTML' ) ) {
|
||||
return $this->step();
|
||||
}
|
||||
|
||||
/*
|
||||
* > Otherwise, pop the current node from the stack of open elements.
|
||||
*/
|
||||
$this->state->stack_of_open_elements->pop();
|
||||
|
||||
/*
|
||||
* > If the parser was not created as part of the HTML fragment parsing algorithm
|
||||
* > (fragment case), and the current node is no longer a frameset element, then
|
||||
* > switch the insertion mode to "after frameset".
|
||||
*/
|
||||
if ( ! isset( $this->context_node ) && ! $this->state->stack_of_open_elements->current_node_is( 'FRAMESET' ) ) {
|
||||
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
/*
|
||||
* > A start tag whose tag name is "frame"
|
||||
*
|
||||
* > Insert an HTML element for the token. Immediately pop the
|
||||
* > current node off the stack of open elements.
|
||||
* >
|
||||
* > Acknowledge the token's self-closing flag, if it is set.
|
||||
*/
|
||||
case '+FRAME':
|
||||
$this->insert_html_element( $this->state->current_token );
|
||||
$this->state->stack_of_open_elements->pop();
|
||||
return true;
|
||||
|
||||
/*
|
||||
* > A start tag whose tag name is "noframes"
|
||||
*/
|
||||
case '+NOFRAMES':
|
||||
return $this->step_in_head();
|
||||
}
|
||||
|
||||
// Parse error: ignore the token.
|
||||
return $this->step();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4010,7 +4176,67 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||
* @return bool Whether an element was found.
|
||||
*/
|
||||
private function step_after_frameset(): bool {
|
||||
$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET . ' state.' );
|
||||
$tag_name = $this->get_token_name();
|
||||
$token_type = $this->get_token_type();
|
||||
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
|
||||
$op = "{$op_sigil}{$tag_name}";
|
||||
|
||||
switch ( $op ) {
|
||||
/*
|
||||
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
|
||||
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
|
||||
* >
|
||||
* > Insert the character.
|
||||
*
|
||||
* This algorithm effectively strips non-whitespace characters from text and inserts
|
||||
* them under HTML. This is not supported at this time.
|
||||
*/
|
||||
case '#text':
|
||||
$text = $this->get_modifiable_text();
|
||||
if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
|
||||
return $this->step_in_body();
|
||||
}
|
||||
$this->bail( 'Non-whitespace characters cannot be handled in after frameset' );
|
||||
break;
|
||||
|
||||
/*
|
||||
* > A comment token
|
||||
*/
|
||||
case '#comment':
|
||||
case '#funky-comment':
|
||||
case '#presumptuous-tag':
|
||||
$this->insert_html_element( $this->state->current_token );
|
||||
return true;
|
||||
|
||||
/*
|
||||
* > A DOCTYPE token
|
||||
*/
|
||||
case 'html':
|
||||
// Parse error: ignore the token.
|
||||
return $this->step();
|
||||
|
||||
/*
|
||||
* > A start tag whose tag name is "html"
|
||||
*/
|
||||
case '+HTML':
|
||||
return $this->step_in_body();
|
||||
|
||||
/*
|
||||
* > An end tag whose tag name is "html"
|
||||
*/
|
||||
case '-HTML':
|
||||
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET;
|
||||
return true;
|
||||
|
||||
/*
|
||||
* > A start tag whose tag name is "noframes"
|
||||
*/
|
||||
case '+NOFRAMES':
|
||||
return $this->step_in_head();
|
||||
}
|
||||
|
||||
// Parse error: ignore the token.
|
||||
return $this->step();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4029,7 +4255,52 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||
* @return bool Whether an element was found.
|
||||
*/
|
||||
private function step_after_after_body(): bool {
|
||||
$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY . ' state.' );
|
||||
$tag_name = $this->get_token_name();
|
||||
$token_type = $this->get_token_type();
|
||||
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
|
||||
$op = "{$op_sigil}{$tag_name}";
|
||||
|
||||
switch ( $op ) {
|
||||
/*
|
||||
* > A comment token
|
||||
*/
|
||||
case '#comment':
|
||||
case '#funky-comment':
|
||||
case '#presumptuous-tag':
|
||||
$this->bail( 'Content outside of HTML is unsupported.' );
|
||||
break;
|
||||
|
||||
/*
|
||||
* > A DOCTYPE token
|
||||
* > A start tag whose tag name is "html"
|
||||
*
|
||||
* > Process the token using the rules for the "in body" insertion mode.
|
||||
*/
|
||||
case 'html':
|
||||
case '+HTML':
|
||||
return $this->step_in_body();
|
||||
|
||||
/*
|
||||
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
|
||||
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
|
||||
* >
|
||||
* > Process the token using the rules for the "in body" insertion mode.
|
||||
*/
|
||||
case '#text':
|
||||
$text = $this->get_modifiable_text();
|
||||
if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
|
||||
return $this->step_in_body();
|
||||
}
|
||||
goto after_after_body_anything_else;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* > Parse error. Switch the insertion mode to "in body" and reprocess the token.
|
||||
*/
|
||||
after_after_body_anything_else:
|
||||
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
|
||||
return $this->step( self::REPROCESS_CURRENT_NODE );
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4048,7 +4319,57 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||
* @return bool Whether an element was found.
|
||||
*/
|
||||
private function step_after_after_frameset(): bool {
|
||||
$this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET . ' state.' );
|
||||
$tag_name = $this->get_token_name();
|
||||
$token_type = $this->get_token_type();
|
||||
$op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
|
||||
$op = "{$op_sigil}{$tag_name}";
|
||||
|
||||
switch ( $op ) {
|
||||
/*
|
||||
* > A comment token
|
||||
*/
|
||||
case '#comment':
|
||||
case '#funky-comment':
|
||||
case '#presumptuous-tag':
|
||||
$this->bail( 'Content outside of HTML is unsupported.' );
|
||||
break;
|
||||
|
||||
/*
|
||||
* > A DOCTYPE token
|
||||
* > A start tag whose tag name is "html"
|
||||
*
|
||||
* > Process the token using the rules for the "in body" insertion mode.
|
||||
*/
|
||||
case 'html':
|
||||
case '+HTML':
|
||||
return $this->step_in_body();
|
||||
|
||||
/*
|
||||
* > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
|
||||
* > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
|
||||
* >
|
||||
* > Process the token using the rules for the "in body" insertion mode.
|
||||
*
|
||||
* This algorithm effectively strips non-whitespace characters from text and inserts
|
||||
* them under HTML. This is not supported at this time.
|
||||
*/
|
||||
case '#text':
|
||||
$text = $this->get_modifiable_text();
|
||||
if ( strlen( $text ) === strspn( $text, " \t\n\f\r" ) ) {
|
||||
return $this->step_in_body();
|
||||
}
|
||||
$this->bail( 'Non-whitespace characters cannot be handled in after after frameset.' );
|
||||
break;
|
||||
|
||||
/*
|
||||
* > A start tag whose tag name is "noframes"
|
||||
*/
|
||||
case '+NOFRAMES':
|
||||
return $this->step_in_head();
|
||||
}
|
||||
|
||||
// Parse error: ignore the token.
|
||||
return $this->step();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4115,7 +4436,8 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||
*/
|
||||
case '#cdata-section':
|
||||
case '#comment':
|
||||
case '#funky_comment':
|
||||
case '#funky-comment':
|
||||
case '#presumptuous-tag':
|
||||
$this->insert_foreign_element( $this->state->current_token, false );
|
||||
return true;
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
*
|
||||
* @global string $wp_version
|
||||
*/
|
||||
$wp_version = '6.7-alpha-58925';
|
||||
$wp_version = '6.7-alpha-58926';
|
||||
|
||||
/**
|
||||
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
||||
|
|
Loading…
Reference in New Issue