HTML API: Add method to create fragment at node.
HTML Fragment parsing always happens with a context node, which may impact how a fragment of HTML is parsed. HTML Fragment Processors can be instantiated with a `BODY` context node via `WP_HTML_Processor::create_fragment( $html )`. This changeset adds a static method called `create_fragment_at_current_node( string $html_fragment )`. It can only be called when the processor is paused at a `#tag`, with some additional constraints: - The opening and closing tags must appear in the HTML input (no virtual tokens). - No "self-contained" elements are allowed ( `IFRAME`, `SCRIPT`, `TITLE`, etc.). If successful, the method will return a `WP_HTML_Processor` instance whose context is inherited from the node that the method was called from. Props jonsurrell, bernhard-reiter, gziolo. Fixes #62357. Built from https://develop.svn.wordpress.org/trunk@59444 git-svn-id: http://core.svn.wordpress.org/trunk@58830 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
8f1dc00b4d
commit
ff3fde39ee
|
@ -424,6 +424,120 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a fragment processor at the current node.
|
||||
*
|
||||
* HTML Fragment parsing always happens with a context node. HTML Fragment Processors can be
|
||||
* instantiated with a `BODY` context node via `WP_HTML_Processor::create_fragment( $html )`.
|
||||
*
|
||||
* The context node may impact how a fragment of HTML is parsed. For example, consider the HTML
|
||||
* fragment `<td />Inside TD?</td>`.
|
||||
*
|
||||
* A BODY context node will produce the following tree:
|
||||
*
|
||||
* └─#text Inside TD?
|
||||
*
|
||||
* Notice that the `<td>` tags are completely ignored.
|
||||
*
|
||||
* Compare that with an SVG context node that produces the following tree:
|
||||
*
|
||||
* ├─svg:td
|
||||
* └─#text Inside TD?
|
||||
*
|
||||
* Here, a `td` node in the `svg` namespace is created, and its self-closing flag is respected.
|
||||
* This is a peculiarity of parsing HTML in foreign content like SVG.
|
||||
*
|
||||
* Finally, consider the tree produced with a TABLE context node:
|
||||
*
|
||||
* └─TBODY
|
||||
* └─TR
|
||||
* └─TD
|
||||
* └─#text Inside TD?
|
||||
*
|
||||
* These examples demonstrate how important the context node may be when processing an HTML
|
||||
* fragment. Special care must be taken when processing fragments that are expected to appear
|
||||
* in specific contexts. SVG and TABLE are good examples, but there are others.
|
||||
*
|
||||
* @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm
|
||||
*
|
||||
* @param string $html Input HTML fragment to process.
|
||||
* @return static|null The created processor if successful, otherwise null.
|
||||
*/
|
||||
public function create_fragment_at_current_node( string $html ) {
|
||||
if ( $this->get_token_type() !== '#tag' ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$namespace = $this->current_element->token->namespace;
|
||||
|
||||
/*
|
||||
* Prevent creating fragments at nodes that require a special tokenizer state.
|
||||
* This is unsupported by the HTML Processor.
|
||||
*/
|
||||
if (
|
||||
'html' === $namespace &&
|
||||
in_array( $this->current_element->token->node_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true )
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$fragment_processor = static::create_fragment( $html );
|
||||
if ( null === $fragment_processor ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$fragment_processor->compat_mode = $this->compat_mode;
|
||||
|
||||
$fragment_processor->context_node = clone $this->state->current_token;
|
||||
$fragment_processor->context_node->bookmark_name = 'context-node';
|
||||
$fragment_processor->context_node->on_destroy = null;
|
||||
|
||||
$fragment_processor->state->context_node = array( $fragment_processor->context_node->node_name, array() );
|
||||
|
||||
$attribute_names = $this->get_attribute_names_with_prefix( '' );
|
||||
if ( null !== $attribute_names ) {
|
||||
foreach ( $attribute_names as $name ) {
|
||||
$fragment_processor->state->context_node[1][ $name ] = $this->get_attribute( $name );
|
||||
}
|
||||
}
|
||||
|
||||
$fragment_processor->breadcrumbs = array( 'HTML', $fragment_processor->context_node->node_name );
|
||||
|
||||
if ( 'TEMPLATE' === $fragment_processor->context_node->node_name ) {
|
||||
$fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
|
||||
}
|
||||
|
||||
$fragment_processor->reset_insertion_mode_appropriately();
|
||||
|
||||
/*
|
||||
* > Set the parser's form element pointer to the nearest node to the context element that
|
||||
* > is a form element (going straight up the ancestor chain, and including the element
|
||||
* > itself, if it is a form element), if any. (If there is no such form element, the
|
||||
* > form element pointer keeps its initial value, null.)
|
||||
*/
|
||||
foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) {
|
||||
if ( 'FORM' === $element->node_name && 'html' === $element->namespace ) {
|
||||
$fragment_processor->state->form_element = clone $element;
|
||||
$fragment_processor->state->form_element->bookmark_name = null;
|
||||
$fragment_processor->state->form_element->on_destroy = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$fragment_processor->state->encoding_confidence = 'irrelevant';
|
||||
|
||||
/*
|
||||
* Update the parsing namespace near the end of the process.
|
||||
* This is important so that any push/pop from the stack of open
|
||||
* elements does not change the parsing namespace.
|
||||
*/
|
||||
$fragment_processor->change_parsing_namespace(
|
||||
$this->current_element->token->integration_node_type ? 'html' : $namespace
|
||||
);
|
||||
|
||||
return $fragment_processor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stops the parser and terminates its execution when encountering unsupported markup.
|
||||
*
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
*
|
||||
* @global string $wp_version
|
||||
*/
|
||||
$wp_version = '6.8-alpha-59443';
|
||||
$wp_version = '6.8-alpha-59444';
|
||||
|
||||
/**
|
||||
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.
|
||||
|
|
Loading…
Reference in New Issue