diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php index d7f77f495c..04c387c003 100644 --- a/wp-includes/html-api/class-wp-html-processor.php +++ b/wp-includes/html-api/class-wp-html-processor.php @@ -188,6 +188,17 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ private $last_error = null; + /** + * Stores context for why the parser bailed on unsupported HTML, if it did. + * + * @see self::get_unsupported_exception + * + * @since 6.7.0 + * + * @var WP_HTML_Unsupported_Exception|null + */ + private $unsupported_exception = null; + /** * Releases a bookmark when PHP garbage-collects its wrapping WP_HTML_Token instance. * @@ -384,6 +395,45 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { }; } + /** + * Stops the parser and terminates its execution when encountering unsupported markup. + * + * @throws WP_HTML_Unsupported_Exception Halts execution of the parser. + * + * @since 6.7.0 + * + * @param string $message Explains support is missing in order to parse the current node. + * + * @return mixed + */ + private function bail( string $message ) { + $here = $this->bookmarks[ $this->state->current_token->bookmark_name ]; + $token = substr( $this->html, $here->start, $here->length ); + + $open_elements = array(); + foreach ( $this->state->stack_of_open_elements->stack as $item ) { + $open_elements[] = $item->node_name; + } + + $active_formats = array(); + foreach ( $this->state->active_formatting_elements->walk_down() as $item ) { + $active_formats[] = $item->node_name; + } + + $this->last_error = self::ERROR_UNSUPPORTED; + + $this->unsupported_exception = new WP_HTML_Unsupported_Exception( + $message, + $this->state->current_token->node_name, + $here->start, + $token, + $open_elements, + $active_formats + ); + + throw $this->unsupported_exception; + } + /** * Returns the last error, if any. * @@ -411,6 +461,21 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return $this->last_error; } + /** + * Returns context for why the parser aborted due to unsupported HTML, if it did. + * + * This is meant for debugging purposes, not for production use. + * + * @since 6.7.0 + * + * @see self::$unsupported_exception + * + * @return WP_HTML_Unsupported_Exception|null + */ + public function get_unsupported_exception() { + return $this->unsupported_exception; + } + /** * Finds the next tag matching the $query. * @@ -841,8 +906,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { // This should be unreachable but PHP doesn't have total type checking on switch. default: - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Found unrecognized insertion mode '{$this->state->insertion_mode}'." ); + $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); } } catch ( WP_HTML_Unsupported_Exception $e ) { /* @@ -922,8 +986,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_initial() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -942,8 +1005,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_before_html() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -962,8 +1024,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_before_head() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -982,8 +1043,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_head() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1002,8 +1062,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_head_noscript() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1022,8 +1081,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_head() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1445,8 +1503,9 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * > than the end tag token that it actually is. */ case '-BR': - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' ); + $this->bail( 'Closing BR tags require unimplemented special handling.' ); + // This return required because PHPCS can't determine that the call to bail() throws. + return false; /* * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr" @@ -1602,8 +1661,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { case 'TITLE': case 'TR': case 'XMP': - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "Cannot process {$token_name} element." ); + $this->bail( "Cannot process {$token_name} element." ); } if ( ! parent::is_tag_closer() ) { @@ -1665,8 +1723,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_table() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1685,8 +1742,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_table_text() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1705,8 +1761,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_caption() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1725,8 +1780,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_column_group() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1745,8 +1799,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_table_body() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1765,8 +1818,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_row() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1785,8 +1837,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_cell() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -1986,8 +2037,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_select_in_table() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -2006,8 +2056,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_template() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -2026,8 +2075,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_body() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -2046,8 +2094,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_frameset() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -2066,8 +2113,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_frameset() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -2086,8 +2132,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_after_body() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -2106,8 +2151,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_after_after_frameset() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /** @@ -2126,8 +2170,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { * @return bool Whether an element was found. */ private function step_in_foreign_content() { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); + $this->bail( "No support for parsing in the '{$this->state->insertion_mode}' state." ); } /* @@ -2835,8 +2878,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { return false; } - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); + $this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); } /** @@ -3072,8 +3114,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { // > If there is no such element, then return and instead act as described in the "any other end tag" entry above. if ( null === $formatting_element ) { - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when "any other end tag" is required.' ); + $this->bail( 'Cannot run adoption agency when "any other end tag" is required.' ); } // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. @@ -3125,12 +3166,10 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { } } - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot extract common ancestor in adoption agency algorithm.' ); + $this->bail( 'Cannot extract common ancestor in adoption agency algorithm.' ); } - $this->last_error = self::ERROR_UNSUPPORTED; - throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when looping required.' ); + $this->bail( 'Cannot run adoption agency when looping required.' ); } /** diff --git a/wp-includes/html-api/class-wp-html-unsupported-exception.php b/wp-includes/html-api/class-wp-html-unsupported-exception.php index 6e7228670b..7b244a5e8a 100644 --- a/wp-includes/html-api/class-wp-html-unsupported-exception.php +++ b/wp-includes/html-api/class-wp-html-unsupported-exception.php @@ -21,11 +21,95 @@ * operation and signify that the given HTML cannot be processed. * * @since 6.4.0 + * @since 6.7.0 Gained contextual information for use in debugging parse failures. * * @access private * * @see WP_HTML_Processor */ class WP_HTML_Unsupported_Exception extends Exception { + /** + * Name of the matched token when the exception was raised, + * if matched on a token. + * + * This does not imply that the token itself was unsupported, but it + * may have been the case that the token triggered part of the HTML + * parsing that isn't supported, such as the adoption agency algorithm. + * + * @since 6.7.0 + * + * @var string + */ + public $token_name; + /** + * Number of bytes into the input HTML document where the parser was + * parsing when the exception was raised. + * + * Use this to reconstruct context for the failure. + * + * @since 6.7.0 + * + * @var int + */ + public $token_at; + + /** + * Full raw text of the matched token when the exception was raised, + * if matched on a token. + * + * Whereas the `$token_name` will be normalized, this contains the full + * raw text of the token, including original casing, duplicated attributes, + * and other syntactic variations that are normally abstracted in the HTML API. + * + * @since 6.7.0 + * + * @var string + */ + public $token; + + /** + * Stack of open elements when the exception was raised. + * + * Use this to trace the parsing circumstances which led to the exception. + * + * @since 6.7.0 + * + * @var string[] + */ + public $stack_of_open_elements = array(); + + /** + * List of active formatting elements when the exception was raised. + * + * Use this to trace the parsing circumstances which led to the exception. + * + * @since 6.7.0 + * + * @var string[] + */ + public $active_formatting_elements = array(); + + /** + * Constructor function. + * + * @since 6.7.0 + * + * @param string $message Brief message explaining what is unsupported, the reason this exception was raised. + * @param string $token_name Normalized name of matched token when this exception was raised. + * @param int $token_at Number of bytes into source HTML document where matched token starts. + * @param string $token Full raw text of matched token when this exception was raised. + * @param string[] $stack_of_open_elements Stack of open elements when this exception was raised. + * @param string[] $active_formatting_elements List of active formatting elements when this exception was raised. + */ + public function __construct( string $message, string $token_name, int $token_at, string $token, array $stack_of_open_elements, array $active_formatting_elements ) { + parent::__construct( $message ); + + $this->token_name = $token_name; + $this->token_at = $token_at; + $this->token = $token; + + $this->stack_of_open_elements = $stack_of_open_elements; + $this->active_formatting_elements = $active_formatting_elements; + } } diff --git a/wp-includes/version.php b/wp-includes/version.php index 453cc90702..5448ace375 100644 --- a/wp-includes/version.php +++ b/wp-includes/version.php @@ -16,7 +16,7 @@ * * @global string $wp_version */ -$wp_version = '6.7-alpha-58713'; +$wp_version = '6.7-alpha-58714'; /** * Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.