From 0aee557b688f3825d7600614671384fe3980f0a0 Mon Sep 17 00:00:00 2001 From: ryan Date: Wed, 16 Jan 2008 19:15:07 +0000 Subject: [PATCH] phpdoc for kses from darkdragon. fixes #5641 git-svn-id: http://svn.automattic.com/wordpress/trunk@6630 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/kses.php | 556 ++++++++++++++++++++++++++++++------------- 1 file changed, 393 insertions(+), 163 deletions(-) diff --git a/wp-includes/kses.php b/wp-includes/kses.php index fc630f5404..042d9cde92 100644 --- a/wp-includes/kses.php +++ b/wp-includes/kses.php @@ -1,23 +1,46 @@ + * + * @package External + * @subpackage KSES + * + * @internal + * *** CONTACT INFORMATION *** + * E-mail: metaur at users dot sourceforge dot net + * Web page: http://sourceforge.net/projects/kses + * Paper mail: Ulf Harnhammar + * Ymergatan 17 C + * 753 25 Uppsala + * SWEDEN + * + * [kses strips evil scripts!] + */ -// Added wp_ prefix to avoid conflicts with existing kses users -# kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes -# Copyright (C) 2002, 2003, 2005 Ulf Harnhammar -# *** CONTACT INFORMATION *** -# -# E-mail: metaur at users dot sourceforge dot net -# Web page: http://sourceforge.net/projects/kses -# Paper mail: Ulf Harnhammar -# Ymergatan 17 C -# 753 25 Uppsala -# SWEDEN -# -# [kses strips evil scripts!] +/** + * You can override this in your my-hacks.php file + * You can also override this in a plugin file. The + * my-hacks.php is deprecated in its usage. + * + * @since 1.2.0 + */ if (!defined('CUSTOM_TAGS')) define('CUSTOM_TAGS', false); -// You can override this in your my-hacks.php file if (!CUSTOM_TAGS) { + /** + * Kses global for default allowable HTML tags + * + * Can be override by using CUSTOM_TAGS constant + * @global array $allowedposttags + * @since 2.0.0 + */ $allowedposttags = array( 'address' => array(), 'a' => array( @@ -251,6 +274,12 @@ if (!CUSTOM_TAGS) { 'ol' => array ( 'class' => array ()), 'var' => array ()); + /** + * Kses allowed HTML elements + * + * @global array $allowedtags + * @since 1.0.0 + */ $allowedtags = array( 'a' => array( 'href' => array (), @@ -286,57 +315,103 @@ if (!CUSTOM_TAGS) { ); } -function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) - ############################################################################### - # This function makes sure that only the allowed HTML element names, attribute - # names and attribute values plus only sane HTML entities will occur in - # $string. You have to remove any slashes from PHP's magic quotes before you - # call this function. - ############################################################################### - { +/** + * wp_kses() - Filters content and keeps only allowable HTML elements. + * + * This function makes sure that only the allowed HTML element names, + * attribute names and attribute values plus only sane HTML entities + * will occur in $string. You have to remove any slashes from PHP's + * magic quotes before you call this function. + * + * The default allowed protocols are 'http', 'https', 'ftp', 'mailto', + * 'news', 'irc', 'gopher', 'nntp', 'feed', and finally 'telnet. This + * covers all common link protocols, except for 'javascript' which + * should not be allowed for untrusted users. + * + * @since 1.0.0 + * + * @param string $string Content to filter through kses + * @param array $allowed_html List of allowed HTML elements + * @param array $allowed_protocols Optional. Allowed protocol in links. + * @return string Filtered content with only allowed HTML elements + */ +function wp_kses($string, $allowed_html, $allowed_protocols = array ('http', 'https', 'ftp', 'ftps', 'mailto', 'news', 'irc', 'gopher', 'nntp', 'feed', 'telnet')) { $string = wp_kses_no_null($string); $string = wp_kses_js_entities($string); $string = wp_kses_normalize_entities($string); $allowed_html_fixed = wp_kses_array_lc($allowed_html); $string = wp_kses_hook($string, $allowed_html_fixed, $allowed_protocols); // WP changed the order of these funcs and added args to wp_kses_hook return wp_kses_split($string, $allowed_html_fixed, $allowed_protocols); -} # function wp_kses +} -function wp_kses_hook($string, $allowed_html, $allowed_protocols) -############################################################################### -# You add any kses hooks here. -############################################################################### -{ +/** + * wp_kses_hook() - You add any kses hooks here. + * + * There is currently only one kses WordPress hook and it is + * called here. All parameters are passed to the hooks and + * expected to recieve a string. + * + * @since 1.0.0 + * + * @param string $string Content to filter through kses + * @param array $allowed_html List of allowed HTML elements + * @param array $allowed_protocols Allowed protocol in links + * @return string Filtered content through 'pre_kses' hook + */ +function wp_kses_hook($string, $allowed_html, $allowed_protocols) { $string = apply_filters('pre_kses', $string, $allowed_html, $allowed_protocols); return $string; -} # function wp_kses_hook +} -function wp_kses_version() -############################################################################### -# This function returns kses' version number. -############################################################################### -{ +/** + * wp_kses_version() - This function returns kses' version number. + * + * @since 1.0.0 + * + * @return string Version Number + */ +function wp_kses_version() { return '0.2.2'; -} # function wp_kses_version +} -function wp_kses_split($string, $allowed_html, $allowed_protocols) -############################################################################### -# This function searches for HTML tags, no matter how malformed. It also -# matches stray ">" characters. -############################################################################### -{ +/** + * wp_kses_split() - Searches for HTML tags, no matter how malformed + * + * It also matches stray ">" characters. + * + * @since 1.0.0 + * + * @param string $string Content to filter + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Content with fixed HTML tags + */ +function wp_kses_split($string, $allowed_html, $allowed_protocols) { return preg_replace('%((|$))|(<[^>]*(>|$)|>))%e', "wp_kses_split2('\\1', \$allowed_html, ".'$allowed_protocols)', $string); -} # function wp_kses_split +} -function wp_kses_split2($string, $allowed_html, $allowed_protocols) -############################################################################### -# This function does a lot of work. It rejects some very malformed things -# like <:::>. It returns an empty string, if the element isn't allowed (look -# ma, no strip_tags()!). Otherwise it splits the tag into an element and an -# attribute list. -############################################################################### -{ +/** + * wp_kses_split2() - Callback for wp_kses_split for fixing malformed HTML tags + * + * This function does a lot of work. It rejects some very malformed things + * like <:::>. It returns an empty string, if the element isn't allowed (look + * ma, no strip_tags()!). Otherwise it splits the tag into an element and an + * attribute list. + * + * After the tag is split into an element and an attribute list, it is run + * through another filter which will remove illegal attributes and once + * that is completed, will be returned. + * + * @since 1.0.0 + * @uses wp_kses_attr() + * + * @param string $string Content to filter + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Fixed HTML element + */ +function wp_kses_split2($string, $allowed_html, $allowed_protocols) { $string = wp_kses_stripslashes($string); if (substr($string, 0, 1) != '<') @@ -370,18 +445,26 @@ function wp_kses_split2($string, $allowed_html, $allowed_protocols) # No attributes are allowed for closing elements return wp_kses_attr("$slash$elem", $attrlist, $allowed_html, $allowed_protocols); -} # function wp_kses_split2 +} -function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) -############################################################################### -# This function removes all attributes, if none are allowed for this element. -# If some are allowed it calls wp_kses_hair() to split them further, and then it -# builds up new HTML code from the data that kses_hair() returns. It also -# removes "<" and ">" characters, if there are any left. One more thing it -# does is to check if the tag has a closing XHTML slash, and if it does, -# it puts one in the returned code as well. -############################################################################### -{ +/** + * wp_kses_attr() - Removes all attributes, if none are allowed for this element + * + * If some are allowed it calls wp_kses_hair() to split them further, and then + * it builds up new HTML code from the data that kses_hair() returns. It also + * removes "<" and ">" characters, if there are any left. One more thing it + * does is to check if the tag has a closing XHTML slash, and if it does, it + * puts one in the returned code as well. + * + * @since 1.0.0 + * + * @param string $element HTML element/tag + * @param string $attr HTML attributes from HTML element to closing HTML element tag + * @param array $allowed_html Allowed HTML elements + * @param array $allowed_protocols Allowed protocols to keep + * @return string Sanitized HTML element + */ +function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) { # Is there a closing XHTML slash at the end of the attributes? $xhtml_slash = ''; @@ -433,18 +516,25 @@ function wp_kses_attr($element, $attr, $allowed_html, $allowed_protocols) $attr2 = preg_replace('/[<>]/', '', $attr2); return "<$element$attr2$xhtml_slash>"; -} # function wp_kses_attr +} -function wp_kses_hair($attr, $allowed_protocols) -############################################################################### -# This function does a lot of work. It parses an attribute list into an array -# with attribute data, and tries to do the right thing even if it gets weird -# input. It will add quotes around attribute values that don't have any quotes -# or apostrophes around them, to make it easier to produce HTML code that will -# conform to W3C's HTML specification. It will also remove bad URL protocols -# from attribute values. -############################################################################### -{ +/** + * wp_kses_hair() - Builds an attribute list from string containing attributes. + * + * This function does a lot of work. It parses an attribute list into an array + * with attribute data, and tries to do the right thing even if it gets weird + * input. It will add quotes around attribute values that don't have any quotes + * or apostrophes around them, to make it easier to produce HTML code that will + * conform to W3C's HTML specification. It will also remove bad URL protocols + * from attribute values. + * + * @since 1.0.0 + * + * @param string $attr Attribute list from HTML element to closing HTML element tag + * @param array $allowed_protocols Allowed protocols to keep + * @return array List of attributes after parsing + */ +function wp_kses_hair($attr, $allowed_protocols) { $attrarr = array (); $mode = 0; $attrname = ''; @@ -527,7 +617,7 @@ function wp_kses_hair($attr, $allowed_protocols) } # switch if ($working == 0) # not well formed, remove and try again - { + { $attr = wp_kses_html_error($attr); $mode = 0; } @@ -539,15 +629,23 @@ function wp_kses_hair($attr, $allowed_protocols) $attrarr[] = array ('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y'); return $attrarr; -} # function wp_kses_hair +} -function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) -############################################################################### -# This function performs different checks for attribute values. The currently -# implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" -# with even more checks to come soon. -############################################################################### -{ +/** + * wp_kses_check_attr_val() - Performs different checks for attribute values. + * + * The currently implemented checks are "maxlen", "minlen", "maxval", "minval" + * and "valueless" with even more checks to come soon. + * + * @since 1.0.0 + * + * @param string $value Attribute value + * @param string $vless Whether the value is valueless or not. Use 'y' or 'n' + * @param string $checkname What $checkvalue is checking for. + * @param mixed $checkvalue What constraint the value should pass + * @return bool Whether check passes (true) or not (false) + */ +function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) { $ok = true; switch (strtolower($checkname)) { @@ -603,16 +701,23 @@ function wp_kses_check_attr_val($value, $vless, $checkname, $checkvalue) } # switch return $ok; -} # function wp_kses_check_attr_val +} -function wp_kses_bad_protocol($string, $allowed_protocols) -############################################################################### -# This function removes all non-allowed protocols from the beginning of -# $string. It ignores whitespace and the case of the letters, and it does -# understand HTML entities. It does its work in a while loop, so it won't be -# fooled by a string like "javascript:javascript:alert(57)". -############################################################################### -{ +/** + * wp_kses_bad_protocol() - Sanitize string from bad protocols + * + * This function removes all non-allowed protocols from the beginning + * of $string. It ignores whitespace and the case of the letters, and + * it does understand HTML entities. It does its work in a while loop, + * so it won't be fooled by a string like "javascript:javascript:alert(57)". + * + * @since 1.0.0 + * + * @param string $string Content to filter bad protocols from + * @param array $allowed_protocols Allowed protocols to keep + * @return string Filtered content + */ +function wp_kses_bad_protocol($string, $allowed_protocols) { $string = wp_kses_no_null($string); $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature" $string2 = $string.'a'; @@ -623,34 +728,48 @@ function wp_kses_bad_protocol($string, $allowed_protocols) } # while return $string; -} # function wp_kses_bad_protocol +} -function wp_kses_no_null($string) -############################################################################### -# This function removes any NULL characters in $string. -############################################################################### -{ +/** + * wp_kses_no_null() - Removes any NULL characters in $string. + * + * @since 1.0.0 + * + * @param string $string + * @return string + */ +function wp_kses_no_null($string) { $string = preg_replace('/\0+/', '', $string); $string = preg_replace('/(\\\\0)+/', '', $string); return $string; -} # function wp_kses_no_null +} -function wp_kses_stripslashes($string) -############################################################################### -# This function changes the character sequence \" to just " -# It leaves all other slashes alone. It's really weird, but the quoting from -# preg_replace(//e) seems to require this. -############################################################################### -{ +/** + * wp_kses_stripslashes() - Strips slashes from in front of quotes + * + * This function changes the character sequence \" to just " + * It leaves all other slashes alone. It's really weird, but the + * quoting from preg_replace(//e) seems to require this. + * + * @since 1.0.0 + * + * @param string $string String to strip slashes + * @return string Fixed strings with quoted slashes + */ +function wp_kses_stripslashes($string) { return preg_replace('%\\\\"%', '"', $string); -} # function wp_kses_stripslashes +} -function wp_kses_array_lc($inarray) -############################################################################### -# This function goes through an array, and changes the keys to all lower case. -############################################################################### -{ +/** + * wp_kses_array_lc() - Goes through an array and changes the keys to all lower case. + * + * @since 1.0.0 + * + * @param array $inarray Unfiltered array + * @return array Fixed array with all lowercase keys + */ +function wp_kses_array_lc($inarray) { $outarray = array (); foreach ($inarray as $inkey => $inval) { @@ -664,42 +783,64 @@ function wp_kses_array_lc($inarray) } # foreach $inarray return $outarray; -} # function wp_kses_array_lc +} -function wp_kses_js_entities($string) -############################################################################### -# This function removes the HTML JavaScript entities found in early versions of -# Netscape 4. -############################################################################### -{ +/** + * wp_kses_js_entities() - Removes the HTML JavaScript entities found in early versions of Netscape 4. + * + * @since 1.0.0 + * + * @param string $string + * @return string + */ +function wp_kses_js_entities($string) { return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); -} # function wp_kses_js_entities +} -function wp_kses_html_error($string) -############################################################################### -# This function deals with parsing errors in wp_kses_hair(). The general plan is -# to remove everything to and including some whitespace, but it deals with -# quotes and apostrophes as well. -############################################################################### -{ +/** + * wp_kses_html_error() - Handles parsing errors in wp_kses_hair() + * + * The general plan is to remove everything to and including some + * whitespace, but it deals with quotes and apostrophes as well. + * + * @since 1.0.0 + * + * @param string $string + * @return string + */ +function wp_kses_html_error($string) { return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); -} # function wp_kses_html_error +} -function wp_kses_bad_protocol_once($string, $allowed_protocols) -############################################################################### -# This function searches for URL protocols at the beginning of $string, while -# handling whitespace and HTML entities. -############################################################################### -{ +/** + * wp_kses_bad_protocol_once() - Sanitizes content from bad protocols and other characters + * + * This function searches for URL protocols at the beginning of $string, + * while handling whitespace and HTML entities. + * + * @since 1.0.0 + * + * @param string $string Content to check for bad protocols + * @param string $allowed_protocols Allowed protocols + * @return string Sanitized content + */ +function wp_kses_bad_protocol_once($string, $allowed_protocols) { return preg_replace('/^((&[^;]*;|[\sA-Za-z0-9])*)'.'(:|:|&#[Xx]3[Aa];)\s*/e', 'wp_kses_bad_protocol_once2("\\1", $allowed_protocols)', $string); -} # function wp_kses_bad_protocol_once +} -function wp_kses_bad_protocol_once2($string, $allowed_protocols) -############################################################################### -# This function processes URL protocols, checks to see if they're in the white- -# list or not, and returns different data depending on the answer. -############################################################################### -{ +/** + * wp_kses_bad_protocol_once2() - Callback for wp_kses_bad_protocol_once() regular expression. + * + * This function processes URL protocols, checks to see if they're in the + * white-list or not, and returns different data depending on the answer. + * + * @since 1.0.0 + * + * @param string $string Content to check for bad protocols + * @param array $allowed_protocols Allowed protocols + * @return string Sanitized content + */ +function wp_kses_bad_protocol_once2($string, $allowed_protocols) { $string2 = wp_kses_decode_entities($string); $string2 = preg_replace('/\s/', '', $string2); $string2 = wp_kses_no_null($string2); @@ -718,14 +859,21 @@ function wp_kses_bad_protocol_once2($string, $allowed_protocols) return "$string2:"; else return ''; -} # function wp_kses_bad_protocol_once2 +} -function wp_kses_normalize_entities($string) -############################################################################### -# This function normalizes HTML entities. It will convert "AT&T" to the correct -# "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. -############################################################################### -{ +/** + * wp_kses_normalize_entities() - Converts and fixes HTML entities + * + * This function normalizes HTML entities. It will convert "AT&T" to the + * correct "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" + * and so on. + * + * @since 1.0.0 + * + * @param string $string Content to normalize entities + * @return string Content with normalized entities + */ +function wp_kses_normalize_entities($string) { # Disarm all entities by converting & to & $string = str_replace('&', '&', $string); @@ -737,44 +885,97 @@ function wp_kses_normalize_entities($string) $string = preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', '&#\\1\\2;', $string); return $string; -} # function wp_kses_normalize_entities +} -function wp_kses_normalize_entities2($i) -############################################################################### -# This function helps wp_kses_normalize_entities() to only accept 16 bit values -# and nothing more for &#number; entities. -############################################################################### -{ +/** + * wp_kses_normalize_entities2() - Callback for wp_kses_normalize_entities() regular expression + * + * This function helps wp_kses_normalize_entities() to only accept 16 bit + * values and nothing more for &#number; entities. + * + * @since 1.0.0 + * + * @param int $i Number encoded entity + * @return string Correctly encoded entity + */ +function wp_kses_normalize_entities2($i) { return (($i > 65535) ? "&#$i;" : "&#$i;"); -} # function wp_kses_normalize_entities2 +} -function wp_kses_decode_entities($string) -############################################################################### -# This function decodes numeric HTML entities (A and A). It doesn't -# do anything with other entities like ä, but we don't need them in the -# URL protocol whitelisting system anyway. -############################################################################### -{ +/** + * wp_kses_decode_entities() - Convert all entities to their character counterparts. + * + * This function decodes numeric HTML entities (A and A). It + * doesn't do anything with other entities like ä, but we don't need + * them in the URL protocol whitelisting system anyway. + * + * @since 1.0.0 + * + * @param string $string Content to change entities + * @return string Content after decoded entities + */ +function wp_kses_decode_entities($string) { $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string); $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string); return $string; -} # function wp_kses_decode_entities +} +/** + * wp_filter_kses() - Sanitize content with allowed HTML Kses rules + * + * @since 1.0.0 + * @uses $allowedtags + * + * @param string $data Content to filter + * @return string Filtered content + */ function wp_filter_kses($data) { global $allowedtags; return addslashes( wp_kses(stripslashes( $data ), $allowedtags) ); } +/** + * wp_filter_post_kses() - Sanitize content for allowed HTML tags for post content + * + * Post content refers to the page contents of the 'post' type and not + * $_POST data from forms. + * + * @since 2.0.0 + * @uses $allowedposttags + * + * @param string $data Post content to filter + * @return string Filtered post content with allowed HTML tags and attributes intact. + */ function wp_filter_post_kses($data) { global $allowedposttags; return addslashes ( wp_kses(stripslashes( $data ), $allowedposttags) ); } +/** + * wp_filter_nohtml_kses() - Strips all of the HTML in the content + * + * @since 2.1.0 + * + * @param string $data Content to strip all HTML from + * @return string Filtered content without any HTML + */ function wp_filter_nohtml_kses($data) { return addslashes ( wp_kses(stripslashes( $data ), array()) ); } +/** + * kses_init_filters() - Adds all Kses input form content filters + * + * All hooks have default priority. The wp_filter_kses() fucntion + * is added to the 'pre_comment_content' and 'title_save_pre' + * hooks. The wp_filter_post_kses() function is added to the + * 'content_save_pre', 'excerpt_save_pre', and 'content_filtered_save_pre' + * hooks. + * + * @since 2.0.0 + * @uses add_filter() See description for what functions are added to what hooks. + */ function kses_init_filters() { // Normal filtering. add_filter('pre_comment_content', 'wp_filter_kses'); @@ -786,6 +987,19 @@ function kses_init_filters() { add_filter('content_filtered_save_pre', 'wp_filter_post_kses'); } +/** + * kses_remove_filters() - Removes all Kses input form content filters + * + * A quick procedural method to removing all of the filters + * that kses uses for content in WordPress Loop. + * + * Does not remove the kses_init() function from 'init' hook + * (priority is default). Also does not remove kses_init() + * function from 'set_current_user' hook (priority is also + * default). + * + * @since 2.0.6 + */ function kses_remove_filters() { // Normal filtering. remove_filter('pre_comment_content', 'wp_filter_kses'); @@ -797,6 +1011,22 @@ function kses_remove_filters() { remove_filter('content_filtered_save_pre', 'wp_filter_post_kses'); } +/** + * kses_init() - Sets up most of the Kses filters for input form content + * + * If you remove the kses_init() function from 'init' hook and + * 'set_current_user' (priority is default), then none of the + * Kses filter hooks will be added. + * + * First removes all of the Kses filters in case the current user + * does not need to have Kses filter the content. If the user does + * not have unfiltered html capability, then Kses filters are added. + * + * @uses kses_remove_filters() Removes the Kses filters + * @uses kses_init_filters() Adds the Kses filters back if the user + * does not have unfiltered HTML capability. + * @since 2.0.0 + */ function kses_init() { kses_remove_filters(); @@ -806,4 +1036,4 @@ function kses_init() { add_action('init', 'kses_init'); add_action('set_current_user', 'kses_init'); -?> +?> \ No newline at end of file