-
- ')) {
- switch ($long) {
- case 0:
- return t('Allowed HTML tags') .': '. check_plain($allowed_html);
- case 1:
- $output = '
'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'
';
- if (!variable_get("filter_html_help_$format", 1)) {
- return $output;
- }
+ if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
+ if ($allowed_html = variable_get("allowed_html_$format", ' -
-
- ')) {
+ switch ($long) {
+ case 0:
+ return t('Allowed HTML tags') .': '. check_plain($allowed_html);
+ case 1:
+ $output = '
'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'
';
+ if (!variable_get("filter_html_help_$format", 1)) {
+ return $output;
+ }
- $output .= t('This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.
+ $output .= t('This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.
For more information see W3C\'s HTML Specifications or use your favorite search engine to find other sites that explain HTML.
');
- $tips = array(
- 'a' => array( t('Anchors are used to make links to other pages.'), ''. variable_get('site_name', 'drupal') .''),
- 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with
line break')),
- 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), ''. t('Paragraph one.') .'
'. t('Paragraph two.') .'
'),
- 'strong' => array( t('Strong'), ''. t('Strong'). ''),
- 'em' => array( t('Emphasized'), ''. t('Emphasized') .''),
- 'cite' => array( t('Cited'), ''. t('Cited') .''),
- 'code' => array( t('Coded text used to show programming source code'), ''. t('Coded') .'
'),
- 'b' => array( t('Bolded'), ''. t('Bolded') .''),
- 'u' => array( t('Underlined'), ''. t('Underlined') .''),
- 'i' => array( t('Italicized'), ''. t('Italicized') .''),
- 'sup' => array( t('Superscripted'), t('Superscripted')),
- 'sub' => array( t('Subscripted'), t('Subscripted')),
- 'pre' => array( t('Preformatted'), ''. t('Preformatted') .'
'),
- 'blockquote' => array( t('Block quoted'), ''. t('Block quoted') .'
'),
- 'q' => array( t('Quoted inline'), ''. t('Quoted inline') .'
'),
- // Assumes and describes tr, td, th.
- 'table' => array( t('Table'), ' '. t('Table header') .' |
'. t('Table cell') .' |
'),
- 'tr' => NULL, 'td' => NULL, 'th' => NULL,
- 'del' => array( t('Deleted'), ''. t('Deleted') .''),
- 'ins' => array( t('Inserted'), ''. t('Inserted') .''),
- // Assumes and describes li.
- 'ol' => array( t('Ordered list - use the <li> to begin each list item'), ' - '. t('First item') .'
- '. t('Second item') .'
'),
- 'ul' => array( t('Unordered list - use the <li> to begin each list item'), ' - '. t('First item') .'
- '. t('Second item') .'
'),
- 'li' => NULL,
- // Assumes and describes dt and dd.
- 'dl' => array( t('Definition lists are similar to other HTML lists. <dl> begins the definition list, <dt> begins the definition term and <dd> begins the definition description.'), ' - '. t('First term') .'
- '. t('First definition') .'
- '. t('Second term') .'
- '. t('Second definition') .'
'),
- 'dt' => NULL, 'dd' => NULL,
- 'h1' => array( t('Header'), ''. t('Title') .'
'),
- 'h2' => array( t('Header'), ''. t('Subtitle') .'
'),
- 'h3' => array( t('Header'), ''. t('Subtitle three') .'
'),
- 'h4' => array( t('Header'), ''. t('Subtitle four') .'
'),
- 'h5' => array( t('Header'), ''. t('Subtitle five') .'
'),
- 'h6' => array( t('Header'), ''. t('Subtitle six') .'
')
- );
- $header = array(t('Tag Description'), t('You Type'), t('You Get'));
- preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
- foreach ($out[1] as $tag) {
- if (array_key_exists($tag, $tips)) {
- if ($tips[$tag]) {
- $rows[] = array(
- array('data' => $tips[$tag][0], 'class' => 'description'),
- array('data' => ''. check_plain($tips[$tag][1]) .'
', 'class' => 'type'),
- array('data' => $tips[$tag][1], 'class' => 'get')
- );
- }
- }
- else {
+ $tips = array(
+ 'a' => array( t('Anchors are used to make links to other pages.'), ''. variable_get('site_name', 'drupal') .''),
+ 'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with
line break')),
+ 'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), ''. t('Paragraph one.') .'
'. t('Paragraph two.') .'
'),
+ 'strong' => array( t('Strong'), ''. t('Strong'). ''),
+ 'em' => array( t('Emphasized'), ''. t('Emphasized') .''),
+ 'cite' => array( t('Cited'), ''. t('Cited') .''),
+ 'code' => array( t('Coded text used to show programming source code'), ''. t('Coded') .'
'),
+ 'b' => array( t('Bolded'), ''. t('Bolded') .''),
+ 'u' => array( t('Underlined'), ''. t('Underlined') .''),
+ 'i' => array( t('Italicized'), ''. t('Italicized') .''),
+ 'sup' => array( t('Superscripted'), t('Superscripted')),
+ 'sub' => array( t('Subscripted'), t('Subscripted')),
+ 'pre' => array( t('Preformatted'), ''. t('Preformatted') .'
'),
+ 'blockquote' => array( t('Block quoted'), ''. t('Block quoted') .'
'),
+ 'q' => array( t('Quoted inline'), ''. t('Quoted inline') .'
'),
+ // Assumes and describes tr, td, th.
+ 'table' => array( t('Table'), ' '. t('Table header') .' |
'. t('Table cell') .' |
'),
+ 'tr' => NULL, 'td' => NULL, 'th' => NULL,
+ 'del' => array( t('Deleted'), ''. t('Deleted') .''),
+ 'ins' => array( t('Inserted'), ''. t('Inserted') .''),
+ // Assumes and describes li.
+ 'ol' => array( t('Ordered list - use the <li> to begin each list item'), ' - '. t('First item') .'
- '. t('Second item') .'
'),
+ 'ul' => array( t('Unordered list - use the <li> to begin each list item'), ' - '. t('First item') .'
- '. t('Second item') .'
'),
+ 'li' => NULL,
+ // Assumes and describes dt and dd.
+ 'dl' => array( t('Definition lists are similar to other HTML lists. <dl> begins the definition list, <dt> begins the definition term and <dd> begins the definition description.'), ' - '. t('First term') .'
- '. t('First definition') .'
- '. t('Second term') .'
- '. t('Second definition') .'
'),
+ 'dt' => NULL, 'dd' => NULL,
+ 'h1' => array( t('Header'), ''. t('Title') .'
'),
+ 'h2' => array( t('Header'), ''. t('Subtitle') .'
'),
+ 'h3' => array( t('Header'), ''. t('Subtitle three') .'
'),
+ 'h4' => array( t('Header'), ''. t('Subtitle four') .'
'),
+ 'h5' => array( t('Header'), ''. t('Subtitle five') .'
'),
+ 'h6' => array( t('Header'), ''. t('Subtitle six') .'
')
+ );
+ $header = array(t('Tag Description'), t('You Type'), t('You Get'));
+ preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
+ foreach ($out[1] as $tag) {
+ if (array_key_exists($tag, $tips)) {
+ if ($tips[$tag]) {
$rows[] = array(
- array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+ array('data' => $tips[$tag][0], 'class' => 'description'),
+ array('data' => ''. check_plain($tips[$tag][1]) .'
', 'class' => 'type'),
+ array('data' => $tips[$tag][1], 'class' => 'get')
);
}
}
- $output .= theme('table', $header, $rows);
-
- $output .= t('Most unusual characters can be directly entered without any problems.
-If you do encounter problems, try using HTML character entities. A common example looks like & for an ampersand & character. For a full list of entities see HTML\'s entities page. Some of the available characters include:
');
- $entities = array(
- array( t('Ampersand'), '&'),
- array( t('Greater than'), '>'),
- array( t('Less than'), '<'),
- array( t('Quotation mark'), '"'),
- );
- $header = array(t('Character Description'), t('You Type'), t('You Get'));
- unset($rows);
- foreach ($entities as $entity) {
+ else {
$rows[] = array(
- array('data' => $entity[0], 'class' => 'description'),
- array('data' => ''. check_plain($entity[1]) .'
', 'class' => 'type'),
- array('data' => $entity[1], 'class' => 'get')
+ array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
);
}
- $output .= theme('table', $header, $rows);
- return $output;
+ }
+ $output .= theme('table', $header, $rows);
+
+ $output .= t('Most unusual characters can be directly entered without any problems.
+If you do encounter problems, try using HTML character entities. A common example looks like & for an ampersand & character. For a full list of entities see HTML\'s entities page. Some of the available characters include:
');
+ $entities = array(
+ array( t('Ampersand'), '&'),
+ array( t('Greater than'), '>'),
+ array( t('Less than'), '<'),
+ array( t('Quotation mark'), '"'),
+ );
+ $header = array(t('Character Description'), t('You Type'), t('You Get'));
+ unset($rows);
+ foreach ($entities as $entity) {
+ $rows[] = array(
+ array('data' => $entity[0], 'class' => 'description'),
+ array('data' => ''. check_plain($entity[1]) .'
', 'class' => 'type'),
+ array('data' => $entity[1], 'class' => 'get')
+ );
+ }
+ $output .= theme('table', $header, $rows);
+ return $output;
}
}
- else {
- return t('No HTML tags allowed');
- }
-
- case FILTER_STYLE_STRIP:
+ }
+ else {
return t('No HTML tags allowed');
- }
+ }
break;
case 1:
@@ -924,10 +916,9 @@
* Settings for the HTML filter.
*/
function _filter_html_settings($format) {
- $group = form_radios(t('Filter HTML tags'), "filter_html_$format", variable_get("filter_html_$format", FILTER_HTML_STRIP), array(FILTER_HTML_STRIP => t('Strip tags'), FILTER_HTML_ESCAPE => t('Escape tags')), t('How to deal with HTML tags in user-contributed content. If set to "Strip tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
+ $group = form_radios(t('Filter HTML tags'), "filter_html_$format", variable_get("filter_html_$format", FILTER_HTML_STRIP), array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape tags')), t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below). If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
$group .= form_textfield(t('Allowed HTML tags'), "allowed_html_$format", variable_get("allowed_html_$format", ' -
-
- '), 64, 255, t('If "Strip tags" is selected, optionally specify tags which should not be stripped. Javascript event attributes are always stripped.'));
$group .= form_checkbox(t('Display HTML help'), "filter_html_help_$format", 1, variable_get("filter_html_help_$format", 1), t('If enabled, Drupal will display some basic HTML help in the long filter tips.'));
- $group .= form_radios(t('HTML style attributes'), "filter_style_$format", variable_get("filter_style_$format", FILTER_STYLE_STRIP), array(FILTER_STYLE_ALLOW => t('Allowed'), FILTER_STYLE_STRIP => t('Removed')), t('If "Strip tags" is selected, you can choose whether "STYLE" attributes are allowed or removed from input.'));
$group .= form_checkbox(t('Spam link deterrent'), "filter_html_nofollow_$format", 1, variable_get("filter_html_nofollow_$format", FALSE), t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'));
$output .= form_group(t('HTML filter'), $group);
@@ -939,12 +930,8 @@
*/
function _filter_html($text, $format) {
if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
- // Allow users to enter HTML, but filter it
- $text = strip_tags($text, variable_get("allowed_html_$format", '
-
-
- '));
- if (variable_get("filter_style_$format", FILTER_STYLE_STRIP)) {
- $text = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $text);
- }
- $text = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $text);
+ $allowed_tags = preg_split('/\s+|<|>/', variable_get("allowed_html_$format", '
-
-
- '), -1, PREG_SPLIT_NO_EMPTY);
+ $text = filter_xss($text, $allowed_tags);
}
if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
@@ -1015,6 +1002,252 @@
}
/**
+ * Filters XSS. Based on kses by Ulf Harnhammar, see
+ * http://sourceforge.net/projects/kses
+ *
+ * For examples of various XSS attacks, see:
+ * http://ha.ckers.org/xss.html
+ *
+ * This code does four things:
+ * - Removes characters and constructs that can trick browsers
+ * - Makes sure all HTML entities are well-formed
+ * - Makes sure all HTML tags and attributes are well-formed
+ * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ * The string with raw HTML in it. It will be stripped of everything that can cause
+ * an XSS attack.
+ * @param $allowed_tags
+ * An array of allowed tags.
+ * @param $format
+ * The format to use.
+ */
+function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
+ // Store the input format
+ _filter_xss_split($allowed_tags, TRUE);
+ // Remove NUL characters (ignored by some browsers)
+ $string = str_replace(chr(0), '', $string);
+ // Remove Netscape 4 JS entities
+ $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+
+ // Defuse all HTML entities
+ $string = str_replace('&', '&', $string);
+ // Change back only well-formed entities in our whitelist
+ // Named entities
+ $string = preg_replace('/&([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
+ // Decimal numeric entities
+ $string = preg_replace('/�*([0-9]+;)/', '\1', $string);
+ // Hexadecimal numeric entities
+ $string = preg_replace('/&#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '\1', $string);
+
+ return preg_replace_callback('%
+ (
+ <[^>]*.(>|$) # a string that starts with a <, up until the > or the end of the string
+ | # or
+ > # just a >
+ )%x', '_filter_xss_split', $string);
+}
+
+/**
+ * Processes an HTML tag.
+ *
+ * @param @m
+ * An array with various meaning depending on the value of $store.
+ * If $store is TRUE then the array contains the allowed tags.
+ * If $store is FALSE then the array has one element, the HTML tag to process.
+ * @param $store
+ * Whether to store $m.
+ * @return
+ * If the element isn't allowed, an empty string. Otherwise, the cleaned up
+ * version of the HTML element.
+ */
+function _filter_xss_split($m, $store = FALSE) {
+ static $allowed_html;
+
+ if ($store) {
+ $allowed_html = array_flip($m);
+ return;
+ }
+
+ $string = $m[1];
+
+ if (substr($string, 0, 1) != '<') {
+ // We matched a lone ">" character
+ return '>';
+ }
+
+ if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
+ // Seriously malformed
+ return '';
+ }
+
+ $slash = trim($matches[1]);
+ $elem = &$matches[2];
+ $attrlist = &$matches[3];
+
+ if (!isset($allowed_html[strtolower($elem)])) {
+ // Disallowed HTML element
+ return '';
+ }
+
+ if ($slash != '') {
+ return "$elem>";
+ }
+ // Is there a closing XHTML slash at the end of the attributes?
+ $xhtml_slash = preg_match('%\s/\s*$%', $attr) ? '/' : '';
+
+ // Clean up attributes
+ $attr2 = implode(' ', _filter_xss_attributes($attrlist));
+ $attr2 = preg_replace('/[<>]/', '', $attr2);
+
+ return "<$elem $attr2$xhtml_slash>";
+}
+
+/**
+ * Processes a string of HTML attributes.
+ *
+ * @return
+ * Cleaned up version of the HTML attributes.
+ */
+function _filter_xss_attributes($attr) {
+ $attrarr = array();
+ $mode = 0;
+ $attrname = '';
+
+ while (strlen($attr) != 0) {
+ // Was the last operation successful?
+ $working = 0;
+
+ switch ($mode) {
+ case 0:
+ // Attribute name, href for instance
+ if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
+ $attrname = strtolower($match[1]);
+ $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
+ $working = $mode = 1;
+ $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
+ }
+
+ break;
+
+ case 1:
+ // Equals sign or valueless ("selected")
+ if (preg_match('/^\s*=\s*/', $attr)) {
+ $working = 1; $mode = 2;
+ $attr = preg_replace('/^\s*=\s*/', '', $attr);
+ break;
+ }
+
+ if (preg_match('/^\s+/', $attr)) {
+ $working = 1; $mode = 0;
+ if (!$skip) {
+ $attrarr[] = $attrname;
+ }
+ $attr = preg_replace('/^\s+/', '', $attr);
+ }
+
+ break;
+
+ case 2:
+ // Attribute value, a URL after href= for instance
+ if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname=\"$thisval\"";
+ }
+ $working = 1;
+ $mode = 0;
+ $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
+ break;
+ }
+
+ if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname='$thisval'";;
+ }
+ $working = 1; $mode = 0;
+ $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
+ break;
+ }
+
+ if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
+ $thisval = filter_xss_bad_protocol($match[1]);
+
+ if (!$skip) {
+ $attrarr[] = "$attrname=\"$thisval\"";
+ }
+ $working = 1; $mode = 0;
+ $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
+ }
+
+ break;
+ }
+
+ if ($working == 0) {
+ // not well formed, remove and try again
+ $attr = preg_replace('/
+ ^
+ (
+ "[^"]*("|$) # - a string that starts with a double quote, up until the next double quote or the end of the string
+ | # or
+ \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
+ | # or
+ \S # - a non-whitespace character
+ )* # any number of the above three
+ \s* # any number of whitespaces
+ /x', '', $attr);
+ $mode = 0;
+ }
+ }
+
+ // the attribute list ends with a valueless attribute like "selected"
+ if ($mode == 1) {
+ $attrarr[] = $attrname;
+ }
+ return $attrarr;
+}
+
+/**
+ * Processes an HTML attribute value and ensures it does not contain an URL
+ * with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ * The string with the attribute value.
+ * @param $decode
+ * Whether to decode entities in the $string. Set to FALSE if the $string
+ * is in plain text, TRUE otherwise. Defaults to TRUE.
+ * @return
+ * Cleaned up and HTML-escaped version of $string.
+ */
+function filter_xss_bad_protocol($string, $decode = TRUE) {
+ // Get the plain text representation of the attribute value (i.e. its meaning)
+ if ($decode) {
+ $string = decode_entities($string);
+ }
+ // Remove soft hyphen
+ $string = str_replace(chr(194) . chr(173), '', $string);
+ $string2 = '';
+ // Strip protocols
+ do {
+ $before = $string;
+ $string = preg_replace_callback('/^([^:]+):/', '_filter_xss_bad_protocol', $string);
+ } while ($before != $string);
+ return check_plain($string);
+}
+
+function _filter_xss_bad_protocol($m) {
+ static $allowed_protocols;
+ if (!isset($allowed_protocols)) {
+ $allowed_protocols = array_flip(variable_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal')));
+ }
+ $string = preg_replace('/\s+/', '', $m[1]);
+ return isset($allowed_protocols[$string]) ? "$string:" : '';
+}
+
+/**
* @} End of "Standard filters".
*/
Index: modules/system.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/system.module,v
retrieving revision 1.204.2.4
diff -u -r1.204.2.4 system.module
--- modules/system.module 26 Jul 2005 01:56:18 -0000 1.204.2.4
+++ modules/system.module 30 Nov 2005 20:32:58 -0000
@@ -47,7 +47,7 @@
* Implementation of hook_perm().
*/
function system_perm() {
- return array('administer site configuration', 'access administration pages', 'bypass input data check');
+ return array('administer site configuration', 'access administration pages');
}
/**