Index: includes/bootstrap.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/bootstrap.inc,v
retrieving revision 1.44
diff -u -r1.44 bootstrap.inc
--- includes/bootstrap.inc  5 Apr 2005 19:00:24 -0000 1.44
+++ includes/bootstrap.inc  30 Nov 2005 20:32:56 -0000
@@ -538,19 +538,6 @@
 }

 /**
- * Prepare a URL for use in an HTML attribute.
- *
- * We replace ( and ) with their url-encoded equivalents to prevent XSS attacks.
- */
-function check_url($uri) {
-  $uri = htmlspecialchars($uri, ENT_QUOTES);
-
-  $uri = strtr($uri, array('(' => '%28', ')' => '%29'));
-
-  return $uri;
-}
-
-/**
  * Since request_uri() is only available on Apache, we generate an
  * equivalent using other environment vars.
  */
@@ -636,6 +623,13 @@
   return $messages;
 }

+/**
+ * Encode special characters in a plain-text string for display as HTML.
+ */
+function check_plain($text) {
+  return htmlspecialchars($text, ENT_QUOTES);
+}
+
 unset($conf);
 $config = conf_init();

Index: includes/common.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/common.inc,v
retrieving revision 1.434.2.10
diff -u -r1.434.2.10 common.inc
--- includes/common.inc 29 Jul 2005 19:05:31 -0000  1.434.2.10
+++ includes/common.inc 30 Nov 2005 20:32:56 -0000
@@ -548,13 +548,6 @@
 }

 /**
- * Encode special characters in a plain-text string for display as HTML.
- */
-function check_plain($text) {
-  return htmlspecialchars($text, ENT_QUOTES);
-}
-
-/**
  * @defgroup validation Input validation
  * @{
  * Functions to validate user input.
@@ -600,54 +593,6 @@
 }

 /**
- * Validate data input by a user.
- *
- * Ensures that user data cannot be used to perform attacks on the site.
- *
- * @param $data
- *   The input to check.
- * @return
- *   TRUE if the input data is acceptable.
- */
-function valid_input_data($data) {
-  if (is_array($data) || is_object($data)) {
-    // Form data can contain a number of nested arrays.
-    foreach ($data as $key => $value) {
-      if (!valid_input_data($key) || !valid_input_data($value)) {
-        return FALSE;
-      }
-    }
-  }
-  else if (isset($data)) {
-    // Detect dangerous input data.
-
-    // Decode all normal character entities.
-    $data = decode_entities($data, array('<', '&', '"'));
-
-    // Check strings:
-    $match  = preg_match('/\Wjavascript\s*:/i', $data);
-    $match += preg_match('/\Wexpression\s*\(/i', $data);
-    $match += preg_match('/\Walert\s*\(/i', $data);
-
-    // Check attributes:
-    $match += preg_match("/\W(dynsrc|datasrc|data|lowsrc|on[a-z]+)\s*=[^>]+?>/i", $data);
-
-    // Check tags:
-    $match += preg_match("/<\s*(applet|script|object|style|embed|form|blink|meta|html|frame|iframe|layer|ilayer|head|frameset|xml)/i", $data);
-
-    if ($match) {
-      watchdog('security', t('Terminated request because of suspicious input data: %data.', array('%data' => theme('placeholder', $data))));
-      return FALSE;
-    }
-  }
-
-  return TRUE;
-}
-/**
- * @} End of "defgroup validation".
- */
-
-/**
  * Register an event for the current visitor (hostname/IP) to the flood control mechanism.
  *
  * @param $name
@@ -679,6 +624,17 @@
 }

 /**
+ * Prepare a URL for use in an HTML attribute. Strips harmful protocols.
+ *
+ */
+function check_url($uri) {
+  $uri = htmlspecialchars($uri, ENT_QUOTES);
+  $uri = filter_xss_bad_protocol($uri, FALSE);
+
+  return $uri;
+}
+
+/**
  * @defgroup format Formatting
  * @{
  * Functions to format numbers, strings, dates, etc.
@@ -1054,7 +1010,7 @@
  *   A themed HTML string representing the form item group.
  */
 function form_group($legend, $group, $description = NULL) {
-  return '<fieldset>' . ($legend ? '<legend>'. $legend .'</legend>' : '') . $group . ($description ? '<div class="description">'. $description .'</div>' : '') . "</fieldset>\n";
+  return '<fieldset>' . ($legend ? '<legend>'. check_plain($legend) .'</legend>' : '') . $group . ($description ? '<div class="description">'. $description .'</div>' : '') . "</fieldset>\n";
 }

 /**
@@ -1078,7 +1034,7 @@
  *   A themed HTML string representing the radio button.
  */
 function form_radio($title, $name, $value = 1, $checked = FALSE, $description = NULL, $attributes = NULL, $required = FALSE) {
-  $element = '<input type="radio" class="'. _form_get_class('form-radio', $required, _form_get_error($name)) .'" name="edit['. $name .']" value="'. $value .'"'. ($checked ? ' checked="checked"' : '') . drupal_attributes($attributes) .' />';
+  $element = '<input type="radio" class="'. _form_get_class('form-radio', $required, _form_get_error($name)) .'" name="edit['. $name .']" value="'. check_plain($value) .'"'. ($checked ? ' checked="checked"' : '') . drupal_attributes($attributes) .' />';
   if (!is_null($title)) {
     $element = '<label class="option">'. $element .' '. $title .'</label>';
   }
@@ -1110,7 +1066,7 @@
   if (count($options) > 0) {
     $choices = '';
     foreach ($options as $key => $choice) {
-      $choices .= '<label class="option"><input type="radio" class="form-radio" name="edit['. $name .']" value="'. $key .'"'. ($key == $value ? ' checked="checked"' : ''). drupal_attributes($attributes) .' /> '. $choice .'</label><br />';
+      $choices .= '<label class="option"><input type="radio" class="form-radio" name="edit['. $name .']" value="'. check_plain($key) .'"'. ($key == $value ? ' checked="checked"' : ''). drupal_attributes($attributes) .' /> '. $choice .'</label><br />';
     }
     return theme('form_element', $title, $choices, $description, NULL, $required, _form_get_error($name));
   }
@@ -1137,7 +1093,7 @@
  *   A themed HTML string representing the checkbox.
  */
 function form_checkbox($title, $name, $value = 1, $checked = FALSE, $description = NULL, $attributes = NULL, $required = FALSE) {
-  $element = '<input type="checkbox" class="'. _form_get_class('form-checkbox', $required, _form_get_error($name)) .'" name="edit['. $name .']" id="edit-'. $name .'" value="'. $value .'"'. ($checked ? ' checked="checked"' : '') . drupal_attributes($attributes) .' />';
+  $element = '<input type="checkbox" class="'. _form_get_class('form-checkbox', $required, _form_get_error($name)) .'" name="edit['. $name .']" id="edit-'. $name .'" value="'. check_plain($value) .'"'. ($checked ? ' checked="checked"' : '') . drupal_attributes($attributes) .' />';
   if (!is_null($title)) {
     $element = '<label class="option">'. $element .' '. $title .'</label>';
   }
@@ -1174,7 +1130,7 @@
     }
     $choices = '';
     foreach ($options as $key => $choice) {
-      $choices .= '<label class="option"><input type="checkbox" class="form-checkbox" name="edit['. $name .'][]" value="'. $key .'"'. (in_array($key, $values) ? ' checked="checked"' : ''). drupal_attributes($attributes) .' /> '. $choice .'</label><br />';
+      $choices .= '<label class="option"><input type="checkbox" class="form-checkbox" name="edit['. $name .'][]" value="'. check_plain($key) .'"'. (in_array($key, $values) ? ' checked="checked"' : ''). drupal_attributes($attributes) .' /> '. $choice .'</label><br />';
     }
     // Note: because unchecked boxes are not included in the POST data, we
     // include a form_hidden() which will be overwritten as soon as there is at
@@ -1260,7 +1216,6 @@ function form_password($title, $name, $v
  *   A themed HTML string representing the field.
  */
 function form_textarea($title, $name, $value, $cols, $rows, $description = NULL, $attributes = NULL, $required = FALSE) {
-  $cols = $cols ? ' cols="'. $cols .'"' : '';
   $pre = '';
   $post = '';

@@ -1272,7 +1227,7 @@ function form_textarea($title, $name, $v
     }
   }

-  return theme('form_element', $title, $pre .'<textarea wrap="virtual"'. $cols .' rows="'. $rows .'" name="edit['. $name .']" id="edit-'. $name .'" class="'. _form_get_class('textarea', $required, _form_get_error($name)) .'"'. drupal_attributes($attributes) .'>'. check_plain($value) .'</textarea>'. $post, $description, 'edit-'. $name, $required, _form_get_error($name));
+  return theme('form_element', $title, $pre .'<textarea wrap="virtual" cols="'. check_plain($cols) .'" rows="'. check_plain($rows) .'" name="edit['. $name .']" id="edit-'. $name .'" class="'. _form_get_class('textarea', $required, _form_get_error($name)) .'"'. drupal_attributes($attributes) .'>'. check_plain($value) .'</textarea>'. $post, $description, 'edit-'. $name, $required, _form_get_error($name));
 }

 /**
@@ -1307,14 +1263,14 @@
   $select = '';
   foreach ($options as $key => $choice) {
     if (is_array($choice)) {
-      $select .= '<optgroup label="'. $key .'">';
+      $select .= '<optgroup label="'. check_plain($key) .'">';
       foreach ($choice as $key => $choice) {
-        $select .= '<option value="'. $key .'"'. (is_array($value) ? (in_array($key, $value) ? ' selected="selected"' : '') : ($value == $key ? ' selected="selected"' : '')) .'>'. check_plain($choice) .'</option>';
+        $select .= '<option value="'. check_plain($key) .'"'. (is_array($value) ? (in_array($key, $value) ? ' selected="selected"' : '') : ($value == $key ? ' selected="selected"' : '')) .'>'. check_plain($choice) .'</option>';
       }
       $select .= '</optgroup>';
     }
     else {
-      $select .= '<option value="'. $key .'"'. (is_array($value) ? (in_array($key, $value) ? ' selected="selected"' : '') : ($value == $key ? ' selected="selected"' : '')) .'>'. check_plain($choice) .'</option>';
+      $select .= '<option value="'. check_plain($key) .'"'. (is_array($value) ? (in_array($key, $value) ? ' selected="selected"' : '') : ($value == $key ? ' selected="selected"' : '')) .'>'. check_plain($choice) .'</option>';
     }
   }
   return theme('form_element', $title, '<select name="edit['. $name .']'. ($multiple ? '[]' : '') .'"'. ($multiple ? ' multiple="multiple" ' : '') . ($extra ? ' '. $extra : '') .' id="edit-'. $name .'">'. $select .'</select>', $description, 'edit-'. $name, $required, _form_get_error($name));
@@ -1799,6 +1755,10 @@
   if ($prefix == '#x') {
     $codepoint = base_convert($codepoint, 16, 10);
   }
+  else {
+    // Decimal numerical entity (strip leading zeros to avoid PHP octal notation)
+    $codepoint = preg_replace('/^0+/', '', $codepoint);
+  }
   // Encode codepoint as UTF-8 bytes
   if ($codepoint < 0x80) {
     $str = chr($codepoint);
@@ -1918,18 +1878,6 @@
 // Initialize all enabled modules.
 module_init();

-if (!user_access('bypass input data check')) {
-  // We can't use $_REQUEST because it consists of the contents of $_POST,
-  // $_GET and $_COOKIE: if any of the input arrays share a key, only one
-  // value will be verified.
-  if (!valid_input_data($_GET)
-   || !valid_input_data($_POST)
-   || !valid_input_data($_COOKIE)
-   || !valid_input_data($_FILES)) {
-    die('Terminated request because of suspicious input data.');
-  }
-}
-
 // Initialize the localization system.
 $locale = locale_initialize();

Index: includes/database.mysql.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/database.mysql.inc,v
retrieving revision 1.27.2.2
diff -u -r1.27.2.2 database.mysql.inc
--- includes/database.mysql.inc 14 Aug 2005 09:58:26 -0000  1.27.2.2
+++ includes/database.mysql.inc 30 Nov 2005 20:32:56 -0000
@@ -63,7 +63,7 @@
     return $result;
   }
   else {
-    trigger_error(mysql_error() ."\nquery: ". htmlspecialchars($query), E_USER_ERROR);
+    trigger_error(check_plain(mysql_error() ."\nquery: ". $query), E_USER_ERROR);
     return FALSE;
   }
 }
Index: includes/database.pgsql.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/database.pgsql.inc,v
retrieving revision 1.6.2.2
diff -u -r1.6.2.2 database.pgsql.inc
--- includes/database.pgsql.inc 14 Aug 2005 09:58:26 -0000  1.6.2.2
+++ includes/database.pgsql.inc 30 Nov 2005 20:32:56 -0000
@@ -59,7 +59,7 @@
     return $last_result;
   }
   else {
-    trigger_error(pg_last_error() ."\nquery: ". htmlspecialchars($query), E_USER_ERROR);
+    trigger_error(check_plain(pg_last_error() ."\nquery: ". $query), E_USER_ERROR);
     return FALSE;
   }
 }
Index: modules/aggregator.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/aggregator.module,v
retrieving revision 1.233.2.6
diff -u -r1.233.2.6 aggregator.module
--- modules/aggregator.module 3 Jul 2005 16:09:12 -0000 1.233.2.6
+++ modules/aggregator.module 30 Nov 2005 20:32:56 -0000
@@ -483,9 +483,7 @@
     foreach ($item as $key => $value) {
       // TODO: Make handling of aggregated HTML more flexible/configurable.
       $value = decode_entities(trim($value));
-      $value = strip_tags($value, '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>');
-      $value = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $value);
-      $value = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $value);
+      $value = filter_xss($value);
       $item[$key] = $value;
     }

@@ -549,12 +547,7 @@
       $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND title = '%s'", $feed['fid'], $title));
     }

-    if (!valid_input_data($item['DESCRIPTION'])) {
-      drupal_set_message(t('Failed to parse entry from %site feed: suspicious input data.', array('%site' => theme('placeholder', $feed['title']))), 'error');
-    }
-    else {
-      aggregator_save_item(array('iid' => $entry->iid, 'fid' => $feed['fid'], 'timestamp' => $timestamp, 'title' => $title, 'link' => $link, 'author' => $item['AUTHOR'], 'description' => $item['DESCRIPTION']));
-    }
+    aggregator_save_item(array('iid' => $entry->iid, 'fid' => $feed['fid'], 'timestamp' => $timestamp, 'title' => $title, 'link' => $link, 'author' => $item['AUTHOR'], 'description' => $item['DESCRIPTION']));
   }

   /*
Index: modules/blogapi.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/blogapi.module,v
retrieving revision 1.39.2.7
diff -u -r1.39.2.7 blogapi.module
--- modules/blogapi.module  14 Aug 2005 22:33:25 -0000  1.39.2.7
+++ modules/blogapi.module  30 Nov 2005 20:32:57 -0000
@@ -204,10 +204,6 @@
     $edit['body'] = $content;
   }

-  if (!valid_input_data($edit['title'], $edit['body'])) {
-    return blogapi_error(t('Terminated request because of suspicious input data.'));
-  }
-
   $node = node_validate($edit);

   if ($errors = form_get_errors()) {
@@ -262,10 +258,6 @@
     $node->body = $content;
   }

-  if (!valid_input_data($node->title, $node->body)) {
-    return blogapi_error(t('Terminated request because of suspicious input data.'));
-  }
-
   $node = node_validate($node);

   if ($errors = form_get_errors()) {
Index: modules/filter.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/filter.module,v
retrieving revision 1.57.2.6
diff -u -r1.57.2.6 filter.module
--- modules/filter.module 29 Jun 2005 19:53:35 -0000  1.57.2.6
+++ modules/filter.module 30 Nov 2005 20:32:57 -0000
@@ -1,5 +1,5 @@
 <?php
-// $Id: filter.module,v 1.57.2.6 2005/06/29 19:53:35 dries Exp $
+// $Id: filter.module,v 1.57.2.8 2005/11/30 00:11:53 unconed Exp $

 /**
  * @file
@@ -14,9 +14,6 @@
 define('FILTER_HTML_STRIP', 1);
 define('FILTER_HTML_ESCAPE', 2);

-define('FILTER_STYLE_ALLOW', 0);
-define('FILTER_STYLE_STRIP', 1);
-
 /**
  * Implementation of hook_help().
  */
@@ -54,104 +51,99 @@
   global $base_url;
   switch ($delta) {
     case 0:
-      switch (variable_get("filter_html_$format", FILTER_HTML_STRIP)) {
-
-        case FILTER_HTML_STRIP:
-          if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
-            switch ($long) {
-              case 0:
-                return t('Allowed HTML tags') .': '. check_plain($allowed_html);
-              case 1:
-                $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
-                if (!variable_get("filter_html_help_$format", 1)) {
-                  return $output;
-                }
+      if (variable_get("filter_html_$format", FILTER_HTML_STRIP) ==  FILTER_HTML_STRIP) {
+        if ($allowed_html = variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>')) {
+          switch ($long) {
+            case 0:
+              return t('Allowed HTML tags') .': '. check_plain($allowed_html);
+            case 1:
+              $output = '<p>'. t('Allowed HTML tags') .': '. check_plain($allowed_html) .'</p>';
+              if (!variable_get("filter_html_help_$format", 1)) {
+                return $output;
+              }

-                $output .= t('<p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
+              $output .= t('<p>This site allows HTML content. While learning all of HTML may feel intimidating, learning how to use a very small number of the most basic HTML "tags" is very easy. This table provides examples for each tag that is enabled on this site.</p>
 <p>For more information see W3C\'s <a href="http://www.w3.org/TR/html/">HTML Specifications</a> or use your favorite search engine to find other sites that explain HTML.</p>');
-                $tips = array(
-                  'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
-                  'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
-                  'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
-                  'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
-                  'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
-                  'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
-                  'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
-                  'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
-                  'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
-                  'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
-                  'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
-                  'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
-                  'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
-                  'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
-                  'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
-                  // Assumes and describes tr, td, th.
-                  'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
-                  'tr' => NULL, 'td' => NULL, 'th' => NULL,
-                  'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
-                  'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
-                   // Assumes and describes li.
-                  'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
-                  'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
-                  'li' => NULL,
-                  // Assumes and describes dt and dd.
-                  'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
-                  'dt' => NULL, 'dd' => NULL,
-                  'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
-                  'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
-                  'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
-                  'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
-                  'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
-                  'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
-                );
-                $header = array(t('Tag Description'), t('You Type'), t('You Get'));
-                preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
-                foreach ($out[1] as $tag) {
-                  if (array_key_exists($tag, $tips)) {
-                    if ($tips[$tag]) {
-                      $rows[] = array(
-                        array('data' => $tips[$tag][0], 'class' => 'description'),
-                        array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
-                        array('data' => $tips[$tag][1], 'class' => 'get')
-                      );
-                    }
-                  }
-                  else {
+              $tips = array(
+                'a' => array( t('Anchors are used to make links to other pages.'), '<a href="'. $base_url .'">'. variable_get('site_name', 'drupal') .'</a>'),
+                'br' => array( t('By default line break tags are automatically added, so use this tag to add additional ones. Use of this tag is different because it is not used with an open/close pair like all the others. Use the extra " /" inside the tag to maintain XHTML 1.0 compatibility'), t('Text with <br />line break')),
+                'p' => array( t('By default paragraph tags are automatically added, so use this tag to add additional ones.'), '<p>'. t('Paragraph one.') .'</p> <p>'. t('Paragraph two.') .'</p>'),
+                'strong' => array( t('Strong'), '<strong>'. t('Strong'). '</strong>'),
+                'em' => array( t('Emphasized'), '<em>'. t('Emphasized') .'</em>'),
+                'cite' => array( t('Cited'), '<cite>'. t('Cited') .'</cite>'),
+                'code' => array( t('Coded text used to show programming source code'), '<code>'. t('Coded') .'</code>'),
+                'b' => array( t('Bolded'), '<b>'. t('Bolded') .'</b>'),
+                'u' => array( t('Underlined'), '<u>'. t('Underlined') .'</u>'),
+                'i' => array( t('Italicized'), '<i>'. t('Italicized') .'</i>'),
+                'sup' => array( t('Superscripted'), t('<sup>Super</sup>scripted')),
+                'sub' => array( t('Subscripted'), t('<sub>Sub</sub>scripted')),
+                'pre' => array( t('Preformatted'), '<pre>'. t('Preformatted') .'</pre>'),
+                'blockquote' => array( t('Block quoted'), '<blockquote>'. t('Block quoted') .'</blockquote>'),
+                'q' => array( t('Quoted inline'), '<q>'. t('Quoted inline') .'</q>'),
+                // Assumes and describes tr, td, th.
+                'table' => array( t('Table'), '<table> <tr><th>'. t('Table header') .'</th></tr> <tr><td>'. t('Table cell') .'</td></tr> </table>'),
+                'tr' => NULL, 'td' => NULL, 'th' => NULL,
+                'del' => array( t('Deleted'), '<del>'. t('Deleted') .'</del>'),
+                'ins' => array( t('Inserted'), '<ins>'. t('Inserted') .'</ins>'),
+                // Assumes and describes li.
+                'ol' => array( t('Ordered list - use the &lt;li&gt; to begin each list item'), '<ol> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ol>'),
+                'ul' => array( t('Unordered list - use the &lt;li&gt; to begin each list item'), '<ul> <li>'. t('First item') .'</li> <li>'. t('Second item') .'</li> </ul>'),
+                'li' => NULL,
+                // Assumes and describes dt and dd.
+                'dl' => array( t('Definition lists are similar to other HTML lists. &lt;dl&gt; begins the definition list, &lt;dt&gt; begins the definition term and &lt;dd&gt; begins the definition description.'), '<dl> <dt>'. t('First term') .'</dt> <dd>'. t('First definition') .'</dd> <dt>'. t('Second term') .'</dt> <dd>'. t('Second definition') .'</dd> </dl>'),
+                'dt' => NULL, 'dd' => NULL,
+                'h1' => array( t('Header'), '<h1>'. t('Title') .'</h1>'),
+                'h2' => array( t('Header'), '<h2>'. t('Subtitle') .'</h2>'),
+                'h3' => array( t('Header'), '<h3>'. t('Subtitle three') .'</h3>'),
+                'h4' => array( t('Header'), '<h4>'. t('Subtitle four') .'</h4>'),
+                'h5' => array( t('Header'), '<h5>'. t('Subtitle five') .'</h5>'),
+                'h6' => array( t('Header'), '<h6>'. t('Subtitle six') .'</h6>')
+              );
+              $header = array(t('Tag Description'), t('You Type'), t('You Get'));
+              preg_match_all('/<([a-z0-9]+)[^a-z0-9]/i', $allowed_html, $out);
+              foreach ($out[1] as $tag) {
+                if (array_key_exists($tag, $tips)) {
+                  if ($tips[$tag]) {
                     $rows[] = array(
-                      array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
+                      array('data' => $tips[$tag][0], 'class' => 'description'),
+                      array('data' => '<code>'. check_plain($tips[$tag][1]) .'</code>', 'class' => 'type'),
+                      array('data' => $tips[$tag][1], 'class' => 'get')
                     );
                   }
                 }
-                $output .= theme('table', $header, $rows);
-
-                $output .= t('<p>Most unusual characters can be directly entered without any problems.</p>
-<p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
-                $entities = array(
-                  array( t('Ampersand'), '&amp;'),
-                  array( t('Greater than'), '&gt;'),
-                  array( t('Less than'), '&lt;'),
-                  array( t('Quotation mark'), '&quot;'),
-                );
-                $header = array(t('Character Description'), t('You Type'), t('You Get'));
-                unset($rows);
-                foreach ($entities as $entity) {
+                else {
                   $rows[] = array(
-                    array('data' => $entity[0], 'class' => 'description'),
-                    array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
-                    array('data' => $entity[1], 'class' => 'get')
+                    array('data' => t('No help provided for tag %tag.', array('%tag' => check_plain($tag))), 'class' => 'description', 'colspan' => 3),
                   );
                 }
-                $output .= theme('table', $header, $rows);
-                return $output;
+              }
+              $output .= theme('table', $header, $rows);
+
+              $output .= t('<p>Most unusual characters can be directly entered without any problems.</p>
+<p>If you do encounter problems, try using HTML character entities. A common example looks like &amp;amp; for an ampersand &amp; character. For a full list of entities see HTML\'s <a href="http://www.w3.org/TR/html4/sgml/entities.html">entities</a> page. Some of the available characters include:</p>');
+              $entities = array(
+                array( t('Ampersand'), '&amp;'),
+                array( t('Greater than'), '&gt;'),
+                array( t('Less than'), '&lt;'),
+                array( t('Quotation mark'), '&quot;'),
+              );
+              $header = array(t('Character Description'), t('You Type'), t('You Get'));
+              unset($rows);
+              foreach ($entities as $entity) {
+                $rows[] = array(
+                  array('data' => $entity[0], 'class' => 'description'),
+                  array('data' => '<code>'. check_plain($entity[1]) .'</code>', 'class' => 'type'),
+                  array('data' => $entity[1], 'class' => 'get')
+                );
+              }
+              $output .= theme('table', $header, $rows);
+              return $output;
             }
           }
-          else {
-            return t('No HTML tags allowed');
-          }
-
-        case FILTER_STYLE_STRIP:
+        }
+        else {
           return t('No HTML tags allowed');
-      }
+        }
       break;

     case 1:
@@ -924,10 +916,9 @@
  * Settings for the HTML filter.
  */
 function _filter_html_settings($format) {
-  $group = form_radios(t('Filter HTML tags'), "filter_html_$format", variable_get("filter_html_$format", FILTER_HTML_STRIP), array(FILTER_HTML_STRIP => t('Strip tags'), FILTER_HTML_ESCAPE => t('Escape tags')), t('How to deal with HTML tags in user-contributed content. If set to "Strip tags", dangerous tags are removed (see below).  If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
+  $group = form_radios(t('Filter HTML tags'), "filter_html_$format", variable_get("filter_html_$format", FILTER_HTML_STRIP), array(FILTER_HTML_STRIP => t('Strip disallowed tags'), FILTER_HTML_ESCAPE => t('Escape tags')), t('How to deal with HTML tags in user-contributed content. If set to "Strip disallowed tags", dangerous tags are removed (see below).  If set to "Escape tags", all HTML is escaped and presented as it was typed.'));
   $group .= form_textfield(t('Allowed HTML tags'), "allowed_html_$format", variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), 64, 255, t('If "Strip tags" is selected, optionally specify tags which should not be stripped. Javascript event attributes are always stripped.'));
   $group .= form_checkbox(t('Display HTML help'), "filter_html_help_$format", 1, variable_get("filter_html_help_$format", 1), t('If enabled, Drupal will display some basic HTML help in the long filter tips.'));
-  $group .= form_radios(t('HTML style attributes'), "filter_style_$format", variable_get("filter_style_$format", FILTER_STYLE_STRIP), array(FILTER_STYLE_ALLOW => t('Allowed'), FILTER_STYLE_STRIP => t('Removed')), t('If "Strip tags" is selected, you can choose whether "STYLE" attributes are allowed or removed from input.'));
   $group .= form_checkbox(t('Spam link deterrent'), "filter_html_nofollow_$format", 1, variable_get("filter_html_nofollow_$format", FALSE), t('If enabled, Drupal will add rel="nofollow" to all links, as a measure to reduce the effectiveness of spam links. Note: this will also prevent valid links from being followed by search engines, therefore it is likely most effective when enabled for anonymous users.'));
   $output .= form_group(t('HTML filter'), $group);

@@ -939,12 +930,8 @@
  */
 function _filter_html($text, $format) {
   if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_STRIP) {
-    // Allow users to enter HTML, but filter it
-    $text = strip_tags($text, variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'));
-    if (variable_get("filter_style_$format", FILTER_STYLE_STRIP)) {
-      $text = preg_replace('/\Wstyle\s*=[^>]+?>/i', '>', $text);
-    }
-    $text = preg_replace('/\Won[a-z]+\s*=[^>]+?>/i', '>', $text);
+    $allowed_tags = preg_split('/\s+|<|>/', variable_get("allowed_html_$format", '<a> <em> <strong> <cite> <code> <ul> <ol> <li> <dl> <dt> <dd>'), -1, PREG_SPLIT_NO_EMPTY);
+    $text = filter_xss($text, $allowed_tags);
   }

   if (variable_get("filter_html_$format", FILTER_HTML_STRIP) == FILTER_HTML_ESCAPE) {
@@ -1015,6 +1002,252 @@
 }

 /**
+ * Filters XSS. Based on kses by Ulf Harnhammar, see
+ * http://sourceforge.net/projects/kses
+ *
+ * For examples of various XSS attacks, see:
+ * http://ha.ckers.org/xss.html
+ *
+ * This code does four things:
+ * - Removes characters and constructs that can trick browsers
+ * - Makes sure all HTML entities are well-formed
+ * - Makes sure all HTML tags and attributes are well-formed
+ * - Makes sure no HTML tags contain URLs with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ *   The string with raw HTML in it. It will be stripped of everything that can cause
+ *   an XSS attack.
+ * @param $allowed_tags
+ *   An array of allowed tags.
+ * @param $format
+ *   The format to use.
+ */
+function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) {
+  // Store the input format
+  _filter_xss_split($allowed_tags, TRUE);
+  // Remove NUL characters (ignored by some browsers)
+  $string = str_replace(chr(0), '', $string);
+  // Remove Netscape 4 JS entities
+  $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
+
+  // Defuse all HTML entities
+  $string = str_replace('&', '&amp;', $string);
+  // Change back only well-formed entities in our whitelist
+  // Named entities
+  $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]*;)/', '&\1', $string);
+  // Decimal numeric entities
+  $string = preg_replace('/&amp;#0*([0-9]+;)/', '&#\1', $string);
+  // Hexadecimal numeric entities
+  $string = preg_replace('/&amp;#[Xx]0*((?:[0-9A-Fa-f]{2})+;)/', '&#x\1', $string);
+
+  return preg_replace_callback('%
+    (
+    <[^>]*.(>|$)  # a string that starts with a <, up until the > or the end of the string
+    |             # or
+    >             # just a >
+    )%x', '_filter_xss_split', $string);
+}
+
+/**
+ * Processes an HTML tag.
+ *
+ * @param @m
+ *   An array with various meaning depending on the value of $store.
+ *   If $store is TRUE then the array contains the allowed tags.
+ *   If $store is FALSE then the array has one element, the HTML tag to process.
+ * @param $store
+ *   Whether to store $m.
+ * @return
+ *   If the element isn't allowed, an empty string. Otherwise, the cleaned up
+ *   version of the HTML element.
+ */
+function _filter_xss_split($m, $store = FALSE) {
+  static $allowed_html;
+
+  if ($store) {
+    $allowed_html = array_flip($m);
+    return;
+  }
+
+  $string = $m[1];
+
+  if (substr($string, 0, 1) != '<') {
+    // We matched a lone ">" character
+    return '&gt;';
+  }
+
+  if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) {
+    // Seriously malformed
+    return '';
+  }
+
+  $slash = trim($matches[1]);
+  $elem = &$matches[2];
+  $attrlist = &$matches[3];
+
+  if (!isset($allowed_html[strtolower($elem)])) {
+    // Disallowed HTML element
+    return '';
+  }
+
+  if ($slash != '') {
+    return "</$elem>";
+  }
+  // Is there a closing XHTML slash at the end of the attributes?
+  $xhtml_slash = preg_match('%\s/\s*$%', $attr) ? '/' : '';
+
+  // Clean up attributes
+  $attr2 = implode(' ', _filter_xss_attributes($attrlist));
+  $attr2 = preg_replace('/[<>]/', '', $attr2);
+
+  return "<$elem $attr2$xhtml_slash>";
+}
+
+/**
+ * Processes a string of HTML attributes.
+ *
+ * @return
+ *   Cleaned up version of the HTML attributes.
+ */
+function _filter_xss_attributes($attr) {
+  $attrarr = array();
+  $mode = 0;
+  $attrname = '';
+
+  while (strlen($attr) != 0) {
+    // Was the last operation successful?
+    $working = 0;
+
+    switch ($mode) {
+      case 0:
+        // Attribute name, href for instance
+        if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) {
+          $attrname = strtolower($match[1]);
+          $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on');
+          $working = $mode = 1;
+          $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
+        }
+
+        break;
+
+      case 1:
+        // Equals sign or valueless ("selected")
+        if (preg_match('/^\s*=\s*/', $attr)) {
+          $working = 1; $mode = 2;
+          $attr = preg_replace('/^\s*=\s*/', '', $attr);
+          break;
+        }
+
+        if (preg_match('/^\s+/', $attr)) {
+          $working = 1; $mode = 0;
+          if (!$skip) {
+            $attrarr[] = $attrname;
+          }
+          $attr = preg_replace('/^\s+/', '', $attr);
+        }
+
+        break;
+
+      case 2:
+        // Attribute value, a URL after href= for instance
+        if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname=\"$thisval\"";
+          }
+          $working = 1;
+          $mode = 0;
+          $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
+          break;
+        }
+
+        if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname='$thisval'";;
+          }
+          $working = 1; $mode = 0;
+          $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
+          break;
+        }
+
+        if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) {
+          $thisval = filter_xss_bad_protocol($match[1]);
+
+          if (!$skip) {
+            $attrarr[] = "$attrname=\"$thisval\"";
+          }
+          $working = 1; $mode = 0;
+          $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
+        }
+
+        break;
+    }
+
+    if ($working == 0) {
+      // not well formed, remove and try again
+      $attr = preg_replace('/
+        ^
+        (
+        "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
+        |               # or
+        \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
+        |               # or
+        \S              # - a non-whitespace character
+        )*              # any number of the above three
+        \s*             # any number of whitespaces
+        /x', '', $attr);
+      $mode = 0;
+    }
+  }
+
+  // the attribute list ends with a valueless attribute like "selected"
+  if ($mode == 1) {
+    $attrarr[] = $attrname;
+  }
+  return $attrarr;
+}
+
+/**
+ * Processes an HTML attribute value and ensures it does not contain an URL
+ * with a disallowed protocol (e.g. javascript:)
+ *
+ * @param $string
+ *   The string with the attribute value.
+ * @param $decode
+ *   Whether to decode entities in the $string. Set to FALSE if the $string
+ *   is in plain text, TRUE otherwise. Defaults to TRUE.
+ * @return
+ *   Cleaned up and HTML-escaped version of $string.
+ */
+function filter_xss_bad_protocol($string, $decode = TRUE) {
+  // Get the plain text representation of the attribute value (i.e. its meaning)
+  if ($decode) {
+    $string = decode_entities($string);
+  }
+  // Remove soft hyphen
+  $string = str_replace(chr(194) . chr(173), '', $string);
+  $string2 = '';
+  // Strip protocols
+  do {
+    $before = $string;
+    $string = preg_replace_callback('/^([^:]+):/', '_filter_xss_bad_protocol', $string);
+  } while ($before != $string);
+  return check_plain($string);
+}
+
+function _filter_xss_bad_protocol($m) {
+  static $allowed_protocols;
+  if (!isset($allowed_protocols)) {
+    $allowed_protocols = array_flip(variable_get('filter_allowed_protocols', array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'mailto', 'irc', 'ssh', 'sftp', 'webcal')));
+  }
+  $string = preg_replace('/\s+/', '', $m[1]);
+  return isset($allowed_protocols[$string]) ? "$string:" : '';
+}
+
+/**
  * @} End of "Standard filters".
  */

Index: modules/system.module
===================================================================
RCS file: /cvs/drupal/drupal/modules/system.module,v
retrieving revision 1.204.2.4
diff -u -r1.204.2.4 system.module
--- modules/system.module 26 Jul 2005 01:56:18 -0000  1.204.2.4
+++ modules/system.module 30 Nov 2005 20:32:58 -0000
@@ -47,7 +47,7 @@
  * Implementation of hook_perm().
  */
 function system_perm() {
-  return array('administer site configuration', 'access administration pages', 'bypass input data check');
+  return array('administer site configuration', 'access administration pages');
 }

 /**