Index: INSTALL.txt =================================================================== RCS file: /cvs/drupal/drupal/INSTALL.txt,v retrieving revision 1.29.2.5 diff -u -p -r1.29.2.5 INSTALL.txt --- INSTALL.txt 27 Jun 2007 18:16:45 -0000 1.29.2.5 +++ INSTALL.txt 10 Jan 2008 21:25:52 -0000 @@ -13,7 +13,7 @@ CONTENTS OF THIS FILE REQUIREMENTS ------------ -Drupal requires a web server, PHP4 (4.3.3 or greater) or PHP5 +Drupal requires a web server, PHP4 (4.3.5 or greater) or PHP5 (http://www.php.net/) and either MySQL (http://www.mysql.com/) or PostgreSQL (http://www.postgresql.org/). Your database user will also need sufficient privileges to run Drupal. Please Index: includes/bootstrap.inc =================================================================== RCS file: /cvs/drupal/drupal/includes/bootstrap.inc,v retrieving revision 1.96.2.9 diff -u -p -r1.96.2.9 bootstrap.inc --- includes/bootstrap.inc 26 Jul 2007 19:17:24 -0000 1.96.2.9 +++ includes/bootstrap.inc 10 Jan 2008 21:25:28 -0000 @@ -610,9 +610,48 @@ function referer_uri() { /** * Encode special characters in a plain-text string for display as HTML. + * + * Uses drupal_validate_utf8 to prevent cross site scripting attacks on + * Internet Explorer 6. */ function check_plain($text) { - return htmlspecialchars($text, ENT_QUOTES); + return drupal_validate_utf8($text) ? htmlspecialchars($text, ENT_QUOTES) : ''; +} + +/** + * Checks whether a string is valid UTF-8. + * + * All functions designed to filter input should use drupal_validate_utf8 + * to ensure they operate on valid UTF-8 strings to prevent bypass of the + * filter. + * + * When text containing an invalid UTF-8 lead byte (0xC0 - 0xFF) is presented + * as UTF-8 to Internet Explorer 6, the program may misinterpret subsequent + * bytes. When these subsequent bytes are HTML control characters such as + * quotes or angle brackets, parts of the text that were deemed safe by filters + * end up in locations that are potentially unsafe; An onerror attribute that + * is outside of a tag, and thus deemed safe by a filter, can be interpreted + * by the browser as if it were inside the tag. + * + * This function exploits preg_match behaviour (since PHP 4.3.5) when used + * with the u modifier, as a fast way to find invalid UTF-8. When the matched + * string contains an invalid byte sequence, it will fail silently. + * + * preg_match may not fail on 4 and 5 octet sequences, even though they + * are not supported by the specification. + * + * The specific preg_match behaviour is present since PHP 4.3.5. + * + * @param $text + * The text to check. + * @return + * TRUE if the text is valid UTF-8, FALSE if not. + */ +function drupal_validate_utf8($text) { + if (strlen($text) == 0) { + return TRUE; + } + return (preg_match('/^./us', $text) == 1); } /** Index: modules/filter.module =================================================================== RCS file: /cvs/drupal/drupal/modules/Attic/filter.module,v retrieving revision 1.122.2.7 diff -u -p -r1.122.2.7 filter.module --- modules/filter.module 2 Jul 2007 19:05:22 -0000 1.122.2.7 +++ modules/filter.module 10 Jan 2008 21:25:28 -0000 @@ -1135,6 +1135,11 @@ function filter_xss_admin($string) { * The format to use. */ function filter_xss($string, $allowed_tags = array('a', 'em', 'strong', 'cite', 'code', 'ul', 'ol', 'li', 'dl', 'dt', 'dd')) { + // Only operate on valid UTF-8 strings. This is necessary to prevent cross + // site scripting issues on Internet Explorer 6. + if (!drupal_validate_utf8($string)) { + return ''; + } // Store the input format _filter_xss_split($allowed_tags, TRUE); // Remove NUL characters (ignored by some browsers)