diff --git a/includes/common.inc b/includes/common.inc index ceac115..831fb8f 100644 --- a/includes/common.inc +++ b/includes/common.inc @@ -1571,6 +1571,8 @@ function _filter_xss_attributes($attr) { $attrarr = array(); $mode = 0; $attrname = ''; + $skip = FALSE; + $skip_protocol_filtering = FALSE; while (strlen($attr) != 0) { // Was the last operation successful? @@ -1582,6 +1584,20 @@ function _filter_xss_attributes($attr) { if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) { $attrname = strtolower($match[1]); $skip = ($attrname == 'style' || substr($attrname, 0, 2) == 'on'); + + // Values for attributes of type URI should be filtered for + // potentially malicious protocols (for example, an href-attribute + // starting with "javascript:"). However, for some non-URI + // attributes performing this filtering causes valid and safe data + // to be mangled. We prevent this by skipping protocol filtering on + // such attributes. + // @see filter_xss_bad_protocol() + // @see http://www.w3.org/TR/html4/index/attributes.html + $skip_protocol_filtering = substr($attrname, 0, 5) === 'data-' || in_array($attrname, array( + 'title', + 'alt', + )); + $working = $mode = 1; $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); } @@ -1607,7 +1623,7 @@ function _filter_xss_attributes($attr) { case 2: // Attribute value, a URL after href= for instance. if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) { - $thisval = filter_xss_bad_protocol($match[1]); + $thisval = $skip_protocol_filtering ? $match[1] : filter_xss_bad_protocol($match[1]); if (!$skip) { $attrarr[] = "$attrname=\"$thisval\""; @@ -1619,7 +1635,7 @@ function _filter_xss_attributes($attr) { } if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) { - $thisval = filter_xss_bad_protocol($match[1]); + $thisval = $skip_protocol_filtering ? $match[1] : filter_xss_bad_protocol($match[1]); if (!$skip) { $attrarr[] = "$attrname='$thisval'"; @@ -1630,7 +1646,7 @@ function _filter_xss_attributes($attr) { } if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) { - $thisval = filter_xss_bad_protocol($match[1]); + $thisval = $skip_protocol_filtering ? $match[1] : filter_xss_bad_protocol($match[1]); if (!$skip) { $attrarr[] = "$attrname=\"$thisval\""; @@ -1694,6 +1710,33 @@ function filter_xss_bad_protocol($string, $decode = TRUE) { } /** + * Applies a very permissive XSS/HTML filter to data-attributes. + * + * @param string $html + * The string to apply the data-attributes filtering to. + * + * @return string + * The filtered string. + */ +function filter_xss_data_attributes($html) { + if (stristr($html, 'data-') !== FALSE) { + $dom = filter_dom_load($html); + $xpath = new \DOMXPath($dom); + foreach ($xpath->query('//@*[starts-with(name(.), "data-")]') as $node) { + // The data-attributes contain an HTML-encoded value, so we need to + // decode the value, apply XSS filtering and then re-save as encoded + // value. There is no need to explicitly decode $node->value, since the + // DOMAttr::value getter returns the decoded value. + $value = filter_xss_admin($node->value); + $node->value = check_plain($value); + } + $html = filter_dom_serialize($dom); + } + + return $html; +} + +/** * @} End of "defgroup sanitization". */ diff --git a/modules/filter/filter.test b/modules/filter/filter.test index ddea6af..b55d215 100644 --- a/modules/filter/filter.test +++ b/modules/filter/filter.test @@ -1051,6 +1051,28 @@ class FilterUnitTestCase extends DrupalUnitTestCase { $f = filter_xss('', array('img')); $this->assertNoNormalized($f, 'javascript', 'HTML scheme clearing evasion -- no quotes.'); + // Check that strings in HTML attributes are are correctly processed. + $f = filter_xss('Example: alt', array('img')); + $this->assertIdentical($f, 'Example: alt', 'No HTML scheme clearing on allowed attributes.'); + + $f = filter_xss('', array('img')); + $this->assertIdentical($f, '', 'No HTML scheme clearing on allowed data attributes.'); + + // Test XSS filtering on data-attributes. + $output = filter_xss('', array('img')); + $f = filter_xss_data_attributes($output); + $this->assertIdentical($f, '', 'HTML-encoded value in data-attributes decoded, filtered and re-saved.'); + + $output = filter_xss('', array('img')); + $f = filter_xss_data_attributes($output); + $this->assertIdentical($f, '', 'HTML-encoded value in data-attributes decoded, filtered and re-saved.'); + + // When including HTML-tags as visible content, they are double-escaped. + // This test case ensures that we leave that content unchanged. + $output = filter_xss('', array('img')); + $f = filter_xss_data_attributes($output); + $this->assertIdentical($f, '', 'Doubled-escaped HTML-tags left unchanged.'); + // A bit like CVE-2006-0070. $f = filter_xss('', array('img')); $this->assertNoNormalized($f, 'javascript', 'HTML scheme clearing evasion -- no alert ;)');