diff -ur modules/filter/filter.test modules/filter/filter.test --- modules/filter/filter.test 2009-03-15 02:53:16.000000000 +0100 +++ modules/filter/filter.test 2009-03-18 16:37:12.000000000 +0100 @@ -200,6 +200,15 @@ * Test the line break filter */ function testLineBreakFilter() { + // Single line breaks should be changed to
tags, while paragraphs + // separated with double line breaks should be enclosed with

tags. + $f = _filter_autop("aaa\nbbb\n\nccc"); + $this->assertEqual(str_replace("\n", '', $f), "

aaa
bbb

ccc

", t('Line breaking basic case.')); + + // Text within some contexts should not be processed. + $f = _filter_autop(""); + $this->assertEqual($f, "", t('Line breaking -- do not break scripts.')); + $f = _filter_autop('

'); $this->assertEqual(substr_count($f, '

'), substr_count($f, '

'), t('Make sure line breaking produces matching paragraph tags.')); @@ -211,10 +220,367 @@ } /** - * Test the HTML filter + * Test limiting allowed tags, XSS prevention and adding 'nofollow' to links. + * XSS tests assume that script is dissallowed on default and src is allowed on default, but on* and style are dissallowed. + * + * Script injection vectors mostly adopted from http://ha.ckers.org/xss.html. + * + * Relevant CVEs: + * CVE-2002-1806, ~CVE-2005-0682, ~CVE-2005-2106, CVE-2005-3973, + * CVE-2006-1226 (= rev. 1.112?), CVE-2008-0273, CVE-2008-3740. + * */ function testHtmlFilter() { + // Tag stripping, different ways to work around removal of HTML tags. + $f = filter_xss(''); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping -- simple script without special characters.')); + + $f = filter_xss(''); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- non whitespace character after tag name.')); + + $f = filter_xss(''); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- no space between tag and attribute.')); + + $f = filter_xss("<\0scr\0ipt>alert(0)"); // Null between < and tag name works at least with IE6. + $this->assertNoNormalized($f, 'ipt', t('HTML tag stripping evasion -- breaking HTML with nulls.')); + + $f = filter_xss(""); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- filter just removing "script".')); + + $f = filter_xss('<'); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- double opening brackets.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- a malformed image tag.')); + + $f = filter_xss('
', array('blockquote')); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- script in a blockqoute.')); + + $f = filter_xss(""); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- script within a comment.')); + + // Dangerous attributes removal. + $f = filter_xss('

', array('p')); + $this->assertNoNormalized($f, 'onmouseover', t('HTML filter attributes removal -- events, no evasion.')); + + $f = filter_xss('

  • ', array('li')); + $this->assertNoNormalized($f, 'style', t('HTML filter attributes removal -- style, no evasion.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'onerror', t('HTML filter attributes removal evasion -- spaces before equals sign.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'onabort', t('HTML filter attributes removal evasion -- non alphanumeric characters before equals sign.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'onmediaerror', t('HTML filter attributes removal evasion -- varying case.')); + + $f = filter_xss("", array('img')); // Works at least with IE6. + $this->assertNoNormalized($f, 'focus', t('HTML filter attributes removal evasion -- breaking with nulls.')); + + // Only whitelisted scheme names in allowed attributes. + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing -- no evasion.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- no quotes.')); + + $f = filter_xss('', array('img')); // A bit like CVE-2006-0070. + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- no alert ;)')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- grave accents.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing -- rare attribute.')); + + $f = filter_xss('', array('table')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing -- another tag.')); + + $f = filter_xss('', array('base')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing -- one more attribute and tag.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- varying case.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- UTF-8 decimal encoding.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- long UTF-8 encoding.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- UTF-8 hex encoding.')); + + $f = filter_xss("", array('img')); + $this->assertNoNormalized($f, 'script', t('HTML scheme clearing evasion -- an embedded tab.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'script', t('HTML scheme clearing evasion -- an encoded, embedded tab.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'script', t('HTML scheme clearing evasion -- an encoded, embedded newline.')); + + $f = filter_xss('', array('img')); // With this test would fail, but the entity gets turned into &#xD;, so it's OK. + $this->assertNoNormalized($f, 'script', t('HTML scheme clearing evasion -- an encoded, embedded carriage return.')); + + $f = filter_xss("", array('img')); + $this->assertNoNormalized($f, 'cript', t('HTML scheme clearing evasion -- broken into many lines.')); + + $f = filter_xss("", array('img')); + $this->assertNoNormalized($f, 'cript', t('HTML scheme clearing evasion -- embedded nulls.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- spaces and metacharacters before scheme.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'vbscript', t('HTML scheme clearing evasion -- another scheme.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'nosuchscheme', t('HTML scheme clearing evasion -- unknown scheme.')); + + // Netscape 4.x javascript entities. + $f = filter_xss('
    ', array('br')); + $this->assertNoNormalized($f, 'alert', t('Netscape 4.x javascript entities.')); + + // Invalid UTF-8, these only work as reflected XSS with Internet Explorer 6. + $f = filter_xss("

    \" style=\"background-image: url(javascript:alert(0));\"\xe0

    ", array('p')); // DRUPAL-SA-2008-006 + $this->assertNoNormalized($f, 'style', t('HTML filter -- invalid UTF-8.')); + + $f = filter_xss("\xc0aaa"); + $this->assertEqual($f, '', t('HTML filter -- overlong UTF-8 sequences.')); + } + + /** + * Test filter settings, defaults, access restrictions and similar. + * + * TODO: This is for functions like filter_filter and check_markup, whose + * functionality is not completely focused on filtering. Some ideas: + * restricting formats according to user permissions, proper cache + * handling, defaults -- allowed tags/attributes/protocols. + * + * TODO: It is possible to add script, iframe etc. to allowed tags, but + * this makes HTML filter completely ineffective. + * + * TODO: Class, id, name and xmlns should be added to disallowed attributes, + * or better a whitelist approach should be used for that too. + */ + function testFilter() { + // Does access restriction really work. + + // HTML filter is not able to secure some tags, these should never be allowed. + $f = filter_filter('process', 0, 'no_such_format', '', 'f'); + $this->assertEqual($f, '', t('Converting URLs -- do not process scripts.')); + + // Addresses in attributes should not be converted. + $f = _filter_url('

    ', 'f'); + $this->assertEqual($f, '

    ', t('Converting URLs -- do not convert addresses in attributes.')); + + $f = _filter_url('text', 'f'); + $this->assertEqual($f, 'text', t('Converting URLs -- do not break existing links with custom title attribute.')); + + // Dot at the end of an address (issue #161217). + // Dot at the end of a domain name can mean that it's a fully qualified name. It can also be used at the end of a filename or a query string (and is not encoded; see: http://en.wikipedia.org./wiki/FQDN and http://en.wikipedia.org/wiki/Query_string). + // However, such usage is rare compared to simply using a link at the and of a sentence, so we shouldn't include the dot in the link. Does everybody expect that?? + $f = _filter_url('www.example.com.', 'f'); + $this->assertEqual($f, 'www.example.com.', t('Converting URLs -- do not recognize a dot at the end of a domain name (FQDNs).')); + + $f = _filter_url('http://www.example.com.', 'f'); + $this->assertEqual($f, 'http://www.example.com.', t('Converting URLs -- do not recognize a dot at the end of an URL (FQDNs).')); + + $f = _filter_url('www.example.com/index.php?a=.', 'f'); + $this->assertEqual($f, 'www.example.com/index.php?a=.', t('Converting URLs -- do forget about a dot at the end of a query string.')); + } + + /** + * Test the HTML corrector. + * + * TODO: This test could really use some validity checking function. + */ + function testHtmlCorrector() { + // Tag closing. + $f = _filter_htmlcorrector('

    text'); + $this->assertEqual($f, '

    text

    ', t('HTML corrector -- tag closing at the end of input.')); + + $f = _filter_htmlcorrector('

    text

    text'); + $this->assertEqual($f, '

    text

    text

    ', t('HTML corrector -- tag closing.')); + + $f = _filter_htmlcorrector("