Index: modules/filter/filter.test =================================================================== RCS file: /cvs/drupal/drupal/modules/filter/filter.test,v retrieving revision 1.12 diff -u -p -r1.12 filter.test --- modules/filter/filter.test 28 Dec 2008 19:30:36 -0000 1.12 +++ modules/filter/filter.test 9 Jan 2009 22:11:11 -0000 @@ -197,9 +197,262 @@ class FilterTestCase extends DrupalWebTe } /** - * Test the line break filter + * Test limiting allowed tags, XSS prevention and adding 'nofollow' to links. + * XSS tests assume that script is dissallowed on default and src is allowed on default, but on* and style are dissallowed. + * + * Script injection vectors mostly adopted from http://ha.ckers.org/xss.html. + * + * Relevant CVEs: + * CVE-2002-1806, ~CVE-2005-0682, ~CVE-2005-2106, CVE-2005-3973, + * CVE-2006-1226 (= rev. 1.112?), CVE-2008-0273, CVE-2008-3740. + * + * TODO: Maybe separate all filter_filter tests into another, higher level, test case? */ - function testLineBreakFilter() { + function testHtmlFilter() { + + // Default filter settings. + $f = filter_filter('process', 0, 'no_such_format', ''); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping -- simple script without special characters.')); + + $f = filter_xss(''); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- non whitespace character after tag name.')); + + $f = filter_xss(''); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- no space between tag and attribute.')); + + $f = filter_xss("<\0scr\0ipt>alert(0)"); // TODO: Vector needs confirmation (the <\0s part). + $this->assertNoNormalized($f, 'ipt', t('HTML tag stripping evasion -- breaking HTML with nulls.')); + + $f = filter_xss(""); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- filter just removing "script".')); + + $f = filter_xss('<'); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- double opening brackets.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- a malformed image tag.')); + + $f = filter_xss('
', array('blockquote')); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- script in a blockqoute.')); + + $f = filter_xss(""); + $this->assertNoNormalized($f, 'script', t('HTML tag stripping evasion -- script within a comment.')); + + // Dangerous attributes removal. + $f = filter_xss('

', array('p')); + $this->assertNoNormalized($f, 'onmouseover', t('HTML filter attributes removal -- events, no evasion.')); + + $f = filter_xss('

  • ', array('li')); + $this->assertNoNormalized($f, 'style', t('HTML filter attributes removal -- style, no evasion.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'onerror', t('HTML filter attributes removal evasion -- spaces before equals sign.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'onabort', t('HTML filter attributes removal evasion -- non alphanumeric characters before equals sign.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'onmediaerror', t('HTML filter attributes removal evasion -- varying case.')); + + // TODO: The filter closes the p tag here with the first > from the attribute, I would rather expect it to + // remove the style attribute ( is well-formed XML as far as I can tell), however this is not dangerous. + // $f = filter_xss('

    ', array('p')); + // $this->assertNoNormalized($f, 'style', t('HTML filter attributes removal evasion -- filter not distinguishing quotes.')); + + $f = filter_xss("", array('img')); // TODO: Vector needs confirmation. + $this->assertNoNormalized($f, 'focus', t('HTML filter attributes removal evasion -- breaking with nulls.')); + + // Only whitelisted scheme names in allowed attributes. + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing -- no evasion.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- no quotes.')); + + $f = filter_xss('', array('img')); // A bit like CVE-2006-0070. + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- no alert ;)')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- grave accents.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing -- rare attribute.')); + + $f = filter_xss('', array('table')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing -- another tag.')); + + $f = filter_xss('', array('base')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing -- one more attribute and tag.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- varying case.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- UTF-8 decimal encoding.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- long UTF-8 encoding.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- UTF-8 hex encoding.')); + + $f = filter_xss("", array('img')); + $this->assertNoNormalized($f, 'script', t('HTML scheme clearing evasion -- an embedded tab.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'script', t('HTML scheme clearing evasion -- an encoded, embedded tab.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'script', t('HTML scheme clearing evasion -- an encoded, embedded newline.')); + + $f = filter_xss('', array('img')); // With this test would fail, but the entity gets turned into &#xD;, so it's OK. + $this->assertNoNormalized($f, 'script', t('HTML scheme clearing evasion -- an encoded, embedded carriage return.')); + + $f = filter_xss("", array('img')); + $this->assertNoNormalized($f, 'cript', t('HTML scheme clearing evasion -- broken into many lines.')); + + $f = filter_xss("", array('img')); + $this->assertNoNormalized($f, 'cript', t('HTML scheme clearing evasion -- embedded nulls.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'javascript', t('HTML scheme clearing evasion -- spaces and metacharacters before scheme.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'vbscript', t('HTML scheme clearing evasion -- another scheme.')); + + $f = filter_xss('', array('img')); + $this->assertNoNormalized($f, 'nosuchscheme', t('HTML scheme clearing evasion -- unknown scheme.')); + + // Netscape 4.x javascript entities. + $f = filter_xss('
    ', array('br')); + $this->assertNoNormalized($f, 'alert', t('Netscape 4.x javascript entities.')); + + // Encoding tricks. + // TODO: I can't really reproduce that attack, with 5.x input with invalid + // characters is truncated, with 7.x the database layer throws an exception + // (disclosing installation path, by the way). + $f = filter_xss("

    \" style=\"background-image: url(javascript:alert(0));\"\xe0

    ", array('p')); // DRUPAL-SA-2008-006 + $this->assertNoNormalized($f, 'style', t('HTML filter -- invalid UTF-8.')); + + $f = filter_xss("\xc0"); + $this->assertEqual($f, '', t('HTML filter -- overlong UTF-8 sequences.')); + } + + /** + * Test the spam deterrent. + */ + function testNoFollowFilter() { + variable_set('filter_html_nofollow_f', TRUE); + + $f = _filter_html('text', 'f'); + $this->assertNormalized($f, 'rel="nofollow"', t('Spam deterrent -- no evasion.')); + + $f = _filter_html('text', 'f'); + $this->assertNormalized($f, 'rel="nofollow"', t('Spam deterrent evasion -- capital A.')); + + $f = _filter_html("text", 'f'); + $this->assertNormalized($f, 'rel="nofollow"', t('Spam deterrent evasion -- non whitespace character after tag name.')); + + $f = _filter_html("<\0a\0 href=\"http://www.example.com/\">text", 'f'); + $this->assertNormalized($f, 'rel="nofollow"', t('Spam deterrent evasion -- some nulls.')); + + $f = _filter_html('', 'f'); + $this->assertNormalized($f, 'rel="nofollow"', t('Spam deterrent evasion -- link within a comment.')); + } + + /** + * Test the loose, admin HTML filter. + */ + function testAdminHtmlFilter() { + + $f = filter_xss_admin(''); // DRUPAL-SA-2008-044 + $this->assertNoNormalized($f, 'object', t('Admin HTML filter -- should not allow object tag.')); + + $f = filter_xss_admin('"); + $this->assertEqual($f, "", t('Line breaking -- do not break scripts.')); + + // TODO: Filters do not skip comments. + // $f = _filter_autop(""); + // $this->assertEqual($f, "", t('Line breaking -- do not change comments.')); + $f = _filter_autop('

    '); $this->assertEqual(substr_count($f, '

    '), substr_count($f, '

    '), t('Make sure line breaking produces matching paragraph tags.')); @@ -211,12 +464,85 @@ class FilterTestCase extends DrupalWebTe } /** - * Test the HTML filter + * Test the URL filter. */ - function testHtmlFilter() { + function testUrlFilter() { + variable_set('filter_url_length_f', 496); + + // Converting URLs. + $f = _filter_url('http://www.example.com/', 'f'); + $this->assertEqual($f, 'http://www.example.com/', t('Converting URLs.')); + + $f = _filter_url('http://www.example.com/?a=1&b=2', 'f'); + $this->assertEqual($f, 'http://www.example.com/?a=1&b=2', t('Converting URLs -- ampersands.')); + $f = _filter_url('ftp://user:pass@ftp.example.com/dir1/dir2', 'f'); + $this->assertEqual($f, 'ftp://user:pass@ftp.example.com/dir1/dir2', t('Converting URLs -- FTP scheme.')); + + $f = _filter_url('www.example.com', t('Converting URLs -- do not break existing links.')); + + // TODO: Filters break comment. + // $f = _filter_url('', 'f'); + // $this->assertEqual($f, '', t('Converting URLs -- do not change comments.')); + + // Converting domain names. + $f = _filter_url('www.example.com', 'f'); + $this->assertEqual($f, 'www.example.com', t('Converting domain names.')); + + $f = _filter_url('
  • www.example.com
  • ', 'f'); + $this->assertEqual($f, '
  • www.example.com
  • ', t('Converting domain names -- domain in a list.')); + + $f = _filter_url('(www.example.com/dir?a=1&b=2#a)', 'f'); + $this->assertEqual($f, '(www.example.com/dir?a=1&b=2#a)', t('Converting domain names -- domain with in parentheses.')); + + // Converting e-mail addresses. + $f = _filter_url('johndoe@example.com', 'f'); + $this->assertEqual($f, 'johndoe@example.com', t('Converting e-mail addresses.')); + + $f = _filter_url('aaa@sub.tv', 'f'); + $this->assertEqual($f, 'aaa@sub.tv', t('Converting e-mail addresses -- a short e-mail from Tuvalu.')); + + // TODO: Just doesn't work, see: http://tools.ietf.org/html/rfc5322#section-3.2.3 and http://tools.ietf.org/html/rfc5321#section-4.1.3 . + // $f = _filter_url('"\"\\()[]\;:,<>@ "!#$%&\'*+-/=?^_`.{|}~@example.com', 'f'); + // $this->assertEqual($f, '@ "!#$%&\'*+-/=?^_`.{|}~@example.com">"\"\\()[]\;:,<>@ "!#$%&\'*+-/=?^_`.{|}~@example.com', t('Converting e-mail addresses -- an interesting, valid address.')); + + // URL trimming. + variable_set('filter_url_length_f', 28); + + $f = _filter_url('http://www.example.com/d/ff.ext?a=1&b=2#a1', 'f'); + $this->assertNormalized($f, 'http://www.example.com/d/ff....', t('URL trimming.')); } + /** + * Test the HTML corrector. + * TODO: This test could really use some validity checking function. + */ + function testHtmlCorrector() { + $f = _filter_htmlcorrector('

    text'); + $this->assertEqual($f, '

    text

    ', t('HTML corrector -- tag closing.')); + + $f = _filter_htmlcorrector('

    text

    text'); + $this->assertEqual($f, '

    text

    text

    ', t('HTML corrector -- tag closing.')); + + $f = _filter_htmlcorrector("
    • e1
    • e2"); + $this->assertEqual($f, "
      • e1
      • e2
      ", t('HTML corrector -- unclosed list tags.')); + + $f = _filter_htmlcorrector('
      content'); + $this->assertEqual($f, '
      content
      ', t('HTML corrector -- unclosed tag with attribute.')); + + $f = _filter_htmlcorrector('

      '); + $this->assertEqual($f, '

      ', t('HTML corrector -- XHTML closing slash.')); + + // TODO: Filters do not handle comments (issue #222926). + // $f = _filter_htmlcorrector(''); + // $this->assertEqual($f, '', t('HTML corrector -- skiping comments.')); + + // $f = _filter_htmlcorrector("", t('HTML corrector -- closing unclosed comments.')); + } + + function createFormat($filter) { $edit = array( 'name' => $this->randomName(), @@ -233,4 +559,43 @@ class FilterTestCase extends DrupalWebTe $this->drupalPost('admin/settings/filters/delete/' . $format->format, array(), t('Delete')); } } + + /** + * Asserts that a text transformed to lowercase with HTML entities decoded does contains a given string. + * Otherwise fails the test with a given message, similar to all the SimpleTest assert* functions. + * Note that this does not remove nulls, newlines and other that could be used to obscure a tag or an attribute name. + * @param $haystack + * Text to look in. + * @param $needle + * Lowercase, plain text to look for. + * @param $message + * Message to display if failed. + * @param $group + * The group this message belongs to, defaults to 'Other'. + * @return + * TRUE on pass, FALSE on fail. + */ + function assertNormalized($haystack, $needle, $message = '', $group = 'Other') { + return $this->assertTrue(strpos(strtolower(decode_entities($haystack)), $needle) !== FALSE, $message, $group); + } + + /** + * Asserts that a text transformed to lowercase with HTML entities decoded does not contain a given string. + * Otherwise fails the test with a given message, similar to all the SimpleTest assert* functions. + * Note that this does not remove nulls, newlines and other that could be used to obscure a tag or an attribute name. + * @param $haystack + * Text to look in. + * @param $needle + * Lowercase, plain text to look for. + * @param $message + * Message to display if failed. + * @param $group + * The group this message belongs to, defaults to 'Other'. + * @return + * TRUE on pass, FALSE on fail. + */ + function assertNoNormalized($haystack, $needle, $message = '', $group = 'Other') { + return $this->assertTrue(strpos(strtolower(decode_entities($haystack)), $needle) === FALSE, $message, $group); + } + }