Index: modules/filter/filter.test
===================================================================
RCS file: /cvs/drupal/drupal/modules/filter/filter.test,v
retrieving revision 1.12
diff -u -p -r1.12 filter.test
--- modules/filter/filter.test 28 Dec 2008 19:30:36 -0000 1.12
+++ modules/filter/filter.test 12 Jan 2009 23:10:24 -0000
@@ -197,24 +197,398 @@ class FilterTestCase extends DrupalWebTe
}
/**
- * Test the line break filter
+ * Test filter settings, defaults, access restrictions and similar.
+ *
+ * TODO: This is for functions like filter_filter and check_markup, whose
+ * functionality is not completely focused on filtering. Some ideas:
+ * restricting formats according to user permissions, proper cache
+ * handling, defaults -- allowed tags/attributes/protocols.
+ *
+ * TODO: It is possible to add script, iframe etc. to allowed tags, but
+ * this makes HTML filter completely ineffective.
+ *
+ * TODO: Class, id, name and xmlns should be added to disallowed attributes,
+ * or better a whitelist approach should be used for that too.
*/
- function testLineBreakFilter() {
- $f = _filter_autop('
');
- $this->assertEqual(substr_count($f, '
'), substr_count($f, '
'), t('Make sure line breaking produces matching paragraph tags.'));
+ function testFilter() {
+ // Does access restriction really work.
+
+ // HTML filter is not able to secure some tags, these should never be allowed.
+ $filtered = filter_filter('process', 0, 'no_such_format', '');
+ $this->assertNoNormalized($filtered, 'script', t('HTML filter should always remove script tags.'));
+
+ $filtered = filter_filter('process', 0, 'no_such_format', '');
+ $this->assertNoNormalized($filtered, 'iframe', t('HTML filter should always remove iframe tags.'));
+
+ $filtered = filter_filter('process', 0, 'no_such_format', '');
+ $this->assertNoNormalized($filtered, 'object', t('HTML filter should always remove object tags.'));
+
+ $filtered = filter_filter('process', 0, 'no_such_format', '');
+ $this->assertNoNormalized($filtered, 'style', t('HTML filter should always remove style tags.'));
+
+ // Some tags make CSRF attacks easier, let the user take the risk herself.
+ $filtered = filter_filter('process', 0, 'no_such_format', '');
+ $this->assertNoNormalized($filtered, 'img', t('HTML filter should remove img tags on default.'));
+
+ $filtered = filter_filter('process', 0, 'no_such_format', '');
+ $this->assertNoNormalized($filtered, 'img', t('HTML filter should remove input tags on default.'));
+
+ // Filtering content of some attributes is infeasible, these shouldn't be allowed too.
+ $filtered = filter_filter('process', 0, 'no_such_format', '');
+ $this->assertNoNormalized($filtered, 'style', t('HTML filter should remove style attribute on default.'));
+
+ $filtered = filter_filter('process', 0, 'no_such_format', '');
+ $this->assertNoNormalized($filtered, 'onerror', t('HTML filter should remove on* attributes on default.'));
+ }
+
+ /**
+ * Test limiting allowed tags, XSS prevention and adding 'nofollow' to links.
+ * XSS tests assume that script is dissallowed on default and src is allowed on default, but on* and style are dissallowed.
+ *
+ * Script injection vectors mostly adopted from http://ha.ckers.org/xss.html.
+ *
+ * Relevant CVEs:
+ * CVE-2002-1806, ~CVE-2005-0682, ~CVE-2005-2106, CVE-2005-3973,
+ * CVE-2006-1226 (= rev. 1.112?), CVE-2008-0273, CVE-2008-3740.
+ *
+ */
+ function testHtmlFilter() {
+
+ // Tag stripping, different ways to work around removal of HTML tags.
+ $filtered = filter_xss('');
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping -- simple script without special characters.'));
+
+ $filtered = filter_xss('');
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping -- empty script with source.'));
+
+ $filtered = filter_xss('');
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping evasion -- non whitespace character after tag name.'));
+
+ $filtered = filter_xss('');
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping evasion -- no space between tag and attribute.'));
+
+ $filtered = filter_xss("<\0scr\0ipt>alert(0)"); // Null between < and tag name works at least with IE6.
+ $this->assertNoNormalized($filtered, 'ipt', t('HTML tag stripping evasion -- breaking HTML with nulls.'));
+
+ $filtered = filter_xss("");
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping evasion -- filter just removing "script".'));
+
+ $filtered = filter_xss('<');
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping evasion -- double opening brackets.'));
+
+ $filtered = filter_xss('', array('img'));
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping evasion -- a malformed image tag.'));
+
+ $filtered = filter_xss('', array('blockquote'));
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping evasion -- script in a blockqoute.'));
+
+ $filtered = filter_xss("");
+ $this->assertNoNormalized($filtered, 'script', t('HTML tag stripping evasion -- script within a comment.'));
+
+ // Dangerous attributes removal.
+ $filtered = filter_xss('
", array('p')); // DRUPAL-SA-2008-006
+ $this->assertNoNormalized($filtered, 'style', t('HTML filter -- invalid UTF-8.'));
+
+ $filtered = filter_xss("\xc0aaa");
+ $this->assertEqual($filtered, '', t('HTML filter -- overlong UTF-8 sequences.'));
}
/**
- * Test the HTML filter
+ * Test the spam deterrent.
*/
- function testHtmlFilter() {
+ function testNoFollowFilter() {
+ variable_set('filter_html_nofollow_f', TRUE);
+
+ // Test if the rel="nofollow" attribute is added, even if we try to prevent it.
+ $filtered = _filter_html('text', 'f');
+ $this->assertNormalized($filtered, 'rel="nofollow"', t('Spam deterrent -- no evasion.'));
+
+ $filtered = _filter_html('text', 'f');
+ $this->assertNormalized($filtered, 'rel="nofollow"', t('Spam deterrent evasion -- capital A.'));
+
+ $filtered = _filter_html("text", 'f');
+ $this->assertNormalized($filtered, 'rel="nofollow"', t('Spam deterrent evasion -- non whitespace character after tag name.'));
+ $filtered = _filter_html("<\0a\0 href=\"http://www.example.com/\">text", 'f');
+ $this->assertNormalized($filtered, 'rel="nofollow"', t('Spam deterrent evasion -- some nulls.'));
+
+ $filtered = _filter_html('', 'f');
+ $this->assertNormalized($filtered, 'rel="nofollow"', t('Spam deterrent evasion -- link within a comment.'));
+ }
+
+ /**
+ * Test the loose, admin HTML filter.
+ */
+ function testAdminHtmlFilter() {
+
+ $filtered = filter_xss_admin(''); // DRUPAL-SA-2008-044
+ $this->assertNoNormalized($filtered, 'object', t('Admin HTML filter -- should not allow object tag.'));
+
+ $filtered = filter_xss_admin('');
+ $this->assertNoNormalized($filtered, 'script', t('Admin HTML filter -- should not allow script tag.'));
+
+ $filtered = filter_xss_admin('
');
+ $this->assertEqual($filtered, '', t('Admin HTML filter -- should not allow some tags.'));
+ }
+
+ /**
+ * Test the HTML escaping filter. Here we test only whether check_plain does what it should.
+ */
+ function testNoHtmlFilter() {
+
+ // Test that characters that have special meaning in XML are changed into entities.
+ $filtered = check_plain('<>&"');
+ $this->assertEqual($filtered, '<>&"', t('No HTML filter basic test.'));
+
+ // A single quote can also be used for evil things in some contexts.
+ $filtered = check_plain('\'');
+ $this->assertEqual($filtered, ''', t('No HTML filter -- single quote.'));
+
+ // Test that the filter is not fooled by different evasion techniques.
+ $filtered = check_plain("\xc2\"");
+ $this->assertEqual($filtered, '', t('No HTML filter -- invalid UTF-8.'));
+ }
+
+ /**
+ * Test the line breaking filter.
+ */
+ function testLineBreakingFilter() {
+
+ // Single line breaks should be changed to tags, while paragraphs
+ // separated with double line breaks should be enclosed with tags.
+ $filtered = _filter_autop("aaa\nbbb\n\nccc");
+ $this->assertEqual(str_replace("\n", '', $filtered), "
aaa bbb
ccc
", t('Line breaking basic case.'));
+
+ // Text within some contexts should not be processed.
+ $filtered = _filter_autop("");
+ $this->assertEqual($filtered, "", t('Line breaking -- do not break scripts.'));
+
+ // Some special cases that used not to work (issue #212236).
+ $filtered = _filter_autop('
', t('Converting domain names -- domain in a list.'));
+
+ $filtered = _filter_url('(www.example.com/dir?a=1&b=2#a)', 'f');
+ $this->assertEqual($filtered, '(www.example.com/dir?a=1&b=2#a)', t('Converting domain names -- domain with in parentheses.'));
+
+ // Converting e-mail addresses.
+ $filtered = _filter_url('johndoe@example.com', 'f');
+ $this->assertEqual($filtered, 'johndoe@example.com', t('Converting e-mail addresses.'));
+
+ $filtered = _filter_url('aaa@sub.tv', 'f');
+ $this->assertEqual($filtered, 'aaa@sub.tv', t('Converting e-mail addresses -- a short e-mail from Tuvalu.'));
+
+ // URL trimming.
+ variable_set('filter_url_length_f', 28);
+
+ $filtered = _filter_url('http://www.example.com/d/ff.ext?a=1&b=2#a1', 'f');
+ $this->assertNormalized($filtered, 'http://www.example.com/d/ff....', t('URL trimming.'));
+
+ // Not breaking existing links.
+ $filtered = _filter_url('www.example.com', 'f');
+ $this->assertEqual($filtered, 'www.example.com', t('Converting URLs -- do not break existing links.'));
+
+ $filtered = _filter_url('http://www.example.com', 'f');
+ $this->assertEqual($filtered, 'http://www.example.com', t('Converting URLs -- do not break existing, relative links.'));
+
+ // Addresses within some tags such as code or script should not be converted.
+ $filtered = _filter_url('http://www.example.com', 'f');
+ $this->assertEqual($filtered, 'http://www.example.com', t('Converting URLs -- skip code contents.'));
+
+ $filtered = _filter_url('http://www.example.com', 'f');
+ $this->assertEqual($filtered, 'http://www.example.com', t('Converting URLs -- really skip code contents.'));
+
+ $filtered = _filter_url('', 'f');
+ $this->assertEqual($filtered, '', t('Converting URLs -- do not process scripts.'));
+
+ // Addresses in attributes should not be converted.
+ $filtered = _filter_url('', 'f');
+ $this->assertEqual($filtered, '', t('Converting URLs -- do not convert addresses in attributes.'));
+
+ $filtered = _filter_url('text', 'f');
+ $this->assertEqual($filtered, 'text', t('Converting URLs -- do not break existing links with custom title attribute.'));
+
+ // Dot at the end of an address (issue #161217).
+ // Dot at the end of a domain name can mean that it's a fully qualified name. It can also be used at the end of a filename or a query string (and is not encoded; see: http://en.wikipedia.org./wiki/FQDN and http://en.wikipedia.org/wiki/Query_string).
+ // However, such usage is rare compared to simply using a link at the and of a sentence, so we shouldn't include the dot in the link. Does everybody expect that??
+ $filtered = _filter_url('www.example.com.', 'f');
+ $this->assertEqual($filtered, 'www.example.com.', t('Converting URLs -- do not recognize a dot at the end of a domain name (FQDNs).'));
+
+ $filtered = _filter_url('http://www.example.com.', 'f');
+ $this->assertEqual($filtered, 'http://www.example.com.', t('Converting URLs -- do not recognize a dot at the end of an URL (FQDNs).'));
+
+ $filtered = _filter_url('www.example.com/index.php?a=.', 'f');
+ $this->assertEqual($filtered, 'www.example.com/index.php?a=.', t('Converting URLs -- do forget about a dot at the end of a query string.'));
+
+
+ }
+
+ /**
+ * Test the HTML corrector.
+ *
+ * TODO: This test could really use some validity checking function.
+ */
+ function testHtmlCorrector() {
+
+ // Tag closing.
+ $filtered = _filter_htmlcorrector('
text');
+ $this->assertEqual($filtered, '
text
', t('HTML corrector -- tag closing at the end of input.'));
+
+ $filtered = _filter_htmlcorrector('
', t('HTML corrector -- unclosed tag with attribute.'));
+
+ // XHTML slash for empty elements.
+ $filtered = _filter_htmlcorrector(' ');
+ $this->assertEqual($filtered, ' ', t('HTML corrector -- XHTML closing slash.'));
}
function createFormat($filter) {
@@ -233,4 +607,43 @@ class FilterTestCase extends DrupalWebTe
$this->drupalPost('admin/settings/filters/delete/' . $format->format, array(), t('Delete'));
}
}
+
+ /**
+ * Asserts that a text transformed to lowercase with HTML entities decoded does contains a given string.
+ * Otherwise fails the test with a given message, similar to all the SimpleTest assert* functions.
+ * Note that this does not remove nulls, newlines and other that could be used to obscure a tag or an attribute name.
+ * @param $haystack
+ * Text to look in.
+ * @param $needle
+ * Lowercase, plain text to look for.
+ * @param $message
+ * Message to display if failed.
+ * @param $group
+ * The group this message belongs to, defaults to 'Other'.
+ * @return
+ * TRUE on pass, FALSE on fail.
+ */
+ function assertNormalized($haystack, $needle, $message = '', $group = 'Other') {
+ return $this->assertTrue(strpos(strtolower(decode_entities($haystack)), $needle) !== FALSE, $message, $group);
+ }
+
+ /**
+ * Asserts that a text transformed to lowercase with HTML entities decoded does not contain a given string.
+ * Otherwise fails the test with a given message, similar to all the SimpleTest assert* functions.
+ * Note that this does not remove nulls, newlines and other that could be used to obscure a tag or an attribute name.
+ * @param $haystack
+ * Text to look in.
+ * @param $needle
+ * Lowercase, plain text to look for.
+ * @param $message
+ * Message to display if failed.
+ * @param $group
+ * The group this message belongs to, defaults to 'Other'.
+ * @return
+ * TRUE on pass, FALSE on fail.
+ */
+ function assertNoNormalized($haystack, $needle, $message = '', $group = 'Other') {
+ return $this->assertTrue(strpos(strtolower(decode_entities($haystack)), $needle) === FALSE, $message, $group);
+ }
+
}