From ee318356acfd57b4746980198f6c701a1c3563de Mon Sep 17 00:00:00 2001 From: Bob Vincent Date: Sun, 17 Apr 2011 13:24:36 -0400 Subject: [PATCH] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet: drupal_html_to_text() formatting is broken and does not have tests. --- includes/mail.inc | 47 +++++++++-- modules/simpletest/tests/mail.test | 160 ++++++++++++++++++++++++++++++++++++ 2 files changed, 200 insertions(+), 7 deletions(-) diff --git a/includes/mail.inc b/includes/mail.inc index d2febed39686c9bf3f6f7a2bf99fa1377d09f4de..edb4adeb3704a3886f9211eecd7a6800ebff05cb 100644 --- a/includes/mail.inc +++ b/includes/mail.inc @@ -366,7 +366,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { // Cache list of supported tags. static $supported_tags; if (empty($supported_tags)) { - $supported_tags = array('a', 'em', 'i', 'strong', 'b', 'br', 'p', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr'); + $supported_tags = array('a', 'address', 'del', 'div', 'em', 'i', 'ins', 'strong', 'b', 'br', 'p', 'pre', 'tr', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr'); } // Make sure only supported tags are kept. @@ -416,7 +416,13 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { array_unshift($lists, '*'); break; case 'ol': - array_unshift($lists, 1); + // Support start attribute; see [#345931]. + if (preg_match('/\bstart\s*=\s*([\'"]?)([0-9]+)\b/i',$value,$matches)) { + array_unshift($lists, $matches[2]); + } + else { + array_unshift($lists, 1); + } break; case '/ul': case '/ol': @@ -427,10 +433,24 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { // Quotation/list markers, non-fancy headers case 'blockquote': // Format=flowed indentation cannot be mixed with lists. - $indent[] = count($lists) ? ' "' : '>'; + $indent[] = count($lists) ? ' "' : '> '; break; case 'li': - $indent[] = is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * '; + // Support value attribute; see [#345931]. + if (is_numeric($lists[0])) { + $inc = ' '; + if (preg_match('/\bvalue\s*=\s*([\'"]?)([0-9]+)\b/i',$value,$matches)) { + $inc .= $matches[2]; + $lists[0] = $matches[2] + 1; + } + else { + $inc .= $lists[0]++; + } + $indent[] = $inc . ') '; + } + else { + $indent[] = ' * '; + } break; case 'dd': $indent[] = ' '; @@ -481,12 +501,22 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { // Horizontal rulers case 'hr': // Insert immediately. - $output .= drupal_wrap_mail('', implode('', $indent)) . "\n"; - $output = _drupal_html_to_text_pad($output, '-'); + $output .= drupal_wrap_mail('', implode('', $indent)); + if ($output) { + $output .= "\n"; + } + $output .= str_repeat('-', 78); break; // Paragraphs and definition lists + case '/address': + case 'br': + case '/ins': + case '/del': + case '/div': case '/p': + case '/pre': + case '/tr': case '/dl': $chunk = ''; // Ensure blank new-line. break; @@ -509,6 +539,9 @@ function drupal_html_to_text($string, $allowed_tags = NULL) { $chunk = $casing($chunk); } // Format it and apply the current indentation. + if ($output) { + $output = rtrim($output) . "\n"; + } $output .= drupal_wrap_mail($chunk, implode('', $indent)); // Remove non-quotation markers from indentation. $indent = array_map('_drupal_html_to_text_clean', $indent); @@ -575,7 +608,7 @@ function _drupal_html_to_text_clean($indent) { */ function _drupal_html_to_text_pad($text, $pad, $prefix = '') { // Remove last line break. - $text = substr($text, 0, -1); + $text = preg_replace('/\n$/s', '', $text); // Calculate needed padding space and add it. if (($p = strrpos($text, "\n")) === FALSE) { $p = -1; diff --git a/modules/simpletest/tests/mail.test b/modules/simpletest/tests/mail.test index 8a7b152d9d32eee7ae47c9ef8b5fb9c77f4e0cf1..24a5ee7b3a31216db35681cc5ddc7624b4b2775b 100644 --- a/modules/simpletest/tests/mail.test +++ b/modules/simpletest/tests/mail.test @@ -63,3 +63,163 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface { } } +/** + * Unit tests for drupal_html_to_text(). + */ +class DrupalHtmlToTextTestCase extends DrupalUnitTestCase { + public static function getInfo() { + return array( + 'name' => 'HTML to text conversion', + 'description' => 'Tests drupal_html_to_text().', + 'group' => 'Mail', + ); + } + + /** + * Test all supported tags of drupal_html_to_text(). + */ + function testTags() { + $tests = array( + 'Drupal.org' => 'Drupal.org [1] +[1] http://drupal.org +', + 'Drupal' => '/Drupal/', + 'Drupal' => '/Drupal/', + 'Drupal' => '*Drupal*', + 'Drupal' => '*Drupal*', + 'Drupal
Drupal' => 'Drupal +Drupal', + '

Drupal

' => 'Drupal +', + '
Drupal
' => '> Drupal', + '' => 'Drupal +', + '' => ' * Drupal +', + '' => ' * Drupal + * Drupal +', + '
    Drupal
' => 'Drupal +', + '
  1. Drupal
' => ' 1) Drupal +', + '
  1. Drupal
  2. Drupal
' => ' 1) Drupal + 2) Drupal +', + '' => ' * Drupal + * 1) Drupal + 2) Drupal +', + '
  1. Drupal
    • Drupal
    • Drupal
' => ' 1) Drupal + 2) * Drupal + * Drupal +', + '
Drupal
' => 'Drupal +', + '
Drupal
' => 'Drupal', + '
Drupal
' => 'Drupal +', + '
Drupal
Drupal
' => 'Drupal + Drupal +', + '
Drupal
Drupal
' => 'Drupal + Drupal +', + '

Drupal

' => '======== DRUPAL ' . str_repeat('=', 62) . "\n", + '

Drupal

' => '-------- DRUPAL ' . str_repeat('-', 62) . "\n", + '

Drupal

' => '.... Drupal +', + '

Drupal

' => '.. Drupal +', + '
Drupal
' => 'Drupal +', + '
Drupal
' => 'Drupal +', + 'Drupal
' => "Drupal\n" . str_repeat('-', 78), + 'Drupal
Drupal' => "Drupal\n" . str_repeat('-', 78) . "\nDrupal", + // Tests malformed HTML tags. + 'Drupal
Drupal' => 'Drupal +Drupal', + '' => ' * Drupal + * Drupal +', + '
  1. Drupal
  2. Drupal
' => ' 1) Drupal + 2) Drupal +', + '