From 10ba67206a7479121621bef50065455a551899eb Mon Sep 17 00:00:00 2001
From: Bob Vincent <bobvin@pillars.net>
Date: Sun, 17 Apr 2011 13:24:36 -0400
Subject: [PATCH 1/3] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet: drupal_html_to_text() formatting is broken and does not have tests. (tests-only patch)

---
 modules/simpletest/tests/mail.test |  160 ++++++++++++++++++++++++++++++++++++
 1 files changed, 160 insertions(+), 0 deletions(-)

diff --git a/modules/simpletest/tests/mail.test b/modules/simpletest/tests/mail.test
index 8a7b152d9d32eee7ae47c9ef8b5fb9c77f4e0cf1..24a5ee7b3a31216db35681cc5ddc7624b4b2775b 100644
--- a/modules/simpletest/tests/mail.test
+++ b/modules/simpletest/tests/mail.test
@@ -63,3 +63,163 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
   }
 }
 
+/**
+ * Unit tests for drupal_html_to_text().
+ */
+class DrupalHtmlToTextTestCase extends DrupalUnitTestCase {
+  public static function getInfo() {
+    return array(
+      'name'  => 'HTML to text conversion',
+      'description' => 'Tests drupal_html_to_text().',
+      'group' => 'Mail',
+    );
+  }
+
+  /**
+   * Test all supported tags of drupal_html_to_text().
+   */
+  function testTags() {
+    $tests = array(
+      '<a href = "http://drupal.org">Drupal.org</a>' => 'Drupal.org [1]
+[1] http://drupal.org
+',
+      '<em>Drupal</em>' => '/Drupal/',
+      '<i>Drupal</i>' => '/Drupal/',
+      '<strong>Drupal</strong>' => '*Drupal*',
+      '<b>Drupal</b>' => '*Drupal*',
+      'Drupal<br />Drupal' => 'Drupal
+Drupal',
+      '<p>Drupal</p>' => 'Drupal
+',
+      '<blockquote>Drupal</blockquote>' => '> Drupal',
+      '<ul>Drupal</ul>' => 'Drupal
+',
+      '<ul><li>Drupal</li></ul>' => ' * Drupal
+',
+      '<ul><li>Drupal</li><li>Drupal</li></ul>' => ' * Drupal
+ * Drupal
+',
+      '<ol>Drupal</ol>' => 'Drupal
+',
+      '<ol><li>Drupal</li></ol>' => ' 1) Drupal
+',
+      '<ol><li>Drupal</li><li>Drupal</li></ol>' => ' 1) Drupal
+ 2) Drupal
+',
+      '<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => ' * Drupal
+ *  1) Drupal
+    2) Drupal
+',
+      '<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => ' 1) Drupal
+ 2)  * Drupal
+     * Drupal
+',
+      '<dl>Drupal</dl>' => 'Drupal
+',
+      '<dt>Drupal</dt>' => 'Drupal',
+      '<dl><dt>Drupal</dl>' => 'Drupal
+',
+      '<dl><dt>Drupal<dd>Drupal</dl>' => 'Drupal
+    Drupal
+',
+      '<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => 'Drupal
+    Drupal
+',
+      '<h1>Drupal</h1>' => '======== DRUPAL ' . str_repeat('=', 62) . "\n",
+      '<h2>Drupal</h2>' => '-------- DRUPAL ' . str_repeat('-', 62) . "\n",
+      '<h3>Drupal</h3>' => '.... Drupal
+',
+      '<h4>Drupal</h4>' => '.. Drupal
+',
+      '<h5>Drupal</h5>' => 'Drupal
+',
+      '<h6>Drupal</h6>' => 'Drupal
+',
+      'Drupal<hr />' => "Drupal\n" . str_repeat('-', 78),
+      'Drupal<hr>Drupal' => "Drupal\n" . str_repeat('-', 78) . "\nDrupal",
+      // Tests malformed HTML tags.
+      'Drupal<br>Drupal' => 'Drupal
+Drupal',
+      '<ul><li>Drupal<li>Drupal</ul>' => ' * Drupal
+ * Drupal
+',
+      '<ol><li>Drupal<li>Drupal</ol>' => ' 1) Drupal
+ 2) Drupal
+',
+      '<ul><li>Drupal<li>Drupal</ol>' => ' * Drupal
+ * Drupal
+',
+      '<div>Drupal</div>' => 'Drupal
+',
+      // Tests some unsupported HTML tags.
+      '<html>Drupal</html>' => 'Drupal',
+      '<script type="text/javascript">Drupal</script>' => 'Drupal',
+    );
+
+    foreach ($tests as $html => $text) {
+      $result = drupal_html_to_text($html);
+      $this->assertEqual($result, $text, t('@html:<br />!first<br />is equal to<br />!second', array(
+        '@html' => var_export($html, TRUE),
+        '!first' => str_replace("\n", '\n', check_plain(var_export($result, TRUE))),
+        '!second' => str_replace("\n", '\n', check_plain(var_export($text, TRUE))),
+      )));
+    }
+  }
+
+  /**
+   * Test $allowed_tags argument of drupal_html_to_text().
+   */
+  function testDrupalHtmlToTextArgs() {
+    // The second parameter of drupal_html_to_text() overrules the allowed tags.
+    $result = drupal_html_to_text('<b>Drupal</b>', array('b'));
+    $this->assertEqual($result, '*Drupal*', t('Allowed tag found.'));
+
+    $result = drupal_html_to_text('<h1>Drupal</h1>', array('b'));
+    $this->assertEqual($result, 'Drupal', t('Disallowed tag not found.'));
+
+    $result = drupal_html_to_text('<b>Drupal</b>', array('a', 'br', 'h1'));
+    $this->assertEqual($result, 'Drupal', t('Disallowed tags not found.'));
+  }
+
+  /**
+   * Test that text separated by block-level tags in HTML get separated by
+   * (at least) a newline in the plaintext version.
+   */
+  function testDrupalHtmlToTextBlockTagToNewline() {
+    $input = '[text]'
+      . '<address>[address]</address>'
+      . '<blockquote>[blockquote]</blockquote>'
+      . '<br />[br]'
+      . '<del>[del]</del>'
+      . '<div>[div]</div>'
+      . '<dl><dt>[dl-dt]</dt>'
+      . '<dt>[dt]</dt>'
+      . '<dd>[dd]</dd>'
+      . '<dd>[dd]</dd></dl>'
+      . '<h1>[h1]</h1>'
+      . '<h2>[h2]</h2>'
+      . '<h3>[h3]</h3>'
+      . '<h4>[h4]</h4>'
+      . '<h5>[h5]</h5>'
+      . '<h6>[h6]</h6>'
+      . '<hr />[hr]'
+      . '<ins>[ins]</ins>'
+      . '<ol><li>[ol-li]</li>'
+      . '<li>[li]</li></ol>'
+      . '<p>[p]</p>'
+      . '<pre>[pre]</pre>'
+      . '<table><thead><tr><td>[table-thead--tr-td]</td></tr></thead>'
+      . '<tbody><tr><td>[tbody-tr-td]</td></tr>'
+      . '<tr><td>[tr-td]</td></tr></tbody></table>'
+      . '<ul><li>[ul-li]</li>'
+      . '<li>[li]</li></ul>'
+      . '[text]';
+    $output = drupal_html_to_text($input);
+    $this->assertFalse(
+      preg_match('/\][^\n]*\[/s', $output),
+      t('Block-level HTML tags should force newlines: !output',
+        array('!output' => nl2br(check_plain($output)))
+      )
+    );
+  }
+}
-- 
1.7.1


From fb93e2806e3a779e8458bea1989dbe97f095e832 Mon Sep 17 00:00:00 2001
From: Bob Vincent <bobvin@pillars.net>
Date: Sun, 17 Apr 2011 20:01:42 -0400
Subject: [PATCH 2/3] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet: drupal_html_to_text() formatting is broken and does not have tests. (includes/mail.inc patch)

---
 includes/mail.inc |   47 ++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/includes/mail.inc b/includes/mail.inc
index d2febed39686c9bf3f6f7a2bf99fa1377d09f4de..edb4adeb3704a3886f9211eecd7a6800ebff05cb 100644
--- a/includes/mail.inc
+++ b/includes/mail.inc
@@ -366,7 +366,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
   // Cache list of supported tags.
   static $supported_tags;
   if (empty($supported_tags)) {
-    $supported_tags = array('a', 'em', 'i', 'strong', 'b', 'br', 'p', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr');
+    $supported_tags = array('a', 'address', 'del', 'div', 'em', 'i', 'ins', 'strong', 'b', 'br', 'p', 'pre', 'tr', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr');
   }
 
   // Make sure only supported tags are kept.
@@ -416,7 +416,13 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
           array_unshift($lists, '*');
           break;
         case 'ol':
-          array_unshift($lists, 1);
+	  // Support start attribute; see [#345931].
+	  if (preg_match('/\bstart\s*=\s*([\'"]?)([0-9]+)\b/i',$value,$matches)) {
+	    array_unshift($lists, $matches[2]);
+	  }
+	  else {
+	    array_unshift($lists, 1);
+	  }
           break;
         case '/ul':
         case '/ol':
@@ -427,10 +433,24 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
         // Quotation/list markers, non-fancy headers
         case 'blockquote':
           // Format=flowed indentation cannot be mixed with lists.
-          $indent[] = count($lists) ? ' "' : '>';
+          $indent[] = count($lists) ? ' "' : '> ';
           break;
         case 'li':
-          $indent[] = is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
+	  // Support value attribute; see [#345931].
+	  if (is_numeric($lists[0])) {
+	    $inc = ' ';
+	    if (preg_match('/\bvalue\s*=\s*([\'"]?)([0-9]+)\b/i',$value,$matches)) {
+	      $inc .= $matches[2];
+	      $lists[0] = $matches[2] + 1;
+	    }
+	    else {
+	      $inc .= $lists[0]++;
+	    }
+	    $indent[] = $inc . ') ';
+	  }
+	  else {
+	    $indent[] = ' * ';
+	  }
           break;
         case 'dd':
           $indent[] = '    ';
@@ -481,12 +501,22 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
         // Horizontal rulers
         case 'hr':
           // Insert immediately.
-          $output .= drupal_wrap_mail('', implode('', $indent)) . "\n";
-          $output = _drupal_html_to_text_pad($output, '-');
+          $output .= drupal_wrap_mail('', implode('', $indent));
+          if ($output) {
+            $output .= "\n";
+          }
+          $output .= str_repeat('-', 78);
           break;
 
         // Paragraphs and definition lists
+        case '/address':
+        case 'br':
+        case '/ins':
+        case '/del':
+        case '/div':
         case '/p':
+        case '/pre':
+        case '/tr':
         case '/dl':
           $chunk = ''; // Ensure blank new-line.
           break;
@@ -509,6 +539,9 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
         $chunk = $casing($chunk);
       }
       // Format it and apply the current indentation.
+      if ($output) {
+        $output = rtrim($output) . "\n";
+      }
       $output .= drupal_wrap_mail($chunk, implode('', $indent));
       // Remove non-quotation markers from indentation.
       $indent = array_map('_drupal_html_to_text_clean', $indent);
@@ -575,7 +608,7 @@ function _drupal_html_to_text_clean($indent) {
  */
 function _drupal_html_to_text_pad($text, $pad, $prefix = '') {
   // Remove last line break.
-  $text = substr($text, 0, -1);
+  $text = preg_replace('/\n$/s', '', $text);
   // Calculate needed padding space and add it.
   if (($p = strrpos($text, "\n")) === FALSE) {
     $p = -1;
-- 
1.7.1


From 7b6600652b121744b818d14f71b9edd2ac9bd194 Mon Sep 17 00:00:00 2001
From: Bob Vincent <bobvin@pillars.net>
Date: Mon, 18 Apr 2011 11:41:24 -0400
Subject: [PATCH 3/3] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet: drupal_html_to_text() formatting is broken and does not have tests. (includes/mail.inc patch)

---
 includes/mail.inc |   28 ++++++++++++++--------------
 1 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/includes/mail.inc b/includes/mail.inc
index edb4adeb3704a3886f9211eecd7a6800ebff05cb..fc6407aa5c2c0352e8a09a4e82cdad052793f874 100644
--- a/includes/mail.inc
+++ b/includes/mail.inc
@@ -318,20 +318,20 @@ function drupal_wrap_mail($text, $indent = '') {
   $clean_indent = _drupal_html_to_text_clean($indent);
   $soft = strpos($clean_indent, ' ') === FALSE;
   // Check if the string has line breaks.
-  if (strpos($text, "\n") !== FALSE) {
+  if (strpos($text, MAIL_LINE_ENDINGS) !== FALSE) {
     // Remove trailing spaces to make existing breaks hard.
-    $text = preg_replace('/ +\n/m', "\n", $text);
+    $text = preg_replace('/ +\n/m', MAIL_LINE_ENDINGS, $text);
     // Wrap each line at the needed width.
-    $lines = explode("\n", $text);
+    $lines = explode(MAIL_LINE_ENDINGS, $text);
     array_walk($lines, '_drupal_wrap_mail_line', array('soft' => $soft, 'length' => strlen($indent)));
-    $text = implode("\n", $lines);
+    $text = implode(MAIL_LINE_ENDINGS, $lines);
   }
   else {
     // Wrap this line.
     _drupal_wrap_mail_line($text, 0, array('soft' => $soft, 'length' => strlen($indent)));
   }
   // Empty lines with nothing but spaces.
-  $text = preg_replace('/^ +\n/m', "\n", $text);
+  $text = preg_replace('/^ +\n/m', MAIL_LINE_ENDINGS, $text);
   // Space-stuff special lines.
   $text = preg_replace('/^(>| |From)/m', ' $1', $text);
   // Apply indentation. We only include non-'>' indentation on the first line.
@@ -388,9 +388,9 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
   $urls = _drupal_html_to_mail_urls();
   $footnotes = '';
   if (count($urls)) {
-    $footnotes .= "\n";
+    $footnotes .= MAIL_LINE_ENDINGS;
     for ($i = 0, $max = count($urls); $i < $max; $i++) {
-      $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";
+      $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . MAIL_LINE_ENDINGS;
     }
   }
 
@@ -464,7 +464,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
         case '/blockquote':
           if (count($lists)) {
             // Append closing quote for inline quotes (immediately).
-            $output = rtrim($output, "> \n") . "\"\n";
+            $output = rtrim($output, "> \r\n") . '"' . MAIL_LINE_ENDINGS;
             $chunk = ''; // Ensure blank new-line.
           }
           // Fall-through
@@ -503,7 +503,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
           // Insert immediately.
           $output .= drupal_wrap_mail('', implode('', $indent));
           if ($output) {
-            $output .= "\n";
+            $output .= MAIL_LINE_ENDINGS;
           }
           $output .= str_repeat('-', 78);
           break;
@@ -540,7 +540,7 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
       }
       // Format it and apply the current indentation.
       if ($output) {
-        $output = rtrim($output) . "\n";
+        $output = rtrim($output) . MAIL_LINE_ENDINGS;
       }
       $output .= drupal_wrap_mail($chunk, implode('', $indent));
       // Remove non-quotation markers from indentation.
@@ -560,9 +560,9 @@ function drupal_html_to_text($string, $allowed_tags = NULL) {
  */
 function _drupal_wrap_mail_line(&$line, $key, $values) {
   // Use soft-breaks only for purely quoted or unindented text.
-  $line = wordwrap($line, 77 - $values['length'], $values['soft'] ? "  \n" : "\n");
+  $line = wordwrap($line, 77 - $values['length'], ($values['soft'] ? ' ' : '') . MAIL_LINE_ENDINGS);
   // Break really long words at the maximum width allowed.
-  $line = wordwrap($line, 996 - $values['length'], $values['soft'] ? " \n" : "\n");
+  $line = wordwrap($line, 996 - $values['length'], ($values['soft'] ? ' ' : '') . MAIL_LINE_ENDINGS);
 }
 
 /**
@@ -610,10 +610,10 @@ function _drupal_html_to_text_pad($text, $pad, $prefix = '') {
   // Remove last line break.
   $text = preg_replace('/\n$/s', '', $text);
   // Calculate needed padding space and add it.
-  if (($p = strrpos($text, "\n")) === FALSE) {
+  if (($p = strrpos($text, MAIL_LINE_ENDINGS)) === FALSE) {
     $p = -1;
   }
   $n = max(0, 79 - (strlen($text) - $p) - strlen($prefix));
   // Add prefix and padding, and restore linebreak.
-  return $text . $prefix . str_repeat($pad, $n) . "\n";
+  return $text . $prefix . str_repeat($pad, $n) . MAIL_LINE_ENDINGS;
 }
-- 
1.7.1

