From 75b87eb2384d3539012823fca26aa4ce6496b1e3 Mon Sep 17 00:00:00 2001
From: Bob Vincent <bobvin@pillars.net>
Date: Mon, 30 May 2011 05:50:33 -0400
Subject: [PATCH] Issue #299138 by catch, Kevin Hankens, drewish, arjenk, jrglasgow, stella, sun, kscheirer, lilou, pillarsdotnet, stephandale, salvis: Improve drupal_html_to_text().

---
 includes/mail.inc                  |  891 +++++++++++++++++++++++++++---------
 modules/simpletest/tests/mail.test |  492 ++++++++++++++++++++
 2 files changed, 1163 insertions(+), 220 deletions(-)

diff --git a/includes/mail.inc b/includes/mail.inc
index be2df923427ec363f671132771e9c97ee490c090..a0a55b3cfefabecce7c9281c08575657b2b4b325 100644
--- a/includes/mail.inc
+++ b/includes/mail.inc
@@ -55,7 +55,7 @@ define('MAIL_LINE_ENDINGS', isset($_SERVER['WINDIR']) || strpos($_SERVER['SERVER
  *     $data['user'] = $params['account'];
  *     $options['language'] = $message['language'];
  *     user_mail_tokens($variables, $data, $options);
- *     switch($key) {
+ *     switch ($key) {
  *       case 'notice':
  *         $langcode = $message['language']->language;
  *         $message['subject'] = t('Notification from !site', $variables, array('langcode' => $langcode));
@@ -267,7 +267,7 @@ interface MailSystemInterface {
    * @return
    *   The formatted $message.
    */
-   public function format(array $message);
+  public function format(array $message);
 
   /**
    * Send a message composed by drupal_mail().
@@ -294,7 +294,7 @@ interface MailSystemInterface {
    * @return
    *   TRUE if the mail was successfully accepted for delivery, otherwise FALSE.
    */
-   public function mail(array $message);
+  public function mail(array $message);
 }
 
 /**
@@ -303,41 +303,62 @@ interface MailSystemInterface {
  * We use delsp=yes wrapping, but only break non-spaced languages when
  * absolutely necessary to avoid compatibility issues.
  *
- * We deliberately use LF rather than CRLF, see drupal_mail().
+ * We deliberately use variable_get('mail_line_endings', MAIL_LINE_ENDINGS)
+ * rather than "\r\n".
  *
  * @param $text
  *   The plain text to process.
- * @param $indent (optional)
- *   A string to indent the text with. Only '>' characters are repeated on
- *   subsequent wrapped lines. Others are replaced by spaces.
+ * @param array $options
+ *   (optional) An array containing one or more of the following keys:
+ *   - indent: A string to indent the text with. Only '>' characters are
+ *     repeated on subsequent wrapped lines. Others are replaced by spaces.
+ *   - max: The maximum length at which to wrap each line. Defaults to 80.
+ *   - stuff: Whether to space-stuff special lines.  Defaults to TRUE.
+ *   - hard: Whether to enforce the maximum line length even if no convenient
+ *     space character is available.  Defaults to FALSE.
+ *   - pad: A string to use for padding short lines to 'max' characters.  If
+ *     more than one character, only the last will be repeated.
+ *   - break: The line break sequence to insert.  The default is one of the
+ *     following:
+ *     - "\r\n": Windows, when $text does not contain a space character.
+ *     - "\n": Non-Windows, when $text does not contain a space character.
+ *     - " \r\n": On Windows, when $text contains at least one space.
+ *     - " \n": Non-Windows, when $text contains at least one space.
+ *
+ * @see drupal_mail()
  */
-function drupal_wrap_mail($text, $indent = '') {
-  // Convert CRLF into LF.
-  $text = str_replace("\r", '', $text);
-  // See if soft-wrapping is allowed.
-  $clean_indent = _drupal_html_to_text_clean($indent);
-  $soft = strpos($clean_indent, ' ') === FALSE;
-  // Check if the string has line breaks.
-  if (strpos($text, "\n") !== FALSE) {
-    // Remove trailing spaces to make existing breaks hard.
-    $text = preg_replace('/ +\n/m', "\n", $text);
-    // Wrap each line at the needed width.
-    $lines = explode("\n", $text);
-    array_walk($lines, '_drupal_wrap_mail_line', array('soft' => $soft, 'length' => strlen($indent)));
-    $text = implode("\n", $lines);
+function drupal_wrap_mail($text, array $options = array()) {
+  static $defaults;
+  if (!isset($defaults)) {
+    $defaults = array(
+      'indent' => '',
+      'pad' => '',
+      'pad_repeat' => '',
+      'max' => 80,
+      'stuff' => TRUE,
+      'hard' => FALSE,
+      'eol' => variable_get('mail_line_endings', MAIL_LINE_ENDINGS),
+    );
   }
-  else {
-    // Wrap this line.
-    _drupal_wrap_mail_line($text, 0, array('soft' => $soft, 'length' => strlen($indent)));
+  $options += $defaults;
+  if (!isset($options['break'])) {
+    // Allow soft-wrap spaces only when $text contains at least one space.
+    $options['break'] = (strpos($text, ' ') === FALSE ? '' : ' ') . $defaults['eol'];
   }
-  // Empty lines with nothing but spaces.
-  $text = preg_replace('/^ +\n/m', "\n", $text);
-  // Space-stuff special lines.
-  $text = preg_replace('/^(>| |From)/m', ' $1', $text);
-  // Apply indentation. We only include non-'>' indentation on the first line.
-  $text = $indent . substr(preg_replace('/^/m', $clean_indent, $text), strlen($indent));
-
-  return $text;
+  $options['wrap'] = $options['max'] - drupal_strlen($options['indent']);
+  if ($options['pad']) {
+    $options['pad_repeat'] = drupal_substr($options['pad'], -1, 1);
+  }
+  // The 'clean' indent is applied to all lines after the first one.
+  $options['clean'] = _drupal_html_to_text_clean($options['indent']);
+  // Wrap lines according to RFC 3676.
+  $lines = explode($defaults['eol'], $text);
+  array_walk($lines, '_drupal_wrap_mail_line', $options);
+  // Expand the lines array on newly-inserted line breaks.
+  $lines = explode($defaults['eol'], implode($defaults['eol'], $lines));
+  // Apply indentation, space-stuffing, and padding.
+  array_walk($lines, '_drupal_indent_mail_line', $options);
+  return implode($defaults['eol'], $lines);
 }
 
 /**
@@ -347,240 +368,670 @@ function drupal_wrap_mail($text, $indent = '') {
  * The output will be suitable for use as 'format=flowed; delsp=yes' text
  * (RFC 3676) and can be passed directly to drupal_mail() for sending.
  *
- * We deliberately use LF rather than CRLF, see drupal_mail().
+ * We deliberately use variable_get('mail_line_endings', MAIL_LINE_ENDINGS)
+ * rather than "\r\n".
  *
  * This function provides suitable alternatives for the following tags:
- * <a> <em> <i> <strong> <b> <br> <p> <blockquote> <ul> <ol> <li> <dl> <dt>
- * <dd> <h1> <h2> <h3> <h4> <h5> <h6> <hr>
+ *
+ * <a> <address> <b> <blockquote> <br /> <caption> <cite> <dd> <div> <dl> <dt>
+ * <em> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <ol> <p> <pre> <strong>
+ * <table> <tbody> <td> <tfoot> <thead> <tr> <u> <ul>
+ *
+ * The following tag attributes are supported:
+ * - <a href=...>: Hyperlink destination urls.
+ * - <li value=...>: Ordered list item numbers.
+ * - <ol start=...>: Ordered list start number.
  *
  * @param $string
  *   The string to be transformed.
- * @param $allowed_tags (optional)
- *   If supplied, a list of tags that will be transformed. If omitted, all
- *   all supported tags are transformed.
+ * @param $allowed_tags
+ *   (optional) If supplied, a list of tags that will be transformed. If
+ *   omitted, all supported tags are transformed.
  *
  * @return
  *   The transformed string.
+ *
+ * @see drupal_mail()
  */
 function drupal_html_to_text($string, $allowed_tags = NULL) {
+  $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
   // Cache list of supported tags.
   static $supported_tags;
-  if (empty($supported_tags)) {
-    $supported_tags = array('a', 'em', 'i', 'strong', 'b', 'br', 'p', 'blockquote', 'ul', 'ol', 'li', 'dl', 'dt', 'dd', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr');
+  if (!isset($supported_tags)) {
+    $supported_tags = array(
+      'a', 'address', 'b', 'blockquote', 'br', 'cite', 'dd', 'div', 'dl',
+      'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'li',
+      'ol', 'p', 'pre', 'strong', 'table', 'td', 'tr', 'u', 'ul',
+    );
   }
 
   // Make sure only supported tags are kept.
   $allowed_tags = isset($allowed_tags) ? array_intersect($supported_tags, $allowed_tags) : $supported_tags;
 
-  // Make sure tags, entities and attributes are well-formed and properly nested.
-  $string = _filter_htmlcorrector(filter_xss($string, $allowed_tags));
-
-  // Apply inline styles.
-  $string = preg_replace('!</?(em|i)((?> +)[^>]*)?>!i', '/', $string);
-  $string = preg_replace('!</?(strong|b)((?> +)[^>]*)?>!i', '*', $string);
-
-  // Replace inline <a> tags with the text of link and a footnote.
-  // 'See <a href="http://drupal.org">the Drupal site</a>' becomes
-  // 'See the Drupal site [1]' with the URL included as a footnote.
-  _drupal_html_to_mail_urls(NULL, TRUE);
-  $pattern = '@(<a[^>]+?href="([^"]*)"[^>]*?>(.+?)</a>)@i';
-  $string = preg_replace_callback($pattern, '_drupal_html_to_mail_urls', $string);
-  $urls = _drupal_html_to_mail_urls();
-  $footnotes = '';
-  if (count($urls)) {
-    $footnotes .= "\n";
-    for ($i = 0, $max = count($urls); $i < $max; $i++) {
-      $footnotes .= '[' . ($i + 1) . '] ' . $urls[$i] . "\n";
+  // Parse $string into a DOM tree.
+  $dom = filter_dom_load($string);
+  $notes = array();
+  // Recursively convert the DOM tree into plain text.
+  $text = _drupal_html_to_text($dom->documentElement, $allowed_tags, $notes);
+  // Hard-wrap at 1000 characters (including the line break sequence)
+  // and space-stuff special lines.
+  $text = drupal_wrap_mail($text, array('max' => 1000 - strlen($eol), 'hard' => TRUE));
+  // Change non-breaking spaces back to regular spaces, and trim line breaks.
+  // chr(160) is the non-breaking space character.
+  $text = str_replace(chr(160), ' ', trim($text, $eol));
+  // Add footnotes;
+  if ($notes) {
+    // Add a blank line before the footnote list.
+    $text .= $eol;
+    foreach ($notes as $url => $note) {
+      $text .= $eol . '[' . $note . '] ' . $url;
     }
   }
-
-  // Split tags from text.
-  $split = preg_split('/<([^>]+?)>/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
-  // Note: PHP ensures the array consists of alternating delimiters and literals
-  // and begins and ends with a literal (inserting $null as required).
-
-  $tag = FALSE; // Odd/even counter (tag or no tag)
-  $casing = NULL; // Case conversion function
-  $output = '';
-  $indent = array(); // All current indentation string chunks
-  $lists = array(); // Array of counters for opened lists
-  foreach ($split as $value) {
-    $chunk = NULL; // Holds a string ready to be formatted and output.
-
-    // Process HTML tags (but don't output any literally).
-    if ($tag) {
-      list($tagname) = explode(' ', strtolower($value), 2);
-      switch ($tagname) {
-        // List counters
-        case 'ul':
-          array_unshift($lists, '*');
-          break;
-        case 'ol':
-          array_unshift($lists, 1);
-          break;
-        case '/ul':
-        case '/ol':
-          array_shift($lists);
-          $chunk = ''; // Ensure blank new-line.
-          break;
-
-        // Quotation/list markers, non-fancy headers
-        case 'blockquote':
-          // Format=flowed indentation cannot be mixed with lists.
-          $indent[] = count($lists) ? ' "' : '>';
-          break;
-        case 'li':
-          $indent[] = is_numeric($lists[0]) ? ' ' . $lists[0]++ . ') ' : ' * ';
-          break;
-        case 'dd':
-          $indent[] = '    ';
-          break;
-        case 'h3':
-          $indent[] = '.... ';
-          break;
-        case 'h4':
-          $indent[] = '.. ';
-          break;
-        case '/blockquote':
-          if (count($lists)) {
-            // Append closing quote for inline quotes (immediately).
-            $output = rtrim($output, "> \n") . "\"\n";
-            $chunk = ''; // Ensure blank new-line.
-          }
-          // Fall-through
-        case '/li':
-        case '/dd':
-          array_pop($indent);
-          break;
-        case '/h3':
-        case '/h4':
-          array_pop($indent);
-        case '/h5':
-        case '/h6':
-          $chunk = ''; // Ensure blank new-line.
-          break;
-
-        // Fancy headers
-        case 'h1':
-          $indent[] = '======== ';
-          $casing = 'drupal_strtoupper';
-          break;
-        case 'h2':
-          $indent[] = '-------- ';
-          $casing = 'drupal_strtoupper';
-          break;
-        case '/h1':
-        case '/h2':
-          $casing = NULL;
-          // Pad the line with dashes.
-          $output = _drupal_html_to_text_pad($output, ($tagname == '/h1') ? '=' : '-', ' ');
-          array_pop($indent);
-          $chunk = ''; // Ensure blank new-line.
-          break;
-
-        // Horizontal rulers
-        case 'hr':
-          // Insert immediately.
-          $output .= drupal_wrap_mail('', implode('', $indent)) . "\n";
-          $output = _drupal_html_to_text_pad($output, '-');
-          break;
-
-        // Paragraphs and definition lists
-        case '/p':
-        case '/dl':
-          $chunk = ''; // Ensure blank new-line.
-          break;
-      }
+  return $text;
+}
+
+/**
+ * Helper function for drupal_html_to_text().
+ *
+ * Recursively converts $node to text, wrapping and indenting as necessary.
+ *
+ * @param $node
+ *   The source DOMNode.
+ * @param $allowed_tags
+ *   A list of tags that will be transformed.
+ * @param array &$notes
+ *   A writeable array of footnote reference numbers, keyed by their
+ *   respective hyperlink destination urls.
+ * @param $line_length
+ *   The maximum length of a line, for wrapping.  Defaults to 80 characters.
+ * @param array $parents
+ *   The list of ancestor tags, from nearest to most distant.  Defaults to an
+ *   empty array().
+ * @param &$count
+ *   The number to use for the next list item within an ordered list.  Defaults
+ *   to 1.
+ */
+function _drupal_html_to_text(DOMNode $node, array $allowed_tags, array &$notes, $line_length = 80, array $parents = array(), &$count = NULL) {
+  if (!isset($count)) {
+    $count = 1;
+  }
+  $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
+  if ($node->nodeType === XML_TEXT_NODE) {
+    // For text nodes, we just copy the text content.
+    $text = $node->textContent;
+    // Convert line breaks and trim trailing spaces.
+    $text = preg_replace('/ *\r?\n/', $eol, $text);
+    if (in_array('pre', $parents)) {
+      // Within <pre> tags, all spaces become non-breaking.
+      // chr(160) is the non-breaking space character.
+      $text = str_replace(' ', chr(160), $text);
     }
-    // Process blocks of text.
     else {
-      // Convert inline HTML text to plain text; not removing line-breaks or
-      // white-space, since that breaks newlines when sanitizing plain-text.
-      $value = trim(decode_entities($value));
-      if (drupal_strlen($value)) {
-        $chunk = $value;
-      }
+      // Outside <pre> tags, collapse whitespace.
+      $text = preg_replace('/[[:space:]]+/', ' ', $text);
     }
+    return $text;
+  }
+  // Non-text node.
+  $tag = '';
+  $text = '';
+  $child_text = '';
+  $child_count = 1;
+  $indent = '';
+  $prefix = '';
+  $suffix = '';
+  $pad = '';
+  if (isset($node->tagName) && in_array($node->tagName, $allowed_tags)) {
+    $tag = $node->tagName;
+    switch ($tag) {
+      // Turn links with valid hrefs into footnotes.
+      case 'a':
+        $test = !empty($node->attributes);
+        $test = $test && ($href = $node->attributes->getNamedItem('href'));
+        $test = $test && ($url = url(preg_replace('|^' . base_path() . '|', '', $href->nodeValue), array('absolute' => TRUE)));
+        $test = $test && valid_url($url);
+        if ($test) {
+          // Only add links that have not already been added.
+          if (isset($notes[$url])) {
+            $note = $notes[$url];
+          }
+          else {
+            $note = count($notes) + 1;
+            $notes[$url] = $note;
+          }
+          $suffix = ' [' . $note . ']';
+        }
+        break;
 
-    // See if there is something waiting to be output.
-    if (isset($chunk)) {
-      // Apply any necessary case conversion.
-      if (isset($casing)) {
-        $chunk = $casing($chunk);
-      }
-      // Format it and apply the current indentation.
-      $output .= drupal_wrap_mail($chunk, implode('', $indent));
-      // Remove non-quotation markers from indentation.
-      $indent = array_map('_drupal_html_to_text_clean', $indent);
-    }
+      // Generic block-level tags.
+      case 'address':
+      case 'caption':
+      case 'div':
+      case 'p':
+      case 'pre':
+        // Start on a new line as the first child of a list item.
+        if (!isset($parents[0]) || $parents[0] !== 'li' || !$node->isSameNode($node->parentNode->firstChild)) {
+          $text = $eol;
+        }
+        $suffix = $eol;
+        break;
+
+      // Forced line break.
+      case 'br':
+        $text = $eol;
+        break;
+
+      // Boldface by wrapping with "*" characters.
+      case 'b':
+      case 'strong':
+        $prefix = '*';
+        $suffix = '*';
+        break;
+
+      // Italicize by wrapping with "/" characters.
+      case 'cite':
+      case 'em':
+      case 'i':
+        $prefix = '/';
+        $suffix = '/';
+        break;
+
+      // Underline by wrapping with "_" characters.
+      case 'u':
+        $prefix = '_';
+        $suffix = '_';
+        break;
+
+      // Blockquotes are indented by "> " at each level.
+      case 'blockquote':
+        $text = $eol;
+        // chr(160) is the non-breaking space character.
+        $indent = '>' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Dictionary definitions are indented by four spaces.
+      case 'dd':
+        // chr(160) is the non-breaking space character.
+        $indent = chr(160) . chr(160) . chr(160) . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Dictionary list.
+      case 'dl':
+        // Start on a new line as the first child of a list item.
+        if (!isset($parents[0]) || $parents[0] !== 'li' || !$node->isSameNode($node->parentNode->firstChild)) {
+          $text = $eol;
+        }
+        $suffix = $eol;
+        break;
+
+      // Dictionary term.
+      case 'dt':
+        $suffix = $eol;
+        break;
+
+      // Header level 1 is prefixed by eight "=" characters.
+      case 'h1':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '========' . chr(160);
+        $pad = chr(160) . '=';
+        $suffix = $eol;
+        break;
+
+      // Header level 2 is prefixed by six "-" characters.
+      case 'h2':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '------' . chr(160);
+        $pad = chr(160) . '-';
+        $suffix = $eol;
+        break;
+
+      // Header level 3 is prefixed by four "." characters and a space.
+      case 'h3':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '....' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Header level 4 is prefixed by three "." characters and a space.
+      case 'h4':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '...' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Header level 5 is prefixed by two "." character and a space.
+      case 'h5':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '..' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Header level 6 is prefixed by one "." character and a space.
+      case 'h6':
+        $text = "$eol$eol";
+        // chr(160) is the non-breaking space character.
+        $indent = '.' . chr(160);
+        $suffix = $eol;
+        break;
+
+      // Horizontal rulers become a line of "-" characters.
+      case 'hr':
+        $text = $eol;
+        $child_text = '-';
+        $pad = '-';
+        $suffix = $eol;
+        break;
+
+      // List items are treated differently depending on the parent tag.
+      case 'li':
+        // Ordered list item.
+        if (reset($parents) === 'ol') {
+          // Check the value attribute.
+          $test = !empty($node->attributes);
+          $test = $test && ($value = $node->attributes->getNamedItem('value'));
+          if ($test) {
+            $count = $value->nodeValue;
+          }
+          // chr(160) is the non-breaking space character.
+          $indent = ($count < 10 ? chr(160) : '') . chr(160) . "$count)" . chr(160);
+          $count++;
+        }
+        // Unordered list item.
+        else {
+          // chr(160) is the non-breaking space character.
+          $indent = chr(160) . '*' . chr(160);
+        }
+        $suffix = $eol;
+        break;
+
+      // Ordered lists.
+      case 'ol':
+        // Start on a new line as the first child of a list item.
+        if (!isset($parents[0]) || $parents[0] !== 'li' || !$node->isSameNode($node->parentNode->firstChild)) {
+          $text = $eol;
+        }
+        // Check the start attribute.
+        $test = !empty($node->attributes);
+        $test = $test && ($value = $node->attributes->getNamedItem('start'));
+        if ($test) {
+          $child_count = $value->nodeValue;
+        }
+        break;
+
+      // Tables require special handling.
+      case 'table':
+        return _drupal_html_to_text_table($node, $allowed_tags, $notes, $line_length);
+
+      // Separate adjacent table cells by two non-breaking spaces.
+      case 'td':
+        if (!empty($node->nextSibling)) {
+          // chr(160) is the non-breaking space character.
+          $suffix = chr(160) . chr(160);
+        }
+        break;
+
+      // End each table row with a newline.
+      case 'tr':
+        $suffix = $eol;
+        break;
 
-    $tag = !$tag;
+      // Unordered lists.
+      case 'ul':
+        // Start on a new line as the first child of a list item.
+        if (!isset($parents[0]) || $parents[0] !== 'li' || !$node->isSameNode($node->parentNode->firstChild)) {
+          $text = $eol;
+        }
+        break;
+
+    default:
+        // Coder review complains if there is no default case.
+        break;
+    }
+    // Only add allowed tags to the $parents array.
+    array_unshift($parents, $tag);
+  }
+  // Copy each child node to output.
+  if ($node->hasChildNodes()) {
+    foreach ($node->childNodes as $child) {
+      $child_text .= _drupal_html_to_text($child, $allowed_tags, $notes, $line_length - drupal_strlen($indent), $parents, $child_count);
+    }
+  }
+  // We only add prefix and suffix if the child nodes were non-empty.
+  if ($child_text > '') {
+    // We capitalize the contents of h1 and h2 tags.
+    if ($tag === 'h1' || $tag === 'h2') {
+      $child_text = drupal_strtoupper($child_text);
+    }
+    // Don't add a newline to an existing newline.
+    if ($suffix === $eol && drupal_substr($child_text, - drupal_strlen($eol)) === $eol) {
+      $suffix = '';
+    }
+    // Trim spaces around newlines except with <pre> or inline tags.
+    if (!in_array($tag, array('a', 'b', 'cite', 'em', 'i', 'pre', 'strong', 'u'))) {
+      $child_text = preg_replace('/ *' . $eol . ' */', $eol, $child_text);
+    }
+    // Soft-wrap at effective line length, but don't space-stuff.
+    $child_text = drupal_wrap_mail(
+      $prefix . $child_text,
+      array(
+        // chr(160) is the non-breaking space character.
+        'break' => chr(160) . $eol,
+        'indent' => $indent,
+        'max' => $line_length,
+        'pad' => $pad,
+        'stuff' => FALSE,
+      )
+    ) . $suffix;
+    if ($tag === 'pre') {
+      // Perform RFC-3676 soft-wrapping.
+      // chr(160) is the non-breaking space character.
+      $child_text = str_replace(chr(160), ' ', $child_text);
+      $child_text = drupal_wrap_mail(
+        $child_text,
+        array('max' => $line_length, 'stuff' => FALSE)
+      );
+      // chr(160) is the non-breaking space character.
+      $child_text = str_replace(' ', chr(160), $child_text);
+    }
+    $text .= $child_text;
   }
+  return $text;
+}
+
+/**
+ * Helper function for _drupal_html_to_text().
+ *
+ * Renders a <table> DOM Node into plain text.  Attributes such as rowspan,
+ * colspan, padding, border, etc. are ignored.
+ *
+ * @param DOMNode $node
+ *   The DOMNode corresponding to the <table> tag and its contents.
+ * @param $allowed_tags
+ *   The list of allowed tags passed to _drupal_html_to_text().
+ * @param array &$notes
+ *   A writeable array of footnote reference numbers, keyed by their
+ *   respective hyperlink destination urls.
+ * @param $line_length
+ *   The desired maximum line length, after word-wrapping each table cell.
+ *
+ * @return
+ *   A plain text representation of the table.
+ *
+ * @see _drupal_html_to_text()
+ */
+function _drupal_html_to_text_table(DOMNode $node, $allowed_tags = NULL, array &$notes = array(), $table_width = 80) {
+  $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
+  $header = array();
+  $footer = array();
+  $body = array();
+  $text = $eol;
+  $node->normalize();
+  $current = $node;
+  while (TRUE) {
+    if (isset($current->tagName)) {
+      switch ($current->tagName) {
+        case 'caption':  // The table caption is added first.
+          $text = _drupal_html_to_text($caption, $allowed_tags, $notes, $table_width);
+          break;
+
+        case 'tr':
+          switch ($current->parentNode->tagName) {
+            case 'thead':
+              $header[] = $current;
+              break;
+
+            case 'tfoot':
+              $footer[] = $current;
+              break;
 
-  return $output . $footnotes;
+            default: // Either 'tbody' or 'table'
+              $body[] = $current;
+              break;
+          }
+          break;
+
+        default:
+          if ($current->hasChildNodes()) {
+            $current = $current->firstChild;
+            continue 2;
+          }
+      }
+    }
+    do {
+      if ($current->nextSibling) {
+        $current = $current->nextSibling;
+        continue 2;
+      }
+      $current = $current->parentNode;
+    } while ($current && !$current->isSameNode($node));
+    break;
+  }
+  // Merge the thead, tbody, and tfoot sections together.
+  if ($rows = array_merge($header, $body, $footer)) {
+    $num_rows = count($rows);
+    // First just count the number of columns.
+    $num_cols = 0;
+    foreach ($rows as $row) {
+      $row_cols = 0;
+      foreach ($row->childNodes as $cell) {
+        if (isset($cell->tagName) && in_array($cell->tagName, array('td', 'th'))) {
+          $row_cols++;
+        }
+      }
+      $num_cols = max($num_cols, $row_cols);
+    }
+    // If any columns were found, calculate each column height and width.
+    if ($num_cols) {
+      // Set up a binary search for best wrap width for each column.
+      $max = max($table_width - $num_cols - 1, 1);
+      $max_wraps = array_fill(0, $num_cols, $max);
+      $try = max(intval(($table_width - 1) / $num_cols - 1), 1);
+      $try_wraps = array_fill(0, $num_cols, $try);
+      $min_wraps = array_fill(0, $num_cols, 1);
+      // Start searching...
+      $change = FALSE;
+      do {
+        $change = FALSE;
+        $widths = array_fill(0, $num_cols, 0);
+        $heights = array_fill(0, $num_rows, 0);
+        $table = array_fill(0, $num_rows, array_fill(0, $num_cols, ''));
+        $breaks = array_fill(0, $num_cols, FALSE);
+        foreach ($rows as $i => $row) {
+          $j = 0;
+          foreach ($row->childNodes as $cell) {
+            if (!isset($cell->tagName) || !in_array($cell->tagName, array('td', 'th'))) {
+              // Skip text nodes.
+              continue;
+            }
+            // Render the cell contents.
+            $cell = _drupal_html_to_text($cell, $allowed_tags, $notes, $try_wraps[$j]);
+            // Trim leading line-breaks and trailing whitespace.
+            // chr(160) is the non-breaking space character.
+            $cell = rtrim(ltrim($cell, $eol), ' ' . $eol . chr(160));
+            $table[$i][$j] = $cell;
+            if ($cell > '') {
+              // Split the cell into lines.
+              $lines = explode($eol, $cell);
+              // The row height is the maximum number of lines among all the
+              // cells in that row.
+              $heights[$i] = max($heights[$i], count($lines));
+              foreach ($lines as $line) {
+                $this_width = drupal_strlen($line);
+                // The column width is the maximum line width among all the
+                // lines in that column.
+                if ($this_width > $widths[$j]) {
+                  $widths[$j] = $this_width;
+                  // If the longest line in a column contains at least one
+                  // space character, then the table can be made narrower.
+                  $breaks[$j] = strpos(' ', $line) !== FALSE;
+                }
+              }
+            }
+            $j++;
+          }
+        }
+        // Calculate the total table width;
+        $this_width = array_sum($widths) + $num_cols + 1;
+        if ($this_width > $table_width) {
+          // Wider than desired.
+          if (!in_array(TRUE, $breaks)) {
+            // If there are no more break points, then the table is already as
+            // narrow as it can get, so we're done.
+            break;
+          }
+          foreach ($try_wraps as $i => $wrap) {
+            $max_wraps[$i] = min($max_wraps[$i], $wrap);
+            if ($breaks[$i]) {
+              $new_wrap = intval(($min_wraps[$i] + $max_wraps[$i]) / 2);
+              $new_wrap = min($new_wrap, $widths[$i] - 1);
+              $new_wrap = max($new_wrap, $min_wraps[$i]);
+            }
+            else {
+              // There's no point in trying to make the column narrower than
+              // the widest un-wrappable line in the column.
+              $min_wraps[$i] = $widths[$i];
+              $new_wrap = $widths[$i];
+            }
+            if ($try_wraps[$i] > $new_wrap) {
+              $try_wraps[$i] = $new_wrap;
+              $change = TRUE;
+            }
+          }
+        }
+        elseif ($this_width < $table_width) {
+          // Narrower than desired.
+          foreach ($try_wraps as $i => $wrap) {
+            if ($min_wraps[$i] < $wrap) {
+              $min_wraps[$i] = $wrap;
+            }
+            $new_wrap = intval(($min_wraps[$i] + $max_wraps[$i]) / 2);
+            $new_wrap = max($new_wrap, $widths[$i] + 1);
+            $new_wrap = min($new_wrap, $max_wraps[$i]);
+            if ($try_wraps[$i] < $new_wrap) {
+              $try_wraps[$i] = $new_wrap;
+              $change = TRUE;
+            }
+          }
+        }
+      } while ($change);
+      // Pad each cell to column width and line height.
+      for ($i = 0; $i < $num_rows; $i++) {
+        if ($heights[$i]) {
+          for ($j = 0; $j < $num_cols; $j++) {
+            $cell = $table[$i][$j];
+            // Pad each cell to the maximum number of lines in that row.
+            $lines = array_pad(explode($eol, $cell), $heights[$i], '');
+            foreach ($lines as $k => $line) {
+              // Pad each line to the maximum width in that column.
+              $repeat = $widths[$j] - drupal_strlen($line);
+              if ($repeat > 0) {
+                // chr(160) is the non-breaking space character.
+                $lines[$k] .= str_repeat(chr(160), $repeat);
+              }
+            }
+            $table[$i][$j] = $lines;
+          }
+        }
+      }
+      // Generate the row separator line.
+      $separator = '+';
+      for($i = 0; $i < $num_cols; $i++) {
+        $separator .= str_repeat('-', $widths[$i]) . '+';
+      }
+      $separator .= $eol;
+      for ($i = 0; $i < $num_rows; $i++) {
+        $text .= $separator;
+        if (!$heights[$i]) {
+          continue;
+        }
+        $row = $table[$i];
+        // For each row, iterate first by lines within the row.
+        for ($k = 0; $k < $heights[$i]; $k++) {
+          // Add a vertical-bar at the beginning of each row line.
+          $row_line = '|';
+          $trimmed = '';
+          // Within each row line, iterate by cells within that line.
+          for ($j = 0; $j < $num_cols; $j++) {
+            // Add a vertical bar at the end of each cell line.
+            $row_line .= $row[$j][$k] . '|';
+            // chr(160) is the non-breaking space character.
+            $trimmed .= trim($row[$j][$k], ' ' . $eol . chr(160));
+          }
+          if ($trimmed > '') {
+            // Only print rows that are non-empty.
+            $text .= $row_line . $eol;
+          }
+        }
+      }
+      // Final output ends with a row separator.
+      $text .= $separator;
+    }
+  }
+  // Make sure formatted table content doesn't line-wrap.
+  // chr(160) is the non-breaking space character.
+  return str_replace(' ', chr(160), $text);
 }
 
 /**
  * Helper function for array_walk in drupal_wrap_mail().
  *
- * Wraps words on a single line.
+ * Inserts $values['break'] sequences to break up $line into parts of no more
+ * than $values['wrap'] characters. Only breaks at space characters, unless
+ * $values['hard'] is TRUE.
  */
 function _drupal_wrap_mail_line(&$line, $key, $values) {
-  // Use soft-breaks only for purely quoted or unindented text.
-  $line = wordwrap($line, 77 - $values['length'], $values['soft'] ? "  \n" : "\n");
-  // Break really long words at the maximum width allowed.
-  $line = wordwrap($line, 996 - $values['length'], $values['soft'] ? " \n" : "\n");
+  $line = wordwrap($line, $values['wrap'], $values['break'], $values['hard']);
 }
 
 /**
- * Helper function for drupal_html_to_text().
+ * Helper function for array_walk in drupal_wrap_mail().
  *
- * Keeps track of URLs and replaces them with placeholder tokens.
+ * If $values['pad'] is non-empty, $values['indent'] will be added at the start
+ * of each line, and $values['pad'] at the end, repeating the last character of
+ * $values['pad'] until the line length equals $values['max'].
+ *
+ * If $values['pad'] is empty, $values['indent'] will be added at the start of
+ * the first line, and $values['clean'] at the start of subsequent lines.
+ *
+ * If $values['stuff'] is true, then an extra space character will be added at
+ * the start of any line beginning with a space, a '>', or the word 'From'.
+ *
+ * @see http://www.ietf.org/rfc/rfc3676.txt
  */
-function _drupal_html_to_mail_urls($match = NULL, $reset = FALSE) {
-  global $base_url, $base_path;
-  static $urls = array(), $regexp;
-
-  if ($reset) {
-    // Reset internal URL list.
-    $urls = array();
+function _drupal_indent_mail_line(&$line, $key, $values) {
+  if ($line == '') {
+    return;
+  }
+  if ($values['pad']) {
+    $line = $values['indent'] . $line;
+    $count = $values['max'] - drupal_strlen($line) - drupal_strlen($values['pad']);
+    if ($count >= 0) {
+      $line .= $values['pad'] . str_repeat($values['pad_repeat'], $count);
+    }
   }
   else {
-    if (empty($regexp)) {
-      $regexp = '@^' . preg_quote($base_path, '@') . '@';
-    }
-    if ($match) {
-      list(, , $url, $label) = $match;
-      // Ensure all URLs are absolute.
-      $urls[] = strpos($url, '://') ? $url : preg_replace($regexp, $base_url . '/', $url);
-      return $label . ' [' . count($urls) . ']';
-    }
+    $line = $values[$key === 0 ? 'indent' : 'clean'] . $line;
+  }
+  if ($values['stuff']) {
+    // chr(160) is the non-breaking space character.
+    $line = preg_replace('/^(' . chr(160) . '| |>|From)/', ' $1', $line);
   }
-  return $urls;
 }
 
 /**
  * Helper function for drupal_wrap_mail() and drupal_html_to_text().
  *
- * Replace all non-quotation markers from a given piece of indentation with spaces.
+ * Replace all non-quotation markers from a given piece of indentation with
+ * non-breaking space characters.
  */
 function _drupal_html_to_text_clean($indent) {
-  return preg_replace('/[^>]/', ' ', $indent);
-}
-
-/**
- * Helper function for drupal_html_to_text().
- *
- * Pad the last line with the given character.
- */
-function _drupal_html_to_text_pad($text, $pad, $prefix = '') {
-  // Remove last line break.
-  $text = substr($text, 0, -1);
-  // Calculate needed padding space and add it.
-  if (($p = strrpos($text, "\n")) === FALSE) {
-    $p = -1;
-  }
-  $n = max(0, 79 - (strlen($text) - $p) - strlen($prefix));
-  // Add prefix and padding, and restore linebreak.
-  return $text . $prefix . str_repeat($pad, $n) . "\n";
+  // chr(160) is the non-breaking space character.
+  return preg_replace('/[^>]/', chr(160), $indent);
 }
diff --git a/modules/simpletest/tests/mail.test b/modules/simpletest/tests/mail.test
index 8a7b152d9d32eee7ae47c9ef8b5fb9c77f4e0cf1..558518d3590020a7d5d45e06f2b3fda1cffd0a42 100644
--- a/modules/simpletest/tests/mail.test
+++ b/modules/simpletest/tests/mail.test
@@ -1,6 +1,7 @@
 <?php
 
 /**
+ * @file
  * Test the Drupal mailing system.
  */
 class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
@@ -63,3 +64,494 @@ class MailTestCase extends DrupalWebTestCase implements MailSystemInterface {
   }
 }
 
+/**
+ * Unit tests for drupal_html_to_text().
+ */
+class DrupalHtmlToTextTestCase extends DrupalWebTestCase {
+  public static function getInfo() {
+    return array(
+      'name'  => 'HTML to text conversion',
+      'description' => 'Tests drupal_html_to_text().',
+      'group' => 'Mail',
+    );
+  }
+
+  /**
+   * Converts a string to its PHP source equivalent for display in test messages.
+   *
+   * @param $text
+   *   The text string to convert.
+   *
+   * @return
+   *   An HTML representation of the text string that, when displayed in a
+   *   browser, represents the PHP source code equivalent of $text.
+   */
+  function stringToHtml($text) {
+    return '"' .
+      str_replace(
+        array("\n", ' '),
+        array('\n', '&nbsp;'),
+        check_plain($text)
+      ) . '"';
+  }
+
+  /**
+   * Helper function for testing drupal_html_to_text().
+   *
+   * @param $html
+   *   The source HTML string to be converted.
+   * @param $text
+   *   The expected result of converting $html to text.
+   * @param $message
+   *   A text message to display in the assertion message.
+   * @param $allowed_tags
+   *   (optional) An array of allowed tags, or NULL to default to the full
+   *   set of tags supported by drupal_html_to_text().
+   */
+  function assertHtmlToText($html, $text, $message, $allowed_tags = NULL) {
+    preg_match_all('/<([a-z1-6]+)/', drupal_strtolower($html), $matches);
+    $tested_tags = implode(', ', array_unique($matches[1]));
+    $message .= ' (' . $tested_tags . ')';
+    $result = drupal_html_to_text($html, $allowed_tags);
+    $pass = $this->assertEqual($result, $text, check_plain($message));
+    $verbose = 'html = <pre>' . $this->stringToHtml($html)
+      . '</pre><br />' . 'result = <pre>' . $this->stringToHtml($result)
+      . '</pre><br />' . 'expected = <pre>' . $this->stringToHtml($text)
+      . '</pre>';
+    $this->verbose($verbose);
+    if (!$pass) {
+      $this->pass("Previous test verbose info:<br />$verbose");
+    }
+  }
+
+  /**
+   * Test all supported tags of drupal_html_to_text().
+   */
+  function testTags() {
+    $tests = array(
+      '<a href = "http://drupal.org">Drupal.org</a>' => "Drupal.org [1]\n\n[1] " . url('http://drupal.org', array('absolute' => TRUE)),
+      '<a href = "./">Homepage</a>' => "Homepage [1]\n\n[1] " . url('./', array('absolute' => TRUE)),
+      '<address>Drupal</address>' => "Drupal",
+      '<address>Drupal</address><address>Drupal</address>' => "Drupal\n\nDrupal",
+      '<b>Drupal</b>' => "*Drupal*",
+      '<blockquote>Drupal</blockquote>' => " > Drupal",
+      '<blockquote>Drupal</blockquote><blockquote>Drupal</blockquote>' => " > Drupal\n\n > Drupal",
+      '<br />Drupal<br />Drupal<br /><br />Drupal' => "Drupal\nDrupal\n\nDrupal",
+      '<br/>Drupal<br/>Drupal<br/><br/>Drupal' => "Drupal\nDrupal\n\nDrupal",
+      '<br/>Drupal<br/>Drupal<br/><br/>Drupal<p>Drupal</p>' => "Drupal\nDrupal\n\nDrupal\nDrupal",
+      '<div>Drupal</div>' => "Drupal",
+      '<div>Drupal</div><div>Drupal</div>' => "Drupal\n\nDrupal",
+      '<em>Drupal</em>' => "/Drupal/",
+      '<h1>Drupal</h1>' => "======== DRUPAL " . str_repeat('=', 64),
+      '<h1>Drupal</h1><p>Drupal</p>' => "======== DRUPAL " . str_repeat('=', 64) . "\n\nDrupal",
+      '<h2>Drupal</h2>' => "------ DRUPAL " . str_repeat('-', 66),
+      '<h2>Drupal</h2><p>Drupal</p>' => "------ DRUPAL " . str_repeat('-', 66) . "\n\nDrupal",
+      '<h3>Drupal</h3>' => ".... Drupal",
+      '<h3>Drupal</h3><p>Drupal</p>' => ".... Drupal\n\nDrupal",
+      '<h4>Drupal</h4>' => "... Drupal",
+      '<h4>Drupal</h4><p>Drupal</p>' => "... Drupal\n\nDrupal",
+      '<h5>Drupal</h5>' => ".. Drupal",
+      '<h5>Drupal</h5><p>Drupal</p>' => ".. Drupal\n\nDrupal",
+      '<h6>Drupal</h6>' => ". Drupal",
+      '<h6>Drupal</h6><p>Drupal</p>' => ". Drupal\n\nDrupal",
+      '<hr />Drupal<hr />' => str_repeat('-', 80) . "\nDrupal\n" . str_repeat('-', 80),
+      '<hr/>Drupal<hr/>' => str_repeat('-', 80) . "\nDrupal\n" . str_repeat('-', 80),
+      '<hr/>Drupal<hr/><p>Drupal</p>' => str_repeat('-', 80) . "\nDrupal\n" . str_repeat('-', 80) . "\n\nDrupal",
+      '<i>Drupal</i>' => "/Drupal/",
+      '<p>Drupal</p>' => "Drupal",
+      '<p>Drupal</p><p>Drupal</p>' => "Drupal\n\nDrupal",
+      '<pre>Drupal</pre>' => "Drupal",
+      '<pre>Drupal</pre>Drupal' => "Drupal\nDrupal",
+      '<pre>Drupal</pre><p>Drupal</p>' => "Drupal\n\nDrupal",
+      '<strong>Drupal</strong>' => "*Drupal*",
+      '<table><tr><td>Drupal</td><td>Drupal</td></tr><tr><td>Drupal</td><td>Drupal</td></tr></table>' => "+------+------+\n|Drupal|Drupal|\n+------+------+\n|Drupal|Drupal|\n+------+------+",
+      '<table><tr><td>Drupal</td></tr></table><p>Drupal</p>' => "+------+\n|Drupal|\n+------+\n\nDrupal",
+      '<u>Drupal</u>' => "_Drupal_",
+      '<ul><li>Drupal</li></ul>' => "  * Drupal",
+      '<ul><li>Drupal <em>Drupal</em> Drupal</li></ul>' => "  * Drupal /Drupal/ Drupal",
+      '<ul><li>Drupal</li><li><ol><li>Drupal</li><li>Drupal</li></ol></li></ul>' => "  * Drupal\n  *   1) Drupal\n      2) Drupal",
+      '<ul><li>Drupal</li><li><ol><li>Drupal</li></ol></li><li>Drupal</li></ul>' => "  * Drupal\n  *   1) Drupal\n  * Drupal",
+      '<ul><li>Drupal</li><li>Drupal</li></ul>' => "  * Drupal\n  * Drupal",
+      '<ul><li>Drupal</li></ul><p>Drupal</p>' => "  * Drupal\n\nDrupal",
+      '<ol><li>Drupal</li></ol>' => "   1) Drupal",
+      '<ol><li>Drupal</li><li><ul><li>Drupal</li><li>Drupal</li></ul></li></ol>' => "   1) Drupal\n   2)  * Drupal\n       * Drupal",
+      '<ol><li>Drupal</li><li>Drupal</li></ol>' => "   1) Drupal\n   2) Drupal",
+      '<ol>Drupal</ol>' => "Drupal",
+      '<ol><li>Drupal</li></ol><p>Drupal</p>' => "   1) Drupal\n\nDrupal",
+      '<dl><dt>Drupal</dt></dl>' => "Drupal",
+      '<dl><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n     Drupal",
+      '<dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl>' => "Drupal\n     Drupal\nDrupal\n     Drupal",
+      '<dl><dt>Drupal</dt><dd>Drupal</dd></dl><p>Drupal</p>' => "Drupal\n     Drupal\n\nDrupal",
+      '<dl><dt>Drupal<dd>Drupal</dl>' => "Drupal\n     Drupal",
+      '<dl><dt>Drupal</dt></dl><p>Drupal</p>' => "Drupal\n\nDrupal",
+      '<ul><li>Drupal</li><li><dl><dt>Drupal</dt><dd>Drupal</dd><dt>Drupal</dt><dd>Drupal</dd></dl></li><li>Drupal</li></ul>' => "  * Drupal\n  * Drupal\n        Drupal\n    Drupal\n        Drupal\n  * Drupal",
+      // Tests malformed HTML tags.
+      '<br>Drupal<br>Drupal' => "Drupal\nDrupal",
+      '<hr>Drupal<hr>Drupal' => str_repeat('-', 80) . "\nDrupal\n" . str_repeat('-', 80) . "\nDrupal",
+      '<ol><li>Drupal<li>Drupal</ol>' => "   1) Drupal\n   2) Drupal",
+      '<ul><li>Drupal <em>Drupal</em> Drupal</ul></ul>' => "  * Drupal /Drupal/ Drupal",
+      '<ul><li>Drupal<li>Drupal</ol>' => "  * Drupal\n  * Drupal",
+      '<ul><li>Drupal<li>Drupal</ul>' => "  * Drupal\n  * Drupal",
+      '<ul>Drupal</ul>' => "Drupal",
+      'Drupal</ul></ol></dl><li>Drupal' => "Drupal * Drupal",
+      '<dl>Drupal</dl>' => "Drupal",
+      '<dl>Drupal</dl><p>Drupal</p>' => "Drupal\n\nDrupal",
+      '<dt>Drupal</dt>' => "Drupal",
+      // Tests some unsupported HTML tags.
+      '<html>Drupal</html>' => "Drupal",
+      '<script type="text/javascript">Drupal</script>' => "",
+    );
+
+    foreach ($tests as $html => $text) {
+      $this->assertHtmlToText($html, $text, 'Supported tags');
+    }
+  }
+
+  /**
+   * Test $allowed_tags argument of drupal_html_to_text().
+   */
+  function testDrupalHtmlToTextArgs() {
+    // The second parameter of drupal_html_to_text() overrules the allowed tags.
+    $this->assertHtmlToText(
+      'Drupal <b>Drupal</b> Drupal',
+      'Drupal *Drupal* Drupal',
+      'Allowed <b> tag found',
+      array('b')
+    );
+    $this->assertHtmlToText(
+      'Drupal <h1>Drupal</h1> Drupal',
+      'Drupal Drupal Drupal',
+      'Disallowed <h1> tag not found',
+      array('b')
+    );
+
+    $this->assertHtmlToText(
+      'Drupal <p><em><b>Drupal</b></em><p> Drupal',
+      'Drupal Drupal Drupal',
+      'Disallowed <p>, <em>, and <b> tags not found',
+      array('a', 'br', 'h1')
+    );
+
+    $this->assertHtmlToText(
+      '<html><body>Drupal</body></html>',
+      'Drupal',
+      'Unsupported <html> and <body> tags not found',
+      array('html', 'body')
+    );
+  }
+
+  /**
+   * Test that whitespace is collapsed, except within <pre> tags.
+   */
+  function testDrupalHtmltoTextCollapsesWhitespace() {
+    $input = "<pre>Drupal  Drupal\n\nDrupal<pre>Drupal  Drupal\n\nDrupal</pre>Drupal  Drupal\n\nDrupal</pre>";
+    $collapsed = "Drupal Drupal DrupalDrupal Drupal DrupalDrupal Drupal Drupal";
+    $preserved = "Drupal  Drupal\n\nDrupal\nDrupal  Drupal\n\nDrupal\nDrupal  Drupal\n\nDrupal";
+    $this->assertHtmlToText(
+      $input,
+      $collapsed,
+      'Whitespace inside disallowed <pre> tags is collapsed',
+      array('p')
+    );
+    $this->assertHtmlToText(
+      $input,
+      $preserved,
+      'Whitespace inside allowed <pre> tags is preserved'
+    );
+  }
+
+  /**
+   * Test that text separated by block-level tags in HTML get separated by
+   * (at least) a newline in the plaintext version.
+   */
+  function testDrupalHtmlToTextBlockTagToNewline() {
+    $input = '[text]'
+      . '<address>[address]</address>'
+      . '<blockquote>[blockquote]</blockquote>'
+      . '<br />[br]'
+      . '<div>[div]</div>'
+      . '<dl><dt>[dl-dt]</dt>'
+      . '<dt>[dt]</dt>'
+      . '<dd>[dd]</dd>'
+      . '<dd>[dd-dl]</dd></dl>'
+      . '<h1>[h1]</h1>'
+      . '<h2>[h2]</h2>'
+      . '<h3>[h3]</h3>'
+      . '<h4>[h4]</h4>'
+      . '<h5>[h5]</h5>'
+      . '<h6>[h6]</h6>'
+      . '<hr />[hr]'
+      . '<ol><li>[ol-li]</li>'
+      . '<li>[li]</li>'
+      . '<li>[li-ol]</li></ol>'
+      . '<p>[p]</p>'
+      . '<pre>[pre]</pre>'
+      . '<table><thead><tr><td>[table-thead--tr-td]</td></tr></thead>'
+      . '<tbody><tr><td>[tbody-tr-td]</td></tr>'
+      . '<tr><td>[tr-td]</td></tr></tbody></table>'
+      . '<ul><li>[ul-li]</li>'
+      . '<li>[li-ul]</li></ul>'
+      . '[text]';
+    $output = drupal_html_to_text($input);
+    $pass = $this->assertFalse(
+      preg_match('/\][^\n]*\[/s', $output),
+      'Block-level HTML tags should force newlines'
+    );
+    $verbose = $this->stringToHtml($output);
+    $this->verbose($verbose);
+    if (!$pass) {
+      $this->pass("Previous test verbose info:<br />$verbose");
+    }
+    $output_upper = drupal_strtoupper($output);
+    $upper_input = drupal_strtoupper($input);
+    $upper_output = drupal_html_to_text($upper_input);
+    $pass = $this->assertEqual(
+      $upper_output,
+      $output_upper,
+      'Tag recognition should be case-insensitive'
+    );
+    $verbose = $upper_output
+      . '<br />should  be equal to <br />'
+      . $output_upper;
+    $this->verbose($verbose);
+    if (!$pass) {
+      $this->pass("Previous test verbose info:<br />$verbose");
+    }
+  }
+
+  /**
+   * Test that headers are properly separated from surrounding text.
+   */
+  function testHeaderSeparation() {
+    $html = 'Drupal<h1>Drupal</h1>Drupal';
+    $text = "Drupal\n\n======== DRUPAL " . str_repeat('=', 64) . "\nDrupal";
+    $this->assertHtmlToText($html, $text,
+      'Text before and after <h1> tag');
+    $html = '<p>Drupal</p><h1>Drupal</h1>Drupal';
+    $text = "Drupal\n\n\n======== DRUPAL " . str_repeat('=', 64) . "\nDrupal";
+    $this->assertHtmlToText($html, $text,
+      'Paragraph before and text after <h1> tag');
+    $html = 'Drupal<h1>Drupal</h1><p>Drupal</p>';
+    $text = "Drupal\n\n======== DRUPAL " . str_repeat('=', 64) . "\n\nDrupal";
+    $this->assertHtmlToText($html, $text,
+      'Text before and paragraph after <h1> tag');
+    $html = '<p>Drupal</p><h1>Drupal</h1><p>Drupal</p>';
+    $text = "Drupal\n\n\n======== DRUPAL " . str_repeat('=', 64) . "\n\nDrupal";
+    $this->assertHtmlToText($html, $text,
+      'Paragraph before and after <h1> tag');
+  }
+
+  /**
+   * Test that footnote references are properly generated.
+   */
+  function testFootnoteReferences() {
+    $source = '<a href="http://www.example.com/node/1">Host and path</a>'
+      . '<br /><a href="http://www.example.com">Host, no path</a>'
+      . '<br /><a href="' . base_path() . 'node">Absolute Path, no host</a>'
+      . '<br /><a href="node">Relative path, no host</a>';
+    $tt = "Host and path [1]"
+      . "\nHost, no path [2]"
+      . "\nAbsolute Path, no host [3]"
+      . "\nRelative path, no host [3]"
+      . "\n"
+      . "\n[1] " . url('http://www.example.com/node/1', array('absolute' => TRUE))
+      . "\n[2] " . url('http://www.example.com', array('absolute' => TRUE))
+      . "\n[3] " . url('node', array('absolute' => TRUE));
+    $this->assertHtmlToText($source, $tt, 'Footnotes');
+  }
+
+  /**
+   * Test that combinations of paragraph breaks, line breaks, linefeeds,
+   * and spaces are properly handled.
+   */
+  function testDrupalHtmlToTextParagraphs() {
+    $tests = array();
+    $tests[] = array(
+        'html' => "<p>line 1<br />\nline 2<br />line 3\n<br />line 4</p><p>paragraph</p>",
+        'text' => "line 1\nline 2\nline 3\nline 4\n\nparagraph",
+    );
+    $tests[] = array(
+      'html' => "<p>line 1<br /> line 2</p> <p>line 4<br /> line 5</p> <p>0</p>",
+      'text' => "line 1\nline 2\n\nline 4\nline 5\n\n0",
+    );
+    foreach ($tests as $test) {
+      $this->assertHtmlToText($test['html'], $test['text'], 'Paragraph breaks');
+    }
+  }
+
+  /**
+   * Tests that drupal_html_to_text() wraps before 1000 characters.
+   *
+   * RFC 3676 says, "The Text/Plain media type is the lowest common
+   * denominator of Internet email, with lines of no more than 998 characters."
+   *
+   * RFC 2046 says, "SMTP [RFC-821] allows a maximum of 998 octets before the
+   * next CRLF sequence."
+   *
+   * RFC 821 says, "The maximum total length of a text line including the
+   * <CRLF> is 1000 characters."
+   */
+  function testVeryLongLineWrap() {
+    $input = 'Drupal<br /><pre>' . str_repeat('x', 2100) . '</pre><br />Drupal';
+    $output = drupal_html_to_text($input);
+    // This awkward construct comes from includes/mail.inc lines 8-13.
+    $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
+    // We must use strlen() rather than drupal_strlen() in order to count
+    // octets rather than characters.
+    $line_length_limit = 1000 - drupal_strlen($eol);
+    $maximum_line_length = 0;
+    foreach (explode($eol, $output) as $line) {
+      // We must use strlen() rather than drupal_strlen() in order to count
+      // octets rather than characters.
+      $maximum_line_length = max($maximum_line_length, strlen($line . $eol));
+    }
+    $verbose = 'Maximum line length found was ' . $maximum_line_length . ' octets.';
+    $this->verbose($verbose);
+    if (!$this->assertFalse($maximum_line_length > 1000, 'Mail lines are wrapped at 1000 octets.')) {
+      $this->pass("Previous test verbose info:<br />$verbose");
+    }
+  }
+
+  /**
+   * Ensure that content within <pre> tags is not changed.
+   */
+  function testNoWrapWithinPre() {
+    // This awkward construct comes from includes/mail.inc lines 8-13.
+    $eol = variable_get('mail_line_endings', MAIL_LINE_ENDINGS);
+    $html = '<pre>'
+       // Single space.
+      . str_repeat('a', 30) . ' ' . str_repeat('a', 30) . ' '
+       // Two spaces.
+      . str_repeat('b', 30) . ' ' . str_repeat('b', 30) . '  '
+      // Single newline.
+      . str_repeat('c', 30) . ' ' . str_repeat('c', 30) . "$eol"
+      // Double newline.
+      . str_repeat('d', 30) . ' ' . str_repeat('d', 30) . "$eol$eol"
+      // Newline and space.
+      . str_repeat('e', 30) . ' ' . str_repeat('e', 30) . "$eol "
+       // Newline and two spaces.
+      . str_repeat('f', 30) . ' ' . str_repeat('f', 30) . "$eol  "
+      // Space and newline.
+      . str_repeat('g', 30) . ' ' . str_repeat('g', 30) . " $eol"
+       // Two spaces and newline.
+      . str_repeat('h', 30) . ' ' . str_repeat('h', 30) . "  $eol"
+      . str_repeat('i', 30) . ' ' . str_repeat('i', 30) . '</pre>';
+    $text = ''
+      // One space and newline.
+      . str_repeat('a', 30) . ' ' . str_repeat('a', 30) . " $eol"
+      // Two spaces and newline.
+      . str_repeat('b', 30) . ' ' . str_repeat('b', 30) . "  $eol"
+      // Single newline.
+      . str_repeat('c', 30) . ' ' . str_repeat('c', 30) . "$eol"
+      // Double newline.
+      . str_repeat('d', 30) . ' ' . str_repeat('d', 30) . "$eol$eol"
+      // Newline and two spaces.
+      . str_repeat('e', 30) . ' ' . str_repeat('e', 30) . "$eol  "
+      // Newline and three spaces.
+      . str_repeat('f', 30) . ' ' . str_repeat('f', 30) . "$eol   "
+      // Newline only.
+      . str_repeat('g', 30) . ' ' . str_repeat('g', 30) . "$eol"
+      // Newline only.
+      . str_repeat('h', 30) . ' ' . str_repeat('h', 30) . "$eol"
+      . str_repeat('i', 30) . ' ' . str_repeat('i', 30);
+    $this->assertHtmlToText($html, $text, 'Soft-wrap and space-stuff text within <pre> according to RFC-3676');
+  }
+
+  /**
+   * Test deeply-nested tables.
+   */
+  function testNestedTables() {
+    $html = '
+<table>
+  <tr>
+    <td>
+      <table>
+        <tr>
+          <td>
+            <table>
+              <tr>
+                <td></td>
+                <td></td>
+                <td></td>
+              </tr>
+              <tr>
+                <td></td>
+                <td></td>
+                <td></td>
+              </tr>
+              <tr>
+                <td></td>
+                <td></td>
+                <td></td>
+              </tr>
+            </table>
+          </td>
+          <td></td>
+          <td>
+            <table>
+              <tr>
+                <td></td>
+                <td></td>
+                <td></td>
+              </tr>
+              <tr>
+                <td></td>
+                <td><a href="#">xxxxxxxxxxxxxxxxxxx</a></td>
+                <td></td>
+              </tr>
+              <tr>
+                <td></td>
+                <td></td>
+                <td></td>
+              </tr>
+            </table>
+          </td>
+          <td></td>
+          <td>
+            <table>
+              <tr>
+                <td></td>
+                <td></td>
+                <td></td>
+              </tr>
+              <tr>
+                <td></td>
+                <td><a href="#">xxxxxxxxxxxxxxxxxxx</a></td>
+                <td></td>
+              </tr>
+              <tr>
+                <td></td>
+                <td></td>
+                <td></td>
+              </tr>
+            </table>
+          </td>
+        </tr>
+      </table>
+    </td>
+  </tr>
+  <tr>
+    <td>
+    </td>
+  </tr>
+</table>';
+    $text = '
++----------------------------------------------------------------+
+|+----++---------------------------++---------------------------+|
+||++++||++-----------------------++||++-----------------------++||
+||++++||++-----------------------++||++-----------------------++||
+||++++||||xxxxxxxxxxxxxxxxxxx [1]||||||xxxxxxxxxxxxxxxxxxx [1]||||
+||++++||++-----------------------++||++-----------------------++||
+||    ||++-----------------------++||++-----------------------++||
+|+----++---------------------------++---------------------------+|
++----------------------------------------------------------------+
++----------------------------------------------------------------+
+
+[1] ' . url('#', array('absolute' => TRUE));
+    $start = microtime(TRUE);
+    $this->assertHtmlToText(ltrim($html), ltrim($text), 'Nested tables');
+    $finish = microtime(TRUE);
+    $elapsed = $finish - $start;
+    $this->assertTrue($elapsed < 1.0, "Table generation took $elapsed seconds.");
+  }
+}
-- 
1.7.4.1

