and
in an intelligent fashion. * Based on: http://photomatt.net/scripts/autop */ function _filter_autop($text, $style) { // All block level tags $block = '(?:table|thead|tfoot|caption|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|form|blockquote|address|p|h[1-6]|hr)'; // Split at
, ,  tags.
  // We don't apply any processing to the contents of these tags to avoid messing
  // up code. We look for matched pairs and allow basic nesting. For example:
  // "processed 
 ignored  ignored 
processed" $chunks = preg_split('@(]*>)@i', $text, -1, PREG_SPLIT_DELIM_CAPTURE); // Note: PHP ensures the array consists of alternating delimiters and literals // and begins and ends with a literal (inserting NULL as required). $ignore = FALSE; $ignoretag = ''; $output = ''; foreach ($chunks as $i => $chunk) { if ($i % 2) { // Opening or closing tag? $open = ($chunk[1] != '/'); list($tag) = preg_split('/[ >]/', substr($chunk, 2 - $open), 2); if (!$ignore) { if ($open) { $ignore = TRUE; $ignoretag = $tag; } } // Only allow a matching tag to close it. elseif (!$open && $ignoretag == $tag) { $ignore = FALSE; $ignoretag = ''; } } elseif (!$ignore) { $chunk = preg_replace('|\n*$|', '', $chunk) . "\n\n"; // just to make things a little easier, pad the end $chunk = preg_replace('|
\s*
|', "\n\n", $chunk); $chunk = preg_replace('!(<' . $block . '[^>]*>)!', "\n$1", $chunk); // Space things out a little $chunk = preg_replace('!()!', "$1\n\n", $chunk); // Space things out a little $chunk = preg_replace("/\n\n+/", "\n\n", $chunk); // take care of duplicates // switch ... if ($style == 'new') { $chunk = preg_replace('/^\n/', '', $chunk); $chunk = '

' . preg_replace('/\n\s*\n\n?(.)/', "

\n

$1", $chunk) . '

'; // make paragraphs, including one at the end } else { $chunk = preg_replace('/\n?(.+?)(?:\n\s*\n|\z)/s', "

$1

\n", $chunk); // make paragraphs, including one at the end } $chunk = preg_replace("|

(|", "$1", $chunk); // problem with nested lists $chunk = preg_replace('|

]*)>|i', "

", $chunk); $chunk = str_replace('

', '

', $chunk); $chunk = preg_replace('|

\s*

\n?|', '', $chunk); // under certain strange conditions it could create a P of entirely whitespace $chunk = preg_replace('!

\s*(]*>)!', "$1", $chunk); $chunk = preg_replace('!(]*>)\s*

!', "$1", $chunk); $chunk = preg_replace('|(?)\s*\n|', "
\n", $chunk); // make line breaks $chunk = preg_replace('!(]*>)\s*
!', "$1", $chunk); $chunk = preg_replace('!
(\s*)!', '$1', $chunk); $chunk = preg_replace('/&([^#])(?![A-Za-z0-9]{1,8};)/', '&$1', $chunk); } $output .= $chunk; } return $output; } /** * new regex for filter.module issue http://drupal.org/node/133188 */ $chunk = "Hello, Lorem ipsum. Lorem ipsum. Last paragraph "; // this is from _filter_autp echo "INPUT===============\n"; echo $chunk; // the two new lines from the patch $chunk_new = _filter_autop($chunk, 'new'); // $chunk_old = _filter_autop($chunk, 'old'); echo "\nNEW REGEX=============\n"; echo $chunk_new; echo "\nOLD REGEX BACKTRACKING=============\n"; echo $chunk_old; if ($chunk_new !== $chunk_old) { echo "\nWARNING - NEW != OLD\n"; }