--- codefilter.module Sat Dec 1 20:41:35 2007 +++ codefilterFixed.module Sat Dec 1 20:42:56 2007 @@ -87,6 +87,86 @@ function codefilter_escape($text) { } /** + * A regex cannot easily find the end of a block as the terminating ?> may be within a string or multiline comment. + * Therefore, php blocks have to be looked at specially and an intelligent guess has to be made as to which ?> actually terminates + * the block. To do this, for each possible ending to a block, highlight_string is called to see if things after that ending are + * highlighted as non-php-code, and if so, then that must be the correct ending. Otherwise, we move onto the next possible ending + * and see what highlight_string thinks about ending there. + */ +function codefilter_prepare_php_block($text) { + $start_offset = 0; + + while(true) { + // Find the next php block and the next mini block + $position_php = strpos($text, '', $position_php); + if($position_php_end === false) + $position_php = false; + } + + $position_mini = strpos($text, '<%', $start_offset); + $position_mini_end = 0; + if($position_mini !== false) { + $position_mini_end = strpos($text, '%>', $position_mini); + if($position_mini_end === false) + $position_mini = false; + } + + if($position_php === false && $position_mini === false) + break; + + // Prepare to look at the next block + $position_start = $position_php; + $position_end = $position_php_end; + $position_starter = ''; + if($position_php === false || ($position_mini !== false && $position_mini < $position_php)) { + $position_start = $position_mini; + $position_end = $position_mini_end; + $position_starter = '<%'; + $position_ender = '%>'; + } + $start_offset = $position_end; + + while($position_end) { + // See what highlight_string thinks about ending the block here + $to_highlight = substr($text, $position_start + strlen($position_starter), $position_end - $position_start - strlen($position_starter)); + $to_highlight = highlight_string(' ?>', true); + $position_styled_close = strrpos($to_highlight, '">?>'); + $position_raw_close = strrpos($to_highlight, '?>'); + if ($position_styled_close !== false && $position_raw_close !== false && ($position_styled_close + 2) < $position_raw_close) { + break; + } + + // Move onto the next possible ending + $position_end = strpos($text, $position_ender, $position_end + 1); + } + if($position_end === false) { + // No more possible endings, but highlight_string hasn't been happy yet, so revert back to the first possible ending + $position_end = strpos($text, $position_ender, $position_start); + } + + // We've found the ending, so mark this block + $before_code = substr($text, 0, $position_start); + $position_start = $position_start + strlen($position_starter); + $the_code = substr($text, $position_start, $position_end - $position_start); + $position_end = $position_end + strlen($position_ender); + $after_code = substr($text, $position_end); + $text = $before_code ."\xFEphp\xFF". codefilter_escape($the_code) ."\xFE/php\xFF"; + $start_offset = strlen($text); + $text = $text . $after_code; + + // Move onto the next block + $position_php = strpos($text, ' during the filtering process. // These bytes are not valid in UTF-8 data and thus least likely to cause problems. $text = preg_replace('@(.+?)@se', "'\xFEcode\xFF'. codefilter_escape('\\1') .'\xFE/code\xFF'", $text); - $text = preg_replace('@[\[<](\?php|%)(.+?)(\?|%)[\]>]@se', "'\xFEphp\xFF'. codefilter_escape('\\2') .'\xFE/php\xFF'", $text); + $text = codefilter_prepare_php_block($text); return $text; case 'process':