Index: codefilter.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/codefilter/codefilter.module,v retrieving revision 1.25 diff -u -r1.25 codefilter.module --- codefilter.module 3 Dec 2007 08:01:51 -0000 1.25 +++ codefilter.module 6 Dec 2007 19:02:59 -0000 @@ -26,15 +26,16 @@ */ function codefilter_process_php($text) { // Note, pay attention to odd preg_replace-with-/e behaviour on slashes - // Undo possible linebreak filter conversion + // Undo possible line break filter conversion $text = preg_replace('@@', '', str_replace('\"', '"', $text)); // Undo the escaping in the prepare step $text = decode_entities($text); - // Trim leading and trailing linebreaks + $text = str_replace("\xFEn\xFF", "\n", $text); + // Trim leading and trailing line breaks $text = trim($text, "\r\n"); // Highlight as PHP $text = '
'. highlight_string("", 1) .'
'; - // Remove newlines to avoid clashing with the linebreak filter + // Remove newlines to avoid clashing with the line break filter $text = str_replace("\n", '', $text); return codefilter_fix_spaces($text); } @@ -42,11 +43,12 @@ /** * Helper function for codefilter_process_code(). */ -function codefilter_process_php_inline($matches) { - // Undo nl2br - $text = str_replace('
', '', $matches[0]); +function codefilter_process_php_inline($text) { + // Undo the escaping + $text = str_replace(array('
', '\"', "\xFEn\xFF"), array('', '"', "\n"), $text); + $text = decode_entities($text); // Decode entities (the highlighter re-entifies) and highlight text - $text = highlight_string(decode_entities($text), 1); + $text = highlight_string('', 1); // Remove PHP's own added code tags $text = str_replace(array('', '', "\n"), array('', '', ''), $text); return $text; @@ -56,23 +58,32 @@ * Processes chunks of escaped code into HTML. */ function codefilter_process_code($text) { - // Undo linebreak escaping - $text = str_replace(' ', "\n", $text); - // Inline or block level piece? + // Undo line break escaping + $text = str_replace("\xFEn\xFF", "\n", $text); + // In-line or block level piece? $multiline = strpos($text, "\n") !== FALSE; // Note, pay attention to odd preg_replace-with-/e behaviour on slashes $text = preg_replace("/^\n/", '', preg_replace('@@', '', str_replace('\"', '"', $text))); - // Trim leading and trailing linebreaks + // Trim leading and trailing line breaks $text = trim($text, "\n"); + + /* + PHP code in regular code (use codefilter_prepare to allow the in-line block to have embedded "?>", + however this requires things to be decoded first, then re-encoded). + */ + $text = str_replace(array('<', '>', '"', '''), array('<', '>', '"', "'"), $text); + $text = codefilter_prepare($text, false); + $text = str_replace(array('<', '>', '"', "'"), array('<', '>', '"', '''), $text); + $text = preg_replace('@\xFEphp\xFF(.+?)\xFE/php\xFF@se', "codefilter_process_php_inline('$1')", $text); + // Escape newlines $text = nl2br($text); - // PHP code in regular code - $text = preg_replace_callback('/<\?php.+?\?>/s', 'codefilter_process_php_inline', $text); - $text = ''. codefilter_fix_spaces(str_replace(' ', ' ', $text)) .''; - if ($multiline) $text = '
'. $text .'
'; - // Remove newlines to avoid clashing with the linebreak filter + if ($multiline) { + $text = '
'. $text .'
'; + } + // Remove newlines to avoid clashing with the line break filter return str_replace("\n", '', $text); } @@ -81,9 +92,153 @@ } function codefilter_escape($text) { - // Note, pay attention to odd preg_replace-with-/e behaviour on slashes - // Protect newlines from linebreak filter - return str_replace(array("\r", "\n"), array('', ' '), check_plain(str_replace('\"', '"', $text))); + // Protect newlines from line break filter + return str_replace(array("\r", "\n"), array('', "\xFEn\xFF"), check_plain($text)); +} + +/** + * Helper function for codefilter_prepare; looks for the next block of code + * @param $text Text to find next block of code in + * @param $start_offset Position in $text to start looking + * @param $block_start String which begins the block of code (for example "") + * @param $chosen_start Starting position of the next block closest to $start_offset + * @param $chosen_starter The string which beings the next block closest to $start_offset (equivalent to $block_start) + * @param $chosen_ender The string which ends the next block closest to $start_offset (equivalent to $block_end) + * @return Starting position of the next block, or false if no blocks were found. + */ +function codefilter_prepare_find_block($text, $start_offset, $block_start, $block_end, &$chosen_start, &$chosen_end, &$chosen_starter, &$chosen_ender) { + $position = strpos($text, $block_start, $start_offset); + if ($position !== false) { + $position_end = strpos($text, $block_end, $position); + if ($position_end === false) { + $position = false; + } + elseif ($position !== false && ($chosen_start === false || $position < $chosen_start)) { + $chosen_start = $position; + $chosen_end = $position_end; + $chosen_starter = $block_start; + $chosen_ender = $block_end; + } + } + return $position; +} + +/** + * Prepares a block of text for filtering by identifying blocks of code and marking them with "\xFEphp\xFF" or similar + * + * A regexp cannot easily find the end of a block as the terminating ?> may be within a string or multi-line comment. + * Therefore, php blocks have to be looked at specially and an intelligent guess has to be made as to which ?> actually terminates + * the block. To do this, for each possible ending to a block, highlight_string is called to see if things after that ending are + * highlighted as non-php-code, and if so, then that must be the correct ending. Otherwise, we move onto the next possible ending + * and see what highlight_string thinks about ending there. Likewise, a regexp cannot easily find the end of a ... block + * as it might contain in-line PHP blocks, which in turn might contain "?>" or "" within strings or comments. + * + * In-line PHP mode - which is designed for identifying in-line PHP blocks - is activated when $inline_php_pass is not false. In this + * mode, $inline_php_pass specifies where to stop searching for the start of the next block. Additionally, the function will return the + * position of the end of the next block, or false if no such block was found. + * + * @param $text Text to prepare + * @param $allow_code_blocks If true (the default), then ... blocks are identified + * @param $start_offset Position within $text to start identifying blocks + * @param $inline_php_pass If not false (defaults to false), then activates in-line php mode + * @return The text with PHP blocks marked as "\xFEphp\xFF ... \xFE/php\xFF" and code blocks marked as "\xFEcode\xFF ... \xFE/code\xFF" + */ +function codefilter_prepare($text, $allow_code_blocks = true, $start_offset = 0, $inline_php_pass = false) { + + while (true) { + // Find the next php block, the next mini block, the next square php block, the next square mini block and the next code block + $position_start = false; + $position_end = false; + $position_starter = ''; + $position_marker = 'php'; + + $position_php = codefilter_prepare_find_block($text, $start_offset, '', $position_start, $position_end, $position_starter, $position_ender); + $position_php_square = codefilter_prepare_find_block($text, $start_offset, '[?php', '?]', $position_start, $position_end, $position_starter, $position_ender); + $position_mini = codefilter_prepare_find_block($text, $start_offset, '<%', '%>', $position_start, $position_end, $position_starter, $position_ender); + $position_mini_square = codefilter_prepare_find_block($text, $start_offset, '[%', '%]', $position_start, $position_end, $position_starter, $position_ender); + $position_code = false; + if($allow_code_blocks) { + $position_code = codefilter_prepare_find_block($text, $start_offset, '', '', $position_start, $position_end, $position_starter, $position_ender); + } + + if ($position_php === false && $position_php_square === false && $position_mini === false && $position_mini_square === false && $position_code === false) { + // No more blocks, so stop looking + break; + } + $start_offset = $position_end; + + if($inline_php_pass !== false && $position_start > $inline_php_pass) { + // If doing an inline php check, abort if the php block starts after the code block currently ends + return false; + } + + if ($position_starter == '') { + $position_marker = 'code'; + /* + If there are in-line php blocks within the code, then the first may be within a php string or comment, so do a + recursive call to find out where any in-line php blocks end, and use the next after the in-line php ends. + */ + $final_code_end = strrpos($text, ''); + $search_begin = $position_start + strlen($position_starter); + while(($search_begin = codefilter_prepare($text, false, $search_begin, $position_end)) !== false) { + if($search_begin >= $final_code_end) { + break; + } + $position_end = strpos($text, '', $search_begin); + } + } + else { + while ($position_end) { + // See what highlight_string thinks about ending the block here + $to_highlight = substr($text, $position_start + strlen($position_starter), $position_end - $position_start - strlen($position_starter)); + $highlighted = highlight_string(' ?>', true); + /* + If (and only if) the block to be highlighted is valid, then the first "?>" will be styled (?>) + and the second one will be not styled (raw), so get the positions of the last styled and the last raw occurrence of "?>" + and if the styled appears before the raw, then the block is valid. highlight_string has an odd bug where it can remove + single quotes (') from a piece of code when the code is invalid, so this is checked for. For example, this call: + highlight_string(" ?>"); + Will remove the single quote, style the penultimate "?>" and leave the final "?>" raw. + */ + $position_styled_close = strrpos($highlighted, '">?>'); + $position_raw_close = strrpos($highlighted, '?>'); + if ($position_styled_close !== false && $position_raw_close !== false && ($position_styled_close + 2) < $position_raw_close) { + if (substr_count($to_highlight, "'") == substr_count($highlighted, "'")) { + break; + } + } + + // Move onto the next possible ending + $position_end = strpos($text, $position_ender, $position_end + 1); + } + if ($position_end === false) { + // No more possible endings, but highlight_string hasn't been happy yet, so revert back to the first possible ending + $position_end = strpos($text, $position_ender, $position_start); + } + if ($inline_php_pass !== false) { + return $position_end + strlen($position_ender); + } + } + + // We've found the ending, so mark this block + // Note: we use the bytes 0xFE and 0xFF to replace < > during the filtering process. + // These bytes are not valid in UTF-8 data and thus least likely to cause problems. + $before_code = substr($text, 0, $position_start); + $position_start = $position_start + strlen($position_starter); + $the_code = substr($text, $position_start, $position_end - $position_start); + $position_end = $position_end + strlen($position_ender); + $after_code = substr($text, $position_end); + $text = $before_code ."\xFE". $position_marker ."\xFF". codefilter_escape($the_code) ."\xFE/". $position_marker ."\xFF"; + $start_offset = strlen($text); + $text = $text . $after_code; + } + + if ($inline_php_pass !== false) { + return false; + } + return $text; } /** @@ -98,20 +253,15 @@ return t('Allows users to post code verbatim using <code> and <?php ?> tags.'); case 'prepare': - // Note: we use the bytes 0xFE and 0xFF to replace < > during the filtering process. - // These bytes are not valid in UTF-8 data and thus least likely to cause problems. - $text = preg_replace('@(.+?)@se', "'\xFEcode\xFF'. codefilter_escape('\\1') .'\xFE/code\xFF'", $text); - $text = preg_replace('@[\[<](\?php|%)(.+?)(\?|%)[\]>]@se', "'\xFEphp\xFF'. codefilter_escape('\\2') .'\xFE/php\xFF'", $text); + $text = codefilter_prepare($text); return $text; case 'process': - $text = preg_replace('@\xFEcode\xFF(.+?)\xFE/code\xFF@se', "codefilter_process_code('$1')", $text); $text = preg_replace('@\xFEphp\xFF(.+?)\xFE/php\xFF@se', "codefilter_process_php('$1')", $text); + $text = preg_replace('@\xFEcode\xFF(.+?)\xFE/code\xFF@se', "codefilter_process_code('$1')", $text); return $text; default: return $text; } } - -