--- node.module.4.7 2006-12-06 22:20:21.000000000 +0100 +++ node.module.4-7.node_teaser.p2 2006-12-07 10:29:27.000000000 +0100 @@ -180,19 +180,40 @@ return $body; } + // the teaser length is "the maximum number of characters used in the trimmed version of a post" + $teaser = truncate_utf8($body, $size); + // since we are trying to find the last occurance of something, the reversed teaser is useful + $haystack = strrev($teaser); + // In some cases, no delimiter has been specified (e.g. when posting using // the Blogger API). In this case, we try to split at paragraph boundaries. - // When even the first paragraph is too long, we try to split at the end of - // the next sentence. - $breakpoints = array('

' => 4, '
' => 0, '
' => 0, "\n" => 0, '. ' => 1, '! ' => 1, '? ' => 1, '。' => 3, '؟ ' => 1); + $breakpoints = array('

' => 4, '
' => 0, '
' => 0, "\n" => 0); + // strrpos() doesn't accept multi-character needles in PHP4, + // so we use strpos() and reverse the haystack and the needle foreach ($breakpoints as $point => $charnum) { - if ($length = strpos($body, $point, $size)) { - return substr($body, 0, $length + $charnum); + $position = strpos($haystack, strrev($point)); + if ($position === 0 and $charnum == strlen($point)) { + return $teaser; + } + if ($position > 0) { + return substr($teaser, 0, $charnum - strlen($point) - $position); } } - // If all else fails, we simply truncate the string. - return truncate_utf8($body, $size); + // When even the first paragraph is too long, we try to split at the end of + // the last full sentence. + $breakpoints = array('. ' => 1, '! ' => 1, '? ' => 1, '\xEF\xBD\xA1' => 3, '\xD8\x9F ' => 2); + // 0xEFBDA1 = ideographic full stop UTF-8, 0xD89F = arabic question mark UTF-8 + // create regex pattern by reversing imploded keys and escaping any would-be special characters + $pattern = '/' . str_replace(array('.', '!', '?'), array('\.', '\!', '\?'), strrev(implode('|', array_keys($breakpoints)))) . '/'; + if (preg_match($pattern, $haystack, $matches, PREG_OFFSET_CAPTURE)) { + // $matches[0][0] now contains the text that matched the pattern and + // $matches[0][1] contains the position of the match + return substr($teaser, 0, $breakpoints[strrev($matches[0][0])] - strlen($matches[0][0]) - $matches[0][1]); + } + + // If all else fails, we simply return the truncated string. + return $teaser; } function _node_names($op = '', $node = NULL) {