--- node.module.4.7 2006-12-06 22:20:21.000000000 +0100
+++ node.module.4-7.node_teaser.p2 2006-12-07 10:29:27.000000000 +0100
@@ -180,19 +180,40 @@
return $body;
}
+ // the teaser length is "the maximum number of characters used in the trimmed version of a post"
+ $teaser = truncate_utf8($body, $size);
+ // since we are trying to find the last occurance of something, the reversed teaser is useful
+ $haystack = strrev($teaser);
+
// In some cases, no delimiter has been specified (e.g. when posting using
// the Blogger API). In this case, we try to split at paragraph boundaries.
- // When even the first paragraph is too long, we try to split at the end of
- // the next sentence.
- $breakpoints = array('
' => 4, '
' => 0, '
' => 0, "\n" => 0, '. ' => 1, '! ' => 1, '? ' => 1, '。' => 3, '؟ ' => 1);
+ $breakpoints = array('' => 4, '
' => 0, '
' => 0, "\n" => 0);
+ // strrpos() doesn't accept multi-character needles in PHP4,
+ // so we use strpos() and reverse the haystack and the needle
foreach ($breakpoints as $point => $charnum) {
- if ($length = strpos($body, $point, $size)) {
- return substr($body, 0, $length + $charnum);
+ $position = strpos($haystack, strrev($point));
+ if ($position === 0 and $charnum == strlen($point)) {
+ return $teaser;
+ }
+ if ($position > 0) {
+ return substr($teaser, 0, $charnum - strlen($point) - $position);
}
}
- // If all else fails, we simply truncate the string.
- return truncate_utf8($body, $size);
+ // When even the first paragraph is too long, we try to split at the end of
+ // the last full sentence.
+ $breakpoints = array('. ' => 1, '! ' => 1, '? ' => 1, '\xEF\xBD\xA1' => 3, '\xD8\x9F ' => 2);
+ // 0xEFBDA1 = ideographic full stop UTF-8, 0xD89F = arabic question mark UTF-8
+ // create regex pattern by reversing imploded keys and escaping any would-be special characters
+ $pattern = '/' . str_replace(array('.', '!', '?'), array('\.', '\!', '\?'), strrev(implode('|', array_keys($breakpoints)))) . '/';
+ if (preg_match($pattern, $haystack, $matches, PREG_OFFSET_CAPTURE)) {
+ // $matches[0][0] now contains the text that matched the pattern and
+ // $matches[0][1] contains the position of the match
+ return substr($teaser, 0, $breakpoints[strrev($matches[0][0])] - strlen($matches[0][0]) - $matches[0][1]);
+ }
+
+ // If all else fails, we simply return the truncated string.
+ return $teaser;
}
function _node_names($op = '', $node = NULL) {