Index: glossary.module
===================================================================
RCS file: /cvs/drupal-contrib/contributions/modules/glossary/glossary.module,v
retrieving revision 1.100.2.9
diff -u -p -r1.100.2.9 glossary.module
--- glossary.module 26 Jan 2008 01:00:22 -0000 1.100.2.9
+++ glossary.module 26 Jan 2008 21:25:00 -0000
@@ -516,6 +516,7 @@ function _glossary_filter_process($forma
$absolute_link = variable_get("glossary_absolute_$format", false);
$terms = _glossary_get_terms($format);
$vids = _glossary_get_filter_vids();
+ $terms_replace = array();
foreach ($terms as $term) {
$term_title = $term->name .': '. strip_tags($term->description);
@@ -556,62 +557,112 @@ function _glossary_filter_process($forma
}
// replace term and synonyms with the desired new HTML code
- foreach ($term->synonyms as $candidate) {
- $text = _glossary_insertlink($format, $text, $candidate, $ins_before, $ins_after);
- }
+ $terms_replace[] = array('synonyms' => $term->synonyms, 'ins_before' => $ins_before, 'ins_after' => $ins_after);
}
+ return _glossary_insertlink($format, $text, $terms_replace);
}
return $text;
}
/**
- * Insert glossary links to $text after every $match that is not inside a link.
- * $ins_before is prepended to the matches, $_insafter is appended to them.
+ * Insert glossary links to $text after every matching $terms[i]['synonyms'] that is not inside a blocking tag.
+ * $terms[i]['ins_before'] is prepended to the matches, $terms[i]['ins_after'] is appended to them.
* Match type and replace mode all depend on user settings.
- *
- * TODO: improve performance with not keeping *2.5 copies* of the string in memory:
- * $text - original
- * $newtext - transformed
- * $before . $this_match - for checking stuff
+
+ * The text is scanned once for all blocking tags and matches,
+ * then those 'events' are sorted and handled one by one.
*/
-function _glossary_insertlink($format, &$text, $match, $ins_before, $ins_after) {
+function _glossary_insertlink($format, &$text, &$terms) {
$multibyte_enabled = extension_loaded('mbstring');
- if ($multibyte_enabled) {
- $mb_prefix = 'mb_';
- }
- else {
- $mb_prefix = null;
- }
+ if ($multibyte_enabled) {
+ $mb_prefix = 'mb_';
+ }
+ else {
+ $mb_prefix = null;
+ }
$findfunc = $mb_prefix . (variable_get("glossary_case_$format", "1") ? 'strpos' : 'stripos');
- $next = $findfunc($text, $match);
-
- if ($next === false) { // no match at all
- return $text;
+ $findtagfunc = $mb_prefix .'stripos';
+
+ $replaceall = variable_get("glossary_replace_all_$format", 0);
+
+ $events = array();
+
+ // Find blocking tags.
+ $open_tags = array('[no-glossary]', '<', '', '', '', '', '', '[/code]');
+ /* use these always/when Codefilter module is on?
+ $open_tags = array('[no-glossary]', '<', '', '', '', '', '', '[/code]', '?>', '?]', '%>', '%]', '[/codefilter_');
+ */
+
+ foreach ($open_tags as $i => $tag) {
+ $offset=0;
+ while(($offset = $findtagfunc($text, $tag, $offset)) !== false) {
+ //longer tags will override shorter '<' on the same offset
+ $events[$offset] = array('type' => 'open', 'which' => $i);
+ $offset += drupal_strlen($tag);
+ }
}
- else { // at least one match
- $prevend = 0;
- $newtext = '';
- $matchlen = drupal_strlen($match);
- $textlen = drupal_strlen($text);
- $replaceall = variable_get("glossary_replace_all_$format", 0);
-
- while ($next && ($next <= $textlen)) {
-
- // get parts of the match for further investigation
- $before = drupal_substr($text, 0, $next);
- $this_match = drupal_substr($text, $next, $matchlen);
-
- // see if we have a proper match or not
- $substr_count_func = $mb_prefix . 'substr_count';
- $open = $substr_count_func($before, '<');
- $close = $substr_count_func($before, '>');
- $opena = $substr_count_func($before, '');
- $openacro = $substr_count_func($before, '');
- $proper_match = false;
- if ($opena <= $closea && $open <= $close && $openacro <= $closeacro) { // Not in an open link
- switch (variable_get("glossary_match_$format", 'b')) {
+
+ // Find match candidates.
+ foreach ($terms as $i => $term) {
+ foreach($term['synonyms'] as $synonym) {
+ $offset=0;
+ $first_match_found = false;
+ while(($offset = $findfunc($text, $synonym, $offset)) !== false) {
+ //don't let shorter matches override longer ones
+ if (!isset($events[$offset])) {
+ // get synonym with case as in text
+ $match = drupal_substr($text, $offset, strlen($synonym));
+ $events[$offset] = array('type' => 'match', 'which' => $i, 'match' => $match);
+ if (!$replaceall) {
+ $first_match_found = true;
+ break;
+ }
+ }
+ $offset += drupal_strlen($synonym);
+ }
+ //TODO: remove this if we want different synonyms of the same term to be matched independently as 'first matches'
+ if ($first_match_found && !$replaceall) {
+ break;
+ }
+ }
+ }
+
+ ksort($events);
+
+ $newtext = '';
+ $parsed = 0; // text was parsed from chars 0 to $parsed (exclusive)
+
+ foreach($events as $place => $event) {
+ // skip events inside blocking tag (they're already copied as is)
+ if ($place < $parsed) {
+ continue;
+ }
+ // copy plain text (with no events)
+ $newtext .= drupal_substr($text, $parsed, ($place - $parsed));
+ $parsed = $place;
+ // if a blocking tag is opened, skip to closing tag
+ if ($event['type'] == 'open') {
+ $skip = $findtagfunc($text, $close_tags[$event['which']], $place);
+ if ($skip === false) {
+ $skip = drupal_strlen($text);
+ }
+ // if the tag is [no-glossary] - remove it with the closing tag (by incrementing $parsed without copying)
+ if ($event['which'] == 0) {
+ $parsed += drupal_strlen($open_tags[$event['which']]);
+ $newtext .= drupal_substr($text, $parsed, ($skip - $parsed));
+ $parsed = $skip + drupal_strlen($close_tags[$event['which']]);
+ }
+ // copy text without changing it
+ else {
+ $newtext .= drupal_substr($text, $parsed, ($skip - $parsed));
+ $parsed = $skip;
+ }
+ }
+ if ($event['type'] == 'match') {
+ $matchlen = drupal_strlen($event['match']);
+ switch (variable_get("glossary_match_$format", 'b')) {
case 'lr': // require word break left or right
// $proper_match = (_glossary_is_boundary($text {$next - 1}) || _glossary_is_boundary($text {$next + $matchlen}));
$proper_match = (_glossary_is_boundary(drupal_substr($text, $next - 1,1)) ||
@@ -634,26 +685,16 @@ function _glossary_insertlink($format, &
default:
$proper_match = true;
break;
- }
}
-
- if ($proper_match) { // found match
- $newtext .= drupal_substr($text, $prevend, ($next - $prevend)) . $ins_before . $this_match . $ins_after;
- if ($replaceall == 0) {
- return $newtext . drupal_substr($text, $next + $matchlen);
- }
- }
- else { // not applicable match
- $newtext .= drupal_substr($text, $prevend, ($next - $prevend)) . $this_match;
+ if ($proper_match) {
+ $newtext .= $terms[$event['which']]['ins_before'] . $event['match'] . $terms[$event['which']]['ins_after'];
+ $parsed += $matchlen;
}
-
- // Step further in finding the next match
- $prevend = $next + $matchlen;
- $next = $findfunc($text, $match, $prevend);
}
- // Append remaining part
- return $newtext . drupal_substr($text, $prevend);
}
+
+ // Append remaining part
+ return $newtext . drupal_substr($text, $parsed);
}
function glossary_page($vid = null, $letter = null) {