Index: glossary.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/glossary/glossary.module,v retrieving revision 1.100.2.9 diff -u -p -r1.100.2.9 glossary.module --- glossary.module 26 Jan 2008 01:00:22 -0000 1.100.2.9 +++ glossary.module 26 Jan 2008 21:25:00 -0000 @@ -516,6 +516,7 @@ function _glossary_filter_process($forma $absolute_link = variable_get("glossary_absolute_$format", false); $terms = _glossary_get_terms($format); $vids = _glossary_get_filter_vids(); + $terms_replace = array(); foreach ($terms as $term) { $term_title = $term->name .': '. strip_tags($term->description); @@ -556,62 +557,112 @@ function _glossary_filter_process($forma } // replace term and synonyms with the desired new HTML code - foreach ($term->synonyms as $candidate) { - $text = _glossary_insertlink($format, $text, $candidate, $ins_before, $ins_after); - } + $terms_replace[] = array('synonyms' => $term->synonyms, 'ins_before' => $ins_before, 'ins_after' => $ins_after); } + return _glossary_insertlink($format, $text, $terms_replace); } return $text; } /** - * Insert glossary links to $text after every $match that is not inside a link. - * $ins_before is prepended to the matches, $_insafter is appended to them. + * Insert glossary links to $text after every matching $terms[i]['synonyms'] that is not inside a blocking tag. + * $terms[i]['ins_before'] is prepended to the matches, $terms[i]['ins_after'] is appended to them. * Match type and replace mode all depend on user settings. - * - * TODO: improve performance with not keeping *2.5 copies* of the string in memory: - * $text - original - * $newtext - transformed - * $before . $this_match - for checking stuff + + * The text is scanned once for all blocking tags and matches, + * then those 'events' are sorted and handled one by one. */ -function _glossary_insertlink($format, &$text, $match, $ins_before, $ins_after) { +function _glossary_insertlink($format, &$text, &$terms) { $multibyte_enabled = extension_loaded('mbstring'); - if ($multibyte_enabled) { - $mb_prefix = 'mb_'; - } - else { - $mb_prefix = null; - } + if ($multibyte_enabled) { + $mb_prefix = 'mb_'; + } + else { + $mb_prefix = null; + } $findfunc = $mb_prefix . (variable_get("glossary_case_$format", "1") ? 'strpos' : 'stripos'); - $next = $findfunc($text, $match); - - if ($next === false) { // no match at all - return $text; + $findtagfunc = $mb_prefix .'stripos'; + + $replaceall = variable_get("glossary_replace_all_$format", 0); + + $events = array(); + + // Find blocking tags. + $open_tags = array('[no-glossary]', '<', '', '', '', '', '', '[/code]'); + /* use these always/when Codefilter module is on? + $open_tags = array('[no-glossary]', '<', '', '', '', '', '', '[/code]', '?>', '?]', '%>', '%]', '[/codefilter_'); + */ + + foreach ($open_tags as $i => $tag) { + $offset=0; + while(($offset = $findtagfunc($text, $tag, $offset)) !== false) { + //longer tags will override shorter '<' on the same offset + $events[$offset] = array('type' => 'open', 'which' => $i); + $offset += drupal_strlen($tag); + } } - else { // at least one match - $prevend = 0; - $newtext = ''; - $matchlen = drupal_strlen($match); - $textlen = drupal_strlen($text); - $replaceall = variable_get("glossary_replace_all_$format", 0); - - while ($next && ($next <= $textlen)) { - - // get parts of the match for further investigation - $before = drupal_substr($text, 0, $next); - $this_match = drupal_substr($text, $next, $matchlen); - - // see if we have a proper match or not - $substr_count_func = $mb_prefix . 'substr_count'; - $open = $substr_count_func($before, '<'); - $close = $substr_count_func($before, '>'); - $opena = $substr_count_func($before, ''); - $openacro = $substr_count_func($before, ''); - $proper_match = false; - if ($opena <= $closea && $open <= $close && $openacro <= $closeacro) { // Not in an open link - switch (variable_get("glossary_match_$format", 'b')) { + + // Find match candidates. + foreach ($terms as $i => $term) { + foreach($term['synonyms'] as $synonym) { + $offset=0; + $first_match_found = false; + while(($offset = $findfunc($text, $synonym, $offset)) !== false) { + //don't let shorter matches override longer ones + if (!isset($events[$offset])) { + // get synonym with case as in text + $match = drupal_substr($text, $offset, strlen($synonym)); + $events[$offset] = array('type' => 'match', 'which' => $i, 'match' => $match); + if (!$replaceall) { + $first_match_found = true; + break; + } + } + $offset += drupal_strlen($synonym); + } + //TODO: remove this if we want different synonyms of the same term to be matched independently as 'first matches' + if ($first_match_found && !$replaceall) { + break; + } + } + } + + ksort($events); + + $newtext = ''; + $parsed = 0; // text was parsed from chars 0 to $parsed (exclusive) + + foreach($events as $place => $event) { + // skip events inside blocking tag (they're already copied as is) + if ($place < $parsed) { + continue; + } + // copy plain text (with no events) + $newtext .= drupal_substr($text, $parsed, ($place - $parsed)); + $parsed = $place; + // if a blocking tag is opened, skip to closing tag + if ($event['type'] == 'open') { + $skip = $findtagfunc($text, $close_tags[$event['which']], $place); + if ($skip === false) { + $skip = drupal_strlen($text); + } + // if the tag is [no-glossary] - remove it with the closing tag (by incrementing $parsed without copying) + if ($event['which'] == 0) { + $parsed += drupal_strlen($open_tags[$event['which']]); + $newtext .= drupal_substr($text, $parsed, ($skip - $parsed)); + $parsed = $skip + drupal_strlen($close_tags[$event['which']]); + } + // copy text without changing it + else { + $newtext .= drupal_substr($text, $parsed, ($skip - $parsed)); + $parsed = $skip; + } + } + if ($event['type'] == 'match') { + $matchlen = drupal_strlen($event['match']); + switch (variable_get("glossary_match_$format", 'b')) { case 'lr': // require word break left or right // $proper_match = (_glossary_is_boundary($text {$next - 1}) || _glossary_is_boundary($text {$next + $matchlen})); $proper_match = (_glossary_is_boundary(drupal_substr($text, $next - 1,1)) || @@ -634,26 +685,16 @@ function _glossary_insertlink($format, & default: $proper_match = true; break; - } } - - if ($proper_match) { // found match - $newtext .= drupal_substr($text, $prevend, ($next - $prevend)) . $ins_before . $this_match . $ins_after; - if ($replaceall == 0) { - return $newtext . drupal_substr($text, $next + $matchlen); - } - } - else { // not applicable match - $newtext .= drupal_substr($text, $prevend, ($next - $prevend)) . $this_match; + if ($proper_match) { + $newtext .= $terms[$event['which']]['ins_before'] . $event['match'] . $terms[$event['which']]['ins_after']; + $parsed += $matchlen; } - - // Step further in finding the next match - $prevend = $next + $matchlen; - $next = $findfunc($text, $match, $prevend); } - // Append remaining part - return $newtext . drupal_substr($text, $prevend); } + + // Append remaining part + return $newtext . drupal_substr($text, $parsed); } function glossary_page($vid = null, $letter = null) {