$word) { if ($odd) { $words[$k] = dutchstemmer_stem($word); } $odd = !$odd; } // Put it all back together return implode('', $words); } /** * Implementation of hook_help(). */ function dutchstemmer_help($section = 'admin/help#search') { switch ($section) { case 'admin/modules#description': return t('Implements a Dutch stemming algorithm to improve Dutch searching.'); } } /** * Stem a dutch word. */ function dutchstemmer_stem($word) { global $step2; $step2 = FALSE; // Lowercase $word = drupal_strtolower($word); // Step 0: early (accented) suffix removal $word = dutchstemmer_step0($word, $r1, $r2); // Remove accents $word = str_replace(array('ä', 'ë', 'ï', 'ö', 'ü', 'á', 'é', 'í', 'ó', 'ú'), array('a', 'e', 'i', 'o', 'u', 'a', 'e', 'i', 'o', 'u'), $word); // Put initial y, y after a vowel, and i between vowels into upper case (treat as consonants). $word = preg_replace(array('/^y|(?<=[aeiouyè])y/u', '/(?<=[aeiouyè])i(?=[aeiouyè])/u'), array('Y', 'I'), $word); /* R1 is the region after the first non-vowel following a vowel, or is the null region at the end of the word if there is no such non-vowel. */ if (preg_match('/[aeiouyè][^aeiouyè]/u', $word, $matches, PREG_OFFSET_CAPTURE)) { $r1 = $matches[0][1] + 2; } /* R2 is the region after the first non-vowel following a vowel in R1, or is the null region at the end of the word if there is no such non-vowel. */ if (preg_match('/[aeiouyè][^aeiouyè]/u', $word, $matches, PREG_OFFSET_CAPTURE, $r1)) { $r2 = $matches[0][1] + 2; } // Steps 1-4: suffix removal $word = dutchstemmer_step1($word, $r1, $r2); $word = dutchstemmer_step2($word, $r1, $r2); $word = dutchstemmer_step3($word, $r1, $r2); $word = dutchstemmer_step4($word, $r1, $r2); $word = str_replace(array('Y', 'I'), array('y', 'i'), $word); return $word; } function dutchstemmer_undouble($word) { return preg_match('/(bb|dd|gg|kk|ll|mm|nn|pp|rr|ss|tt|zz)$/u', $word) ? substr($word, 0, -1) : $word; } function dutchstemmer_step0($word) { // Step 0: accented suffixes return preg_replace('/eën$/u', 'e', preg_replace('/(ieel|iële|ieën)$/u', 'ie', $word)); } function dutchstemmer_step1($word, $r1, $r2) { // Step 1: // Search for the longest among the following suffixes, and perform the action indicated if ($r1) { // -heden if (preg_match('/heden$/u', $word, $matches, 0, $r1)) { return preg_replace('/heden$/u', 'heid', $word, -1); } // -en(e) else if (preg_match('/(?<=[^aeiouyè]|gem)ene?$/u', $word, $matches, 0, $r1)) { return dutchstemmer_undouble(preg_replace('/ene?$/u', '', $word, -1)); } // -s(e) else if (preg_match('/(?<=[^jaeiouyè])se?$/u', $word, $matches, 0, $r1)) { return rtrim(preg_replace('/se?$/u', '', $word, -1), "'"); } } return $word; } function dutchstemmer_step2($word, $r1, $r2) { // Step 2: // Delete suffix e if in R1 and preceded by a non-vowel, and then undouble the ending if ($r1) { if (preg_match('/(?<=[^aeiouyè])e$/u', $word, $matches, 0, $r1)) { $step2 = TRUE; return dutchstemmer_undouble(preg_replace('/e$/u', '', $word, -1)); } } return $word; } function dutchstemmer_step3($word, $r1, $r2) { global $step2; // Step 3a: heid // delete heid if in R2 and not preceded by c, and treat a preceding en as in step 1(b) if ($r2) { if (preg_match('/(? man, brood -> brod). if (preg_match('/[^aeiouyè](aa|ee|oo|uu)[^Iaeiouyè]$/u', $word)) { $word = drupal_substr($word, 0, -2) . str_replace(array('s', 'f'), array('z', 'v'), drupal_substr($word, -1)); } return $word; } ?>