Index: apachesolr.index.inc =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr.index.inc,v retrieving revision 1.1.2.5 diff -u -p -r1.1.2.5 apachesolr.index.inc --- apachesolr.index.inc 19 May 2009 19:58:23 -0000 1.1.2.5 +++ apachesolr.index.inc 1 Jul 2009 00:56:12 -0000 @@ -21,6 +21,7 @@ function apachesolr_add_node_document(&$ function apachesolr_strip_ctl_chars($text) { // See: http://w3.org/International/questions/qa-forms-utf-8.html // Printable utf-8 does not include any of these chars below x7F + $text = iconv("UTF-8", "UTF-8//IGNORE", $text); return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $text); } @@ -32,6 +33,7 @@ function apachesolr_clean_text($text) { } function _apachesolr_strip_decode($text) { + $text = iconv("UTF-8", "UTF-8//IGNORE", $text); // Add spaces before stripping tags to avoid running words together. $text = filter_xss(str_replace(array('<', '>'), array(' <', '> '), $text), array()); // Decode entities and then make safe any < or > characters.