? .cvsignore ? SolrPhpClient ? Zend ? auth-servlet.patch ? entity-fixes-528086-17.patch ? entity-fixes-528086-20.patch ? entity-type-348668-38.patch ? mlt-restrict-372767-38.patch ? mlt-restrict-372767-40.patch ? mysql-opt-592522-16.patch ? noresults-661952-1.patch ? text-analyzer.xml Index: README.txt =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/README.txt,v retrieving revision 1.1.2.1.2.31 diff -u -p -r1.1.2.1.2.31 README.txt --- README.txt 15 Nov 2009 14:52:28 -0000 1.1.2.1.2.31 +++ README.txt 17 Dec 2009 15:41:13 -0000 @@ -156,6 +156,9 @@ behavior: each Solr request, such as when making {apachesolr_search_node} consistent with {node}. + - apachesolr_search_noresults: the text displayed by apachesolr_search when + there are no results for a search. + Troubleshooting -------------- Problem: @@ -173,6 +176,12 @@ site address. Developers -------------- +Important note: RC3 to RC4 the function apachesolr_clean_text() was renamed to +apachesolr_unclean_text(). The handling of html entities changed significantly, +and any custom theme implementations or hook implmentations must be sure +to call check_plain() on text that was processed by apachesolr_unclean_text() +before being indexed. See issue: http://drupal.org/node/528086 + Exposed Hooks in 6.x: hook_apachesolr_modify_query(&$query, &$params, $caller); Index: apachesolr.index.inc =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr.index.inc,v retrieving revision 1.1.2.16 diff -u -p -r1.1.2.16 apachesolr.index.inc --- apachesolr.index.inc 17 Dec 2009 15:27:52 -0000 1.1.2.16 +++ apachesolr.index.inc 17 Dec 2009 15:41:13 -0000 @@ -16,13 +16,14 @@ function apachesolr_add_node_document(&$ } /** - * Strip html tags and also control characters that cause Jetty/Solr to fail. + * Strip html tags and decode entities. Becuase of decoding, we should not trust + * content returned from Solr - we need to apply check_plain(). */ -function apachesolr_clean_text($text) { +function apachesolr_unclean_text($text) { // Add spaces before stripping tags to avoid running words together. $text = filter_xss(str_replace(array('<', '>'), array(' <', '> '), $text), array()); - // Decode entities and then make safe any < or > characters. - return htmlspecialchars(html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'), ENT_NOQUOTES, 'UTF-8'); + // Decode entities so we can search for them. + return html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'); } /** @@ -50,7 +51,7 @@ function apachesolr_node_to_document($ni $node->build_mode = NODE_BUILD_SEARCH_INDEX; $node = node_build_content($node, FALSE, FALSE); $node->body = drupal_render($node->content); - $node->title = apachesolr_clean_text($node->title); + $node->title = apachesolr_unclean_text($node->title); $text = $node->body; @@ -89,7 +90,7 @@ function apachesolr_node_to_document($ni // 'zxx' is the language-neutral code in Drupal 7. $document->language = 'zxx'; } - $document->body = apachesolr_clean_text($text); + $document->body = apachesolr_unclean_text($text); $document->type = $node->type; $document->type_name = node_get_types('name', $node); $document->created = apachesolr_date_iso($node->created); @@ -132,10 +133,10 @@ function apachesolr_node_to_document($ni // We can use 'value' rather than 'safe' since we strip tags and later check_plain(). if (isset($value['value']) && strlen($value['value'])) { if ($cck_info['multiple']) { - $document->setMultiValue($index_key, apachesolr_clean_text($value['value'])); + $document->setMultiValue($index_key, apachesolr_unclean_text($value['value'])); } else { - $document->$index_key = apachesolr_clean_text($value['value']); + $document->$index_key = apachesolr_unclean_text($value['value']); } } } @@ -175,7 +176,7 @@ function apachesolr_add_taxonomy_to_docu foreach ($ancestors as $ancestor) { $document->setMultiValue('tid', $ancestor->tid); $document->setMultiValue('im_vid_'. $ancestor->vid, $ancestor->tid); - $name = apachesolr_clean_text($ancestor->name); + $name = apachesolr_unclean_text($ancestor->name); $document->setMultiValue('vid', $ancestor->vid); $document->{'ts_vid_'. $ancestor->vid .'_names'} .= ' '. $name; // We index each name as a string for cross-site faceting Index: apachesolr_search.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr_search.module,v retrieving revision 1.1.2.6.2.122 diff -u -p -r1.1.2.6.2.122 apachesolr_search.module --- apachesolr_search.module 17 Dec 2009 15:27:52 -0000 1.1.2.6.2.122 +++ apachesolr_search.module 17 Dec 2009 15:41:13 -0000 @@ -417,14 +417,32 @@ function apachesolr_process_response($re else { $snippet = ''; } - + // We decoded entities when we indexed, so we must make the text safe. + // Translate back STRONG tags used for highlighting matches. + $snippet = strtr(check_plain($snippet), array('<strong>' => '', '</strong>' => '')); if (!isset($doc->body)) { $doc->body = $snippet; } + else { + $doc->body = check_plain($doc->body); + } $doc->created = strtotime($doc->created); $doc->changed = strtotime($doc->changed); // Allow modules to alter each document. drupal_alter('apachesolr_search_result', $doc); + + // Find all $doc fields that need to be made safe, except the title + // which we expect to be unsafe. + $check_keys = array(); + foreach (apachesolr_cck_fields() as $key => $cck_info) { + $check_keys[apachesolr_index_key($cck_info)] = 1; + } + foreach ($doc as $key => $value) { + // We call apachesolr_unclean_text() on these values in apachesolr.index.inc + if (isset($check_keys[$key]) || strpos($key, 'ts_vid_') === 0 || strpos($key, 'sm_vid_') === 0) { + $doc->$key = check_plain($value); + } + } // Copy code from comment_nodeapi(). $extra[] = format_plural($doc->comment_count, '1 comment', '@count comments'); $results[] = array(