diff --git README.txt README.txt index 962c5cf..2551c5e 100644 --- README.txt +++ README.txt @@ -5,8 +5,8 @@ features and better performance. Among the extra features is the ability to have faceted search on facets ranging from content author to taxonomy to arbitrary CCK fields. -The module comes with a schema.xml and solrconfig.xml file which should be used -in your Solr installation. +The module comes with a schema.xml, solrconfig.xml, and protwords.txt file which +must be used in your Solr installation. This module depends on the search framework in core. However, you may not want the core searches and only want Solr search. If that is the case, you want to @@ -86,6 +86,10 @@ Similarly, move apache-solr-1.4.1/example/solr/conf/solrconfig.xml and rename it like solrconfig.bak. Then move the solrconfig.xml that comes with the ApacheSolr Drupal module to take its place. +Finally, move apache-solr-1.4.1/example/solr/conf/protwords.txt and rename +it like protwords.bak. Then move the protwords.txt that comes with the +ApacheSolr Drupal module to take its place. + Now start the solr application by opening a shell, changing directory to apache-solr-1.4.1/example, and executing the command java -jar start.jar diff --git apachesolr.index.inc apachesolr.index.inc index 26987a6..f99eda6 100644 --- apachesolr.index.inc +++ apachesolr.index.inc @@ -12,7 +12,7 @@ function apachesolr_clean_text($text) { // Add spaces before stripping tags to avoid running words together. $text = filter_xss(str_replace(array('<', '>'), array(' <', '> '), $text), array()); // Decode entities and then make safe any < or > characters. - return htmlspecialchars(html_entity_decode($text, ENT_NOQUOTES, 'UTF-8'), ENT_NOQUOTES, 'UTF-8'); + return htmlspecialchars(html_entity_decode($text, ENT_QUOTES, 'UTF-8'), ENT_QUOTES, 'UTF-8'); } /** diff --git apachesolr.module apachesolr.module index 4d44f9d..413ce3a 100644 --- apachesolr.module +++ apachesolr.module @@ -1739,9 +1739,13 @@ function apachesolr_do_query($caller, $current_query, &$params = array('rows' => } // This is the object that does the communication with the solr server. $solr = apachesolr_get_solr(); - // We must run htmlspecialchars() here since converted entities are in the index. - // and thus bare entities &, > or < won't match. - $response = $solr->search(htmlspecialchars($keys, ENT_NOQUOTES, 'UTF-8'), $params['start'], $params['rows'], $params); + // We must run htmlspecialchars() here since converted entities are in the index + // and thus bare entities &, > or < won't match. Single quotes are converted + // too, but not double quotes since the dismax parser looks at them for + // phrase queries. + $keys = htmlspecialchars($keys, ENT_NOQUOTES, 'UTF-8'); + $keys = str_replace("'", ''', $keys); + $response = $solr->search($keys, $params['start'], $params['rows'], $params); // The response is cached so that it is accessible to the blocks and anything // else that needs it beyond the initial search. apachesolr_static_response_cache($response, $caller); diff --git protwords.txt protwords.txt new file mode 100644 index 0000000..f0fd084 --- /dev/null +++ protwords.txt @@ -0,0 +1,8 @@ +#----------------------------------------------------------------------- +# This file blocks words from being operated on by the stemmer and word delimiter. +& +< +> +' +" + diff --git schema.xml schema.xml index fae2ce9..a6b0526 100644 --- schema.xml +++ schema.xml @@ -10,7 +10,7 @@ http://wiki.apache.org/solr/SchemaXml --> - +