? CHANGELOG.txt ? Drupal_Apache_Solr_Service.php ? LICENSE.txt ? apachesolr-backport.patch ? apachesolr.js ? apachesolr_search.install ? SolrPhpClient/COPYING ? contrib/apachesolr_nodeaccess ? tests/solr_base_subquery.test ? tests/solr_index_and_search.test Index: README.txt =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/README.txt,v retrieving revision 1.1.2.5 diff -u -p -r1.1.2.5 README.txt --- README.txt 5 Oct 2008 17:30:07 -0000 1.1.2.5 +++ README.txt 5 Feb 2009 21:42:04 -0000 @@ -1,10 +1,17 @@ -/* $Id: README.txt,v 1.1.2.5 2008/10/05 17:30:07 robertDouglass Exp $ */ +/* $Id: README.txt,v 1.1.2.1.2.9 2009/01/12 22:20:56 pwolanin Exp $ */ -This module integrates Drupal with the Apache Solr search platform. Solr search can be used as a replacement for core content search and boasts both extra features and better performance. Among the extra features is the ability to have faceted search on facets ranging from content author to taxonomy to arbitrary CCK fields. - -The module comes with a schema.xml file which should be used in your Solr installation. - -This module depends on the search framework in core. However, you may not want the core searches and only want Solr search. If that is the case, you want to use the Core Searches module in tandem with this module. +This module integrates Drupal with the Apache Solr search platform. Solr search +can be used as a replacement for core content search and boasts both extra +features and better performance. Among the extra features is the ability to have +faceted search on facets ranging from content author to taxonomy to arbitrary +CCK fields. + +The module comes with a schema.xml and solrconfig.xml file which should be used +in your Solr installation. + +This module depends on the search framework in core. However, you may not want +the core searches and only want Solr search. If that is the case, you want to +use the Core Searches module in tandem with this module. Installation @@ -12,36 +19,55 @@ Installation Install and enable the ApacheSolr Drupal module as you would any Drupal module. -Prerequisite: Java 5 or higher. +Prerequisite: Java 5 or higher (a.k.a. 1.5.x). PHP 5.2.0 or higher. -Download Solr 1.2 or higher from a mirror site: -http://www.apache.org/dyn/closer.cgi/lucene/solr/ +Download Solr trunk (candidate 1.4.x build) from a nightly build or build it +from svn. http://people.apache.org/builds/lucene/solr/nightly/ -Unpack the tarball somewhere not visible to the web (not in your apache docroot and not inside of your drupal directory). +Once Solr 1.4 is released, you will be able to download from: +http://www.apache.org/dyn/closer.cgi/lucene/solr/ -The Solr download comes with an example application that you can use for testing, development, and even for smaller production sites. This application is found at apache-solr-1.2.x/example. +Unpack the tarball somewhere not visible to the web (not in your apache docroot +and not inside of your drupal directory). -Move apache-solr-1.2.x/example/solr/conf/schema.xml and rename it to something like schema.bak. Then move the schema.xml that comes with the ApacheSolr Drupal module to take its place. +The Solr download comes with an example application that you can use for +testing, development, and even for smaller production sites. This +application is found at apache-solr-nightly/example. + +Move apache-solr-nightly/example/solr/conf/schema.xml and rename it to +something like schema.bak. Then move the schema.xml that comes with the +ApacheSolr Drupal module to take its place. + +Similarly, move apache-solr-nightly/example/solr/conf/solrconfig.xml and rename +it like solrconfig.bak. Then move the solrconfig.xml that comes with the +ApacheSolr Drupal module to take its place. -Now start the solr application by opening a shell, changing directory to apache-solr-1.2.x/example, and executing the command java -jar start.jar +Now start the solr application by opening a shell, changing directory to +apache-solr-nightly/example, and executing the command java -jar start.jar -Test that your solr server is now available by visiting http://localhost:8983/solr/admin/ +Test that your solr server is now available by visiting +http://localhost:8983/solr/admin/ Now run cron on your Drupal site until your content is indexed. +The solrconfig.xml that comes with this modules defines auto-commit, so +it may take a few minutes between running cron and when the new content +is visible in search. + Enable blocks for facets at Administer > Site building > Blocks. Troubleshooting -------------- Problem: -Your Solr instance is running and you can test it in the Solr -admin interface (comes with the Java application). Yet your -Drupal ApacheSolr module cannot connect to it to do a search. +Links to nodes appear in the search results with a different host name or +subdomain than is preferred. e.g. sometimes at http://example.com +and sometimes at http://www.example.com Solution: -To be able to use file_get_contents() in PHP, the "allow_url_fopen" -directive must be enabled. In php.ini set the following value: -allow_url_fopen = On +Set $base_url in settings.php to insure that an identical absolute url is +generated at all times when nodes are indexed. Alternately, set up a re-direct +in .htaccess to prevent site visitors from accessing the site via more than one +site address. Developers @@ -55,4 +81,5 @@ hook_apachesolr_update_index(&$document, This hook is called just before indexing the document. It allows you to add fields to the $document object which is sent to Solr. -For reference on the $document object, see SolrPhpClient/Apache/Solr/Document.php +For reference on the $document object, see: +SolrPhpClient/Apache/Solr/Document.php Index: Solr_Base_Query.php =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/Solr_Base_Query.php,v retrieving revision 1.1.2.8 diff -u -p -r1.1.2.8 Solr_Base_Query.php --- Solr_Base_Query.php 27 Nov 2008 18:47:58 -0000 1.1.2.8 +++ Solr_Base_Query.php 5 Feb 2009 21:42:05 -0000 @@ -1,26 +1,9 @@ code == '200') { - $data = json_decode($response->data); - } - } - return $data->fields; - } - - /** * This is copied from search module. The search module implementation doesn't * handle quoted terms correctly (bug) and this function is copied here until * I have the bugfix perfected, at which point a patch will be submitted to search @@ -28,34 +11,15 @@ class Solr_Base_Query { * * Extract a module-specific search option from a search query. e.g. 'type:book' */ - static function query_extract($keys, $option) { - $pattern = '/(^| )'. $option .':(\"([^\"]*)\")/i'; - preg_match_all($pattern, $keys, $matches); - if (!empty($matches[2])) { - // The preg_replace removes beginning and trailing quotations. - return preg_replace('/^"|"$/', '', $matches[2]); + static function query_extract($filters, $option) { + $pattern = '/(^| )'. $option .':"([^"]*)"/i'; + if (preg_match_all($pattern, $filters, $matches)) { + return array('matches' => $matches[0], 'values' => $matches[2]); } $pattern = '/(^| )'. $option .':([^ ]*)/i'; - if (preg_match_all($pattern, $keys, $matches)) { - if (!empty($matches[2])) { - return $matches[2]; - } - } - } - - /** - * Replaces all occurances of $option in $keys. - */ - static function query_replace($keys, $option) { - $matches = Solr_Base_Query::query_extract($keys, $option); - if (count($matches) > 0) { - foreach ($matches as $match) { - // TODO: Make some sort of name->value container object. - $found = Solr_Base_Query::make_field(array('#name' => $option, '#value' => $match)); - $keys = str_replace($found, '', $keys); - } + if (preg_match_all($pattern, $filters, $matches)) { + return array('matches' => $matches[0], 'values' => $matches[2]); } - return $keys; } /** @@ -67,84 +31,107 @@ class Solr_Base_Query { return implode(' ', array_filter(explode(' ', $values['#value']), 'trim')); } else { - // if the field value has spaces in it, wrap it in double quotes. - if (count(explode(' ', $values['#value'])) > 1) { + // If the field value has spaces, or : in it, wrap it in double quotes. + if (preg_match('/[ :]/', $values['#value'])) { $values['#value'] = '"'. $values['#value']. '"'; } - return $values['#name']. ':'. $values['#value']; + return $values['#name'] . ':' . $values['#value']; } } /** + * Static shared by all instances, used to increment ID numbers. + */ + protected static $idCount = 0; + + /** + * Each query/subquery will have a unique ID + */ + public $id; + + /** * A keyed array where the key is a position integer and the value * is an array with #name and #value properties. */ - private $_fields; + protected $fields; + protected $filters; /** * An array of subqueries. */ - private $_subqueries = array(); + protected $subqueries = array(); /** - * The query string. + * The query path (search keywords). */ - private $_query; + protected $querypath; /** - * Should fields be AND'd or OR'd together? + * Apache_Solr_Service object */ - private $_field_operator; - + protected $solr; + /** - * @param $querystring + * @param $solr + * An instantiated Apache_Solr_Service Object. + * Can be instantiated from apachesolr_get_solr(). + * + * @param $querypath * The string that a user would type into the search box. Suitable input * may come from search_get_keys() - * @param $field_operator - * An object level operator. AND is the implicit default. All segments will - * be joined with this operator. - */ - function __construct($querystring, $field_operator = "AND") { - $this->_field_operator = $field_operator; - $this->_query = trim($querystring); + * + * @param $filterstring + * Key and value pairs that are applied as a filter query. + * + * @param $sortstring + * Visible string telling solr how to sort - added to output querystring. + */ + function __construct($solr, $querypath, $filterstring, $sortstring) { + $this->solr = $solr; + $this->querypath = trim($querypath); + $this->filters = trim($filterstring); + $this->solrsort = trim($sortstring); + $this->id = ++self::$idCount; $this->parse_query(); } + function __clone() { + $this->id = ++self::$idCount; + } + function add_field($field, $value) { // microtime guarantees that added fields come at the end of the query, // in order. - $this->_fields[microtime()] = array('#name' => $field, '#value' => trim($value)); - $this->rebuild_query(); + $this->fields[microtime()] = array('#name' => $field, '#value' => trim($value)); } - - function get_fields() { - return $this->_fields; + + public function get_fields() { + return $this->fields; } - function remove_field($name, $value = NULL) { + public function remove_field($name, $value = NULL) { // We can only remove named fields. if (empty($name)) { return; } if (empty($value)) { - foreach ($this->_fields as $pos => $values) { + foreach ($this->fields as $pos => $values) { if ($values['#name'] == $name) { - unset($this->_fields[$pos]); + unset($this->fields[$pos]); } } } else { - foreach ($this->_fields as $pos => $values) { + foreach ($this->fields as $pos => $values) { if ($values['#name'] == $name && $values['#value'] == $value) { - unset($this->_fields[$pos]); + unset($this->fields[$pos]); } } } - $this->rebuild_query(); } - function has_field($name, $value) { - foreach ($this->_fields as $pos => $values) { + public function has_field($name, $value) { + foreach ($this->fields as $pos => $values) { if (!empty($values['#name']) && !empty($values['#value']) && $values['#name'] == $name && $values['#value'] == $value) { return TRUE; } @@ -159,62 +146,78 @@ class Solr_Base_Query { * * @param $query * An instance of Solr_Base_Query. - * + * * @param $operator * 'AND' or 'OR' - */ - function add_subquery(Solr_Base_Query $query, $operator = 'AND') { - $this->_subqueries[$query->get_query_basic()] = array('#query' => $query, '#operator' => $operator); + */ + function add_subquery(Solr_Base_Query $query, $fq_operator = 'OR', $q_operator = 'AND') { + $this->subqueries[$query->id] = array('#query' => $query, '#fq_operator' => $fq_operator, '#q_operator' => $q_operator); } - + function remove_subquery(Solr_Base_Query $query) { - unset($this->_subqueries[$query->get_query_basic()]); + unset($this->subqueries[$query->id]); } - - function remove_subqueries() { - $this->_subqueries = array(); - } - - function get_query() { - $this->rebuild_query(); - return $this->_query; + + public function remove_subqueries() { + $this->subqueries = array(); + } + + public function set_solrsort($sortstring) { + $this->solrsort = trim($sortstring); + } + /** + * Return filters and sort in a form suitable for a query param to url(). + */ + public function get_url_querystring() { + $querystring = ''; + if ($fq = $this->get_fq()) { + $querystring = 'filters='. implode(' ', $fq); + } + if ($this->solrsort) { + $querystring .= ($querystring) ? '&' . $this->solrsort : $this->solrsort; + } + return $querystring; + } + + public function get_fq() { + return $this->rebuild_fq(); } /** * A function to get just the keyword components of the query, * omitting any field:value portions. */ - function get_query_basic() { - $nonames = array_filter($this->_fields, create_function('$a', 'return empty($a[\'#name\']);')); - $result = array(); - foreach ($nonames as $pos => $field) { - $result[] = $field['#value']; - } - return implode(' ', $result); + public function get_query_basic() { + return $this->rebuild_query(); } - function get_breadcrumb() { + public function get_breadcrumb() { // This encodes an assumption that the breadcrumb is always building off // of the current page. Could be a problem. $breadcrumb = menu_get_active_breadcrumb(); // double check that the fields are ordered by position. - ksort($this->_fields); + ksort($this->fields); $progressive_crumb = array(); + $search_keys = $this->get_query_basic(); // TODO: Don't know if hardcoding this is going to come back to bite. - $base = 'search/'. arg(1). '/'; + $base = 'search/'. arg(1) . '/' . $search_keys; + if ($search_keys) { + $breadcrumb[] = l($search_keys, $base); + } - foreach ($this->_fields as $field) { + foreach ($this->fields as $field) { $progressive_crumb[] = Solr_Base_Query::make_field($field); + $options = 'filters=' . implode(' ', $progressive_crumb); if (empty($field['#name'])) { - $breadcrumb[] = l($field['#value'], $base. implode(' ', $progressive_crumb)); + $breadcrumb[] = l($field['#value'], $base, array(), $options); } else if ($themed = theme("apachesolr_breadcrumb_{$field['#name']}", $field['#value'])) { - $breadcrumb[] = l($themed, $base. implode(' ', $progressive_crumb)); + $breadcrumb[] = l($themed, $base, array(), $options); } else { - $breadcrumb[] = l($field['#value'], $base. implode(' ', $progressive_crumb)); + $breadcrumb[] = l($field['#name'], $base, array(), $options); } } // the last breadcrumb is the current page, so it shouldn't be a link. @@ -224,59 +227,66 @@ class Solr_Base_Query { return $breadcrumb; } - private function parse_query() { - $this->_fields = array(); - $_keys = $this->_query; + protected function parse_query() { + $this->fields = array(); + $filters = $this->filters; // Gets information about the fields already in solr index. - $index_fields = Solr_Base_Query::get_fields_in_index(); + $index_fields = $this->solr->getFields(); $rows = array(); foreach ((array) $index_fields as $name => $field) { do { // save the strlen so we can detect if it has changed at the bottom // of the do loop - $a = (int)strlen($_keys); + $a = (int)strlen($filters); // Get the values for $name - $values = Solr_Base_Query::query_extract($_keys, $name); - if (count($values) > 0) { - foreach ($values as $value) { + $extracted = Solr_Base_Query::query_extract($filters, $name); + if (count($extracted['values'])) { + foreach ($extracted['values'] as $value) { $found = Solr_Base_Query::make_field(array('#name' => $name, '#value' => $value)); - $pos = strpos($this->_query, $found); + $pos = strpos($this->filters, $found); // $solr_keys and $solr_crumbs are keyed on $pos so that query order // is maintained. This is important for breadcrumbs. - $this->_fields[$pos] = array('#name' => $name, '#value' => trim($value)); + $this->fields[$pos] = array('#name' => $name, '#value' => trim($value)); } - // Update the local copy of $_keys by removing the key that was just found. - $_keys = trim(Solr_Base_Query::query_replace($_keys, $name)); + // Update the local copy of $filters by removing the key that was just found. + $filters = trim(str_replace($extracted['matches'], '', $filters)); } // Take new strlen to compare with $a. - $b = (int)strlen($_keys); + $b = (int)strlen($filters); } while ($a !== $b); - - // Clean up by adding remaining keywords. - if (!empty($_keys)) { - $pos = strpos($this->_query, $_keys); - $this->_fields[$pos] = array('#name' => '', '#value' => trim($_keys)); - } } // Even though the array has the right keys they are likely in the wrong // order. ksort() sorts the array by key while maintaining the key. - ksort($this->_fields); + ksort($this->fields); } - private function rebuild_query() { + protected function rebuild_fq() { $fields = array(); - foreach ($this->_fields as $pos => $values) { + foreach ($this->fields as $pos => $values) { $fields[] = Solr_Base_Query::make_field($values); } - - $join_delim = $this->_field_operator == 'AND' ? ' ' : ' OR '; - $this->_query = trim(implode($join_delim, array_filter($fields, 'trim'))); - foreach ($this->_subqueries as $id => $data) { - $operator = $data['#operator']; - $subquery = $data['#query']->get_query(); - $this->_query .= " {$operator} ({$subquery})"; + $fq = array_filter($fields, 'trim'); + foreach ($this->subqueries as $id => $data) { + $subfq = $data['#query']->get_fq(); + if ($subfq) { + $operator = $data['#fq_operator']; + $fq[] = "(" . implode(" {$operator} ", $subfq) .")"; + } + } + return $fq; + } + + protected function rebuild_query() { + $query = $this->querypath; + foreach ($this->subqueries as $id => $data) { + $operator = $data['#q_operator']; + $subquery = $data['#query']->get_query_basic(); + if ($subquery) { + $query .= " {$operator} ({$subquery})"; + } } + return $query; } } Index: apachesolr.info =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr.info,v retrieving revision 1.1.2.2 diff -u -p -r1.1.2.2 apachesolr.info --- apachesolr.info 14 Jun 2008 18:44:29 -0000 1.1.2.2 +++ apachesolr.info 5 Feb 2009 21:42:05 -0000 @@ -1,5 +1,6 @@ -; $Id: apachesolr.info,v 1.1.2.2 2008/06/14 18:44:29 robertDouglass Exp $ +; $Id: apachesolr.info,v 1.1.2.1.2.6 2009/01/27 21:32:34 pwolanin Exp $ name = Apache Solr framework -description = Framework for searching with solr +description = Framework for searching with Solr dependencies = search -package = ApacheSolr +package = Apache Solr + Index: apachesolr.install =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr.install,v retrieving revision 1.1.2.4 diff -u -p -r1.1.2.4 apachesolr.install --- apachesolr.install 20 Jul 2008 10:33:15 -0000 1.1.2.4 +++ apachesolr.install 5 Feb 2009 21:42:05 -0000 @@ -1,12 +1,64 @@ 'admin/settings/apachesolr', - 'title' => t('Apache Solr'), - 'description' => t('Administer Apache Solr.'), - 'callback' => 'drupal_get_form', - 'callback arguments' => 'apachesolr_settings', - 'access' => user_access('administer search'), + 'path' => 'admin/settings/apachesolr', + 'title' => t('Apache Solr'), + 'description' => t('Administer Apache Solr.'), + 'callback' => 'drupal_get_form', + 'callback arguments' => 'apachesolr_settings', + 'access' => user_access('administer site configuration'), ); $items[] = array( - 'path' => 'admin/settings/apachesolr/settings', - 'title' => t('Settings'), - 'weight' => -10, - 'callback' => 'drupal_get_form', - 'callback arguments' => 'apachesolr_settings', - 'access' => user_access('administer search'), - 'type' => MENU_DEFAULT_LOCAL_TASK, + 'path' => 'admin/settings/apachesolr/settings', + 'title' => t('Settings'), + 'weight' => -10, + 'access' => user_access('administer site configuration'), + 'type' => MENU_DEFAULT_LOCAL_TASK, ); $items[] = array( - 'path' => 'admin/settings/apachesolr/index', - 'title' => t('Search index'), - 'weight' => -8, - 'callback' => 'apachesolr_index_page', - 'access' => user_access('administer search'), - 'type' => MENU_LOCAL_TASK, + 'path' => 'admin/settings/apachesolr/enabled-filters', + 'title' => t('Enabled filters'), + 'callback' => 'drupal_get_form', + 'callback arguments' => array('apachesolr_enabled_facets_form'), + 'weight' => -7, + 'access' => user_access('administer site configuration'), + 'type' => MENU_LOCAL_TASK, ); - } - return $items; -} - -/** - * Implementation of hook_help(). - */ -function apachesolr_help($section) { - switch ($section) { - case 'admin/settings/apachesolr/index': - // Collect some stats (from search.module) - $remaining = 0; - $total = 0; - foreach (module_list() as $module) { - if (module_hook($module, 'search')) { - $status = module_invoke($module, 'search', 'status'); - $remaining += $status['remaining']; - $total += $status['total']; - } - } - - return t('Apache Solr search index is generated by !cron. %percentage of the site has been indexed. There @items left to index.', array( - '!cron' => l(t('running cron'), 'admin/logs/status/run-cron', array(), 'destination=admin/settings/apachesolr/index'), - '%percentage' => ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) .'%', - '@items' => format_plural($remaining, t('is 1 item'), t('are @count items') - ))); - } -} - -function apachesolr_settings() { - $form = array(); - - //perform a check to ensure the server is there - $requirements = apachesolr_requirements('runtime'); - $status = $requirements['apachesolr']['severity'] == 2 ? 'error' : 'status'; - drupal_set_message($requirements['apachesolr']['value'], $status); - - $form['apachesolr_host'] = array( - '#type' => 'textfield', - '#title' => t('Solr host name'), - '#default_value' => variable_get('apachesolr_host', 'localhost'), - '#description' => t('Host name of your Solr server, e.g. localhost or example.com.'), - ); - $form['apachesolr_port'] = array( - '#type' => 'textfield', - '#title' => t('Solr port'), - '#default_value' => variable_get('apachesolr_port', '8983'), - '#description' => t('Port on which the Solr server listens. Tomcat is 8080 by default.'), - ); - $form['apachesolr_path'] = array( - '#type' => 'textfield', - '#title' => t('Solr path'), - '#default_value' => variable_get('apachesolr_path', '/solr'), - '#description' => t('Path that identifies the Solr request handler to be used. Leave this as /solr for now.'), + $items[] = array( + 'path' => 'admin/settings/apachesolr/index', + 'title' => t('Search index'), + 'callback' => 'apachesolr_index_page', + 'access' => user_access('administer site configuration'), + 'weight' => -8, + 'type' => MENU_LOCAL_TASK, ); - $options = array(); - foreach (array(5, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100) as $option) { - $options[$option] = $option; } - $form['apachesolr_rows'] = array( - '#type' => 'select', - '#title' => t('Results per page'), - '#default_value' => variable_get('apachesolr_rows', 10), - '#options' => $options, - '#description' => t('The number of results that will be shown per page.'), - ); - $form['apachesolr_failure'] = array( - '#type' => 'select', - '#title' => t('On failure'), - '#options' => array('show_error' => t('Show error'), - 'show_drupal_results' => t('Show core Drupal results'), - 'show_no_results' => t('Show no results') - ), - '#default_value' => variable_get('apachesolr_failure', 'show_error'), - '#description' => t('What to display if ApacheSolr search is not available.'), - ); - return system_settings_form($form); + return $items; } /** - * Determines ApacheSolr's behavior when searching causes an exception (e.g. Solr isn't available.) + * Determines Apache Solr's behavior when searching causes an exception (e.g. Solr isn't available.) * Depending on the admin settings, possibly redirect to Drupal's core search. * * @param $search_name * The name of the search implementation. * * @param $querystring - * The search query that was issued at the time of failure. + * The search query that was issued at the time of failure. */ function apachesolr_failure($search_name, $querystring) { $fail_rule = variable_get('apachesolr_failure', 'show_error'); - + switch ($fail_rule) { case 'show_error': drupal_set_message(t('The Apache Solr search engine is not available. Please contact your site administrator.'), 'error'); break; - case 'show_drupal_results': + case 'show_drupal_results': drupal_set_message(t("%search_name is not available. Your search is being redirected.", array('%search_name' => $search_name))); drupal_goto('search/node/' . drupal_urlencode($querystring)); break; @@ -154,23 +86,23 @@ function apachesolr_requirements($phase) $path = variable_get('apachesolr_path', '/solr'); $ping = FALSE; try { - $solr =& apachesolr_get_solr($host, $port, $path); + $solr = apachesolr_get_solr(); $ping = @$solr->ping(); // If there is no $solr object, there is no server available, so don't continue. if (!$ping) { - throw new Exception(t('No Solr instance available during indexing')); + throw new Exception(t('No Solr instance available when checking requirements.')); } } catch (Exception $e) { watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); } - $value = $ping ? $t('Solr can be pinged.') : $t('No Solr instance is available.'); + $value = $ping ? $t('Your site has contacted the Apache Solr server.') : $t('Your site was unable to contact the Apache Solr server.'); $severity = $ping ? 0: 2; $description = theme('item_list', array($t('Host: %host', array('%host' => $host)), $t('Port: %port', array('%port' => $port)), $t('Path: %path', array('%path' => $path)))); $requirements['apachesolr'] = array( - 'title' => $t('ApacheSolr'), + 'title' => $t('Apache Solr'), 'value' => $value, 'description' => $description, 'severity' => $severity, @@ -179,301 +111,417 @@ function apachesolr_requirements($phase) } } -function apachesolr_index_page() { - // Gets information about the fields already in solr index. - include_once drupal_get_path('module', 'apachesolr') .'/Solr_Base_Query.php'; - $fields = Solr_Base_Query::get_fields_in_index(); - $rows = array(); - foreach ($fields as $name => $field) { - $rows[] = array($name, $field->type); +/** + * Like $site_key in _update_refresh() - returns a site-specific hash. + */ +function apachesolr_site_hash() { + if (!($hash = variable_get('apachesolr_site_hash', FALSE))) { + global $base_url; + $hash = md5($base_url . drupal_get_private_key() . 'apachesolr'); + variable_set('apachesolr_site_hash', $hash); } - $output = ''; - // Display the table of Field names and Field Index Types. - $output .= theme('table', array(t('Field name'), t('Field index type')), $rows); - // Display the Delete Index form. - $output .= drupal_get_form('apachesolr_delete_index_form'); - - return $output; + return $hash; +} + +function apachesolr_document_id($id, $type = 'node') { + return apachesolr_site_hash() . "/$type/" . $id; } /** - * Create a form for deleting the contents of the Solr index. + * Implementation of hook_user(). + * + * Mark nodes as needing re-indexing if the author name changes. */ -function apachesolr_delete_index_form() { - $form = array(); - $form['markup'] = array( - '#type' => 'markup', - '#value' => '

Solr Index

', - ); - $form['delete_index'] = array( - '#type' => 'checkbox', - '#title' => t('Delete all documents'), - '#description' => t('This option deletes all of the documents in the Solr index. You would do this if the index contains wrong content that you need to purge. This action shouldn\'t be necessary in normal cases. After deleting you will need to rebuild the index by running cron.'), - '#default_value' => NULL, - ); - $form['submit'] = array( - '#type' => 'submit', - '#value' => t('Delete the index'), - ); - - return $form; +function apachesolr_user($op, &$edit, &$account) { + switch ($op) { + case 'update': + if (isset($edit['name']) && $account->name != $edit['name']) { + db_query("UPDATE {apachesolr_search_node} SET changed = %d WHERE nid IN (SELECT nid FROM {node} WHERE uid = %d)", time(), $account->uid); + } + break; + } } -function apachesolr_delete_index_form_validate($form, $form_values) { - if (!$form_values['delete_index']) { - form_set_error('delete_index', t('If you want to delete the Solr index, you must check the confirmation box.')); +/** + * Implementation of hook_taxonomy(). + * + * Mark nodes as needing re-indexing if a term name changes. + */ +function apachesolr_taxonomy($op, $type, $edit) { + if ($type == 'term' && ($op == 'update')) { + db_query("UPDATE {apachesolr_search_node} SET changed = %d WHERE nid IN (SELECT nid FROM {term_node} WHERE tid = %d)", time(), $edit['tid']); } - if (!user_access('administer site configuration')) { - drupal_access_denied(); + // TODO: the rest, such as term deletion. +} + +/** + * Implementation of hook_comment(). + * + * Mark nodes as needing re-indexing if comments are added or changed. + * Like search_comment(). + */ +function apachesolr_comment($edit, $op) { + $edit = (array) $edit; + switch ($op) { + // Reindex the node when comments are added or changed + case 'insert': + case 'update': + case 'delete': + case 'publish': + case 'unpublish': + db_query("UPDATE {apachesolr_search_node} SET changed = %d WHERE nid = %d", time(), $edit['nid']); + break; } } -function apachesolr_delete_index_form_submit($form, $form_values) { - if ($form_values['delete_index']) { - try { - // Instantiate a new Solr object. - $solr =& apachesolr_get_solr(variable_get('apachesolr_host', 'localhost'), variable_get('apachesolr_port', 8983), variable_get('apachesolr_path', '/solr')); - // TODO: Add site so that you can only delete your site's content. - $solr->deleteByQuery('*:*'); - $solr->commit(); - variable_del('apachesolr_last_change'); - variable_del('apachesolr_last_id'); - // This form can't be seen by anyone without 'administer site configuration' - // permission, so no need to check perms before displaying a run-cron link. - drupal_set_message(t('The Solr content index has been erased. You must now !run_cron until your entire site has been re-indexed.', array('!run_cron' => l(t('run cron'), 'admin/logs/status/run-cron', array('fragment' => 'module-user'))))); - } - catch (Exception $e) { - watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); - } +/** + * Implementation of hook_node_type(). + * + * Mark nodes as needing re-indexing if a node type name changes. + */ +function apachesolr_node_type($op, $info) { + if ($op != 'delete' && !empty($info->old_type) && $info->old_type != $info->type) { + // We cannot be sure we are going before or after node module. + db_query("UPDATE {apachesolr_search_node} SET changed = %d WHERE nid IN (SELECT nid FROM {node} WHERE type = '%s' OR type = '%s')", time(), $info->old_type, $info->type); } } +/** + * Helper function for modules implmenting hook_search's 'status' op. + */ +function apachesolr_index_status($namespace) { + extract(apachesolr_get_last_index($namespace)); + $total = db_result(db_query('SELECT COUNT(*) FROM {node} WHERE status = 1')); + $remaining = db_result(db_query('SELECT COUNT(*) FROM {apachesolr_search_node} WHERE (changed > %d OR (changed = %d AND nid > %d)) AND status = 1', $last_change, $last_change, $last_nid)); + return array('remaining' => $remaining, 'total' => $total); +} /** - * The point of this class is to manage the update index needs of multiple - * search modules. Each one needs to track its own list of nodes that need - * updating. + * Returns last changed and last nid for an indexing namespace. */ -class ApacheSolrUpdate { - public static $_namespaces = array(); +function apachesolr_get_last_index($namespace) { + $stored = variable_get('apachesolr_index_last', array()); + return isset($stored[$namespace]) ? $stored[$namespace] : array('last_change' => 0, 'last_nid' => 0); +} - static function reset($namespace) { - variable_del($namespace . '_last_change'); - variable_del($namespace . '_last_id'); +/** + * Clear a specific namespace's last changed and nid, or clear all. + */ +function apachesolr_clear_last_index($namespace = '') { + if ($namespace) { + $stored = variable_get('apachesolr_index_last', array()); + unset($stored[$namespace]); + variable_set('apachesolr_index_last', $stored); } + else { + variable_del('apachesolr_index_last'); + } +} - static function get_change($namespace) { - $var = variable_get($namespace . '_last_change', 0); - return $var; +/** + * Returns a resource from a query based on an indexing namespace. + */ +function apachesolr_get_nodes_to_index($namespace, $limit) { + extract(apachesolr_get_last_index($namespace)); + return db_query_range("SELECT nid, changed FROM {apachesolr_search_node} WHERE (changed > %d OR (changed = %d AND nid > %d)) AND status = 1 ORDER BY changed ASC, nid ASC", $last_change, $last_change, $last_nid, 0, $limit); +} + +/** + * Function to handle the indexing of nodes. + * + * The calling function must supply a name space or track/store + * the timestamp and nid returned. + * Returns FALSE if no nodes were indexed (none found or error). + */ +function apachesolr_index_nodes($result, $namespace = '', $callback = 'apachesolr_add_node_document') { + try { + // Get the $solr object + $solr = apachesolr_get_solr(); + // If there is no server available, don't continue. + if (!$solr->ping()) { + throw new Exception(t('No Solr instance available during indexing.')); + } + } + catch (Exception $e) { + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + return FALSE; } - static function get_last($namespace) { - $var = variable_get($namespace . '_last_id', 0); - return $var; + $documents = array(); + $solr_last_change = 0; + $solr_last_id = 0; + + while ($row = db_fetch_object($result)) { + // Variables to track the last item changed. + $solr_last_change = $row->changed; + $solr_last_id = $row->nid; + $callback($documents, $row->nid); } + if (is_object($solr)) { + try { + watchdog('Apache Solr', t('Adding @count documents.', array('@count' => count($documents)))); + // Chunk the adds by 20s + $docs_chunk = array_chunk($documents, 20); + foreach ($docs_chunk as $docs) { + $solr->addDocuments($docs); + } + $last = array('last_change' => $solr_last_change, 'last_nid' => $solr_last_id); + if ($namespace) { + $stored = variable_get('apachesolr_index_last', array()); + $stored[$namespace] = $last; + variable_set('apachesolr_index_last', $stored); + } + return $last; + } + catch (Exception $e) { + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + } + } + return FALSE; +} - /** - * Function to generically handle the fetching of nodes that need indexing on a cron run. - * It takes a namespace which needs to be unique to the calling module and manages - * all of the global variables and the shutdown function so that every search - * implementation can have its own without needing to duplicate the query. - * Returns a db_query $result. - * Modules need to then call apache_update_success after each node is successfully - * indexed. - */ - static function getNodesToIndex($namespace) { - register_shutdown_function('apachesolr_shutdown'); +/** + * Add a document to the $documents array based on a node ID. + */ +function apachesolr_add_node_document(&$documents, $nid) { + if ($document = apachesolr_node_to_document($nid)) { + $documents[] = $document; + } +} - $cron_change = self::get_change($namespace); - $cron_last = self::get_last($namespace); - $cron_limit = variable_get('search_cron_limit', 100); +/** + * Strip control characters that cause Jetty/Solr to fail. + */ +function apachesolr_strip_ctl_chars($text) { + // See: http://w3.org/International/questions/qa-forms-utf-8.html + // Printable utf-8 does not include any of these chars below x7F + return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $text); +} - $result = db_query_range('SELECT GREATEST(IF(c.last_comment_timestamp IS NULL, 0, c.last_comment_timestamp), n.changed) as last_change, n.nid '. - 'FROM {node} n LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid '. - 'WHERE n.status = 1 '. - 'AND ((GREATEST(IF(c.last_comment_timestamp IS NULL , 0, c.last_comment_timestamp ), n.changed) = %d AND n.nid > %d) OR n.changed > %d OR c.last_comment_timestamp > %d) ' . - 'ORDER BY last_change ASC, n.nid ASC', $cron_change, $cron_last, $cron_change, $cron_change, 0, $cron_limit); - return $result; +/** + * Strip html tags and also control characters that cause Jetty/Solr to fail. + */ +function apachesolr_clean_text($text) { + return strip_tags(preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $text)); +} + +/** + * Given a node ID, return a document representing that node. + */ +function apachesolr_node_to_document($nid) { + // Set reset = TRUE to avoid static caching of all nodes that get indexed. + $node = node_load($nid, NULL, TRUE); + if (empty($node)) { + return FALSE; } - static function success($namespace, $last_change, $last_id) { - self::$_namespaces[$namespace] = array('last_change' => $last_change, 'last_id' => $last_id); + $document = FALSE; + // Let any module exclude this node from the index. + $build_document = TRUE; + foreach (module_implements('apachesolr_node_exclude') as $module) { + $exclude = module_invoke($module, 'apachesolr_node_exclude', $node); + if (!empty($exclude)) { + $build_document = FALSE; + } } - static function update_index($namespace) { - $solr = FALSE; - try { - // Get the $solr object - $solr =& apachesolr_get_solr(variable_get('apachesolr_host', 'localhost'), variable_get('apachesolr_port', 8983), variable_get('apachesolr_path', '/solr')); - // If there is no $solr object, there is no server available, so don't continue. - if (!$solr->ping()) { - throw new Exception(t('No Solr instance available during indexing')); + if ($build_document) { + // Build the node body. + $node->build_mode = NODE_BUILD_SEARCH_INDEX; + $node = node_build_content($node, FALSE, FALSE); + $node->body = drupal_render($node->content); + $node->title = apachesolr_clean_text($node->title); + + $text = $node->body; + + // Fetch extra data normally not visible, including comments. + $extra = node_invoke_nodeapi($node, 'update index'); + $text .= "\n\n" . implode(' ', $extra); + $text = apachesolr_strip_ctl_chars($text); + + $document = new Apache_Solr_Document(); + $document->id = apachesolr_document_id($node->nid); + $document->site = url(NULL, array('absolute' => TRUE)); + $document->hash = apachesolr_site_hash(); + $document->url = url('node/' . $node->nid, NULL, NULL, TRUE); + $document->nid = $node->nid; + $document->status = $node->status; + $document->uid = $node->uid; + $document->title = $node->title; + $document->body = strip_tags($text); + $document->type = $node->type; + $document->type_name = apachesolr_strip_ctl_chars(node_get_types('name', $node)); + $document->created = apachesolr_date_iso($node->created); + $document->changed = apachesolr_date_iso($node->changed); + $last_change = (isset($node->last_comment_timestamp) && $node->last_comment_timestamp > $node->changed) ? $node->last_comment_timestamp : $node->changed; + $document->last_comment_or_change = apachesolr_date_iso($last_change); + $document->comment_count = isset($node->comment_count) ? $node->comment_count : 0; + $document->name = apachesolr_strip_ctl_chars($node->name); + $document->language = $node->language; + + // Path aliases can have important information about the content. + // Add them to the index as well. + if (function_exists('drupal_get_path_alias')) { + // Add any path alias to the index. + $output = drupal_get_path_alias($path); + if ($output && $output != $path) { + $document->path = apachesolr_strip_ctl_chars($output); } } - catch (Exception $e) { - watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); - return; - } // Get CCK fields list $cck_fields = apachesolr_cck_fields(); - $result = self::getNodesToIndex($namespace); - $count = 0; - $documents = array(); - - while ($row = db_fetch_object($result)) { - - // Variables to track the last item changed. - $solr_last_change = $row->last_change; - $solr_last_id = $row->nid; - - // Set reset = TRUE to avoid static caching of all nodes that get indexed. - $node = node_load($row->nid, NULL, TRUE); - - if ($node->nid) { - // Build the node body. - $node = node_build_content($node, FALSE, FALSE); - $node->body = drupal_render($node->content); - - $text = check_plain($node->title) . ' ' . $node->body; - - // Fetch extra data normally not visible - $extra = node_invoke_nodeapi($node, 'update index'); - foreach ($extra as $t) { - $text .= $t; + foreach ($cck_fields as $key => $cck_info) { + if (isset($node->$key)) { + // Got a CCK field. See if it is to be indexed. + $function = $cck_info['callback']; + if ($cck_info['callback'] && function_exists($function)) { + $field = call_user_func_array($function, array($node, $key)); } - - // Update solr index. - try { - $document = new Apache_Solr_Document(); - - $site = url(NULL, NULL, NULL, TRUE); - $hash = md5($site); - $document->site = $site; - $document->hash = $hash; - $document->url = url('node/' . $node->nid, NULL, NULL, TRUE); - - $document->nid = $node->nid; - $document->uid = $node->uid; - $document->title = $node->title; - $document->body = $node->body; - $document->type = $node->type; - $document->changed = $node->changed; - $document->comment_count = $node->comment_count; - $document->name = $node->name; - $document->language = $node->language; - - // Path aliases can have important information about the content. - // Add them to the index as well. - if (function_exists('drupal_get_path_alias')) { - // Add any path alias to the index, looking first for language specific - // aliases but using language neutral aliases otherwise. - $language = empty($node->language) ? '' : $node->language; - $path = 'node/' . $node->nid; - $output = drupal_get_path_alias($path, $language); - if ($output && $output != $path) { - $document->path = $output; - $text .= $output; - } - } - - if ($cck_fields && strpos($key, 'field_') === 0) { - // Got a CCK field. See if it is to be indexed. - if (in_array($key, array_keys($cck_fields))) { - $function = $cck_fields[$key]['callback']; - if ($cck_fields[$key]['callback'] && function_exists($function)) { - $dynamic_fields = call_user_func_array($function, array($node, $key)); - } - else { - $dynamic_fields = $node->$key; - } - if (is_array($dynamic_fields) && count($dynamic_fields) > 0) { - foreach ($dynamic_fields as $field) { - if (!empty($field['view'])) { - $index_key = apachesolr_index_key($cck_fields[$key]); - if ($cck_fields[$key]['multiple']) { - $document->setMultiValue($index_key, $field['view']); - } - else { - $document->$index_key = $field['view']; - } - } - } - } + else { + $field = $node->$key; + } + $index_key = apachesolr_index_key($cck_info); + foreach ($field as $value) { + // Don't index NULLs or empty strings + if (isset($value['safe']) && strlen($value['safe'])) { + if ($cck_info['multiple']) { + $document->setMultiValue($index_key, apachesolr_clean_text($value['safe'])); } - } - - // This is the string value of the title. Used for sorting. - $document->stitle = $node->title; - - if (is_array($node->taxonomy)) { - foreach ($node->taxonomy as $term) { - // Double indexing of tids lets us do effecient searches (on tid) - // and do accurate per-vocabulary faceting. - - // By including the ancestors to a term in the index we make - // sure that searches for general categories match specific - // categories, e.g. Fruit -> apple, a search for fruit will find - // content categorized with apple. - $ancestors = taxonomy_get_parents_all($term->tid); - foreach ($ancestors as $ancestor) { - $document->setMultiValue('tid', $ancestor->tid); - $document->setMultiValue('imfield_vid'. $ancestor->vid, $ancestor->tid); - $document->setMultiValue('vid', $ancestor->vid); - $document->setMultiValue('taxonomy_name', $ancestor->name); - $text .= ' ' . $ancestor->name; - } + else { + $document->$index_key = apachesolr_clean_text($value['safe']); } } - $document->text = $text; - - // Let modules add to the document - foreach (module_implements('apachesolr_update_index') as $module) { - $function = $module .'_apachesolr_update_index'; - $function($document, $node); - } - - $documents[] = $document; - } - catch (Exception $e) { - watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); } } - self::success('apachesolr', $solr_last_change, $solr_last_id); } - if (is_object($solr) && count($documents) > 0) { - try { - watchdog('Apache Solr', t('Adding @count documents.', array('@count' => count($documents)))); - // Chunk the adds by 20s - $docs_chunk = array_chunk($documents, 20); - foreach ($docs_chunk as $docs) { - $solr->addDocuments($docs); - } - $solr->commit(); - $solr->optimize(FALSE, FALSE); - } - catch (Exception $e) { - watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + apachesolr_add_tags_to_document($document, $text); + apachesolr_add_taxonomy_to_document($document, $node); + + // Let modules add to the document - TODO convert to drupal_alter(). + foreach (module_implements('apachesolr_update_index') as $module) { + $function = $module .'_apachesolr_update_index'; + $function($document, $node); + } + } + return $document; +} + +/** + * Convert date from timestamp into ISO 8601 format. + * http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html + */ +function apachesolr_date_iso($date_timestamp) { + return gmdate('Y-m-d\TH:i:s\Z', $date_timestamp); +} + +/** + * Extract taxonomy from $node and add to dynamic fields. + */ +function apachesolr_add_taxonomy_to_document(&$document, $node) { + if (isset($node->taxonomy) && is_array($node->taxonomy)) { + foreach ($node->taxonomy as $term) { + // Double indexing of tids lets us do effecient searches (on tid) + // and do accurate per-vocabulary faceting. + + // By including the ancestors to a term in the index we make + // sure that searches for general categories match specific + // categories, e.g. Fruit -> apple, a search for fruit will find + // content categorized with apple. + $ancestors = taxonomy_get_parents_all($term->tid); + foreach ($ancestors as $ancestor) { + $document->setMultiValue('tid', $ancestor->tid); + $document->setMultiValue('imfield_vid_'. $ancestor->vid, $ancestor->tid); + $name = apachesolr_clean_text($ancestor->name); + $document->setMultiValue('vid', $ancestor->vid); + $document->{'tsfield_vid_'. $ancestor->vid .'_names'} .= ' '. $name; + // We index each name as a string for cross-site faceting + // using the vocab name rather than vid in field construction . + $document->setMultiValue('smfield_vid_'. apachesolr_vocab_name($ancestor->vid), $name); } } } } -function apachesolr_apachesolr_facets() { - return array('type'); +/** + * Helper function - return a safe (PHP identifier) vocabulary name. + */ +function apachesolr_vocab_name($vid) { + static $names = array(); + + if (!isset($names[$vid])) { + $vocab_name = db_result(db_query('SELECT v.name FROM {vocabulary} v WHERE v.vid = %d', $vid)); + $names[$vid] = preg_replace('/[^a-zA-Z0-9_\x7f-\xff]/', '_', $vocab_name); + // Fallback for names ending up all as '_'. + $check = rtrim($names[$vid], '_'); + if (!$check) { + $names[$vid] = '_' . $vid . '_'; + } + } + return $names[$vid]; +} + +/** + * Extract HTML tag contents from $text and add to boost fields. + * + * $text must be stripped of control characters before hand. + */ +function apachesolr_add_tags_to_document(&$document, $text) { + $tags_to_index = variable_get('apachesolr_tags_to_index', array( + 'h1' => 'tags_h1', + 'h2' => 'tags_h2_h3', + 'h3' => 'tags_h2_h3', + 'h4' => 'tags_h4_h5_h6', + 'h5' => 'tags_h4_h5_h6', + 'h6' => 'tags_h4_h5_h6', + 'u' => 'tags_inline', + 'b' => 'tags_inline', + 'i' => 'tags_inline', + 'strong' => 'tags_inline', + 'em' => 'tags_inline', + 'a' => 'tags_a' + )); + + // Strip off all ignored tags. + $text = strip_tags($text, '<'. implode('><', array_keys($tags_to_index)) .'>'); + + preg_match_all('@<('. implode('|', array_keys($tags_to_index)) .')[^>]*>(.*)@Ui', $text, $matches); + foreach ($matches[1] as $key => $tag) { + // We don't want to index links auto-generated by the url filter. + if ($tag != 'a' || !preg_match('@(?:http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://|www\.)[a-zA-Z0-9]+@', $matches[2][$key])) { + $document->{$tags_to_index[$tag]} .= ' '. $matches[2][$key]; + } + } +} + +function apachesolr_delete_node_from_index($node) { + try { + $solr = apachesolr_get_solr(); + $solr->deleteById(apachesolr_document_id($node->nid)); + } + catch (Exception $e) { + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + } } /** - * Registered shutdown function. + * Implementation of hook_cron(). */ -function apachesolr_shutdown() { - foreach (ApacheSolrUpdate::$_namespaces as $namespace => $vars) { - extract($vars); - if ($last_change && $last_id) { - variable_set("{$namespace}_last_change", $last_change); - variable_set("{$namespace}_last_id", $last_id); +function apachesolr_cron() { + try { + $solr = apachesolr_get_solr(); + $solr->clearCache(); + $last = variable_get('apachesolr_last_optimize', 0); + $time = time(); + // Make sure to omtimize once per day. + if ($time - $last > 60*60*24) { + $solr->optimize(FALSE, FALSE); + variable_set('apachesolr_last_optimize', $time); } } + catch (Exception $e) { + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + } } /** @@ -482,40 +530,63 @@ function apachesolr_shutdown() { function apachesolr_nodeapi(&$node, $op, $a3 = NULL, $a4 = NULL) { switch ($op) { case 'delete': - try { - $solr =& apachesolr_get_solr(variable_get('apachesolr_host', 'localhost'), variable_get('apachesolr_port', 8983), variable_get('apachesolr_path', '/solr')); - $solr->deleteById($node->nid); - $solr->commit(); - } - catch (Exception $e) { - watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + apachesolr_delete_node_from_index($node); + // TODO: check that there was no exception? + db_query("DELETE FROM {apachesolr_search_node} WHERE nid = %d", $node->nid); + break; + case 'insert': + db_query("INSERT INTO {apachesolr_search_node} (nid, status, changed) VALUES (%d, %d, GREATEST(%d, %d))", $node->nid, $node->status, $node->created, $node->changed); + break; + case 'update': + // Check if the node has gone from published to unpublished. + If (!$node->status && db_result(db_query("SELECT status FROM {apachesolr_search_node} WHERE nid = %d", $node->nid))) { + apachesolr_delete_node_from_index($node); } + // TODO: check that there was no exception? + db_query("UPDATE {apachesolr_search_node} SET changed = %d, status = %d WHERE nid = %d", time(), $node->status, $node->nid); break; } } /** + * Return the enabled facets from the specified block array. + * + * @param $module + * The module (optional). + * @return + * An array consisting info for facets that have been enabled + * for the specified module, or all enabled facets. + */ +function apachesolr_get_enabled_facets($module = NULL) { + $enabled = variable_get('apachesolr_enabled_facets', array()); + if (isset($module)) { + return isset($enabled[$module]) ? $enabled[$module] : array(); + } + return $enabled; +} + +/** * Implementation of hook_block(). */ function apachesolr_block($op = 'list', $delta = 0, $edit = array()) { switch ($op) { case 'list': - // Sorting block - $blocks['sort'] = array('info' => t('ApacheSolr Core: Sorting')); - $blocks['type'] = array('info' => t('ApacheSolr Core: Filter by type')); + // Add the blocks + $blocks['sort'] = array( + 'info' => t('Apache Solr Core: Sorting'), + 'cache' => BLOCK_CACHE_PER_PAGE, + ); return $blocks; case 'view': if (apachesolr_has_searched()) { // Get the query and response. Without these no blocks make sense. - $response =& apachesolr_static_response_cache(); + $response = apachesolr_static_response_cache(); if (empty($response)) { return; } - $query =& apachesolr_drupal_query(); - // Get information needed by the rest of the blocks about limits. - $facet_display_limits = variable_get('apachesolr_facet_query_limits', array()); + $query = apachesolr_current_query(); switch ($delta) { case 'sort': @@ -524,12 +595,12 @@ function apachesolr_block($op = 'list', 'stitle' => array('name' => t('Title'), 'default' => 'asc'), 'type' => array('name' => t('Type'), 'default' => 'asc'), 'name' => array('name' => t('Author'), 'default' => 'asc'), - 'changed' => array('name' => t('Date'), 'default' => 'desc'), + 'created' => array('name' => t('Date'), 'default' => 'desc'), ); $solrsorts = array(); $sort_parameter = isset($_GET['solrsort']) ? check_plain($_GET['solrsort']) : FALSE; - foreach(explode(',', $sort_parameter) as $solrsort) { + foreach (explode(',', $sort_parameter) as $solrsort) { $parts = explode(' ', $solrsort); if (!empty($parts[0]) && !empty($parts[1])) { $solrsorts[$parts[0]] = $parts[1]; @@ -537,39 +608,38 @@ function apachesolr_block($op = 'list', } $sort_links = array(); - $path = 'search/'. arg(1). '/'. $query->get_query(); + $path = 'search/' . arg(1) . '/' . $query->get_query_basic(); + $new_query = clone $query; foreach ($sorts as $type => $sort) { $new_sort = isset($solrsorts[$type]) ? $solrsorts[$type] == 'asc' ? 'desc' : 'asc' : $sort['default']; - $sort_links[] = theme('apachesolr_sort_link', $sort['name'], $path, $type == "relevancy" ? '' : "solrsort={$type} {$new_sort}", isset($solrsorts[$type]) ? $solrsorts[$type] : ''); + $new_query->set_solrsort($type == "relevancy" ? '' : "solrsort={$type} {$new_sort}"); + $active = isset($solrsorts[$type]) || ($type == "relevancy" && !$solrsorts); + $direction = isset($solrsorts[$type]) ? $solrsorts[$type] : ''; + $sort_links[] = theme('apachesolr_sort_link', $sort['name'], $path, $new_query->get_url_querystring(), $active, $direction); } return array('subject' => t('Sort by'), 'content' => theme('apachesolr_sort_list', $sort_links)); - - case 'type': - $filter_by = t('Filter by type'); - return apachesolr_facet_block($response, $query, $delta, $filter_by, 'apachesolr_get_type'); - default: break; } - } - break; - - case 'configure': - return apachesolr_facetcount_form($delta); - break; - case 'save': - apachesolr_facetcount_save($delta, $edit); + } break; } } -function apachesolr_facet_block($response, $query, $delta, $filter_by, $facet_callback = FALSE) { - if (is_object($response->facet_counts->facet_fields->$delta)) { +/** + * Helper function for displaying a facet block. + */ +function apachesolr_facet_block($response, $query, $module, $delta, $facet_field, $filter_by, $facet_callback = FALSE) { + if (!empty($response->facet_counts->facet_fields->$facet_field)) { $contains_active = FALSE; $items = array(); - foreach ($response->facet_counts->facet_fields->$delta as $facet => $count) { + foreach ($response->facet_counts->facet_fields->$facet_field as $facet => $count) { + // Solr sends this back if it's empty. + if ($facet == '_empty_') { + continue; + } $unclick_link = ''; unset($active); if ($facet_callback && function_exists($facet_callback)) { @@ -579,15 +649,18 @@ function apachesolr_facet_block($respons $facet_text = $facet; } $new_query = clone $query; - if ($active = $query->has_field($delta, $facet)) { + if ($active = $query->has_field($facet_field, $facet)) { $contains_active = TRUE; - $new_query->remove_field($delta, $facet); - $path = 'search/'. arg(1) .'/'. $new_query->get_query(); - $unclick_link = theme('apachesolr_unclick_link', $path); + $new_query->remove_field($facet_field, $facet); + // TODO: don't assume 'search' - find the real path. + $path = 'search/'. arg(1) .'/'. $new_query->get_query_basic(); + $querystring = $new_query->get_url_querystring(); + $unclick_link = theme('apachesolr_unclick_link', $path, $querystring); } else { - $new_query->add_field($delta, $facet); - $path = 'search/'. arg(1) .'/'. $new_query->get_query(); + $new_query->add_field($facet_field, $facet); + $path = 'search/'. arg(1) .'/'. $new_query->get_query_basic(); + $querystring = $new_query->get_url_querystring(); } $countsort = $count == 0 ? '' : 1 / $count; // if numdocs == 1 and !active, don't add. @@ -595,75 +668,47 @@ function apachesolr_facet_block($respons // skip } else { - $items[$active ? $countsort . $facet : 1 + $countsort . $facet] = theme('apachesolr_facet_item', $facet_text, $count, $path, $active, $unclick_link, $response->numFound); + $items[$active ? $countsort . $facet : 1 + $countsort . $facet] = theme('apachesolr_facet_item', $facet_text, $count, $path, $querystring, $active, $unclick_link, $response->numFound); } } if (count($items) > 0) { ksort($items); - $facet_display_limit = isset($facet_display_limits[$delta]) ? $facet_display_limits[$delta] : 10; - $items = array_slice($items, 0, ($facet_display_limit == -1 ? NULL : $facet_display_limit)); - $output = theme('apachesolr_facet_list', $items); - return array('subject' => t('@filter_by', array('@filter_by' => $filter_by)), 'content' => $output); + // Get information needed by the rest of the blocks about limits. + $initial_limits = variable_get('apachesolr_facet_query_initial_limits', array()); + $limit = isset($initial_limits[$module][$delta]) ? $initial_limits[$module][$delta] : variable_get('apachesolr_facet_query_initial_limit_default', 10); + $output = theme('apachesolr_facet_list', $items, $limit); + return array('subject' => $filter_by, 'content' => $output); } } return NULL; } /** - * Callback function for the 'Filter by type' facet block. - */ -function apachesolr_get_type($facet) { - return node_get_types('name', $facet); -} - -/** - * Implementation of hook_form_alter(). - */ -function apachesolr_form_alter($form_id, &$form) { - $arg = arg(1); - $alias = drupal_lookup_path('alias', "search/{$arg}"); - // Ok, this really sucks. I want a way to know whether the action of the form - // is supposed to be handled by an ApacheSolr module or not. I manually - // exclude node and user here, but there are many other hook_search implementations - // in the wild and this code will potentially interfere with them. It also - // creates a Solr instance wasting resources. - if ($alias && $arg != 'node' && $arg != 'user' && ( - preg_match("&/search/{$arg}&", $form['#action']) || - preg_match("&/{$alias}&", $form['#action']) || - strpos($form['#action'], $alias))) { - if (!isset($_POST['form_id'])) { - // Set up our validation function - $form['#validate']['apachesolr_search_validate'] = array(); - - // if no keys, there's nothing to do. - if (empty($form['basic']['inline']['keys']['#default_value'])) { - return; - } - - // The $query is the true source for search key information - if ($query =& apachesolr_drupal_query()) { - $form['basic']['inline']['keys']['#default_value'] = $query->get_query_basic(); - } - } - } -} - -/** * Used by the 'configure' $op of hook_block so that modules can generically set * facet limits on their blocks. */ -function apachesolr_facetcount_form($delta) { - $facet_query_limits = variable_get('apachesolr_facet_query_limits', array()); +function apachesolr_facetcount_form($module, $delta) { + $initial = variable_get('apachesolr_facet_query_initial_limits', array()); + $limits = variable_get('apachesolr_facet_query_limits', array()); - // If the block is not 'sort' (and therefore is a facet block), - // display facet limit option. + $limit = drupal_map_assoc(array(50, 40, 30, 20, 15, 10, 5, 3)); + + $form['apachesolr_facet_query_initial_limit'] = array( + '#type' => 'select', + '#title' => t('Initial filter links'), + '#options' => $limit, + '#description' => t('The initial number of filter links to show in this block.'), + '#default_value' => isset($initial[$module][$delta]) ? $initial[$module][$delta] : variable_get('apachesolr_facet_query_initial_limit_default', 10), + ); + $limit = drupal_map_assoc(array(100, 75, 50, 40, 30, 20, 15, 10, 5, 3)); $form['apachesolr_facet_query_limit'] = array( - '#type' => 'textfield', - '#title' => t('Facet Query Limit'), - '#required' => TRUE, - '#description' => t('The number of facet links to show in this block. Set to -1 for unlimited. Default is 10.'), - '#default_value' => isset($facet_query_limits[$delta]) ? $facet_query_limits[$delta] : 10, + '#type' => 'select', + '#title' => t('Maximum filter links'), + '#options' => $limit, + '#description' => t('The maximum number of filter links to show in this block.'), + '#default_value' => isset($limits[$module][$delta]) ? $limits[$module][$delta] : variable_get('apachesolr_facet_query_limit_default', 20), ); + return $form; } @@ -671,11 +716,39 @@ function apachesolr_facetcount_form($del * Used by the 'save' $op of hook_block so that modules can generically set * facet limits on their blocks. */ -function apachesolr_facetcount_save($delta, $edit) { +function apachesolr_facetcount_save($edit) { // Save query limits - $facet_query_limits = variable_get('apachesolr_facet_query_limits', array()); - $facet_query_limits[$delta] = intval($edit['apachesolr_facet_query_limit']); - variable_set('apachesolr_facet_query_limits', $facet_query_limits); + $module = $edit['module']; + $delta = $edit['delta']; + $limits = variable_get('apachesolr_facet_query_limits', array()); + $limits[$module][$delta] = (int)$edit['apachesolr_facet_query_limit']; + variable_set('apachesolr_facet_query_limits', $limits); + $initial = variable_get('apachesolr_facet_query_initial_limits', array()); + $initial[$module][$delta] = (int)$edit['apachesolr_facet_query_initial_limit']; + variable_set('apachesolr_facet_query_initial_limits', $initial); +} + +/** + * This hook allows modules to modify the query and params objects. + * + * Example: + * + * function my_module_apachesolr_modify_query(&$query, &$params) { + * // I only want to see articles by the admin! + * $query->add_field("uid", 1); + * + * } + */ +function apachesolr_modify_query(&$query, &$params) { + + foreach (module_implements('apachesolr_modify_query') as $module) { + $function_name = "{$module}_apachesolr_modify_query"; + $function_name($query, $params); + } + // Add array of fq parameters. + if ($query && ($fq = $query->get_fq())) { + $params['fq'] = $fq; + } } /** @@ -701,17 +774,31 @@ function apachesolr_has_searched($search * Factory method for solr singleton object. Structure allows for an arbitrary * number of solr objects to be used based on the host, port, path combination. * Get an instance like this: - * $solr =& apachesolr_get_solr(); + * $solr = apachesolr_get_solr(); */ -function &apachesolr_get_solr($host = 'localhost', $port = 8983, $path = '/solr') { +function apachesolr_get_solr($host = NULL, $port = NULL, $path = NULL) { static $solr_cache; + if (empty($host)) { + $host = variable_get('apachesolr_host', 'localhost'); + } + if (empty($port)) { + $port = variable_get('apachesolr_port', '8983'); + } + if (empty($path)) { + $path = variable_get('apachesolr_path', '/solr'); + } + if (empty($solr_cache[$host][$port][$path])) { - $include_path = get_include_path(); - set_include_path('./'. drupal_get_path('module', 'apachesolr') .'/SolrPhpClient/'); - include_once('Apache/Solr/Service.php'); - set_include_path($include_path); - $solr_cache[$host][$port][$path] = new Apache_Solr_Service($host, $port, $path); + list($module, $filepath, $class) = variable_get('apachesolr_service_class', array('apachesolr', 'Drupal_Apache_Solr_Service.php', 'Drupal_Apache_Solr_Service')); + include_once(drupal_get_path('module', $module) .'/'. $filepath); + try { + $solr_cache[$host][$port][$path] = new $class($host, $port, $path); + } + catch (Exception $e) { + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + return; + } } return $solr_cache[$host][$port][$path]; } @@ -720,39 +807,63 @@ function &apachesolr_get_solr($host = 'l * It is important to hold on to the Solr response object for the duration of the * page request so that we can use it for things like building facet blocks. */ -function &apachesolr_static_response_cache($response = NULL) { +function apachesolr_static_response_cache($response = NULL) { static $_response; if (!empty($response)) { - $_response = drupal_clone($response); + $_response = clone $response; } return $_response; } -/* - * The query object is built from the keys. If you want to build queries - * programmatically you can pass in different keys. If you don't pass in - * any keys, search_get_keys() is used instead. +/** + * Factory function for query objects. + * + * The query object is built from the keys, filters, and sort. + */ +function apachesolr_drupal_query($keys = '', $filters = '', $solrsort = '') { + + list($module, $class) = variable_get('apachesolr_query_class', array('apachesolr', 'Solr_Base_Query')); + include_once drupal_get_path('module', $module) .'/'. $class .'.php'; + + try { + $query = new $class(apachesolr_get_solr(), $keys, $filters, $solrsort); + } + catch (Exception $e) { + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + $query = NULL; + } + return $query; +} + +/** + * Factory function for query objects representing the current search URL. + * + * The query object is built from the keys in the URL, but these may be + * overridden by passing in parameters. */ -function &apachesolr_drupal_query($keys = NULL, $reset = FALSE) { - static $_queries; +function apachesolr_current_query($keys = '', $filters = '', $solrsort = '', $reset = FALSE) { + static $_queries = array(); if ($reset) { - unset($_queries); + $_queries = array(); } if (empty($keys)) { $keys = search_get_keys(); } - if (empty($_queries) || empty($_queries[$keys])) { - include_once drupal_get_path('module', 'apachesolr') .'/Solr_Base_Query.php'; - $_queries[$keys] = new Solr_Base_Query($keys); + if (empty($filters) && !empty($_GET['filters'])) { + $filters = $_GET['filters']; } - return $_queries[$keys]; -} + if (empty($solrsort) && !empty($_GET['solrsort'])) { + $solrsort = $_GET['solrsort']; + } + $index = $keys . '&filters=' . $filters; -function apachesolr_base_url() { - return "http://" . variable_get('apachesolr_host', 'localhost') .':'. variable_get('apachesolr_port', '8983') . variable_get('apachesolr_path', '/solr'); + if (empty($_queries) || !array_key_exists($index, $_queries)) { + $_queries[$index] = apachesolr_drupal_query($keys, $filters, $solrsort); + } + return is_object($_queries[$index]) ? clone $_queries[$index] : $_queries[$index]; } /** @@ -772,7 +883,10 @@ function apachesolr_index_key($field) { case 'integer': $type_prefix = 'i'; break; - case 'double': + case 'sint': + $type_prefix = 'si'; + break; + case 'double': $type_prefix = 'p'; // reserve d for date break; case 'boolean': @@ -787,7 +901,7 @@ function apachesolr_index_key($field) { default: $type_prefix = 's'; } - $sm = $field['multiple'] ? 'm' : 's'; + $sm = $field['multiple'] ? 'mfield_' : 'sfield_'; return $type_prefix . $sm . $field['name']; } @@ -796,41 +910,32 @@ function apachesolr_index_key($field) { * CCK fields. */ function apachesolr_cck_fields() { - static $_fields; - // If CCK isn't enabled, do nothing. - if (module_exists('content')) { - $mappings = module_invoke_all('apachesolr_cck_field_mappings'); - if (is_null($_fields)) { - $_fields = array(); - $result = db_query("SELECT i.field_name, f.multiple, f.type, i.widget_type FROM {node_field_instance} i INNER JOIN {node_field} f ON i.field_name = f.field_name;"); + static $fields; + if (is_null($fields)) { + $fields = array(); + // If CCK isn't enabled, do nothing. + if (module_exists('content')) { + // A single default mapping for text fields. + $mappings['text'] = array('callback' => '', 'index_type' => 'string', 'widget_types' => array('optionwidgets_select' => 1, 'optionwidgets_buttons' => 1)); + $mappings = module_invoke_all('apachesolr_cck_field_mappings') + $mappings; + $result = db_query("SELECT i.field_name, f.multiple, f.type AS field_type, i.widget_type, i.label FROM {node_field_instance} i INNER JOIN {node_field} f ON i.field_name = f.field_name;"); while ($row = db_fetch_object($result)) { // Only deal with fields that have options widgets (facets don't make sense otherwise), or fields that have specific mappings. - if (($row->type == 'text' && in_array($row->widget_type, array('options_select', 'options_buttons'))) || in_array($row->type, array_keys($mappings))) { - $_fields[$row->field_name] = array( - 'name' => $row->field_name, - 'multiple' => $row->multiple ? TRUE : FALSE, - 'field_type' => $row->type, - 'index_type' => empty($mappings) ? 'string' : $mappings[$row->type]['index_type'], - 'callback' => empty($mappings[$row->type]['callback']) ? NULL : $mappings[$row->type]['callback'], - ); + if (isset($mappings[$row->field_type]) && !empty($mappings[$row->field_type]['widget_types'][$row->widget_type])) { + $row->index_type = $mappings[$row->field_type]['index_type']; + $row->callback = $mappings[$row->field_type]['callback']; + $row->multiple = (bool) $row->multiple; + $row->name = 'cck_' . $row->field_name; + $fields[$row->field_name] = (array) $row; } } } - return $_fields; } - else { - return FALSE; - } -} - -function apachesolr_simpletest() { - $dir = drupal_get_path('module', 'apachesolr'). '/tests'; - $tests = file_scan_directory($dir, '\.test$'); - return array_keys($tests); + return $fields; } -function theme_apachesolr_facet_item($name, $count, $path, $active = FALSE, $unclick_link = NULL, $num_found = NULL) { +function theme_apachesolr_facet_item($name, $count, $path, $querystring = '', $active = FALSE, $unclick_link = NULL, $num_found = NULL) { $attributes = array(); if ($active) { $attributes['class'] = 'active'; @@ -839,24 +944,49 @@ function theme_apachesolr_facet_item($na return $unclick_link . ' '. check_plain($name); } else { - return l($name ." ($count)", $path, $attributes, isset($_GET['solrsort']) ? "solrsort=" . check_plain($_GET['solrsort']) : FALSE); + return apachesolr_l($name ." ($count)", $path, array('attributes' => $attributes), $querystring); } } -function theme_apachesolr_unclick_link($path) { - return l("(-)", $path, NULL, isset($_GET['solrsort']) ? "solrsort=" . check_plain($_GET['solrsort']) : FALSE); +function apachesolr_l($text, $attributes = array(), $query = NULL, $fragment = NULL, $absolute = FALSE, $html = FALSE) { + return ''. check_plain($text) .''; +} + + +function theme_apachesolr_unclick_link($path, $querystring = '') { + return apachesolr_l("(-)", $path, array(), $querystring); } -function theme_apachesolr_sort_link($text, $path, $query, $direction = NULL) { +function theme_apachesolr_sort_link($text, $path, $querystring = '', $active = FALSE, $direction = '') { $icon = ''; + $attributes = array(); if ($direction) { - $icon = theme('tablesort_indicator', $direction); + $icon = ' '. theme('tablesort_indicator', $direction); + } + if ($active) { + $attributes['class'] = 'active'; } - return $icon . ' ' . l($text, $path, NULL, $query); + return $icon . apachesolr_l($text, $path, $attributes, $querystring); } -function theme_apachesolr_facet_list($items) { - return theme('item_list', $items); +function theme_apachesolr_facet_list($items, $display_limit = 0) { + // theme('item_list') expects a numerically indexed array. + $items = array_values($items); + // If there is a limit and the facet count is over the limit, hide the rest. + if (($display_limit > 0) && (count($items) > $display_limit)) { + // Show/hide extra facets. + drupal_add_js(drupal_get_path('module', 'apachesolr') . '/apachesolr.js'); + // Split items array into displayed and hidden. + $hidden_items = array_splice($items, $display_limit); + foreach ($hidden_items as $link) { + $items[] = array('data' => $link, 'class' => 'apachesolr-hidden-facet'); + } + } + $admin_link = ''; + if (user_access('administer site configuration')) { + $admin_link = l(t('Configure enabled filters'), 'admin/settings/apachesolr/enabled-filters'); + } + return theme('item_list', $items) . $admin_link; } function theme_apachesolr_sort_list($items) { @@ -864,8 +994,323 @@ function theme_apachesolr_sort_list($ite } /** - * Return the human readable text for a content type. + * @file + * Administrative pages for the Apache Solr framework. + */ + +function apachesolr_settings() { + $form = array(); + + //perform a check to ensure the server is there + $requirements = apachesolr_requirements('runtime'); + $status = $requirements['apachesolr']['severity'] == 2 ? 'error' : 'status'; + drupal_set_message($requirements['apachesolr']['value'], $status); + + $form['apachesolr_host'] = array( + '#type' => 'textfield', + '#title' => t('Solr host name'), + '#default_value' => variable_get('apachesolr_host', 'localhost'), + '#description' => t('Host name of your Solr server, e.g. localhost or example.com.'), + ); + $form['apachesolr_port'] = array( + '#type' => 'textfield', + '#title' => t('Solr port'), + '#default_value' => variable_get('apachesolr_port', '8983'), + '#description' => t('Port on which the Solr server listens. The Jetty example server is 8983, while Tomcat is 8080 by default.'), + ); + $form['apachesolr_path'] = array( + '#type' => 'textfield', + '#title' => t('Solr path'), + '#default_value' => variable_get('apachesolr_path', '/solr'), + '#description' => t('Path that identifies the Solr request handler to be used.'), + ); + + $numbers = drupal_map_assoc(array(10, 20, 50, 100)); + $form['apachesolr_cron_limit'] = array( + '#type' => 'select', + '#title' => t('Number of items to index per cron run'), + '#default_value' => variable_get('apachesolr_cron_limit', 50), + '#options' => $numbers, + '#description' => t('The maximum number of items indexed in each pass of a cron maintenance task. If necessary, reduce the number of items to prevent timeouts and memory errors while indexing.', array('@cron' => url('admin/reports/status'))) + ); + + $options = drupal_map_assoc(array(5, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100)); + $form['apachesolr_rows'] = array( + '#type' => 'select', + '#title' => t('Results per page'), + '#default_value' => variable_get('apachesolr_rows', 10), + '#options' => $options, + '#description' => t('The number of results that will be shown per page.'), + ); + $form['apachesolr_failure'] = array( + '#type' => 'select', + '#title' => t('On failure'), + '#options' => array('show_error' => t('Show error message'), + 'show_drupal_results' => t('Show core Drupal results'), + 'show_no_results' => t('Show no results') + ), + '#default_value' => variable_get('apachesolr_failure', 'show_error'), + '#description' => t('What to display if Apache Solr search is not available.'), + ); + return system_settings_form($form); +} + +/** + * Gets information about the fields already in solr index. */ -function theme_apachesolr_breadcrumb_type($type) { - return node_get_types('name', $type); +function apachesolr_index_page() { + try { + $solr = apachesolr_get_solr(); + // TODO: only clear this every page view if we are running + // multi-site. + $solr->clearCache(); + // Note: we use 2 since 1 fails on Ubuntu Hardy. + $data = $solr->getLuke(1); + } + catch (Exception $e) { + watchdog('apachesolr', $e->getMessage()); + drupal_set_message($e->getMessage(), "warning"); + $data->fields = array(); + } + + $output = ''; + if (isset($data->index->numDocs)) { + $output .= '

' . t('Number of documents in index: @num', array('@num' => $data->index->numDocs)) . "

\n"; + $output .= '

' . t('Number of terms in index: @num', array('@num' => $data->index->numTerms)) . "

\n"; + } + + $fields = (array)$data->fields; + if ($fields) { + $output .= '

' . t('Number of fields in index: @num', array('@num' => count($fields))) . "

\n"; + $rows = array(); + foreach ($fields as $name => $field) { + // TODO: try to map the name to something more meaningful. + $rows[$name] = array($name, $field->type, isset($field->index) ? $field->distinct : t('Not indexed')); + } + ksort($rows); + // Display the table of Field names, Index Types, and term counts. + $output .= theme('table', array(t('Field name'), t('Index type'), t('Distinct terms')), $rows); + } + else { + $output .= '

' . t('No data on indexed fields.') . "

\n"; + } + + // Display the Delete Index form. + $output .= drupal_get_form('apachesolr_delete_index_form'); + + return $output; +} + +/** + * Indicates what order the specified facets should be listed in. This function is used in a usort + * invocation. + * @param $a + * The first facet. + * @param $b + * The second facet. + * @return + * A signed integer that indicates which of the specified facets should come first. + */ +function _apachesolr_sort_facets($a, $b) { + return strcasecmp($a['info'], $b['info']); +} + +/** + * This is the submit handler for the active facets form. + * + * The form values for each module are array filtereed to remove non-enabled items and + * stored in the variable table with the name 'apachesolr_enabled_facets'. + * + * @see apachesolr_enabled_facets_form() + */ +function apachesolr_enabled_facets_form_submit($form, &$form_state) { + $enabled = array(); + foreach ($form_state['apachesolr_enabled_facets'] as $module => $facets) { + $enabled[$module] = array_filter($facets); + } + variable_set('apachesolr_enabled_facets', $enabled); + drupal_set_message($form_state['submit_message']); +} + +/** + * Creates the form that allows the user to select which facets will be enabled. + * + * Only enabled facets are sent to solr. Fewer enabled facets can reduce the + * load on the search server. Blocks are only offered for enabled facets, so + * this also reduces the clutter on the blocks admin page. + */ +function apachesolr_enabled_facets_form() { + $facets = array(); + $module_facets = array(); + $module_list = array(); + foreach (module_implements('apachesolr_facets') as $module) { + $module_facets[$module] = module_invoke($module, 'apachesolr_facets'); + uasort($module_facets[$module], '_apachesolr_sort_facets'); + $module_list[$module] = $module; + } + + $enabled_facets = apachesolr_get_enabled_facets(); + $form = array(); + $form['apachesolr_enabled_facets']['help'] = array ( + '#type' => 'item', + '#value' => t('You can use this screen to select which Apache Solr filter blocks should be created by enabling the corresponding Apache Solr filters. For performance reasons, you should only enable filters that you intend to have available to users on the search page. After selecting which filter blocks to create, you will be sent to the blocks page where you can choose which of those blocks should be enabled when your users search by placing each block in a region.'), + ); + foreach($module_facets as $module => $facets) { + $form['apachesolr_enabled_facets'][$module] = array( + '#type' => 'fieldset', + '#title' => check_plain($module_list[$module]), + '#collapsible' => TRUE, + '#collapsed' => FALSE, + ); + // We must use module + delta as the keys since that combination is + // guaranteed to be unique. A single module could, for example, have + // two different blocks that expose different faceting on the same + // field in the index. + foreach($facets as $delta => $data) { + $form['apachesolr_enabled_facets'][$module][$delta] = array( + '#type' => 'checkbox', + '#title' => $data['info'], + '#return_value' => $data['facet_field'], + '#default_value' => isset($enabled_facets[$module][$delta]) ? $data['facet_field'] : 0, + ); + } + } + + $has_facets = (bool)$module_facets; + + $form['submit'] = array( + '#type' => 'submit', + '#value' => t('Save'), + '#access' => $has_facets, + ); + $form['no-facets-message'] = array( + '#value' => t('No filters are available from your currently enabled modules'), + '#access' => !$has_facets, + ); + + $form['#tree'] = TRUE; + $form['submit_message'] = array( + '#type' => 'value', + '#value' => t('The Apache Solr filters settings were changed. To arrange the blocks for your enabled filters, visit the blocks administration page.', array('@url' => url('admin/build/block'))), + ); + + return $form; +} + +/** + * Create a form for deleting the contents of the Solr index. + */ +function apachesolr_delete_index_form() { + $form = array(); + $form['markup'] = array( + '#prefix' => '

', + '#value' => t('Solr Index'), + '#suffix' => '

', + ); + $form['reindex'] = array( + '#type' => 'submit', + '#value' => t('Re-index all content'), + '#submit' => array('apachesolr_clear_index'), + ); + $form['reindex-desc'] = array( + '#type' => 'item', + '#description' => t('Re-indexing will add all content to the index again (overwriting the index), but existing content in the index will remain searchable.'), + ); + $form['submit'] = array( + '#type' => 'submit', + '#value' => t('Delete the index'), + '#validate' => array('apachesolr_delete_index_validate'), + '#submit' => array('apachesolr_delete_index'), + ); + $form['delete_index'] = array( + '#type' => 'checkbox', + '#title' => t('Confirm index deletion'), + '#description' => t('Deletes all of the documents in the Solr index. This is rarely necessary unless your index is corrupt or you have installed a new schema.xml.'), + '#default_value' => NULL, + ); + return $form; +} + +/** + * Submit function for the 'Re-index all content' button. + * + * @see apachesolr_delete_index_form() + */ +function apachesolr_clear_index($form, &$form_state) { + apachesolr_clear_last_index(); +} + +/** + * Validate function for the 'Delete the index' button. + * + * @see apachesolr_delete_index_form() + */ +function apachesolr_delete_index_validate($form, &$form_state) { + if (!$form_state['delete_index']) { + form_set_error('delete_index', t('If you want to delete the Solr index, you must check the confirmation box.')); + } +} + +/** + * Submit function for the 'Delete the index' button. + * + * @see apachesolr_delete_index_form() + */ +function apachesolr_delete_index() { + try { + // Instantiate a new Solr object. + $solr = apachesolr_get_solr(); + $query = '*:*'; + // Allow other modules to modify the delete query. + // For example, use the site hash so that you only delete this site's + // content: $query = 'hash:' . apachesolr_site_hash() + apachesolr_alter('apachesolr_delete_index', $query); + $solr->deleteByQuery($query); + $solr->commit(); + apachesolr_clear_last_index(); + // This form can't be seen by anyone without 'administer site configuration' + // permission, so no need to check perms before displaying a run-cron link. + drupal_set_message(t('The Solr content index has been erased. You must now !run_cron until your entire site has been re-indexed.', array('!run_cron' => l(t('run cron'), 'admin/reports/status/run-cron', array('query' => array('destination' => 'admin/settings/apachesolr/index')))))); + } + catch (Exception $e) { + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + } +} + + +/** + * Copy of Drupal 6's drupal_alter function. + */ +function apachesolr_alter($type, &$data) { + // PHP's func_get_args() always returns copies of params, not references, so + // drupal_alter() can only manipulate data that comes in via the required first + // param. For the edge case functions that must pass in an arbitrary number of + // alterable parameters (hook_form_alter() being the best example), an array of + // those params can be placed in the __drupal_alter_by_ref key of the $data + // array. This is somewhat ugly, but is an unavoidable consequence of a flexible + // drupal_alter() function, and the limitations of func_get_args(). + // @todo: Remove this in Drupal 7. + if (is_array($data) && isset($data['__drupal_alter_by_ref'])) { + $by_ref_parameters = $data['__drupal_alter_by_ref']; + unset($data['__drupal_alter_by_ref']); + } + + // Hang onto a reference to the data array so that it isn't blown away later. + // Also, merge in any parameters that need to be passed by reference. + $args = array(&$data); + if (isset($by_ref_parameters)) { + $args = array_merge($args, $by_ref_parameters); + } + + // Now, use func_get_args() to pull in any additional parameters passed into + // the drupal_alter() call. + $additional_args = func_get_args(); + array_shift($additional_args); + array_shift($additional_args); + $args = array_merge($args, $additional_args); + + foreach (module_implements($type .'_alter') as $module) { + $function = $module .'_'. $type .'_alter'; + call_user_func_array($function, $args); + } } Index: apachesolr_search.info =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr_search.info,v retrieving revision 1.1.2.2 diff -u -p -r1.1.2.2 apachesolr_search.info --- apachesolr_search.info 14 Jun 2008 18:44:29 -0000 1.1.2.2 +++ apachesolr_search.info 5 Feb 2009 21:42:05 -0000 @@ -1,5 +1,5 @@ -; $Id: apachesolr_search.info,v 1.1.2.2 2008/06/14 18:44:29 robertDouglass Exp $ +; $Id: apachesolr_search.info,v 1.1.2.1.2.6 2009/01/27 21:32:34 pwolanin Exp $ name = Apache Solr search -description = Search with solr +description = Search with Solr dependencies = search apachesolr -package = ApacheSolr +package = Apache Solr Index: apachesolr_search.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr_search.module,v retrieving revision 1.1.2.22 diff -u -p -r1.1.2.22 apachesolr_search.module --- apachesolr_search.module 22 Nov 2008 12:58:07 -0000 1.1.2.22 +++ apachesolr_search.module 5 Feb 2009 21:42:05 -0000 @@ -1,5 +1,5 @@ l(t('running cron'), 'admin/reports/status/run-cron', array('query' => array('destination' => 'admin/settings/apachesolr/index'))), + '%percentage' => ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) .'%', + '@items' => format_plural($remaining, t('is 1 item'), t('are @count items') + ))); + } +} + +/** + * Implementation of hook_menu(). + */ +function apachesolr_search_menu($may_cache) { + $items = array(); + if ($may_cache) { + $items[] = array( + 'path' => 'admin/settings/apachesolr/query-fields', + 'title' => t('Query field settings'), + 'callback' => 'apachesolr_search_settings_page', + 'access' => user_access('administer site configuration'), + 'weight' => 1, + 'type' => MENU_LOCAL_TASK, + ); + $items[] = array( + 'path' => 'admin/settings/apachesolr/node-types', + 'title' => t('Content type settings'), + 'callback' => 'drupal_get_form', + 'callback arguments' => array('apachesolr_search_type_boost_form'), + 'access' => user_access('administer site configuration'), + 'weight' => 1, + 'type' => MENU_LOCAL_TASK, + ); + } + return $items; +} + +/** * Implementation of hook_update_index(). */ -function apachesolr_update_index() { - ApacheSolrUpdate::update_index('apachesolr'); +function apachesolr_search_update_index() { + $cron_limit = variable_get('apachesolr_cron_limit', 50); + $result = apachesolr_get_nodes_to_index('apachesolr_search', $cron_limit); + apachesolr_index_nodes($result, 'apachesolr_search'); +} + +/** + * Implementation of hook_apachesolr_node_exclude(). + */ +function apachesolr_search_apachesolr_node_exclude($node) { + $excluded_types = variable_get('apachesolr_search_excluded_types', array()); + // Exclude from processing node types we don't want. + return !empty($excluded_types[$node->type]); } /** @@ -24,54 +83,67 @@ function apachesolr_search_search($op = return t('Search'); case 'reset': - ApacheSolrUpdate::reset('apachesolr'); + apachesolr_clear_last_index('apachesolr_search'); return; case 'status': - $change = ApacheSolrUpdate::get_change('apachesolr'); - $last = ApacheSolrUpdate::get_last('apachesolr'); - $total = db_result(db_query('SELECT COUNT(*) FROM {node} WHERE status = 1')); - $remaining = db_result(db_query('SELECT COUNT(*) FROM {node} n '. - 'LEFT JOIN {node_comment_statistics} c ON n.nid = c.nid '. - 'WHERE n.status = 1 AND ((GREATEST(n.created, n.changed, c.last_comment_timestamp) = %d AND n.nid > %d ) OR (n.created > %d OR n.changed > %d OR c.last_comment_timestamp > %d))', $change, $last, $change, $change, $change)); - return array('remaining' => $remaining, 'total' => $total); + return apachesolr_index_status('apachesolr_search'); case 'search': - global $pager_total; - // This is the object that does the communication with the solr server. - $solr =& apachesolr_get_solr(variable_get('apachesolr_host', 'localhost'), variable_get('apachesolr_port', 8983), variable_get('apachesolr_path', '/solr')); - // This is the object that knows about the query coming from the user. - $query =& apachesolr_drupal_query($keys); - $results = array(); - + try { + // This is the object that knows about the query coming from the user. + $query = apachesolr_current_query($keys); + if (is_null($query)) { + throw new Exception(t('Could not construct a Solr query in function apachesolr_search_search()')); + } + + $results = array(); + $params = array( - //'qt' => 'standard', - 'fl' => '*,score', + 'fl' => 'id,nid,title,comment_count,type,created,changed,score,url,uid,name', 'rows' => variable_get('apachesolr_rows', 10), 'facet' => 'true', 'facet.mincount' => 1, 'facet.sort' => 'true' ); - // TODO: This adds all of the possible facets to the query. Not all - // of these facets have their blocks enabled, so the list should be - // filtered by the actual enabled blocks, otherwise we're putting - // unneeded strain on the Solr server. - foreach (module_implements('apachesolr_facets') as $module) { - $function = $module .'_apachesolr_facets'; - $result = call_user_func_array($function, array()); - if (isset($result) && is_array($result)) { - foreach ($result as $facet) { - $params['facet.field'][] = $facet; + /** + * Highlighting settings + * These settings are set in solrconfig.xml. + * See the defaults there. + * If you wish to override them, you can via settings.php + */ + + $params['hl'] = variable_get('apachesolr_hl_active', NULL); + $params['hl.fragsize']= variable_get('apachesolr_hl_textsnippetlength', NULL); + $params['hl.simple.pre'] = variable_get('apachesolr_hl_pretag', NULL); + $params['hl.simple.post'] = variable_get('apachesolr_hl_posttag', NULL); + $params['hl.snippets'] = variable_get('apachesolr_hl_numsnippets', NULL); + $params['hl.fl'] = variable_get('apachesolr_hl_fieldtohightlight', NULL); + // We default to getting snippets from the body. + $hl_fl = is_null($params['hl.fl']) ? 'body' : $params['hl.fl']; + + if (variable_get('apachesolr_search_spellcheck', FALSE)) { + //Add new parameter to the search request + $params['spellcheck.q'] = $query->get_query_basic(); + $params['spellcheck'] = 'true'; + } + + $facet_query_limits = variable_get('apachesolr_facet_query_limits', array()); + // Request all enabled facets. + foreach (apachesolr_get_enabled_facets() as $module => $module_facets) { + foreach($module_facets as $delta => $facet_field) { + $params['facet.field'][] = $facet_field; + // Facet limits + if (isset($facet_query_limits[$module][$delta])) { + $params['f.' . $facet_field . '.facet.limit'] = $facet_query_limits[$module][$delta]; } } } - - // Facet limits - $facet_query_limits = variable_get('apachesolr_facet_query_limits', array()); - foreach ($facet_query_limits as $fieldname => $limit) { - $params['f.' . $fieldname . '.facet.limit'] = $limit; + if (!empty($params['facet.field'])) { + // Add a default limit for fields where no limit was set. + $params['facet.limit'] = variable_get('apachesolr_facet_query_limit_default', 20); } if (isset($_GET['solrsort'])) { @@ -79,97 +151,107 @@ function apachesolr_search_search($op = } // Validate sort parameter - if ($sort && preg_match('/^([a-z0-9_]+ (asc|desc)(,)?)+$/i', $sort)) { + if (isset($sort) && preg_match('/^([a-z0-9_]+ (asc|desc)(,)?)+$/i', $sort)) { $params['sort'] = $sort; } - if ($fields = apachesolr_cck_fields()) { - foreach ($fields as $name => $field) { - $index_key = apachesolr_index_key($field); - $params['facet.field'][] = $index_key; - } - } $page = isset($_GET['page']) ? $_GET['page'] : 0; $params['start'] = $page * $params['rows']; - - /** - * This hook allows modules to modify the query are params objects. - * - * Example: - * - * - * function my_module_apachesolr_modify_query(&$query, &$params) { - * // I only want to see articles by the admin! - * $query->add_field("uid", 1); - * - * } - * - */ - foreach (module_implements('apachesolr_modify_query') as $module) { - $function_name = "{$module}_apachesolr_modify_query"; - $function_name($query, $params); + // This is the object that does the communication with the solr server. + $solr = apachesolr_get_solr(); + + // Note - we have query fields set in solrconfig.xml, which will operate when + // none are set. + $qf = variable_get('apachesolr_search_query_fields', array()); + $fields = $solr->getFields(); + if ($qf && $fields) { + foreach ($fields as $field_name => $field) { + if (!empty($qf[$field_name])) { + if ($field_name == 'body') { + // Body is the only normed field. + $qf[$field_name] *= 40.0; + } + $params['qf'][] = $field_name . '^'. $qf[$field_name]; + } + } } - if (!$query) { - return array(); + // Note: we use 2 since 1 fails on Ubuntu Hardy. + $data = $solr->getLuke(2); + if (isset($data->index->numDocs)) { + $total = $data->index->numDocs; + } + else { + $total = db_result(db_query("SELECT COUNT(nid) FROM {node}")); + } + $date_settings = variable_get('apachesolr_search_date_boost', '4:200.0'); + list($date_steepness, $date_boost) = explode(':', $date_settings); + // Default date-biasing function, as suggested (but steeper) at + // http://wiki.apache.org/solr/DisMaxRequestHandler + // rord() returns 1 for the newset doc, and the number in the index for + // the oldest doc. The function is thus: $total/(rord()*$steepness + $total). + if ($date_boost) { + $params['bf'][] = "recip(rord(created),$date_steepness,$total,$total)^$date_boost"; + } + $comment_settings = variable_get('apachesolr_search_comment_boost', '4:200.0'); + list($comment_steepness, $comment_boost) = explode(':', $comment_settings); + // Default date-biasing function, as suggested (but steeper) at + // http://wiki.apache.org/solr/DisMaxRequestHandler + // rord() returns 1 for the newset doc, and the number in the index for + // the oldest doc. The function is thus: $total/(rord()*$steepness + $total). + if ($comment_boost) { + $params['bf'][] = "recip(rord(comment_count),$comment_steepness,$total,$total)^$comment_boost"; + } + + // Modify the weight of results according to the node types. + $type_boosts = variable_get('apachesolr_search_type_boosts', array()); + if (!empty($type_boosts)) { + foreach ($type_boosts as $type => $boost) { + $params['bq'][] = "type:$type^$boost"; + } } - /** - * This hook allows modules to modify the query are params objects. - * - * Example: - * - * - * function my_module_apachesolr_modify_query(&$query,&$params) { - * // I only want to see articles by the admin! - * $query->add_field("uid" => "1"); - * - * } - * - */ - foreach (module_implements('apachesolr_modify_query') as $module) { - $function_name = "{$module}_apachesolr_modify_query"; - $function_name($query, $params); - } + // This hook allows modules to modify the query and params objects. + apachesolr_modify_query($query, $params, 'apachesolr_search'); if (!$query) { return array(); } - $hash = md5(url(NULL, NULL, NULL, TRUE)); - $query->add_field('hash', $hash); - - $response = $solr->search($query->get_query(), $params['start'], $params['rows'], $params); + $response = $solr->search($query->get_query_basic(), $params['start'], $params['rows'], $params); // The response is cached so that it is accessible to the blocks and anything // else that needs it beyond the initial search. + $total = $response->response->numFound; apachesolr_static_response_cache($response); apachesolr_has_searched(TRUE); - $query->remove_field('hash', $hash); - $total = $response->response->numFound; pager_query("SELECT %d", $params['rows'], 0, NULL, $total); if ($total > 0) { - $extra = array(); foreach ($response->response->docs as $doc) { + $extra = array(); + $snippet = isset($response->highlighting->{$doc->id}->$hl_fl) ? theme('apachesolr_search_snippets', $doc, $response->highlighting->{$doc->id}->$hl_fl) : ''; + if (!isset($doc->body)) { + $doc->body = $snippet; + } + $doc->created = strtotime($doc->created); + $doc->changed = strtotime($doc->changed); $extra += node_invoke_nodeapi($doc, 'search result'); $extra['score'] = $doc->score; - $snippet = search_excerpt($keys, $doc->body); - if (trim($snippet) == '...') { - $snippet = ''; - } - $results[] = array('link' => $doc->url, - 'type' => node_get_types('name', $doc), - 'title' => $doc->title, - 'user' => theme('username', $doc), - 'date' => $doc->changed, - 'node' => $doc, - 'extra' => $extra, - 'score' => $doc->score, - 'snippet' => $snippet); + $results[] = array( + 'link' => $doc->url, + 'type' => apachesolr_search_get_type($doc->type), + 'title' => $doc->title, + 'user' => theme('username', $doc), + 'date' => $doc->created, + 'node' => $doc, + 'extra' => $extra, + 'score' => $doc->score, + 'snippet' => $snippet, + ); } - + // Hook to allow modifications of the retrieved results foreach (module_implements('apachesolr_process_results') as $module) { $function = $module .'_apachesolr_process_results'; call_user_func_array($function, array(&$results)); - } + } } // Set breadcrumb drupal_set_breadcrumb($query->get_breadcrumb()); @@ -178,58 +260,100 @@ function apachesolr_search_search($op = } // try catch (Exception $e) { watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); - apachesolr_failure(t('Search'), $query->get_query()); + apachesolr_failure(t('Solr search'), is_null($query) ? $keys : $query->get_query_basic()); } break; - } // switch } +/** + * Implementation of hook_apachesolr_facets(). + * + * Returns an array keyed by block delta. + */ function apachesolr_search_apachesolr_facets() { - return array_keys(apachesolr_search_block()); + $facets = array(); + + $facets['type'] = array( + 'info' => t('Apache Solr Search: Filter by content type'), + 'facet_field' => 'type', + ); + $facets['uid'] = array( + 'info' => t('Apache Solr Search: Filter by author'), + 'facet_field' => 'uid', + ); + $facets['language'] = array( + 'info' => t('Apache Solr Search: Filter by language'), + 'facet_field' => 'language', + ); + + // Get taxonomy vocabulary facets. + if (module_exists('taxonomy')) { + $vocabs = taxonomy_get_vocabularies(); + foreach ($vocabs as $vid => $vocab) { + // In this case the delta and facet field are the same. + $delta = 'imfield_vid_' . $vid; + $facets[$delta] = array( + 'info' => t('Apache Solr Search: Filter by @name', array('@name' => $vocab->name)), + 'facet_field' => $delta, + ); + } + } + + // Get CCK field facets. + $fields = apachesolr_cck_fields(); + if ($fields) { + foreach ($fields as $name => $field) { + // $delta can only be 32 chars, and the CCK field name may be this + // long also, so we cannot add anything to it. + $facets[$field['field_name']] = array( + 'info' => t('Apache Solr Search: Filter by @field', array('@field' => $field['label'])), + 'facet_field' => apachesolr_index_key($field), + ); + } + } + return $facets; } /** * Implementation of hook_block(). */ function apachesolr_search_block($op = 'list', $delta = 0, $edit = array()) { + switch ($op) { case 'list': - $blocks['uid'] = array('info' => t('ApacheSolr Search: Filter by author')); - - // Get taxonomy vocabulary facets. - if (module_exists('taxonomy')) { - $vocabs = taxonomy_get_vocabularies(); - foreach ($vocabs as $vid => $vocab) { - $blocks['imfield_vid' . $vid] = array('info' => t('ApacheSolr Search: Filter by @name', array('@name' => $vocab->name))); - } - } - - // Get CCK field facets. - if ($fields = apachesolr_cck_fields()) { - foreach ($fields as $name => $field) { - $index_key = apachesolr_index_key($field); - $label = db_result(db_query("SELECT label FROM {node_field_instance} WHERE field_name = '%s'", $name)); - // TODO: $index_key must be wrong here. - $blocks[$index_key] = array('info' => t('ApacheSolr Search: Filter by @field', array('@field' => $label))); - } + $enabled_facets = apachesolr_get_enabled_facets('apachesolr_search'); + $facets = apachesolr_search_apachesolr_facets(); + // Add the blocks + $blocks = array(); + foreach ($enabled_facets as $delta => $facet_field) { + $blocks[$delta] = $facets[$delta]; } + $blocks['currentsearch'] = array( + 'info' => t('Apache Solr Search: Current search'), + ); return $blocks; case 'view': - if (arg(1) == 'apachesolr_search' && apachesolr_has_searched()) { + if (apachesolr_has_searched()) { // Get the query and response. Without these no blocks make sense. - $response =& apachesolr_static_response_cache(); + $response = apachesolr_static_response_cache(); if (empty($response)) { return; } - $query =& apachesolr_drupal_query(); + $query = apachesolr_current_query(); - // Get information needed by the rest of the blocks about limits. - $facet_display_limits = variable_get('apachesolr_facet_query_limits', array()); + $facets = apachesolr_get_enabled_facets('apachesolr_search'); + if (empty($facets[$delta]) && ($delta != 'currentsearch')) { + return; + } + + // Get information needed by the taxonomy blocks about limits. + $initial_limits = variable_get('apachesolr_facet_query_initial_limits', array()); + $limit_default = variable_get('apachesolr_facet_query_initial_limit_default', 10); // Handle taxonomy vocabulary facets - if ((strpos($delta, 'imfield_vid') === 0) && module_exists('taxonomy')) { + if ((strpos($delta, 'imfield_vid_') === 0) && module_exists('taxonomy')) { if (is_object($response->facet_counts->facet_fields->$delta)) { $contains_active = FALSE; @@ -243,12 +367,14 @@ function apachesolr_search_block($op = ' if ($active = $query->has_field('tid', $tid)) { $contains_active = TRUE; $new_query->remove_field('tid', $term->tid); - $path = 'search/' . arg(1) . '/' . $new_query->get_query(); - $unclick_link = theme('apachesolr_unclick_link', $path); + $path = 'search/' . arg(1) . '/' . $new_query->get_query_basic(); + $querystring = $new_query->get_url_querystring(); + $unclick_link = theme('apachesolr_unclick_link', $path, $querystring); } else { $new_query->add_field('tid', $term->tid); - $path = 'search/' . arg(1) . '/' . $new_query->get_query(); + $path = 'search/' . arg(1) . '/' . $new_query->get_query_basic(); + $querystring = $new_query->get_url_querystring(); } $countsort = $count == 0 ? '' : 1 / $count; // if numdocs == 1 and !active, don't add. @@ -256,70 +382,62 @@ function apachesolr_search_block($op = ' // skip } else { - $terms[$term->vid][$active ? $countsort . $term->name : 1 + $countsort . $term->name] = - theme('apachesolr_facet_item', $term->name, $count, $path, $active, $unclick_link, $response->numFound); + $terms[$term->vid][$active ? $countsort . $term->name : 1 + $countsort . $term->name] = theme('apachesolr_facet_item', $term->name, $count, $path, $querystring, $active, $unclick_link, $response->numFound); } } } - $vid = substr($delta, 11); - $vocab = taxonomy_get_vocabulary($vid); + $vid = substr($delta, 12); + $vocab = taxonomy_vocabulary_load($vid); if (is_numeric($vid) && is_array($terms) && isset($terms[$vid]) && is_array($terms[$vid])) { ksort($terms[$vid]); - $facet_display_limit = isset($facet_display_limits[$delta]) ? $facet_display_limits[$delta] : 10; - $terms[$vid] = array_slice($terms[$vid], 0, ($facet_display_limit == -1 ? NULL : $facet_display_limit)); - return array('subject' => t('Filter by @name', array('@name' => $vocab->name)), - 'content' => theme('apachesolr_facet_list', $terms[$vid])); - } - else { - return; + $limit = isset($initial_limits['apachesolr_search'][$delta]) ? $initial_limits['apachesolr_search'][$delta] : $limit_default; + return array( + 'subject' => t('Filter by @name', array('@name' => $vocab->name)), + 'content' => theme('apachesolr_facet_list', $terms[$vid], $limit), + ); } + return; } switch ($delta) { - case 'uid': - $filter_by = t('Filter by author'); - return apachesolr_facet_block($response, $query, $delta, $filter_by, 'apachesolr_search_get_username'); + case 'currentsearch': + $fields = $query->get_fields(); + $search_keys = $query->get_query_basic(); + $path = 'search/' . arg(1) . '/' . $search_keys; + $options = array(); + if (!$fields) { + $options['attributes']['class'] = 'active'; + } + $links[] = apachesolr_l($search_keys, $path, $options); + foreach($fields as $field) { + if ($field['#name']) { + $new_query = clone $query; + $new_query->remove_field($field['#name'], $field['#value']); + $path = 'search/'. arg(1) .'/'. $new_query->get_query_basic(); + $querystring = $new_query->get_url_querystring(); + $unclick_link = theme('apachesolr_unclick_link', $path, $querystring); + if (! $fielddisplay = theme("apachesolr_breadcrumb_". $field['#name'], $field['#value'])) { + $fielddisplay = $field['#value']; + } + $links[] = theme('apachesolr_facet_item', $fielddisplay, NULL, $path, $querystring, $active, $unclick_link, $response->numFound); + } + } + $content = theme('apachesolr_currentsearch', $response->response->numFound, $links); + return array('subject' => t('Current search'), 'content' => $content); + + case 'language': + return apachesolr_facet_block($response, $query, 'apachesolr_search', $delta, $delta, t('Filter by language'), 'locale_language_name'); + case 'uid': + return apachesolr_facet_block($response, $query, 'apachesolr_search', $delta, $delta, t('Filter by author'), 'apachesolr_search_get_username'); + case 'type': + return apachesolr_facet_block($response, $query, 'apachesolr_search', $delta, $delta, t('Filter by type'), 'apachesolr_search_get_type'); default: if ($fields = apachesolr_cck_fields()) { foreach ($fields as $name => $field) { - $index_key = apachesolr_index_key($field); - if ($index_key == $delta) { - if (is_array($response->facets->$index_key)) { - $contains_active = FALSE; - foreach ($response->facets->$index_key as $facet => $count) { - $unclick_link = ''; - unset($active); - $new_query = clone $query; - if ($active = $query->has_field($index_key, $facet)) { - $contains_active = TRUE; - $new_query->remove_field($index_key, $facet); - $path = 'search/'. arg(1) .'/'. $new_query->get_query(); - $unclick_link = theme('apachesolr_unclick_link', $path); - } - else { - $new_query->add_field($index_key, $facet); - $path = 'search/'. arg(1) .'/'. $new_query->get_query(); - } - $countsort = $count == 0 ? '' : 1 / $count; - // if numdocs == 1 and !active, don't add. - if ($response->numFound == 1 && !$active) { - // skip - } - else { - $facets[$active ? $countsort . $facet : 1 + $countsort . $facet] = theme('apachesolr_facet_item', $facet, $count, $path, $active, $unclick_link, $response->numFound); - } - } - if (is_array($facets)) { - ksort($facets); - $facet_display_limit = isset($facet_display_limits[$delta]) ? $facet_display_limits[$delta] : 10; - $facets = array_slice($facets, 0, ($facet_display_limit == -1 ? NULL : $facet_display_limit)); - $output = theme('apachesolr_facet_list', $facets); - $label = db_result(db_query("SELECT label FROM {node_field_instance} WHERE field_name = '%s'", $name)); - return array('subject' => t('Filter by @field', array('@field' => $label)), - 'content' => $output); - } - } + if ($field['field_name'] == $delta) { + $index_key = apachesolr_index_key($field); + return apachesolr_facet_block($response, $query, 'apachesolr_search', $delta, $index_key, t('Filter by @field', array('@field' => $field['label']))); } } } @@ -329,19 +447,22 @@ function apachesolr_search_block($op = ' break; case 'configure': - if ($delta != 'sort') { - return apachesolr_facetcount_form($delta); + if ($delta != 'currentsearch') { + return apachesolr_facetcount_form('apachesolr_search', $delta); } break; case 'save': - if ($delta != 'sort') { - apachesolr_facetcount_save($delta, $edit); + if ($delta != 'currentsearch') { + apachesolr_facetcount_save($edit); } break; } } +/** + * Callback function for the 'Filter by name' facet block. + */ function apachesolr_search_get_username($facet) { if ($facet == 0) { return variable_get('anonymous', t('Anonymous')); @@ -352,11 +473,77 @@ function apachesolr_search_get_username( } /** + * Callback function for the 'Filter by type' facet block. + */ +function apachesolr_search_get_type($facet) { + $type = node_get_types('name', $facet); + // A disabled or missing node type returns FALSE. + return ($type === FALSE) ? $facet : $type; +} + +/** + * Implementation of hook_form_alter(). + * + * This adds spelling suggestions to the search form. + */ +function apachesolr_search_form_alter($form_id, &$form) { + if ($form_id == 'search_form') { + if (($form['module']['#value'] == 'apachesolr_search') && variable_get('apachesolr_search_spellcheck', FALSE) && apachesolr_has_searched() && ($response = apachesolr_static_response_cache())) { + //Get spellchecker suggestions into an array. + $suggestions = get_object_vars($response->spellcheck->suggestions); + + if ($suggestions) { + //Get the original query and replace words. + $query = apachesolr_current_query(); + + foreach($suggestions as $word => $value) { + $replacements[$word] = $value->suggestion[0]; + } + $new_keywords = strtr($query->get_query_basic(), $replacements); + + $form['basic']['suggestion'] = array( + '#prefix' => '
', + '#suffix' => '
', + '#type' => 'item', + '#title' => t('Did you mean'), + '#value' => l($new_keywords, 'search/'. arg(1) .'/'. $new_keywords), + ); + } + } + } + if ($form_id == 'apachesolr_settings') { + $form['apachesolr_search_spellcheck'] = array( + '#type' => 'checkbox', + '#title' => t('Enable spellchecker and suggestions'), + '#default_value' => variable_get('apachesolr_search_spellcheck', FALSE), + '#description' => t('Enable spellchecker and get word suggestions. Also known as the "Did you mean ... ?" feature.'), + ); + + $form['#submit'][] = 'apachesolr_search_build_spellcheck'; + // Move buttons to the bottom. + $buttons = $form['buttons']; + unset($form['buttons']); + $form['buttons'] = $buttons; + } +} + +function apachesolr_search_build_spellcheck() { + try { + $solr = apachesolr_get_solr(); + $params['spellcheck'] = 'true'; + $params['spellcheck.build'] = 'true'; + $response = $solr->search('solr', 0, 0, $params); + } + catch (Exception $e) { + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); + } +} + +/** * Return the username from $uid */ function theme_apachesolr_breadcrumb_uid($uid) { - $user = user_load(array('uid' => $uid)); - return $user->name; + return apachesolr_search_get_username($uid); } /** @@ -365,4 +552,245 @@ function theme_apachesolr_breadcrumb_uid function theme_apachesolr_breadcrumb_tid($tid) { $term = taxonomy_get_term($tid); return $term->name; -} \ No newline at end of file +} + +/** + * Return the human readable text for a content type. + */ +function theme_apachesolr_breadcrumb_type($type) { + return node_get_types('name', $type); +} + +/** + * Return current search block contents + */ +function theme_apachesolr_currentsearch($total_found, $links) { + return theme_item_list($links, t('Search found @count items', array('@count' => $total_found))); +} + +/** + * Returns the snipit text for a search entry + * + * @param object $doc + * @param array $snippets + * + */ +function theme_apachesolr_search_snippets($doc, $snippets) { + return implode(' ... ', $snippets); +} + +/** + * @file + * Administrative settings for searching. + */ + +/** + * Menu callback - the settings form. + */ +function apachesolr_search_settings_page() { + $output = drupal_get_form('apachesolr_search_bias_form'); + // try to fetch the schema fields + try { + $solr = apachesolr_get_solr(); + $fields = $solr->getFields(); + if (!empty($fields)) { + $output .= drupal_get_form('apachesolr_search_settings_form', $fields); + } + } + catch (Exception $e) { + watchdog('apachesolr', $e->getMessage()); + drupal_set_message($e->getMessage(), "warning"); + $output .= t('Cannot get information about the fields in the index at this time.'); + } + return $output; +} + +/** + * Form builder function to set date, comment, etc biases. + */ +function apachesolr_search_bias_form($form_state = NULL) { + + $date_settings = variable_get('apachesolr_search_date_boost', '4:200.0'); + $comment_settings = variable_get('apachesolr_search_comment_boost', '4:200.0'); + + $options = array( + '10:2000.0' => '10', + '8:1000.0' => '9', + '8:700.0' => '8', + '8:500.0' => '7', + '4:300.0' => '6', + '4:200.0' => '5', + '4:150.0' => '4', + '2:150.0' => '3', + '2:100.0' => '2', + '1:100.0' => '1', + '0:0' => t('Omit'), + ); + + $form['biasing'] = array( + '#type' => 'fieldset', + '#title' => t('Result biasing'), + '#collapsible' => TRUE, + '#collapsed' => FALSE, + '#description' => t('Give bias to certain properties when ordering the search results. Choose Omit to ignore this for any given property.'), + ); + $form['biasing']['apachesolr_search_date_boost'] = array( + '#type' => 'select', + '#options' => $options, + '#title' => t("'More recent change' bias"), + '#default_value' => $date_settings, + '#description' => t('This setting will change the result scoring so that results changed more recently may appear before those with higher keyword matching.'), + ); + $form['biasing']['apachesolr_search_comment_boost'] = array( + '#type' => 'select', + '#options' => $options, + '#title' => t("'More comments' bias"), + '#default_value' => $comment_settings, + '#description' => t('This setting will change the result scoring so that nodes with more comments may appear before those with higher keyword matching.'), + ); + + return system_settings_form($form); +} + +/** + * Form builder function to set query field weights. + */ +function apachesolr_search_settings_form($form_state = NULL, $fields = NULL) { + $form = array(); + + // get the current weights + $qf = variable_get('apachesolr_search_query_fields', array()); + $weights = drupal_map_assoc(array('21.0', '13.0', '8.0', '5.0', '3.0', '2.0', '1.0', '0.8', '0.5', '0.3', '0.2', '0.1')); + $weights['0'] = t('Omit'); + // Note - we have default values set in solrconfig.xml, which will operate when + // none are set. + $defaults = array( + 'body' => '1.0', + 'title' => '5.0', + 'name' => '3.0', + 'taxonomy_names' => '2.0', + 'tags_h1' => '5.0', + 'tags_h2_h3' => '3.0', + 'tags_h4_h5_h6' => '2.0', + 'tags_inline' => '1.0', + 'tags_a' => '0', + ); + if (!$qf) { + $qf = $defaults; + } + if ($fields) { + + $form['apachesolr_search_query_fields'] = array( + '#type' => 'fieldset', + '#title' => t('Field weights'), + '#collapsible' => TRUE, + '#collapsed' => FALSE, + '#tree' => TRUE, + '#description' => t('Specify here which fields are more important when searching. Give a field a greater numeric value to make it more important. If you omit a field, it will not be searched.'), + ); + foreach ($fields as $field_name => $field) { + $form['apachesolr_search_query_fields'][$field_name] = array( + '#access' => $field->type == 'text', + '#type' => 'select', + '#options' => $weights, + '#title' => t('Weight for %field_name', array('%field_name' => $field_name)), + '#default_value' => isset($qf[$field_name]) ? $qf[$field_name] : '0', + ); + } + // Make sure all the default fields are included, even if they have no indexed content. + foreach ($defaults as $field_name => $weight) { + $form['apachesolr_search_query_fields'][$field_name] = array( + '#type' => 'select', + '#options' => $weights, + '#title' => t('Weight for %field_name', array('%field_name' => $field_name)), + '#default_value' => isset($qf[$field_name]) ? $qf[$field_name] : $defaults[$field_name], + ); + } + + ksort($form['apachesolr_search_query_fields']); + } + + return system_settings_form($form); +} + +/** + * Form builder function to set query type weights. + */ +function apachesolr_search_type_boost_form($form_state = NULL) { + $form = array(); + + $form['apachesolr_search_type_settings'] = array( + '#type' => 'fieldset', + '#title' => t('Type weighting and exclusion'), + '#collapsible' => TRUE, + '#collapsed' => FALSE, + ); + $form['apachesolr_search_type_settings']['apachesolr_search_type_boosts'] = array( + '#type' => 'item', + '#description' => t("Specify here which node types should get a higher relevancy score in searches. Any value except 'Normal' will increase the score of the given type in search results."), + '#tree' => TRUE, + ); + + $weights = drupal_map_assoc(array('21.0', '13.0', '8.0', '5.0', '3.0', '2.0', '1.0', '0.8', '0.5', '0.3', '0.2', '0.1')); + $weights['0'] = t('Normal'); + + + // Get the current boost values. + $type_boosts = variable_get('apachesolr_search_type_boosts', array()); + $names = node_get_types('names'); + + foreach ($names as $type => $name) { + $form['apachesolr_search_type_settings']['apachesolr_search_type_boosts'][$type] = array( + '#type' => 'select', + '#title' => t('Weight for %type', array('%type' => $name)), + '#options' => $weights, + '#default_value' => isset($type_boosts[$type]) ? $type_boosts[$type] : 0, + ); + } + + $form['apachesolr_search_type_settings']['apachesolr_search_excluded_types'] = array( + '#type' => 'checkboxes', + '#title' => t('Types to exclude from the search index'), + '#options' => $names, + '#default_value' => variable_get('apachesolr_search_excluded_types', array()), + '#description' => t("Specify here which node types should be totally excluded from the search index."), + ); + + $form['#submit']['apachesolr_search_type_boost_form_submit'] = array(); + return system_settings_form($form); +} + +/** + * Submit callback for apachesolr_search_type_boost_form(). + * + * This is called before system_settings_form_submit(). + */ +function apachesolr_search_type_boost_form_submit($form_id, &$form_state) { + $old_excluded_types = variable_get('apachesolr_search_excluded_types', array()); + $new_excluded_types = $form_state['apachesolr_search_excluded_types']; + // Check whether we are resetting the values. + if ($form_state['op'] == t('Reset to defaults')) { + $new_excluded_types = array(); + variable_set('apachesolr_search_type_boosts', array()); + } + else { + // Apparently, if you add a submit handler to a system_settings_form, it no longer works. + variable_set('apachesolr_search_type_boosts', $form_state['apachesolr_search_type_boosts']); + variable_set('apachesolr_search_excluded_types', $new_excluded_types); + } + + foreach ($new_excluded_types as $type => $excluded) { + // Remove newly omitted node types. + if (!empty($new_excluded_types[$type]) && empty($old_excluded_types[$type])) { + $solr = apachesolr_get_solr(); + $solr->deleteByQuery("type:$type"); + } + } + + foreach ($old_excluded_types as $type => $excluded) { + // Set no longer omitted node types for reindexing. + if (empty($new_excluded_types[$type]) && !empty($old_excluded_types[$type])) { + db_query("UPDATE {apachesolr_search_node} SET changed = %d WHERE nid IN (SELECT nid FROM {node} WHERE type = '%s')", time(), $type); + } + } +} Index: schema.xml =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/schema.xml,v retrieving revision 1.1.2.8 diff -u -p -r1.1.2.8 schema.xml --- schema.xml 4 Nov 2008 14:29:55 -0000 1.1.2.8 +++ schema.xml 5 Feb 2009 21:42:05 -0000 @@ -1,21 +1,5 @@ - - + - + + + + + + + + + + + + + + + + @@ -247,27 +245,53 @@ - + - + + - + + + + + - - + + + + + + + + + + + + + + + + when each document was indexed.--> + + + + + + - - - + + + + @@ -292,12 +317,12 @@ + + - - - @@ -306,10 +331,10 @@ - url + id - text + body Index: solrconfig.xml =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/solrconfig.xml,v retrieving revision 1.1.4.2 diff -u -p -r1.1.4.2 solrconfig.xml --- solrconfig.xml 30 Nov 2008 21:14:35 -0000 1.1.4.2 +++ solrconfig.xml 5 Feb 2009 21:42:05 -0000 @@ -17,6 +17,7 @@ --> + + @@ -113,18 +115,46 @@ This is not needed if lock type is 'none' or 'single' --> false + + + + + false + + 1 + + + - + @@ -140,11 +170,12 @@ - 10000 - 1000 + 500 + 240000 - --> + 1024 - + + + + @@ -256,7 +304,7 @@ then documents 0 through 49 will be collected and cached. Any further requests in that range can be satisfied via the cache. --> 50 - + 200 @@ -310,7 +358,7 @@ - + - - + + - + explicit - + + + + - + + + dismax + explicit + + + + + dismax explicit 0.01 - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + body^1.0 title^5.0 name^3.0 taxonomy_names^2.0 tags_h1^5.0 tags_h2_h3^3.0 tags_h4_h5_h6^2.0 tags_inline^1.0 - text^0.2 features^1.1 name^1.5 manu^1.4 manu_exact^1.9 - - - ord(popularity)^0.5 recip(rord(price),1,1000,1000)^0.3 - - - id,name,price,score + body^2.0 + 15 - 2<-1 5<-2 6<90% + 2<-35% - 100 *:* - - text features name - - 0 - - name - regex + + + true + body + 3 + true + + body + 256 + + + + + false + + false + false + + 1 + + spellcheck + - - + + - dismax - explicit - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 - 2<-1 5<-2 6<90% - - incubationdate_dt:[* TO NOW/DAY-1MONTH]^2.2 - - - - inStock:true - - - - cat - manu_exact - price:[* TO 500] - price:[500 TO *] - + 1 + 1 + 3 + 15 + 30 + false + - + - - - - false - - false - - 1 - - - spellcheck - - - @@ -548,7 +576,7 @@ string elevate.xml - + @@ -558,14 +586,14 @@ elevator - + @@ -575,7 +603,7 @@ for debugging and as a token server for other types of applications --> - + @@ -583,7 +611,7 @@ - + @@ -610,7 +638,7 @@ all - + @@ -618,7 +646,7 @@ true - + @@ -634,22 +662,22 @@ 70 - 0.5 + 0.5 [-\w ,/\n\"']{20,200} - + - ]]> - ]]> + ]]> + ]]> - - + + 5 - + - - + + solr - + Index: SolrPhpClient/Apache/Solr/Document.php =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/SolrPhpClient/Apache/Solr/Document.php,v retrieving revision 1.1.2.3 diff -u -p -r1.1.2.3 Document.php --- SolrPhpClient/Apache/Solr/Document.php 23 Oct 2008 21:14:52 -0000 1.1.2.3 +++ SolrPhpClient/Apache/Solr/Document.php 5 Feb 2009 21:42:05 -0000 @@ -1,28 +1,43 @@ */ /** - * Holds Key / Value pairs that represent a Solr Document. Field values can be accessed - * by direct dereferencing such as: + * Holds Key / Value pairs that represent a Solr Document along with any associated boost + * values. Field values can be accessed by direct dereferencing such as: * * ... * $document->title = 'Something'; @@ -39,161 +54,280 @@ * } * */ -class Apache_Solr_Document implements Iterator +class Apache_Solr_Document implements IteratorAggregate { - protected $_fields = array(); - - /** - * Magic get for field values - * - * @param string $key - * @return mixed - */ - public function __get($key) - { - return $this->_fields[$key]; - } - - /** - * Magic set for field values. Multi-valued fields should be set as arrays - * or instead use the setMultiValue(...) function which will automatically - * make sure the field is an array. - * - * @param string $key - * @param mixed $value - */ - public function __set($key, $value) - { - $this->_fields[$key] = $value; - } - - /** - * Magic isset for fields values. Do no call directly. Allows usage: - * - * - * isset($document->some_field); - * - * - * @param string $key - * @return boolean - */ - public function __isset($key) - { - return isset($this->_fields[$key]); - } - - /** - * Magic unset for field values. Do no call directly. Allows usage: - * - * - * unset($document->some_field); - * - * - * @param string $key - */ - public function __unset($key) - { - unset($this->_fields[$key]); - } - - /** - * Handle the array manipulation for a multi-valued field - * - * @param string $key - * @param string $value - */ - public function setMultiValue($key, $value) - { - if (!isset($this->_fields[$key])) - { - $this->_fields[$key] = array(); - } - - if (!is_array($this->_fields[$key])) - { - $this->_fields[$key] = array($this->_fields[$key]); - } - - $this->_fields[$key][] = $value; - } - - /** - * Get the names of all fields in this document - * - * @return array - */ - public function getFieldNames() - { - return array_keys($this->_fields); - } - - /** - * Iterator implementation function, proxies to _fields. Allows usage: - * - * - * foreach ($document as $key => $value) - * { - * ... - * } - * - */ - public function rewind() { - reset($this->_fields); - } - - /** - * Iterator implementation function, proxies to _fields. Allows usage: - * - * - * foreach ($document as $key => $value) - * { - * ... - * } - * - */ - public function current() { - return current($this->_fields); - } - - /** - * Iterator implementation function, proxies to _fields. Allows usage: - * - * - * foreach ($document as $key => $value) - * { - * ... - * } - * - */ - public function key() { - return key($this->_fields); - } - - /** - * Iterator implementation function, proxies to _fields. Allows usage: - * - * - * foreach ($document as $key => $value) - * { - * ... - * } - * - */ - public function next() { - return next($this->_fields); - } - - /** - * Iterator implementation function, proxies to _fields. Allows usage: - * - * - * foreach ($document as $key => $value) - * { - * ... - * } - * - */ - public function valid() { - return current($this->_fields) !== false; - } + /** + * Document boost value + * + * @var float + */ + protected $_documentBoost = false; + + /** + * Document field values, indexed by name + * + * @var array + */ + protected $_fields = array(); + + /** + * Document field boost values, indexed by name + * + * @var array array of floats + */ + protected $_fieldBoosts = array(); + + /** + * Clear all boosts and fields from this document + */ + public function clear() + { + $this->_documentBoost = false; + + $this->_fields = array(); + $this->_fieldBoosts = array(); + } + + /** + * Get current document boost + * + * @return mixed will be false for default, or else a float + */ + public function getBoost() + { + return $this->_documentBoost; + } + + /** + * Set document boost factor + * + * @param mixed $boost Use false for default boost, else cast to float + */ + public function setBoost($boost) + { + if ($boost !== false) + { + $this->_documentBoost = (float) $boost; + } + else + { + $this->_documentBoost = false; + } + } + + /** + * Add a value to a multi-valued field + * + * NOTE: the solr XML format allows you to specify boosts + * PER value even though the underlying Lucene implementation + * only allows a boost per field. To remedy this, the final + * field boost value will be the product of all specified boosts + * on field values - this is similar to SolrJ's functionality. + * + * + * $doc = new Apache_Solr_Document(); + * + * $doc->addField('foo', 'bar', 2.0); + * $doc->addField('foo', 'baz', 3.0); + * + * // resultant field boost will be 6! + * echo $doc->getFieldBoost('foo'); + * + * + * @param string $key + * @param mixed $value + * @param float $boost + */ + public function addField($key, $value, $boost = false) + { + if (!isset($this->_fields[$key])) + { + $this->_fields[$key] = array(); + } + + if (!isset($this->_fieldBoosts[$key])) + { + $this->setFieldBoost($key, $boost); + } + else if ($boost !== false) + { + if ($this->_fieldBoosts[$key] !== false) + { + $this->_fieldBoosts[$key] *= (float) $boost; + } + else + { + $this->_fieldBoosts[$key] = (float) $boost; + } + } + + if (!is_array($this->_fields[$key])) + { + $this->_fields[$key] = array($this->_fields[$key]); + } + + $this->_fields[$key][] = $value; + } + + /** + * Handle the array manipulation for a multi-valued field + * + * @param string $key + * @param string $value + * + * @deprecated Use addField(...) instead + */ + public function setMultiValue($key, $value, $boost = false) + { + $this->addField($key, $value, $boost); + } + + /** + * Get field information + * + * @param string $key + * @return mixed associative array of info if field exists, false otherwise + */ + public function getField($key) + { + if (isset($this->_fields[$key])) + { + return array( + 'name' => $key, + 'value' => $this->_fields[$key], + 'boost' => $this->_fieldBoosts[$key] + ); + } + + return false; + } + + /** + * Set a field value. Multi-valued fields should be set as arrays + * or instead use the addField(...) function which will automatically + * make sure the field is an array. + * + * @param string $key + * @param mixed $value + * @param float $boost + */ + public function setField($key, $value, $boost = false) + { + $this->_fields[$key] = $value; + $this->setFieldBoost($key, $boost); + } + + public function getFieldBoost($key) + { + return $this->_fieldBoosts[$key]; + } + + public function setFieldBoost($key, $boost) + { + //@note:JacobSingh changed this because of problem w/ multivalued fields + if ($boost !== false && $boost !== null) + { + $this->_fieldBoosts[$key] = (float) $boost; + } + else + { + $this->_fieldBoosts = false; + } + } + + /** + * Get the names of all fields in this document + * + * @return array + */ + public function getFieldNames() + { + return array_keys($this->_fields); + } + + /** + * Get the values of all fields in this document + * + * @return array + */ + public function getFieldValues() + { + return array_values($this->_fields); + } + + /** + * IteratorAggregate implementation function. Allows usage: + * + * + * foreach ($document as $key => $value) + * { + * ... + * } + * + */ + public function getIterator() + { + $arrayObject = new ArrayObject($this->_fields); + + return $arrayObject->getIterator(); + } + + /** + * Magic get for field values + * + * @param string $key + * @return mixed + */ + public function __get($key) + { + return $this->_fields[$key]; + } + + /** + * Magic set for field values. Multi-valued fields should be set as arrays + * or instead use the setMultiValue(...) function which will automatically + * make sure the field is an array. + * + * @param string $key + * @param mixed $value + */ + public function __set($key, $value) + { + $this->_fields[$key] = $value; + + if (!isset($this->_fieldBoosts[$key])) + { + $this->_fieldBoosts[$key] = false; + } + } + + /** + * Magic isset for fields values. Do not call directly. Allows usage: + * + * + * isset($document->some_field); + * + * + * @param string $key + * @return boolean + */ + public function __isset($key) + { + return isset($this->_fields[$key]); + } + + /** + * Magic unset for field values. Do not call directly. Allows usage: + * + * + * unset($document->some_field); + * + * + * @param string $key + */ + public function __unset($key) + { + unset($this->_fields[$key]); + unset($this->_fieldBoosts[$key]); + } } \ No newline at end of file Index: SolrPhpClient/Apache/Solr/Response.php =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/SolrPhpClient/Apache/Solr/Response.php,v retrieving revision 1.1.2.5 diff -u -p -r1.1.2.5 Response.php --- SolrPhpClient/Apache/Solr/Response.php 23 Oct 2008 21:14:52 -0000 1.1.2.5 +++ SolrPhpClient/Apache/Solr/Response.php 5 Feb 2009 21:42:05 -0000 @@ -1,20 +1,35 @@ @@ -31,234 +46,234 @@ */ class Apache_Solr_Response { - /** - * Holds the raw response used in construction - * - * @var string - */ - protected $_rawResponse; - - /** - * Parsed values from the passed in http headers - * - * @var string - */ - protected $_httpStatus, $_httpStatusMessage, $_type, $_encoding; - - /** - * Whether the raw response has been parsed - * - * @var boolean - */ - protected $_isParsed = false; - - /** - * Parsed representation of the data - * - * @var mixed - */ - protected $_parsedData; - - /** - * Data parsing flags. Determines what extra processing should be done - * after the data is initially converted to a data structure. - * - * @var boolean - */ - protected $_createDocuments = true, - $_collapseSingleValueArrays = true; - - /** - * Constructor. Takes the raw HTTP response body and the exploded HTTP headers - * - * @param string $rawResponse - * @param array $httpHeaders - * @param boolean $createDocuments Whether to convert the documents json_decoded as stdClass instances to Apache_Solr_Document instances - * @param boolean $collapseSingleValueArrays Whether to make multivalued fields appear as single values - */ - public function __construct($rawResponse, $httpHeaders = array(), $createDocuments = true, $collapseSingleValueArrays = true) - { - //Assume 0, 'Communication Error', utf-8, and text/plain - $status = 0; - $statusMessage = 'Communication Error'; - $type = 'text/plain'; - $encoding = 'UTF-8'; - - //iterate through headers for real status, type, and encoding - if (is_array($httpHeaders) && count($httpHeaders) > 0) - { - //look at the first headers for the HTTP status code - //and message (errors are usually returned this way) - // - //HTTP 100 Continue response can also be returned before - //the REAL status header, so we need look until we find - //the last header starting with HTTP - // - //the spec: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.1 - // - //Thanks to Daniel Andersson for pointing out this oversight - while (isset($httpHeaders[0]) && substr($httpHeaders[0], 0, 4) == 'HTTP') - { - $parts = split(' ', substr($httpHeaders[0], 9), 2); - - $status = $parts[0]; - $statusMessage = trim($parts[1]); - - array_shift($httpHeaders); - } - - //Look for the Content-Type response header and determine type - //and encoding from it (if possible - such as 'Content-Type: text/plain; charset=UTF-8') - foreach ($httpHeaders as $header) - { - if (strncasecmp($header, 'Content-Type:', 13) == 0) - { - //split content type value into two parts if possible - $parts = split(';', substr($header, 13), 2); - - $type = trim($parts[0]); - - if ($parts[1]) - { - //split the encoding section again to get the value - $parts = split('=', $parts[1], 2); - - if ($parts[1]) - { - $encoding = trim($parts[1]); - } - } - - break; - } - } - } - - $this->_rawResponse = $rawResponse; - $this->_type = $type; - $this->_encoding = $encoding; - $this->_httpStatus = $status; - $this->_httpStatusMessage = $statusMessage; - $this->_createDocuments = (bool) $createDocuments; - $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays; - } - - /** - * Get the HTTP status code - * - * @return integer - */ - public function getHttpStatus() - { - return $this->_httpStatus; - } - - /** - * Get the HTTP status message of the response - * - * @return string - */ - public function getHttpStatusMessage() - { - return $this->_httpStatusMessage; - } - - /** - * Get content type of this Solr response - * - * @return string - */ - public function getType() - { - return $this->_type; - } - - /** - * Get character encoding of this response. Should usually be utf-8, but just in case - * - * @return string - */ - public function getEncoding() - { - return $this->_encoding; - } - - /** - * Get the raw response as it was given to this object - * - * @return string - */ - public function getRawResponse() - { - return $this->_rawResponse; - } - - /** - * Magic get to expose the parsed data and to lazily load it - * - * @param unknown_type $key - * @return unknown - */ - public function __get($key) - { - if (!$this->_isParsed) - { - $this->_parseData(); - $this->_isParsed = true; - } - - if (isset($this->_parsedData->$key)) - { - return $this->_parsedData->$key; - } - - return null; - } - - /** - * Parse the raw response into the parsed_data array for access - */ - protected function _parseData() - { - //An alternative would be to use Zend_Json::decode(...) - $data = json_decode($this->_rawResponse); - - //if we're configured to collapse single valued arrays or to convert them to Apache_Solr_Document objects - //and we have response documents, then try to collapse the values and / or convert them now - if (($this->_createDocuments || $this->_collapseSingleValueArrays) && isset($data->response) && is_array($data->response->docs)) - { - $documents = array(); - - foreach ($data->response->docs as $originalDocument) - { - if ($this->_createDocuments) - { - $document = new Apache_Solr_Document(); - } - else - { - $document = $originalDocument; - } - - foreach ($originalDocument as $key => $value) - { - //If a result is an array with only a single - //value then its nice to be able to access - //it as if it were always a single value - if ($this->_collapseSingleValueArrays && is_array($value) && count($value) <= 1) - { - $value = array_shift($value); - } - - $document->$key = $value; - } + /** + * Holds the raw response used in construction + * + * @var string + */ + protected $_rawResponse; + + /** + * Parsed values from the passed in http headers + * + * @var string + */ + protected $_httpStatus, $_httpStatusMessage, $_type, $_encoding; + + /** + * Whether the raw response has been parsed + * + * @var boolean + */ + protected $_isParsed = false; + + /** + * Parsed representation of the data + * + * @var mixed + */ + protected $_parsedData; + + /** + * Data parsing flags. Determines what extra processing should be done + * after the data is initially converted to a data structure. + * + * @var boolean + */ + protected $_createDocuments = true, + $_collapseSingleValueArrays = true; + + /** + * Constructor. Takes the raw HTTP response body and the exploded HTTP headers + * + * @param string $rawResponse + * @param array $httpHeaders + * @param boolean $createDocuments Whether to convert the documents json_decoded as stdClass instances to Apache_Solr_Document instances + * @param boolean $collapseSingleValueArrays Whether to make multivalued fields appear as single values + */ + public function __construct($rawResponse, $httpHeaders = array(), $createDocuments = true, $collapseSingleValueArrays = true) + { + //Assume 0, 'Communication Error', utf-8, and text/plain + $status = 0; + $statusMessage = 'Communication Error'; + $type = 'text/plain'; + $encoding = 'UTF-8'; + + //iterate through headers for real status, type, and encoding + if (is_array($httpHeaders) && count($httpHeaders) > 0) + { + //look at the first headers for the HTTP status code + //and message (errors are usually returned this way) + // + //HTTP 100 Continue response can also be returned before + //the REAL status header, so we need look until we find + //the last header starting with HTTP + // + //the spec: http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.1 + // + //Thanks to Daniel Andersson for pointing out this oversight + while (isset($httpHeaders[0]) && substr($httpHeaders[0], 0, 4) == 'HTTP') + { + $parts = split(' ', substr($httpHeaders[0], 9), 2); + + $status = $parts[0]; + $statusMessage = trim($parts[1]); + + array_shift($httpHeaders); + } + + //Look for the Content-Type response header and determine type + //and encoding from it (if possible - such as 'Content-Type: text/plain; charset=UTF-8') + foreach ($httpHeaders as $header) + { + if (strncasecmp($header, 'Content-Type:', 13) == 0) + { + //split content type value into two parts if possible + $parts = split(';', substr($header, 13), 2); + + $type = trim($parts[0]); + + if ($parts[1]) + { + //split the encoding section again to get the value + $parts = split('=', $parts[1], 2); + + if ($parts[1]) + { + $encoding = trim($parts[1]); + } + } + + break; + } + } + } + + $this->_rawResponse = $rawResponse; + $this->_type = $type; + $this->_encoding = $encoding; + $this->_httpStatus = $status; + $this->_httpStatusMessage = $statusMessage; + $this->_createDocuments = (bool) $createDocuments; + $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays; + } + + /** + * Get the HTTP status code + * + * @return integer + */ + public function getHttpStatus() + { + return $this->_httpStatus; + } + + /** + * Get the HTTP status message of the response + * + * @return string + */ + public function getHttpStatusMessage() + { + return $this->_httpStatusMessage; + } + + /** + * Get content type of this Solr response + * + * @return string + */ + public function getType() + { + return $this->_type; + } + + /** + * Get character encoding of this response. Should usually be utf-8, but just in case + * + * @return string + */ + public function getEncoding() + { + return $this->_encoding; + } + + /** + * Get the raw response as it was given to this object + * + * @return string + */ + public function getRawResponse() + { + return $this->_rawResponse; + } + + /** + * Magic get to expose the parsed data and to lazily load it + * + * @param unknown_type $key + * @return unknown + */ + public function __get($key) + { + if (!$this->_isParsed) + { + $this->_parseData(); + $this->_isParsed = true; + } + + if (isset($this->_parsedData->$key)) + { + return $this->_parsedData->$key; + } + + return null; + } + + /** + * Parse the raw response into the parsed_data array for access + */ + protected function _parseData() + { + //An alternative would be to use Zend_Json::decode(...) + $data = json_decode($this->_rawResponse); + + //if we're configured to collapse single valued arrays or to convert them to Apache_Solr_Document objects + //and we have response documents, then try to collapse the values and / or convert them now + if (($this->_createDocuments || $this->_collapseSingleValueArrays) && isset($data->response) && is_array($data->response->docs)) + { + $documents = array(); + + foreach ($data->response->docs as $originalDocument) + { + if ($this->_createDocuments) + { + $document = new Apache_Solr_Document(); + } + else + { + $document = $originalDocument; + } + + foreach ($originalDocument as $key => $value) + { + //If a result is an array with only a single + //value then its nice to be able to access + //it as if it were always a single value + if ($this->_collapseSingleValueArrays && is_array($value) && count($value) <= 1) + { + $value = array_shift($value); + } + + $document->$key = $value; + } - $documents[] = $document; - } + $documents[] = $document; + } - $data->response->docs = $documents; - } + $data->response->docs = $documents; + } - $this->_parsedData = $data; - } + $this->_parsedData = $data; + } } \ No newline at end of file Index: SolrPhpClient/Apache/Solr/Service.php =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/SolrPhpClient/Apache/Solr/Service.php,v retrieving revision 1.1.2.7 diff -u -p -r1.1.2.7 Service.php --- SolrPhpClient/Apache/Solr/Service.php 26 Oct 2008 15:13:07 -0000 1.1.2.7 +++ SolrPhpClient/Apache/Solr/Service.php 5 Feb 2009 21:42:05 -0000 @@ -1,27 +1,44 @@ */ -require_once('Apache/Solr/Document.php'); -require_once('Apache/Solr/Response.php'); +// See Issue #1 (http://code.google.com/p/solr-php-client/issues/detail?id=1) +// Doesn't follow typical include path conventions, but is more convenient for users +require_once(dirname(__FILE__) . '/Document.php'); +require_once(dirname(__FILE__) . '/Response.php'); /** * Starting point for the Solr API. Represents a Solr server resource and has @@ -34,24 +51,24 @@ require_once('Apache/Solr/Response.php') * * if ($solr->ping()) * { - * $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :) + * $solr->deleteByQuery('*:*'); //deletes ALL documents - be careful :) * - * $document = new Apache_Solr_Document(); - * $document->id = uniqid(); //or something else suitably unique + * $document = new Apache_Solr_Document(); + * $document->id = uniqid(); //or something else suitably unique * - * $document->title = 'Some Title'; - * $document->content = 'Some content for this wonderful document. Blah blah blah.'; + * $document->title = 'Some Title'; + * $document->content = 'Some content for this wonderful document. Blah blah blah.'; * - * $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments - * //with an array of documents is faster + * $solr->addDocument($document); //if you're going to be adding documents in bulk using addDocuments + * //with an array of documents is faster * - * $solr->commit(); //commit to see the deletes and the document - * $solr->optimize(); //merges multiple segments into one + * $solr->commit(); //commit to see the deletes and the document + * $solr->optimize(); //merges multiple segments into one * - * //and the one we all care about, search! - * //any other common or custom parameters to the request handler can go in the - * //optional 4th array argument. - * $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc')); + * //and the one we all care about, search! + * //any other common or custom parameters to the request handler can go in the + * //optional 4th array argument. + * $solr->search('content:blah', 0, 10, array('sort' => 'timestamp desc')); * } * ... * @@ -61,825 +78,849 @@ require_once('Apache/Solr/Response.php') */ class Apache_Solr_Service { - /** - * Response version we support - */ - const SOLR_VERSION = '1.2'; - - /** - * Response writer we support - * - * @todo Solr 1.3 release may change this to SerializedPHP or PHP implementation - */ - const SOLR_WRITER = 'json'; - - /** - * NamedList Treatment constants - */ - const NAMED_LIST_FLAT = 'flat'; - const NAMED_LIST_MAP = 'map'; - - /** - * Servlet mappings - */ - const PING_SERVLET = 'admin/ping'; - const UPDATE_SERVLET = 'update'; - const SEARCH_SERVLET = 'select'; - const THREADS_SERVLET = 'admin/threads'; - - /** - * Server identification strings - * - * @var string - */ - protected $_host, $_port, $_path; - - /** - * Whether {@link Apache_Solr_Response} objects should create {@link Apache_Solr_Document}s in - * the returned parsed data - * - * @var boolean - */ - protected $_createDocuments = true; - - /** - * Whether {@link Apache_Solr_Response} objects should have multivalue fields with only a single value - * collapsed to appear as a single value would. - * - * @var boolean - */ - protected $_collapseSingleValueArrays = true; - - /** - * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values - * are {@link Apache_Solr_Service::NAMED_LIST_MAP} (default) or {@link Apache_Solr_Service::NAMED_LIST_FLAT}. - * - * @var string - */ - protected $_namedListTreatment = self::NAMED_LIST_MAP; - - /** - * Query delimiters. Someone might want to be able to change - * these (to use & instead of & for example), so I've provided them. - * - * @var string - */ - protected $_queryDelimiter = '?', $_queryStringDelimiter = '&'; - - /** - * Constructed servlet full path URLs - * - * @var string - */ - protected $_updateUrl, $_searchUrl, $_threadsUrl; - - /** - * Keep track of whether our URLs have been constructed - * - * @var boolean - */ - protected $_urlsInited = false; - - /** - * Stream context for posting - * - * @var resource - */ - protected $_postContext; - - /** - * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. - * - * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead - * - * @param string $value - * @return string - */ - static public function escape($value) - { - //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters - $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/'; - $replace = '\\\$1'; - - return preg_replace($pattern, $replace, $value); - } - - /** - * Escape a value meant to be contained in a phrase for special query characters - * - * @param string $value - * @return string - */ - static public function escapePhrase($value) - { - $pattern = '/("|\\\)/'; - $replace = '\\\$1'; - - return preg_replace($pattern, $replace, $value); - } - - /** - * Convenience function for creating phrase syntax from a value - * - * @param string $value - * @return string - */ - static public function phrase($value) - { - return '"' . self::escapePhrase($value) . '"'; - } - - /** - * Constructor. All parameters are optional and will take on default values - * if not specified. - * - * @param string $host - * @param string $port - * @param string $path - */ - public function __construct($host = 'localhost', $port = 8180, $path = '/solr/') - { - $this->setHost($host); - $this->setPort($port); - $this->setPath($path); - - $this->_initUrls(); - - //set up the stream context for posting with file_get_contents - $contextOpts = array( - 'http' => array( - 'method' => 'POST', - 'header' => "Content-Type: text/xml; charset=UTF-8\r\n" //php.net example showed \r\n at the end - ) - ); - - $this->_postContext = stream_context_create($contextOpts); - } - - /** - * Return a valid http URL given this server's host, port and path and a provided servlet name - * - * @param string $servlet - * @return string - */ - protected function _constructUrl($servlet, $params = array()) - { - if (count($params)) - { - //escape all parameters appropriately for inclusion in the query string - $escapedParams = array(); - - foreach ($params as $key => $value) - { - $escapedParams[] = urlencode($key) . '=' . urlencode($value); - } - - $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams); - } - else - { - $queryString = ''; - } - - return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString; - } - - /** - * Construct the Full URLs for the three servlets we reference - */ - protected function _initUrls() - { - //Initialize our full servlet URLs now that we have server information - $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER )); - $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET); - $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER )); - - $this->_urlsInited = true; - } - - /** - * Central method for making a get operation against this Solr Server - * - * @param string $url - * @param float $timeout Read timeout in seconds - * @return Apache_Solr_Response - * - * @todo implement timeout ability - * @throws Exception If a non 200 response status is returned - */ - protected function _sendRawGet($url, $timeout = FALSE) - { - //$http_response_header is set by file_get_contents - $http_response_header = NULL; - $response = new Apache_Solr_Response(@file_get_contents($url), $http_response_header, $this->_createDocuments, $this->_collapseSingleValueArrays); - - if ($response->getHttpStatus() != 200) - { - throw new Exception('"' . $response->getHttpStatus() . '" Status: ' . $response->getHttpStatusMessage(), $response->getHttpStatus()); - } - - return $response; - } - - /** - * Central method for making a post operation against this Solr Server - * - * @param string $url - * @param string $rawPost - * @param float $timeout Read timeout in seconds - * @param string $contentType - * @return Apache_Solr_Response - * - * @throws Exception If a non 200 response status is returned - */ - protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8') - { - //ensure content type is correct - stream_context_set_option($this->_postContext, 'http', 'header', 'Content-Type: ' . $contentType); - - //set the read timeout if specified - if ($timeout !== FALSE) - { - stream_context_set_option($this->_postContext, 'http', 'timeout', $timeout); - } - - //set the content - stream_context_set_option($this->_postContext, 'http', 'content', $rawPost); - - //$http_response_header is set by file_get_contents - $response = new Apache_Solr_Response(@file_get_contents($url, false, $this->_postContext), $http_response_header, $this->_createDocuments, $this->_collapseSingleValueArrays); - - if ($response->getHttpStatus() != 200) - { - throw new Exception('"' . $response->getHttpStatus() . '" Status: ' . $response->getHttpStatusMessage(), $response->getHttpStatus()); - } - - return $response; - } - - /** - * Returns the set host - * - * @return string - */ - public function getHost() - { - return $this->_host; - } - - /** - * Set the host used. If empty will fallback to constants - * - * @param string $host - */ - public function setHost($host) - { - //Use the provided host or use the default - if (empty($host)) - { - throw new Exception('Host parameter is empty'); - } - else - { - $this->_host = $host; - } - - if ($this->_urlsInited) - { - $this->_initUrls(); - } - } - - /** - * Get the set port - * - * @return integer - */ - public function getPort() - { - return $this->_port; - } - - /** - * Set the port used. If empty will fallback to constants - * - * @param integer $port - */ - public function setPort($port) - { - //Use the provided port or use the default - $port = (int) $port; - - if ($port <= 0) - { - throw new Exception('Port is not a valid port number'); - } - else - { - $this->_port = $port; - } - - if ($this->_urlsInited) - { - $this->_initUrls(); - } - } - - /** - * Get the set path. - * - * @return string - */ - public function getPath() - { - return $this->_path; - } - - /** - * Set the path used. If empty will fallback to constants - * - * @param string $path - */ - public function setPath($path) - { - $path = trim($path, '/'); - - $this->_path = '/' . $path . '/'; - - if ($this->_urlsInited) - { - $this->_initUrls(); - } - } - - /** - * Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will - * parse the response and create {@link Apache_Solr_Document} instances in place. - * - * @param unknown_type $createDocuments - */ - public function setCreateDocuments($createDocuments) - { - $this->_createDocuments = (bool) $createDocuments; - } - - /** - * Get the current state of teh create documents flag. - * - * @return boolean - */ - public function getCreateDocuments() - { - return $this->_createDocuments; - } - - /** - * Set the collapse single value arrays flag. - * - * @param boolean $collapseSingleValueArrays - */ - public function setCollapseSingleValueArrays($collapseSingleValueArrays) - { - $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays; - } - - /** - * Get the current state of the collapse single value arrays flag. - * - * @return boolean - */ - public function getCollapseSingleValueArrays() - { - return $this->_collapseSingleValueArrays; - } - - /** - * Set how NamedLists should be formatted in the response data. This mainly effects - * the facet counts format. - * - * @param string $namedListTreatment - * @throws Exception If invalid option is set - */ - public function setNamedListTreatmet($namedListTreatment) - { - switch ((string) $namedListTreatment) - { - case Apache_Solr_Service::NAMED_LIST_FLAT: - $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_FLAT; - break; - - case Apache_Solr_Service::NAMED_LIST_MAP: - $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_MAP; - break; - - default: - throw new Exception('Not a valid named list treatement option'); - } - } - - /** - * Get the current setting for named list treatment. - * - * @return string - */ - public function getNamedListTreatment() - { - return $this->_namedListTreatment; - } - - - /** - * Set the string used to separate the path form the query string. - * Defaulted to '?' - * - * @param string $queryDelimiter - */ - public function setQueryDelimiter($queryDelimiter) - { - $this->_queryDelimiter = $queryDelimiter; - } - - /** - * Set the string used to separate the parameters in thequery string - * Defaulted to '&' - * - * @param string $queryStringDelimiter - */ - public function setQueryStringDelimiter($queryStringDelimiter) - { - $this->_queryStringDelimiter = $queryStringDelimiter; - } - - /** - * Call the /admin/ping servlet, can be used to quickly tell if a connection to the - * server is able to be made. - * - * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2) - * @return float Actual time taken to ping the server, FALSE if timeout occurs - */ - public function ping($timeout = 2) - { - $timeout = (float) $timeout; - - if ($timeout <= 0) - { - $timeout = -1; - } - - $start = microtime(true); - - //to prevent strict errors - $errno = 0; - $errstr = ''; - - //try to connect to the host with timeout - $fp = fsockopen($this->_host, $this->_port, $errno, $errstr, $timeout); - - if ($fp) - { - //If we have a timeout set, then determine the amount of time we have left - //in the request and set the stream timeout for the write operation - if ($timeout > 0) - { - //do the calculation - $writeTimeout = $timeout - (microtime(true) - $start); - - //check if we're out of time - if ($writeTimeout <= 0) - { - fclose($fp); - return false; - } - - //convert to microseconds and set the stream timeout - $writeTimeoutInMicroseconds = (int) $writeTimeout * 1000000; - stream_set_timeout($fp, 0, $writeTimeoutInMicroseconds); - } - - $request = 'HEAD ' . $this->_path . self::PING_SERVLET . ' HTTP/1.1' . "\r\n" . - 'host: ' . $this->_host . "\r\n" . - 'Connection: close' . "\r\n" . - "\r\n"; - - fwrite($fp, $request); - - //check the stream meta data to see if we timed out during the operation - $metaData = stream_get_meta_data($fp); - - if (isset($metaData['timeout']) && $metaData['timeout']) - { - fclose($fp); - return false; - } - - - //if we have a timeout set and have made it this far, determine the amount of time - //still remaining and set the timeout appropriately before the read operation - if ($timeout > 0) - { - //do the calculation - $readTimeout = $timeout - (microtime(true) - $start); - - //check if we've run out of time - if ($readTimeout <= 0) - { - fclose($fp); - return false; - } - - //convert to microseconds and set the stream timeout - $readTimeoutInMicroseconds = $readTimeout * 1000000; - stream_set_timeout($fp, 0, $readTimeoutInMicroseconds); - } - - //at the very least we should get a response header line of - //HTTP/1.1 200 OK - $response = fread($fp, 15); - - //check the stream meta data to see if we timed out during the operation - $metaData = stream_get_meta_data($fp); - fclose($fp); //we're done with the connection - ignore the rest - - if (isset($metaData['timeout']) && $metaData['timeout']) - { - return false; - } - - //finally, check the response header line - if ($response != 'HTTP/1.1 200 OK') - { - return false; - } - - //we made it, return the approximate ping time - return microtime(true) - $start; - } - - //we weren't able to make a connection - return false; - } - - /** - * Call the /admin/threads servlet and retrieve information about all threads in the - * Solr servlet's thread group. Useful for diagnostics. - * - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function threads() - { - return $this->_sendRawGet($this->_threadsUrl); - } - - /** - * Raw Add Method. Takes a raw post body and sends it to the update service. Post body - * should be a complete and well formed "add" xml document. - * - * @param string $rawPost - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function add($rawPost) - { - return $this->_sendRawPost($this->_updateUrl, $rawPost); - } - - /** - * Add a Solr Document to the index - * - * @param Apache_Solr_Document $document - * @param boolean $allowDups - * @param boolean $overwritePending - * @param boolean $overwriteCommitted - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) - { - $dupValue = $allowDups ? 'true' : 'false'; - $pendingValue = $overwritePending ? 'true' : 'false'; - $committedValue = $overwriteCommitted ? 'true' : 'false'; - - $rawPost = ''; - $rawPost .= $this->_documentToXmlFragment($document); - $rawPost .= ''; - - return $this->add($rawPost); - } - - /** - * Add an array of Solr Documents to the index all at once - * - * @param array $documents Should be an array of Apache_Solr_Document instances - * @param boolean $allowDups - * @param boolean $overwritePending - * @param boolean $overwriteCommitted - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) - { - $dupValue = $allowDups ? 'true' : 'false'; - $pendingValue = $overwritePending ? 'true' : 'false'; - $committedValue = $overwriteCommitted ? 'true' : 'false'; - - $rawPost = ''; - - foreach ($documents as $document) - { - if ($document instanceof Apache_Solr_Document) - { - $rawPost .= $this->_documentToXmlFragment($document); - } - } - - $rawPost .= ''; - - return $this->add($rawPost); - } - - /** - * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call - * - * @return string - */ - protected function _documentToXmlFragment(Apache_Solr_Document $document) - { - $xml = ''; - - foreach ($document as $key => $value) - { - $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8'); - - if (is_array($value)) - { - foreach ($value as $multivalue) - { - $multivalue = htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8'); - - $xml .= '' . $multivalue . ''; - } - } - else - { - $value = htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8'); - - $xml .= '' . $value . ''; - } - } - - $xml .= ''; - - return $xml; - } - - /** - * Send a commit command. Will be synchronous unless both wait parameters are set to false. - * - * @param boolean $optimize Defaults to true - * @param boolean $waitFlush Defaults to true - * @param boolean $waitSearcher Defaults to true - * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function commit($optimize = true, $waitFlush = true, $waitSearcher = true, $timeout = 3600) - { - $optimizeValue = $optimize ? 'true' : 'false'; - $flushValue = $waitFlush ? 'true' : 'false'; - $searcherValue = $waitSearcher ? 'true' : 'false'; - - $rawPost = ''; - - return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); - } - - /** - * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be - * a complete and well formed "delete" xml document - * - * @param string $rawPost Expected to be utf-8 encoded xml document - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function delete($rawPost) - { - return $this->_sendRawPost($this->_updateUrl, $rawPost); - } - - /** - * Create a delete document based on document ID - * - * @param string $id Expected to be utf-8 encoded - * @param boolean $fromPending - * @param boolean $fromCommitted - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function deleteById($id, $fromPending = true, $fromCommitted = true) - { - $pendingValue = $fromPending ? 'true' : 'false'; - $committedValue = $fromCommitted ? 'true' : 'false'; - - //escape special xml characters - $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); - - $rawPost = '' . $id . ''; - - return $this->delete($rawPost); - } - - /** - * Create a delete document based on a query and submit it - * - * @param string $rawQuery Expected to be utf-8 encoded - * @param boolean $fromPending - * @param boolean $fromCommitted - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true) - { - $pendingValue = $fromPending ? 'true' : 'false'; - $committedValue = $fromCommitted ? 'true' : 'false'; - - // escape special xml characters - $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8'); - - $rawPost = '' . $rawQuery . ''; - - return $this->delete($rawPost); - } - - /** - * Send an optimize command. Will be synchronous unless both wait parameters are set - * to false. - * - * @param boolean $waitFlush - * @param boolean $waitSearcher - * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600) - { - $flushValue = $waitFlush ? 'true' : 'false'; - $searcherValue = $waitSearcher ? 'true' : 'false'; - - $rawPost = ''; - - return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); - } - - /** - * Simple Search interface - * - * @param string $query The raw query string - * @param int $offset The starting offset for result documents - * @param int $limit The maximum number of result documents to return - * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function search($query, $offset = 0, $limit = 10, $params = array()) - { - if (!is_array($params)) - { - $params = array(); - } - - // construct our full parameters - // sending the version is important in case the format changes - $params['version'] = self::SOLR_VERSION; - - // common parameters in this interface - $params['wt'] = self::SOLR_WRITER; - $params['json.nl'] = $this->_namedListTreatment; - - $params['q'] = $query; - $params['start'] = $offset; - $params['rows'] = $limit; - - // use http_build_query to encode our arguments because its faster - // than urlencoding all the parts ourselves in a loop - $queryString = http_build_query($params, null, $this->_queryStringDelimiter); - - // because http_build_query treats arrays differently than we want to, correct the query - // string by changing foo[#]=bar (# being an actual number) parameter strings to just - // multiple foo=bar strings. This regex should always work since '=' will be urlencoded - // anywhere else the regex isn't expecting it - $queryString = preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString); + /** + * Response version we support + */ + const SOLR_VERSION = '1.2'; + + /** + * Response writer we support + * + * @todo Solr 1.3 release may change this to SerializedPHP or PHP implementation + */ + const SOLR_WRITER = 'json'; + + /** + * NamedList Treatment constants + */ + const NAMED_LIST_FLAT = 'flat'; + const NAMED_LIST_MAP = 'map'; + + /** + * Servlet mappings + */ + const PING_SERVLET = 'admin/ping'; + const UPDATE_SERVLET = 'update'; + const SEARCH_SERVLET = 'select'; + const THREADS_SERVLET = 'admin/threads'; + + /** + * Server identification strings + * + * @var string + */ + protected $_host, $_port, $_path; + + /** + * Whether {@link Apache_Solr_Response} objects should create {@link Apache_Solr_Document}s in + * the returned parsed data + * + * @var boolean + */ + protected $_createDocuments = true; + + /** + * Whether {@link Apache_Solr_Response} objects should have multivalue fields with only a single value + * collapsed to appear as a single value would. + * + * @var boolean + */ + protected $_collapseSingleValueArrays = true; + + /** + * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values + * are {@link Apache_Solr_Service::NAMED_LIST_MAP} (default) or {@link Apache_Solr_Service::NAMED_LIST_FLAT}. + * + * @var string + */ + protected $_namedListTreatment = self::NAMED_LIST_MAP; + + /** + * Query delimiters. Someone might want to be able to change + * these (to use & instead of & for example), so I've provided them. + * + * @var string + */ + protected $_queryDelimiter = '?', $_queryStringDelimiter = '&'; + + /** + * Constructed servlet full path URLs + * + * @var string + */ + protected $_updateUrl, $_searchUrl, $_threadsUrl; + + /** + * Keep track of whether our URLs have been constructed + * + * @var boolean + */ + protected $_urlsInited = false; + + /** + * Stream context for posting + * + * @var resource + */ + protected $_postContext; + + /** + * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. + * + * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead + * + * @param string $value + * @return string + */ + static public function escape($value) + { + //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters + $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/'; + $replace = '\\\$1'; + + return preg_replace($pattern, $replace, $value); + } + + /** + * Escape a value meant to be contained in a phrase for special query characters + * + * @param string $value + * @return string + */ + static public function escapePhrase($value) + { + $pattern = '/("|\\\)/'; + $replace = '\\\$1'; + + return preg_replace($pattern, $replace, $value); + } + + /** + * Convenience function for creating phrase syntax from a value + * + * @param string $value + * @return string + */ + static public function phrase($value) + { + return '"' . self::escapePhrase($value) . '"'; + } + + /** + * Constructor. All parameters are optional and will take on default values + * if not specified. + * + * @param string $host + * @param string $port + * @param string $path + */ + public function __construct($host = 'localhost', $port = 8180, $path = '/solr/') + { + $this->setHost($host); + $this->setPort($port); + $this->setPath($path); + + $this->_initUrls(); + + //set up the stream context for posting with file_get_contents + $contextOpts = array( + 'http' => array( + 'method' => 'POST', + 'header' => "Content-Type: text/xml; charset=UTF-8\r\n" //php.net example showed \r\n at the end + ) + ); + + $this->_postContext = stream_context_create($contextOpts); + } + + /** + * Return a valid http URL given this server's host, port and path and a provided servlet name + * + * @param string $servlet + * @return string + */ + protected function _constructUrl($servlet, $params = array()) + { + if (count($params)) + { + //escape all parameters appropriately for inclusion in the query string + $escapedParams = array(); + + foreach ($params as $key => $value) + { + $escapedParams[] = urlencode($key) . '=' . urlencode($value); + } + + $queryString = $this->_queryDelimiter . implode($this->_queryStringDelimiter, $escapedParams); + } + else + { + $queryString = ''; + } + + return 'http://' . $this->_host . ':' . $this->_port . $this->_path . $servlet . $queryString; + } + + /** + * Construct the Full URLs for the three servlets we reference + */ + protected function _initUrls() + { + //Initialize our full servlet URLs now that we have server information + $this->_updateUrl = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => self::SOLR_WRITER )); + $this->_searchUrl = $this->_constructUrl(self::SEARCH_SERVLET); + $this->_threadsUrl = $this->_constructUrl(self::THREADS_SERVLET, array('wt' => self::SOLR_WRITER )); + + $this->_urlsInited = true; + } + + /** + * Central method for making a get operation against this Solr Server + * + * @param string $url + * @param float $timeout Read timeout in seconds + * @return Apache_Solr_Response + * + * @todo implement timeout ability + * @throws Exception If a non 200 response status is returned + */ + protected function _sendRawGet($url, $timeout = FALSE) + { + //$http_response_header is set by file_get_contents + $response = new Apache_Solr_Response(@file_get_contents($url), $http_response_header, $this->_createDocuments, $this->_collapseSingleValueArrays); + + if ($response->getHttpStatus() != 200) + { + throw new Exception('"' . $response->getHttpStatus() . '" Status: ' . $response->getHttpStatusMessage(), $response->getHttpStatus()); + } + + return $response; + } + + /** + * Central method for making a post operation against this Solr Server + * + * @param string $url + * @param string $rawPost + * @param float $timeout Read timeout in seconds + * @param string $contentType + * @return Apache_Solr_Response + * + * @throws Exception If a non 200 response status is returned + */ + protected function _sendRawPost($url, $rawPost, $timeout = FALSE, $contentType = 'text/xml; charset=UTF-8') + { + //ensure content type is correct + stream_context_set_option($this->_postContext, 'http', 'header', 'Content-Type: ' . $contentType); + + //set the read timeout if specified + if ($timeout !== FALSE) + { + stream_context_set_option($this->_postContext, 'http', 'timeout', $timeout); + } + + //set the content + stream_context_set_option($this->_postContext, 'http', 'content', $rawPost); + + //$http_response_header is set by file_get_contents + $response = new Apache_Solr_Response(@file_get_contents($url, false, $this->_postContext), $http_response_header, $this->_createDocuments, $this->_collapseSingleValueArrays); + + if ($response->getHttpStatus() != 200) + { + throw new Exception('"' . $response->getHttpStatus() . '" Status: ' . $response->getHttpStatusMessage(), $response->getHttpStatus()); + } + + return $response; + } + + /** + * Returns the set host + * + * @return string + */ + public function getHost() + { + return $this->_host; + } + + /** + * Set the host used. If empty will fallback to constants + * + * @param string $host + */ + public function setHost($host) + { + //Use the provided host or use the default + if (empty($host)) + { + throw new Exception('Host parameter is empty'); + } + else + { + $this->_host = $host; + } + + if ($this->_urlsInited) + { + $this->_initUrls(); + } + } + + /** + * Get the set port + * + * @return integer + */ + public function getPort() + { + return $this->_port; + } + + /** + * Set the port used. If empty will fallback to constants + * + * @param integer $port + */ + public function setPort($port) + { + //Use the provided port or use the default + $port = (int) $port; + + if ($port <= 0) + { + throw new Exception('Port is not a valid port number'); + } + else + { + $this->_port = $port; + } + + if ($this->_urlsInited) + { + $this->_initUrls(); + } + } + + /** + * Get the set path. + * + * @return string + */ + public function getPath() + { + return $this->_path; + } + + /** + * Set the path used. If empty will fallback to constants + * + * @param string $path + */ + public function setPath($path) + { + $path = trim($path, '/'); + + $this->_path = '/' . $path . '/'; + + if ($this->_urlsInited) + { + $this->_initUrls(); + } + } + + /** + * Set the create documents flag. This determines whether {@link Apache_Solr_Response} objects will + * parse the response and create {@link Apache_Solr_Document} instances in place. + * + * @param unknown_type $createDocuments + */ + public function setCreateDocuments($createDocuments) + { + $this->_createDocuments = (bool) $createDocuments; + } + + /** + * Get the current state of teh create documents flag. + * + * @return boolean + */ + public function getCreateDocuments() + { + return $this->_createDocuments; + } + + /** + * Set the collapse single value arrays flag. + * + * @param boolean $collapseSingleValueArrays + */ + public function setCollapseSingleValueArrays($collapseSingleValueArrays) + { + $this->_collapseSingleValueArrays = (bool) $collapseSingleValueArrays; + } + + /** + * Get the current state of the collapse single value arrays flag. + * + * @return boolean + */ + public function getCollapseSingleValueArrays() + { + return $this->_collapseSingleValueArrays; + } + + /** + * Set how NamedLists should be formatted in the response data. This mainly effects + * the facet counts format. + * + * @param string $namedListTreatment + * @throws Exception If invalid option is set + */ + public function setNamedListTreatmet($namedListTreatment) + { + switch ((string) $namedListTreatment) + { + case Apache_Solr_Service::NAMED_LIST_FLAT: + $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_FLAT; + break; + + case Apache_Solr_Service::NAMED_LIST_MAP: + $this->_namedListTreatment = Apache_Solr_Service::NAMED_LIST_MAP; + break; + + default: + throw new Exception('Not a valid named list treatement option'); + } + } + + /** + * Get the current setting for named list treatment. + * + * @return string + */ + public function getNamedListTreatment() + { + return $this->_namedListTreatment; + } + + + /** + * Set the string used to separate the path form the query string. + * Defaulted to '?' + * + * @param string $queryDelimiter + */ + public function setQueryDelimiter($queryDelimiter) + { + $this->_queryDelimiter = $queryDelimiter; + } + + /** + * Set the string used to separate the parameters in thequery string + * Defaulted to '&' + * + * @param string $queryStringDelimiter + */ + public function setQueryStringDelimiter($queryStringDelimiter) + { + $this->_queryStringDelimiter = $queryStringDelimiter; + } + + /** + * Call the /admin/ping servlet, can be used to quickly tell if a connection to the + * server is able to be made. + * + * @param float $timeout maximum time to wait for ping in seconds, -1 for unlimited (default is 2) + * @return float Actual time taken to ping the server, FALSE if timeout occurs + */ + public function ping($timeout = 2) + { + $timeout = (float) $timeout; + + if ($timeout <= 0) + { + $timeout = -1; + } + + $start = microtime(true); + + //to prevent strict errors + $errno = 0; + $errstr = ''; + + //try to connect to the host with timeout + $fp = fsockopen($this->_host, $this->_port, $errno, $errstr, $timeout); + + if ($fp) + { + //If we have a timeout set, then determine the amount of time we have left + //in the request and set the stream timeout for the write operation + if ($timeout > 0) + { + //do the calculation + $writeTimeout = $timeout - (microtime(true) - $start); + + //check if we're out of time + if ($writeTimeout <= 0) + { + fclose($fp); + return false; + } + + //convert to microseconds and set the stream timeout + $writeTimeoutInMicroseconds = (int) $writeTimeout * 1000000; + stream_set_timeout($fp, 0, $writeTimeoutInMicroseconds); + } + + $request = 'HEAD ' . $this->_path . self::PING_SERVLET . ' HTTP/1.1' . "\r\n" . + 'host: ' . $this->_host . "\r\n" . + 'Connection: close' . "\r\n" . + "\r\n"; + + fwrite($fp, $request); + + //check the stream meta data to see if we timed out during the operation + $metaData = stream_get_meta_data($fp); + + if (isset($metaData['timeout']) && $metaData['timeout']) + { + fclose($fp); + return false; + } + + + //if we have a timeout set and have made it this far, determine the amount of time + //still remaining and set the timeout appropriately before the read operation + if ($timeout > 0) + { + //do the calculation + $readTimeout = $timeout - (microtime(true) - $start); + + //check if we've run out of time + if ($readTimeout <= 0) + { + fclose($fp); + return false; + } + + //convert to microseconds and set the stream timeout + $readTimeoutInMicroseconds = $readTimeout * 1000000; + stream_set_timeout($fp, 0, $readTimeoutInMicroseconds); + } + + //at the very least we should get a response header line of + //HTTP/1.1 200 OK + $response = fread($fp, 15); + + //check the stream meta data to see if we timed out during the operation + $metaData = stream_get_meta_data($fp); + fclose($fp); //we're done with the connection - ignore the rest + + if (isset($metaData['timeout']) && $metaData['timeout']) + { + return false; + } + + //finally, check the response header line + if ($response != 'HTTP/1.1 200 OK') + { + return false; + } + + //we made it, return the approximate ping time + return microtime(true) - $start; + } + + //we weren't able to make a connection + return false; + } + + /** + * Call the /admin/threads servlet and retrieve information about all threads in the + * Solr servlet's thread group. Useful for diagnostics. + * + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function threads() + { + return $this->_sendRawGet($this->_threadsUrl); + } + + /** + * Raw Add Method. Takes a raw post body and sends it to the update service. Post body + * should be a complete and well formed "add" xml document. + * + * @param string $rawPost + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function add($rawPost) + { + return $this->_sendRawPost($this->_updateUrl, $rawPost); + } + + /** + * Add a Solr Document to the index + * + * @param Apache_Solr_Document $document + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) + { + $dupValue = $allowDups ? 'true' : 'false'; + $pendingValue = $overwritePending ? 'true' : 'false'; + $committedValue = $overwriteCommitted ? 'true' : 'false'; + + $rawPost = ''; + $rawPost .= $this->_documentToXmlFragment($document); + $rawPost .= ''; + + return $this->add($rawPost); + } + + /** + * Add an array of Solr Documents to the index all at once + * + * @param array $documents Should be an array of Apache_Solr_Document instances + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) + { + $dupValue = $allowDups ? 'true' : 'false'; + $pendingValue = $overwritePending ? 'true' : 'false'; + $committedValue = $overwriteCommitted ? 'true' : 'false'; + + $rawPost = ''; + + foreach ($documents as $document) + { + if ($document instanceof Apache_Solr_Document) + { + $rawPost .= $this->_documentToXmlFragment($document); + } + } + + $rawPost .= ''; + + return $this->add($rawPost); + } + + /** + * Create an XML fragment from a {@link Apache_Solr_Document} instance appropriate for use inside a Solr add call + * + * @return string + */ + protected function _documentToXmlFragment(Apache_Solr_Document $document) + { + $xml = 'getBoost() !== false) + { + $xml .= ' boost="' . $document->getBoost() . '"'; + } + + $xml .= '>'; + + foreach ($document as $key => $value) + { + $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8'); + $fieldBoost = $document->getFieldBoost($key); + + if (is_array($value)) + { + foreach ($value as $multivalue) + { + $xml .= ''; + } + } + else + { + $xml .= ''; + } + } + + $xml .= ''; + + return $xml; + } + + /** + * Send a commit command. Will be synchronous unless both wait parameters are set to false. + * + * @param boolean $optimize Defaults to true + * @param boolean $waitFlush Defaults to true + * @param boolean $waitSearcher Defaults to true + * @param float $timeout Maximum expected duration (in seconds) of the commit operation on the server (otherwise, will throw a communication exception). Defaults to 1 hour + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function commit($optimize = true, $waitFlush = true, $waitSearcher = true, $timeout = 3600) + { + $optimizeValue = $optimize ? 'true' : 'false'; + $flushValue = $waitFlush ? 'true' : 'false'; + $searcherValue = $waitSearcher ? 'true' : 'false'; + + $rawPost = ''; + + return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); + } + + /** + * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be + * a complete and well formed "delete" xml document + * + * @param string $rawPost Expected to be utf-8 encoded xml document + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function delete($rawPost) + { + return $this->_sendRawPost($this->_updateUrl, $rawPost); + } + + /** + * Create a delete document based on document ID + * + * @param string $id Expected to be utf-8 encoded + * @param boolean $fromPending + * @param boolean $fromCommitted + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function deleteById($id, $fromPending = true, $fromCommitted = true) + { + $pendingValue = $fromPending ? 'true' : 'false'; + $committedValue = $fromCommitted ? 'true' : 'false'; + + //escape special xml characters + $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); + + $rawPost = '' . $id . ''; + + return $this->delete($rawPost); + } + + /** + * Create a delete document based on a query and submit it + * + * @param string $rawQuery Expected to be utf-8 encoded + * @param boolean $fromPending + * @param boolean $fromCommitted + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true) + { + $pendingValue = $fromPending ? 'true' : 'false'; + $committedValue = $fromCommitted ? 'true' : 'false'; + + // escape special xml characters + $rawQuery = htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8'); + + $rawPost = '' . $rawQuery . ''; + + return $this->delete($rawPost); + } + + /** + * Send an optimize command. Will be synchronous unless both wait parameters are set + * to false. + * + * @param boolean $waitFlush + * @param boolean $waitSearcher + * @param float $timeout Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function optimize($waitFlush = true, $waitSearcher = true, $timeout = 3600) + { + $flushValue = $waitFlush ? 'true' : 'false'; + $searcherValue = $waitSearcher ? 'true' : 'false'; + + $rawPost = ''; + + return $this->_sendRawPost($this->_updateUrl, $rawPost, $timeout); + } + + /** + * Simple Search interface + * + * @param string $query The raw query string + * @param int $offset The starting offset for result documents + * @param int $limit The maximum number of result documents to return + * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function search($query, $offset = 0, $limit = 10, $params = array()) + { + if (!is_array($params)) + { + $params = array(); + } + + // construct our full parameters + // sending the version is important in case the format changes + $params['version'] = self::SOLR_VERSION; + + // common parameters in this interface + $params['wt'] = self::SOLR_WRITER; + $params['json.nl'] = $this->_namedListTreatment; + + $params['q'] = $query; + $params['start'] = $offset; + $params['rows'] = $limit; + + // use http_build_query to encode our arguments because its faster + // than urlencoding all the parts ourselves in a loop + $queryString = http_build_query($params, null, $this->_queryStringDelimiter); + + // because http_build_query treats arrays differently than we want to, correct the query + // string by changing foo[#]=bar (# being an actual number) parameter strings to just + // multiple foo=bar strings. This regex should always work since '=' will be urlencoded + // anywhere else the regex isn't expecting it + $queryString = preg_replace('/%5B(?:[0-9]|[1-9][0-9]+)%5D=/', '=', $queryString); - return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString); - } + return $this->_sendRawGet($this->_searchUrl . $this->_queryDelimiter . $queryString); + } } \ No newline at end of file Index: SolrPhpClient/Apache/Solr/Service/Balancer.php =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/SolrPhpClient/Apache/Solr/Service/Balancer.php,v retrieving revision 1.1.2.3 diff -u -p -r1.1.2.3 Balancer.php --- SolrPhpClient/Apache/Solr/Service/Balancer.php 23 Oct 2008 21:12:00 -0000 1.1.2.3 +++ SolrPhpClient/Apache/Solr/Service/Balancer.php 5 Feb 2009 21:42:05 -0000 @@ -1,746 +1,763 @@ , Dan Wolfe */ -require_once('Apache/Solr/Service.php'); +// See Issue #1 (http://code.google.com/p/solr-php-client/issues/detail?id=1) +// Doesn't follow typical include path conventions, but is more convenient for users +require_once(dirname(dirname(__FILE__)) . '/Service.php'); /** * Reference Implementation for using multiple Solr services in a distribution. Functionality * includes: - * routing of read / write operations - * failover (on selection) for multiple read servers + * routing of read / write operations + * failover (on selection) for multiple read servers */ class Apache_Solr_Service_Balancer { - protected $_createDocuments = true; + protected $_createDocuments = true; + + protected $_readableServices = array(); + protected $_writeableServices = array(); + + protected $_currentReadService = null; + protected $_currentWriteService = null; - protected $_readableServices = array(); - protected $_writeableServices = array(); + protected $_readPingTimeout = 2; + protected $_writePingTimeout = 4; + + // Configuration for server selection backoff intervals + protected $_useBackoff = false; // Set to true to use more resillient write server selection + protected $_backoffLimit = 600; // 10 minute default maximum + protected $_backoffEscalation = 2.0; // Rate at which to increase backoff period + protected $_defaultBackoff = 2.0; // Default backoff interval + + /** + * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. + * + * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead + * + * @param string $value + * @return string + */ + static public function escape($value) + { + return Apache_Solr_Service::escape($value); + } + + /** + * Escape a value meant to be contained in a phrase for special query characters + * + * @param string $value + * @return string + */ + static public function escapePhrase($value) + { + return Apache_Solr_Service::escapePhrase($value); + } + + /** + * Convenience function for creating phrase syntax from a value + * + * @param string $value + * @return string + */ + static public function phrase($value) + { + return Apache_Solr_Service::phrase($value); + } + + /** + * Constructor. Takes arrays of read and write service instances or descriptions + * + * @param array $readableServices + * @param array $writeableServices + */ + public function __construct($readableServices = array(), $writeableServices = array()) + { + //setup readable services + foreach ($readableServices as $service) + { + $this->addReadService($service); + } + + //setup writeable services + foreach ($writeableServices as $service) + { + $this->addWriteService($service); + } + } + + public function setReadPingTimeout($timeout) + { + $this->_readPingTimeout = $timeout; + } + + public function setWritePingTimeout($timeout) + { + $this->_writePingTimeout = $timeout; + } + + public function setUseBackoff($enable) + { + $this->_useBackoff = $enable; + } + + /** + * Generates a service ID + * + * @param string $host + * @param integer $port + * @param string $path + * @return string + */ + protected function _getServiceId($host, $port, $path) + { + return $host . ':' . $port . $path; + } + + /** + * Adds a service instance or service descriptor (if it is already + * not added) + * + * @param mixed $service + * + * @throws Exception If service descriptor is not valid + */ + public function addReadService($service) + { + if ($service instanceof Apache_Solr_Service) + { + $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); + + $this->_readableServices[$id] = $service; + } + else if (is_array($service)) + { + if (isset($service['host']) && isset($service['port']) && isset($service['path'])) + { + $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); + + $this->_readableServices[$id] = $service; + } + else + { + throw new Exception('A Readable Service description array does not have all required elements of host, port, and path'); + } + } + } + + /** + * Removes a service instance or descriptor from the available services + * + * @param mixed $service + * + * @throws Exception If service descriptor is not valid + */ + public function removeReadService($service) + { + $id = ''; + + if ($service instanceof Apache_Solr_Service) + { + $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); + } + else if (is_array($service)) + { + if (isset($service['host']) && isset($service['port']) && isset($service['path'])) + { + $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); + } + else + { + throw new Exception('A Readable Service description array does not have all required elements of host, port, and path'); + } + } + else if (is_string($service)) + { + $id = $service; + } + + if ($id && isset($this->_readableServices[$id])) + { + unset($this->_readableServices[$id]); + } + } + + /** + * Adds a service instance or service descriptor (if it is already + * not added) + * + * @param mixed $service + * + * @throws Exception If service descriptor is not valid + */ + public function addWriteService($service) + { + if ($service instanceof Apache_Solr_Service) + { + $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); + + $this->_writeableServices[$id] = $service; + } + else if (is_array($service)) + { + if (isset($service['host']) && isset($service['port']) && isset($service['path'])) + { + $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); + + $this->_writeableServices[$id] = $service; + } + else + { + throw new Exception('A Writeable Service description array does not have all required elements of host, port, and path'); + } + } + } + + /** + * Removes a service instance or descriptor from the available services + * + * @param mixed $service + * + * @throws Exception If service descriptor is not valid + */ + public function removeWriteService($service) + { + $id = ''; + + if ($service instanceof Apache_Solr_Service) + { + $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); + } + else if (is_array($service)) + { + if (isset($service['host']) && isset($service['port']) && isset($service['path'])) + { + $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); + } + else + { + throw new Exception('A Readable Service description array does not have all required elements of host, port, and path'); + } + } + else if (is_string($service)) + { + $id = $service; + } + + if ($id && isset($this->_writeableServices[$id])) + { + unset($this->_writeableServices[$id]); + } + } + + /** + * Iterate through available read services and select the first with a ping + * that satisfies configured timeout restrictions (or the default) + * + * @return Apache_Solr_Service + * + * @throws Exception If there are no read services that meet requirements + */ + protected function _selectReadService($forceSelect = false) + { + if (!$this->_currentReadService || !isset($this->_readableServices[$this->_currentReadService]) || $forceSelect) + { + if ($this->_currentReadService && isset($this->_readableServices[$this->_currentReadService]) && $forceSelect) + { + // we probably had a communication error, ping the current read service, remove it if it times out + if ($this->_readableServices[$this->_currentReadService]->ping($this->_readPingTimeout) === false) + { + $this->removeReadService($this->_currentReadService); + } + } + + if (count($this->_readableServices)) + { + // select one of the read services at random + $ids = array_keys($this->_readableServices); + + $id = $ids[rand(0, count($ids) - 1)]; + $service = $this->_readableServices[$id]; + + if (is_array($service)) + { + //convert the array definition to a client object + $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); + $this->_readableServices[$id] = $service; + } + + $service->setCreateDocuments($this->_createDocuments); + $this->_currentReadService = $id; + } + else + { + throw new Exception('No read services were available'); + } + } + + return $this->_readableServices[$this->_currentReadService]; + } + + /** + * Iterate through available write services and select the first with a ping + * that satisfies configured timeout restrictions (or the default) + * + * @return Apache_Solr_Service + * + * @throws Exception If there are no write services that meet requirements + */ + protected function _selectWriteService($forceSelect = false) + { + if($this->_useBackoff) + { + return $this->_selectWriteServiceSafe($forceSelect); + } + + if (!$this->_currentWriteService || !isset($this->_writeableServices[$this->_currentWriteService]) || $forceSelect) + { + if ($this->_currentWriteService && isset($this->_writeableServices[$this->_currentWriteService]) && $forceSelect) + { + // we probably had a communication error, ping the current read service, remove it if it times out + if ($this->_writeableServices[$this->_currentWriteService]->ping($this->_writePingTimeout) === false) + { + $this->removeWriteService($this->_currentWriteService); + } + } + + if (count($this->_writeableServices)) + { + // select one of the read services at random + $ids = array_keys($this->_writeableServices); + + $id = $ids[rand(0, count($ids) - 1)]; + $service = $this->_writeableServices[$id]; + + if (is_array($service)) + { + //convert the array definition to a client object + $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); + $this->_writeableServices[$id] = $service; + } + + $this->_currentWriteService = $id; + } + else + { + throw new Exception('No write services were available'); + } + } + + return $this->_writeableServices[$this->_currentWriteService]; + } + + /** + * Iterate through available write services and select the first with a ping + * that satisfies configured timeout restrictions (or the default). The + * timeout period will increase until a connection is made or the limit is + * reached. This will allow for increased reliability with heavily loaded + * server(s). + * + * @return Apache_Solr_Service + * + * @throws Exception If there are no write services that meet requirements + */ + + protected function _selectWriteServiceSafe($forceSelect = false) + { + if (!$this->_currentWriteService || !isset($this->_writeableServices[$this->_currentWriteService]) || $forceSelect) + { + if (count($this->_writeableServices)) + { + $backoff = $this->_defaultBackoff; + + do { + // select one of the read services at random + $ids = array_keys($this->_writeableServices); + + $id = $ids[rand(0, count($ids) - 1)]; + $service = $this->_writeableServices[$id]; + + if (is_array($service)) + { + //convert the array definition to a client object + $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); + $this->_writeableServices[$id] = $service; + } + + $this->_currentWriteService = $id; + + $backoff *= $this->_backoffEscalation; + + if($backoff > $this->_backoffLimit) + { + throw new Exception('No write services were available. All timeouts exceeded.'); + } + + } while($this->_writeableServices[$this->_currentWriteService]->ping($backoff) === false); + } + else + { + throw new Exception('No write services were available'); + } + } + + return $this->_writeableServices[$this->_currentWriteService]; + } + + public function setCreateDocuments($createDocuments) + { + $this->_createDocuments = (bool) $createDocuments; + + if ($this->_currentReadService) + { + $service = $this->_selectReadService(); + $service->setCreateDocuments($createDocuments); + } + } + + public function getCreateDocuments() + { + return $this->_createDocuments; + } + + /** + * Raw Add Method. Takes a raw post body and sends it to the update service. Post body + * should be a complete and well formed "add" xml document. + * + * @param string $rawPost + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function add($rawPost) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->add($rawPost); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Add a Solr Document to the index + * + * @param Apache_Solr_Document $document + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->addDocument($document, $allowDups, $overwritePending, $overwriteCommitted); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Add an array of Solr Documents to the index all at once + * + * @param array $documents Should be an array of Apache_Solr_Document instances + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->addDocuments($documents, $allowDups, $overwritePending, $overwriteCommitted); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Send a commit command. Will be synchronous unless both wait parameters are set + * to false. + * + * @param boolean $waitFlush + * @param boolean $waitSearcher + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function commit($optimize = true, $waitFlush = true, $waitSearcher = true, $timeout = 3600) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->commit($optimize, $waitFlush, $waitSearcher, $timeout); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be + * a complete and well formed "delete" xml document + * + * @param string $rawPost + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function delete($rawPost) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->delete($rawPost); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Create a delete document based on document ID + * + * @param string $id + * @param boolean $fromPending + * @param boolean $fromCommitted + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function deleteById($id, $fromPending = true, $fromCommitted = true) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->deleteById($id, $fromPending, $fromCommitted); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Create a delete document based on a query and submit it + * + * @param string $rawQuery + * @param boolean $fromPending + * @param boolean $fromCommitted + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->deleteByQuery($rawQuery, $fromPending, $fromCommitted); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Send an optimize command. Will be synchronous unless both wait parameters are set + * to false. + * + * @param boolean $waitFlush + * @param boolean $waitSearcher + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function optimize($waitFlush = true, $waitSearcher = true) + { + $service = $this->_selectWriteService(); + + do + { + try + { + return $service->optimize($waitFlush, $waitSearcher); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } + + $service = $this->_selectWriteService(true); + } while ($service); + + return false; + } + + /** + * Simple Search interface + * + * @param string $query The raw query string + * @param int $offset The starting offset for result documents + * @param int $limit The maximum number of result documents to return + * @param array $params key / value pairs for query parameters, use arrays for multivalued parameters + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function search($query, $offset = 0, $limit = 10, $params = array()) + { + $service = $this->_selectReadService(); + + do + { + try + { + return $service->search($query, $offset, $limit, $params); + } + catch (Exception $e) + { + if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR + { + throw $e; + } + } - protected $_currentReadService = null; - protected $_currentWriteService = null; + $service = $this->_selectReadService(true); + } while ($service); - protected $_readPingTimeout = 2; - protected $_writePingTimeout = 4; - - // Configuration for server selection backoff intervals - protected $_useBackoff = false; // Set to true to use more resillient write server selection - protected $_backoffLimit = 600; // 10 minute default maximum - protected $_backoffEscalation = 2.0; // Rate at which to increase backoff period - protected $_defaultBackoff = 2.0; // Default backoff interval - - /** - * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. - * - * NOTE: inside a phrase fewer characters need escaped, use {@link Apache_Solr_Service::escapePhrase()} instead - * - * @param string $value - * @return string - */ - static public function escape($value) - { - return Apache_Solr_Service::escape($value); - } - - /** - * Escape a value meant to be contained in a phrase for special query characters - * - * @param string $value - * @return string - */ - static public function escapePhrase($value) - { - return Apache_Solr_Service::escapePhrase($value); - } - - /** - * Convenience function for creating phrase syntax from a value - * - * @param string $value - * @return string - */ - static public function phrase($value) - { - return Apache_Solr_Service::phrase($value); - } - - /** - * Constructor. Takes arrays of read and write service instances or descriptions - * - * @param array $readableServices - * @param array $writeableServices - */ - public function __construct($readableServices = array(), $writeableServices = array()) - { - //setup readable services - foreach ($readableServices as $service) - { - $this->addReadService($service); - } - - //setup writeable services - foreach ($writeableServices as $service) - { - $this->addWriteService($service); - } - } - - public function setReadPingTimeout($timeout) - { - $this->_readPingTimeout = $timeout; - } - - public function setWritePingTimeout($timeout) - { - $this->_writePingTimeout = $timeout; - } - - public function setUseBackoff($enable) - { - $this->_useBackoff = $enable; - } - - /** - * Generates a service ID - * - * @param string $host - * @param integer $port - * @param string $path - * @return string - */ - protected function _getServiceId($host, $port, $path) - { - return $host . ':' . $port . $path; - } - - /** - * Adds a service instance or service descriptor (if it is already - * not added) - * - * @param mixed $service - * - * @throws Exception If service descriptor is not valid - */ - public function addReadService($service) - { - if ($service instanceof Apache_Solr_Service) - { - $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); - - $this->_readableServices[$id] = $service; - } - else if (is_array($service)) - { - if (isset($service['host']) && isset($service['port']) && isset($service['path'])) - { - $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); - - $this->_readableServices[$id] = $service; - } - else - { - throw new Exception('A Readable Service description array does not have all required elements of host, port, and path'); - } - } - } - - /** - * Removes a service instance or descriptor from the available services - * - * @param mixed $service - * - * @throws Exception If service descriptor is not valid - */ - public function removeReadService($service) - { - $id = ''; - - if ($service instanceof Apache_Solr_Service) - { - $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); - } - else if (is_array($service)) - { - if (isset($service['host']) && isset($service['port']) && isset($service['path'])) - { - $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); - } - else - { - throw new Exception('A Readable Service description array does not have all required elements of host, port, and path'); - } - } - else if (is_string($service)) - { - $id = $service; - } - - if ($id && isset($this->_readableServices[$id])) - { - unset($this->_readableServices[$id]); - } - } - - /** - * Adds a service instance or service descriptor (if it is already - * not added) - * - * @param mixed $service - * - * @throws Exception If service descriptor is not valid - */ - public function addWriteService($service) - { - if ($service instanceof Apache_Solr_Service) - { - $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); - - $this->_writeableServices[$id] = $service; - } - else if (is_array($service)) - { - if (isset($service['host']) && isset($service['port']) && isset($service['path'])) - { - $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); - - $this->_writeableServices[$id] = $service; - } - else - { - throw new Exception('A Writeable Service description array does not have all required elements of host, port, and path'); - } - } - } - - /** - * Removes a service instance or descriptor from the available services - * - * @param mixed $service - * - * @throws Exception If service descriptor is not valid - */ - public function removeWriteService($service) - { - $id = ''; - - if ($service instanceof Apache_Solr_Service) - { - $id = $this->_getServiceId($service->getHost(), $service->getPort(), $service->getPath()); - } - else if (is_array($service)) - { - if (isset($service['host']) && isset($service['port']) && isset($service['path'])) - { - $id = $this->_getServiceId((string)$service['host'], (int)$service['port'], (string)$service['path']); - } - else - { - throw new Exception('A Readable Service description array does not have all required elements of host, port, and path'); - } - } - else if (is_string($service)) - { - $id = $service; - } - - if ($id && isset($this->_writeableServices[$id])) - { - unset($this->_writeableServices[$id]); - } - } - - /** - * Iterate through available read services and select the first with a ping - * that satisfies configured timeout restrictions (or the default) - * - * @return Apache_Solr_Service - * - * @throws Exception If there are no read services that meet requirements - */ - protected function _selectReadService($forceSelect = false) - { - if (!$this->_currentReadService || !isset($this->_readableServices[$this->_currentReadService]) || $forceSelect) - { - if ($this->_currentReadService && isset($this->_readableServices[$this->_currentReadService]) && $forceSelect) - { - // we probably had a communication error, ping the current read service, remove it if it times out - if ($this->_readableServices[$this->_currentReadService]->ping($this->_readPingTimeout) === false) - { - $this->removeReadService($this->_currentReadService); - } - } - - if (count($this->_readableServices)) - { - // select one of the read services at random - $ids = array_keys($this->_readableServices); - - $id = $ids[rand(0, count($ids) - 1)]; - $service = $this->_readableServices[$id]; - - if (is_array($service)) - { - //convert the array definition to a client object - $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); - $this->_readableServices[$id] = $service; - } - - $service->setCreateDocuments($this->_createDocuments); - $this->_currentReadService = $id; - } - else - { - throw new Exception('No read services were available'); - } - } - - return $this->_readableServices[$this->_currentReadService]; - } - - /** - * Iterate through available write services and select the first with a ping - * that satisfies configured timeout restrictions (or the default) - * - * @return Apache_Solr_Service - * - * @throws Exception If there are no write services that meet requirements - */ - protected function _selectWriteService($forceSelect = false) - { - if($this->_useBackoff) - { - return $this->_selectWriteServiceSafe($forceSelect); - } - - if (!$this->_currentWriteService || !isset($this->_writeableServices[$this->_currentWriteService]) || $forceSelect) - { - if ($this->_currentWriteService && isset($this->_writeableServices[$this->_currentWriteService]) && $forceSelect) - { - // we probably had a communication error, ping the current read service, remove it if it times out - if ($this->_writeableServices[$this->_currentWriteService]->ping($this->_writePingTimeout) === false) - { - $this->removeWriteService($this->_currentWriteService); - } - } - - if (count($this->_writeableServices)) - { - // select one of the read services at random - $ids = array_keys($this->_writeableServices); - - $id = $ids[rand(0, count($ids) - 1)]; - $service = $this->_writeableServices[$id]; - - if (is_array($service)) - { - //convert the array definition to a client object - $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); - $this->_writeableServices[$id] = $service; - } - - $this->_currentWriteService = $id; - } - else - { - throw new Exception('No write services were available'); - } - } - - return $this->_writeableServices[$this->_currentWriteService]; - } - - /** - * Iterate through available write services and select the first with a ping - * that satisfies configured timeout restrictions (or the default). The - * timeout period will increase until a connection is made or the limit is - * reached. This will allow for increased reliability with heavily loaded - * server(s). - * - * @return Apache_Solr_Service - * - * @throws Exception If there are no write services that meet requirements - */ - - protected function _selectWriteServiceSafe($forceSelect = false) - { - if (!$this->_currentWriteService || !isset($this->_writeableServices[$this->_currentWriteService]) || $forceSelect) - { - if (count($this->_writeableServices)) - { - $backoff = $this->_defaultBackoff; - - do { - // select one of the read services at random - $ids = array_keys($this->_writeableServices); - - $id = $ids[rand(0, count($ids) - 1)]; - $service = $this->_writeableServices[$id]; - - if (is_array($service)) - { - //convert the array definition to a client object - $service = new Apache_Solr_Service($service['host'], $service['port'], $service['path']); - $this->_writeableServices[$id] = $service; - } - - $this->_currentWriteService = $id; - - $backoff *= $this->_backoffEscalation; - - if($backoff > $this->_backoffLimit) - { - throw new Exception('No write services were available. All timeouts exceeded.'); - } - - } while($this->_writeableServices[$this->_currentWriteService]->ping($backoff) === false); - } - else - { - throw new Exception('No write services were available'); - } - } - - return $this->_writeableServices[$this->_currentWriteService]; - } - - public function setCreateDocuments($createDocuments) - { - $this->_createDocuments = (bool) $createDocuments; - - if ($this->_currentReadService) - { - $service = $this->_selectReadService(); - $service->setCreateDocuments($createDocuments); - } - } - - public function getCreateDocuments() - { - return $this->_createDocuments; - } - - /** - * Raw Add Method. Takes a raw post body and sends it to the update service. Post body - * should be a complete and well formed "add" xml document. - * - * @param string $rawPost - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function add($rawPost) - { - $service = $this->_selectWriteService(); - - do - { - try - { - return $service->add($rawPost); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectWriteService(true); - } while ($service); - - return false; - } - - /** - * Add a Solr Document to the index - * - * @param Apache_Solr_Document $document - * @param boolean $allowDups - * @param boolean $overwritePending - * @param boolean $overwriteCommitted - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function addDocument(Apache_Solr_Document $document, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) - { - $service = $this->_selectWriteService(); - - do - { - try - { - return $service->addDocument($document, $allowDups, $overwritePending, $overwriteCommitted); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectWriteService(true); - } while ($service); - - return false; - } - - /** - * Add an array of Solr Documents to the index all at once - * - * @param array $documents Should be an array of Apache_Solr_Document instances - * @param boolean $allowDups - * @param boolean $overwritePending - * @param boolean $overwriteCommitted - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function addDocuments($documents, $allowDups = false, $overwritePending = true, $overwriteCommitted = true) - { - $service = $this->_selectWriteService(); - - do - { - try - { - return $service->addDocuments($documents, $allowDups, $overwritePending, $overwriteCommitted); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectWriteService(true); - } while ($service); - - return false; - } - - /** - * Send a commit command. Will be synchronous unless both wait parameters are set - * to false. - * - * @param boolean $waitFlush - * @param boolean $waitSearcher - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function commit($optimize = true, $waitFlush = true, $waitSearcher = true, $timeout = 3600) - { - $service = $this->_selectWriteService(); - - do - { - try - { - return $service->commit($optimize, $waitFlush, $waitSearcher, $timeout); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectWriteService(true); - } while ($service); - - return false; - } - - /** - * Raw Delete Method. Takes a raw post body and sends it to the update service. Body should be - * a complete and well formed "delete" xml document - * - * @param string $rawPost - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function delete($rawPost) - { - $service = $this->_selectWriteService(); - - do - { - try - { - return $service->delete($rawPost); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectWriteService(true); - } while ($service); - - return false; - } - - /** - * Create a delete document based on document ID - * - * @param string $id - * @param boolean $fromPending - * @param boolean $fromCommitted - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function deleteById($id, $fromPending = true, $fromCommitted = true) - { - $service = $this->_selectWriteService(); - - do - { - try - { - return $service->deleteById($id, $fromPending, $fromCommitted); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectWriteService(true); - } while ($service); - - return false; - } - - /** - * Create a delete document based on a query and submit it - * - * @param string $rawQuery - * @param boolean $fromPending - * @param boolean $fromCommitted - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function deleteByQuery($rawQuery, $fromPending = true, $fromCommitted = true) - { - $service = $this->_selectWriteService(); - - do - { - try - { - return $service->deleteByQuery($rawQuery, $fromPending, $fromCommitted); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectWriteService(true); - } while ($service); - - return false; - } - - /** - * Send an optimize command. Will be synchronous unless both wait parameters are set - * to false. - * - * @param boolean $waitFlush - * @param boolean $waitSearcher - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function optimize($waitFlush = true, $waitSearcher = true) - { - $service = $this->_selectWriteService(); - - do - { - try - { - return $service->optimize($waitFlush, $waitSearcher); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectWriteService(true); - } while ($service); - - return false; - } - - /** - * Simple Search interface - * - * @param string $query The raw query string - * @param int $offset The starting offset for result documents - * @param int $limit The maximum number of result documents to return - * @param array $params key / value pairs for query parameters, use arrays for multivalued parameters - * @return Apache_Solr_Response - * - * @throws Exception If an error occurs during the service call - */ - public function search($query, $offset = 0, $limit = 10, $params = array()) - { - $service = $this->_selectReadService(); - - do - { - try - { - return $service->search($query, $offset, $limit, $params); - } - catch (Exception $e) - { - if ($e->getCode() != 0) //IF NOT COMMUNICATION ERROR - { - throw $e; - } - } - - $service = $this->_selectReadService(true); - } while ($service); - - return false; - } + return false; + } } Index: contrib/apachesolr_attachments/apachesolr_attachments.info =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_attachments/apachesolr_attachments.info,v retrieving revision 1.1.4.2 diff -u -p -r1.1.4.2 apachesolr_attachments.info --- contrib/apachesolr_attachments/apachesolr_attachments.info 14 Aug 2008 04:01:18 -0000 1.1.4.2 +++ contrib/apachesolr_attachments/apachesolr_attachments.info 5 Feb 2009 21:42:05 -0000 @@ -1,4 +1,4 @@ -name = Apache Solr for Attachments -description = Searching file attachments with Apache Solr -dependencies = apachesolr -package = ApacheSolr +name = Apache Solr for attachments +description = Searching file attachments with Solr +dependencies = apachesolr +package = Apache Solr Index: contrib/apachesolr_attachments/apachesolr_attachments.install =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_attachments/apachesolr_attachments.install,v retrieving revision 1.1.4.2 diff -u -p -r1.1.4.2 apachesolr_attachments.install --- contrib/apachesolr_attachments/apachesolr_attachments.install 14 Aug 2008 04:01:18 -0000 1.1.4.2 +++ contrib/apachesolr_attachments/apachesolr_attachments.install 5 Feb 2009 21:42:05 -0000 @@ -1,5 +1,5 @@ $change, 'last_nid' => $nid); + variable_set('apachesolr_index_last', $stored); + } + variable_del('apachesolr_attachment_last_change'); + variable_del('apachesolr_attachment_last_nid'); + return array(); +} Index: contrib/apachesolr_attachments/apachesolr_attachments.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_attachments/apachesolr_attachments.module,v retrieving revision 1.1.4.4 diff -u -p -r1.1.4.4 apachesolr_attachments.module --- contrib/apachesolr_attachments/apachesolr_attachments.module 9 Dec 2008 19:19:19 -0000 1.1.4.4 +++ contrib/apachesolr_attachments/apachesolr_attachments.module 5 Feb 2009 21:42:05 -0000 @@ -1,29 +1,26 @@ 'admin/settings/apachesolr/attachments', - 'title' => t('Apache Solr Attachments Settings'), - 'description' => t('Administer Apache Solr Attachments'), - 'callback' => 'drupal_get_form', - 'callback arguments' => 'apachesolr_attachments_settings', - 'access' => user_access('administer site configuration'), - ); - } +function apachesolr_attachments_menu() { + $items = array(); + $items['admin/settings/apachesolr/attachments'] = array( + 'title' => 'Apache Solr Attachments Settings', + 'description' => 'Administer Apache Solr Attachments.', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('apachesolr_attachments_settings'), + 'access callback' => 'user_access', + 'access arguments' => array('administer site configuration'), + ); return $items; } @@ -72,19 +69,14 @@ function apachesolr_attachments_settings function apachesolr_attachments_search($op = 'search', $keys = NULL) { switch ($op) { - case 'name': - // We dont want a tab - return ''; - + return ''; // We dont want a tab case 'reset': - ApacheSolrUpdate::reset(SOLR_ATTACHMENT_NS); + apachesolr_clear_last_index('apachesolr_attachments'); return; - case 'status': - // Figure out a way to know how many are left to update, or expose it as part of the apachesolr module - return; - + // TODO: Figure out a way to know how many actual files are left to update. + return apachesolr_index_status('apachesolr_attachments'); case 'search': return apachesolr_search_search($op, $keys); } @@ -97,76 +89,65 @@ function apachesolr_attachments_search($ * attachments. */ function apachesolr_attachments_update_index() { + // TODO - should probably have a separate variable. + $cron_limit = variable_get('search_cron_limit', 100); + $result = apachesolr_get_nodes_to_index('apachesolr_attachments', $cron_limit); + apachesolr_index_nodes($result, 'apachesolr_attachments', 'apachesolr_attachments_add_documents'); +} - $result = ApacheSolrUpdate::getNodesToIndex(SOLR_ATTACHMENT_NS); - while ($row = db_fetch_object($result)) { - // Variables to track the last item changed. - $solr_last_change = $row->last_change; - $solr_last_id = $row->nid; - - $node = node_load($row->nid); - if ($node->nid) { - // Since there is no notification for an attachment being unassociated with a - // node (but that action will trigger it to be indexed again), lets remove - // all indexed attachments then add all attached (if any) - _asa_remove_attachments_from_index($node->nid); - - $files = _asa_get_indexable_files($node); - if(!empty($files)) { - // Update solr index. - try { - - foreach ($files as $file) { - // Some are arrays others are objects, treat them all as objects - $file = (object) $file; - - $text = _asa_get_attachment_text($file); - $text = trim($text); - - if (!empty($text)) { - $document = new Apache_Solr_Document(); - - $site = url(NULL, NULL, NULL, TRUE); - $hash = md5($site); - $document->site = $site; - $document->hash = $hash; - $document->url = file_create_url($file->filepath); - $document->id = $file->fid; - $document->nid = $node->nid; - $document->title = $file->filename; - $document->changed = $node->changed; - $document->uid = $node->uid; - $document->body = $text; - $document->text = "{$file->description} {$file->filename} $text"; - $document->type = $node->type; - $document->bsfield_isfile = TRUE; - _as_configure_taxonomy($document, $node); - - // Let modules add to the document - foreach (module_implements('apachesolr_attachments_update_index') as $module) { - $function = $module .'_apachesolr_attachments_update_index'; - $function($document, $node, $file); - } - - $documents[] = $document; - } - } - } - catch (Exception $e) { - watchdog(SOLR_ATTACHMENT_WD, $e->getMessage(), WATCHDOG_ERROR); +/** + * Callback for apachesolr_index_nodes(). + * + * Adds a document for each indexable file attachment for the given node ID. + */ +function apachesolr_attachments_add_documents(&$documents, $nid) { + $node = node_load($nid, NULL, TRUE); + if (!empty($node->nid)) { + + $hash = apachesolr_site_hash(); + + // Since there is no notification for an attachment being unassociated with a + // node (but that action will trigger it to be indexed again), lets remove + // all indexed attachments then add all attached (if any) + _asa_remove_attachments_from_index($node->nid); + + $files = _asa_get_indexable_files($node); + if(!empty($files)) { + foreach ($files as $file) { + // Some are arrays others are objects, treat them all as objects + $file = (object) $file; + + $text = _asa_get_attachment_text($file); + $text = trim($text); + + if ($text) { + $document = new Apache_Solr_Document(); + $document->id = apachesolr_document_id($file->fid, 'file'); + $document->url = file_create_url($file->filepath); + $document->hash = $hash; + $document->nid = $node->nid; + $document->title = $file->filename; + $document->changed = apachesolr_date_iso($node->changed); + $document->uid = $node->uid; + $document->name = $node->name; + $document->body = $text; + $document->text = "{$file->description} {$file->filename} $text"; + $document->type = $node->type; + $document->bsfield_isfile = TRUE; + _as_configure_taxonomy($document, $node); + + drupal_alter('apachesolr_attachment', $document, $node, $file); + + $documents[] = $document; } } - ApacheSolrUpdate::success(SOLR_ATTACHMENT_NS, $solr_last_change, $solr_last_id); } } - - _as_index_documents($documents); } /** * Implementation of hook_nodeapi(). * - * For a search result: Parse the nid and fid for a search result for potential use later. * For a delete: Remove all associated attachments from the Solr store. */ function apachesolr_attachments_nodeapi($node, $op) { @@ -185,7 +166,8 @@ function apachesolr_attachments_nodeapi( * the link and type wont be configured correctly if it is a file attachement, so override * those values here if needed. */ -function apachesolr_attachments_apachesolr_process_results($results) { +function apachesolr_attachments_apachesolr_process_results($results) { + if (is_array($results)) { foreach ($results as &$item) { if (isset($item['node']->bsfield_isfile) && $item['node']->bsfield_isfile === TRUE) { @@ -202,11 +184,11 @@ function apachesolr_attachments_apacheso */ function _asa_get_indexable_files($node) { $files = array(); - + if(!empty($node->files)) { $files = array_merge($files, $node->files); } - + $fields = _asa_get_cck_file_fields(); foreach ($fields as $field) { if(!empty($node->$field)) { @@ -225,7 +207,7 @@ function _asa_get_cck_file_fields() { if(module_exists('filefield')) { $fields = content_fields(); foreach($fields as $key => $values){ - if($values['type'] == 'file') { + if($values['type'] == 'filefield') { $file_fields[] = $key; } } @@ -254,10 +236,10 @@ function _asa_get_attachment_text($file) // Strip anything that might make the Solr integration barf. // Wierd control characters make things behave wierd, especially in XML $cleaned_text = iconv("utf-8", "utf-8//IGNORE", $text); - + // As per robertDouglass - http://drupal.org/node/335871 - // Bad control character. Do we need to make a hook for text cleanup? - $cleaned_text = preg_replace('/\x0C/', '', $cleaned_text); + // Strip bad control characters. Do we need to make a hook for text cleanup? + $cleaned_text = apachesolr_strip_ctl_chars($cleaned_text); return $cleaned_text; } @@ -267,7 +249,7 @@ function _asa_get_attachment_text($file) */ function _asa_remove_attachments_from_index($nid) { try { - $solr = _get_solr_instance(); + $solr = apachesolr_get_solr(); $solr->deleteByQuery("nid:{$nid} AND bsfield_isfile:true"); $solr->commit(); } @@ -277,17 +259,6 @@ function _asa_remove_attachments_from_in } /** - * For a provided fid, get the file path. - */ -function _asa_get_file_url($fid) { - if (!empty($fid) && is_numeric($fid)) { - $result = db_query('SELECT * FROM {files} WHERE fid = %d', $fid); - $file = db_fetch_array($result); - return $file['filepath']; - } -} - -/** * Get the command to parse text out of a particular mime type. */ function _asa_get_file_helper_command($type) { @@ -309,80 +280,34 @@ function _asa_get_file_helper_command($t return $cmd; } -/** - * Get a reference to the Solr service. - */ -function _asa_get_solr_instance() { - try { - return _get_solr_instance(); - } - catch (Exception $e) { - watchdog(SOLR_ATTACHMENT_WD, $e->getMessage(), WATCHDOG_ERROR); - } - - return FALSE; -} - - - /***************************************************************************************/ /** The following functions should become part of the Apache Solr module API **/ /***************************************************************************************/ /** - * Get a reference to the Solr service. This consolidates cal to varaible_get, etc. - */ -function _get_solr_instance() { - $host = variable_get('apachesolr_host', 'localhost'); - $port = variable_get('apachesolr_port', 8983); - $path = variable_get('apachesolr_path', '/solr'); - $solr =& apachesolr_get_solr($host, $port, $path); - if (!$solr->ping()) { - throw new Exception(t('No Solr instance available')); - } - - return $solr; -} - - -/** * Add taxonomy from the node to the solr document for the attachment. + * + * This is a direct copy from the apachesolr.module, we should refactor to + * make it reusable */ -function _as_configure_taxonomy($document, $node) { +function _as_configure_taxonomy(&$document, $node) { + if (is_array($node->taxonomy)) { foreach ($node->taxonomy as $term) { - $document->setMultiValue('tid', $term->tid); - // Double indexing of tids lets us do effecient searches (on tid) // and do accurate per-vocabulary faceting. - $document->setMultiValue('imfield_vid' . $term->vid, $term->tid); - - $document->setMultiValue('vid', $term->vid); - $document->setMultiValue('taxonomy_name', $term->name); - } - } -} - -/** - * Take the full list of Docs to submit to Solr and add them in batches. - */ -function _as_index_documents($documents) { - $solr = _asa_get_solr_instance(); - - if (is_object($solr) && count($documents) > 0) { - watchdog(SOLR_ATTACHMENT_WD, t("Adding @count documents to Solr", array('@count' => count($documents)))); - try { - // Chunk the adds by 50s - $docs_chunk = array_chunk($documents, 50); - foreach ($docs_chunk as $docs) { - $solr->addDocuments($docs); + // By including the ancestors to a term in the index we make + // sure that searches for general categories match specific + // categories, e.g. Fruit -> apple, a search for fruit will find + // content categorized with apple. + $ancestors = taxonomy_get_parents_all($term->tid); + foreach ($ancestors as $ancestor) { + $document->setMultiValue('tid', $ancestor->tid); + $document->setMultiValue('imfield_vid'. $ancestor->vid, $ancestor->tid); + $document->setMultiValue('vid', $ancestor->vid); + $document->setMultiValue('taxonomy_name', $ancestor->name); } - $solr->commit(); - $solr->optimize(FALSE, FALSE); - } - catch (Exception $e) { - watchdog(SOLR_ATTACHMENT_WD, $e->getMessage(), WATCHDOG_ERROR); } } -} \ No newline at end of file +} Index: contrib/apachesolr_image/apachesolr_image.info =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_image/apachesolr_image.info,v retrieving revision 1.1.2.1 diff -u -p -r1.1.2.1 apachesolr_image.info --- contrib/apachesolr_image/apachesolr_image.info 29 Jan 2008 18:05:50 -0000 1.1.2.1 +++ contrib/apachesolr_image/apachesolr_image.info 5 Feb 2009 21:42:05 -0000 @@ -1,5 +1,5 @@ ; $Id$ -name = ApacheSolr Image integration -description = Integrates the ApacheSolr and Image modules +name = Apache Solr image module integration +description = Integrates the Apache Solr and Image modules dependencies = image apachesolr -package = ApacheSolr +package = Apache Solr Index: contrib/apachesolr_image/apachesolr_image.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_image/apachesolr_image.module,v retrieving revision 1.1.2.3 diff -u -p -r1.1.2.3 apachesolr_image.module --- contrib/apachesolr_image/apachesolr_image.module 4 Sep 2008 23:13:10 -0000 1.1.2.3 +++ contrib/apachesolr_image/apachesolr_image.module 5 Feb 2009 21:42:05 -0000 @@ -1,62 +1,68 @@ type == 'image') { $areas = array(); + // A problem here - small images do not get a derived thumbnail. $sizes = image_get_derivative_sizes($node->images['_original']); foreach ($sizes as $name => $info) { $areas[$name] = $info['width'] * $info['height']; } asort($areas); - $image = FALSE; + $image_path = FALSE; foreach ($areas as $preset => $size) { - $image = $node->images[$preset]; + $image_path = $node->images[$preset]; break; } - if ($image) { - $index_key = 'ssfield_imagemodule_image'; - $document->$index_key = $image; + if ($image_path) { + $document->ssfield_image_relative = $image_path; + // TODO - support multi-site too. + // $document->ssfield_image_absolute = file_create_url($image_path); } } } /** - * Format a single result entry of a search query. This function is normally - * called by theme_search_page() or hook_search_page(). - * - * @param $item - * A single search result as returned by hook_search(). The result should be - * an array with keys "link", "title", "type", "user", "date", and "snippet". - * Optionally, "extra" can be an array of extra info to show along with the - * result. - * @param $type - * The type of item found, such as "user" or "contentsearch". - * - * @ingroup themeable + * Implementation of hook_apachesolr_modify_query(). */ -function phptemplate_search_item($item, $type) { - $output = '
'. check_plain($item['title']) .'
'; - $info = array(); - if ($item['type']) { - $info[] = check_plain($item['type']); - } - if ($item['user']) { - $info[] = $item['user']; - } - if ($item['date']) { - $info[] = format_date($item['date'], 'small'); - } - if (is_array($item['extra'])) { - $info = array_merge($info, $item['extra']); - } - $break = ''; - if ($node = $item['node']) { - if ($path = $node->ssfield_imagemodule_image) { - $item['snippet'] = ''. theme('image', $path, '', '', array('align' => 'left')) .''. $item['snippet']; - $break = '
'; +function apachesolr_image_apachesolr_modify_query(&$query, &$params) { + // Also retrieve image thumbnail links. + $params['fl'] .= ',ssfield_image_relative'; +} + +/** + * Implementation of hook_apachesolr_process_results(). + */ +function apachesolr_image_apachesolr_process_results(&$results) { + foreach ($results as $index => $item) { + if ($item['node']->type == 'image' && !empty($item['node']->ssfield_image_relative)) { + $results[$index]['snippet'] = theme('apachesolr_image_snippet', $item); } } - $output .= '
'. ($item['snippet'] ? '

'. $item['snippet'] . $break .'

' : '') .'

'. implode(' - ', $info) .'

'; - return $output; +} + +function theme_apachesolr_image_snippet($item){ + return ''. theme('image', $item['node']->ssfield_image_relative, $item['title'], $item['title'], array('align' => 'left')) .''. $item['snippet'] . '
'; +} + +/** + * Implementation of hook_theme(). + */ +function apachesolr_image_theme() { + return array( + 'apachesolr_image_snippet' => array( + 'arguments' => array('item' => NULL), + ), + ); +} + +/** + * Implementation of hook_enable(). + */ +function apachesolr_image_enable() { + drupal_set_message(t('The Apache Solr image integration module will not have any apparent effect until Image type nodes are indexed or re-indexed.')); } Index: contrib/apachesolr_mlt/apachesolr_mlt.info =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_mlt/Attic/apachesolr_mlt.info,v retrieving revision 1.1.2.1 diff -u -p -r1.1.2.1 apachesolr_mlt.info --- contrib/apachesolr_mlt/apachesolr_mlt.info 23 Oct 2008 20:39:16 -0000 1.1.2.1 +++ contrib/apachesolr_mlt/apachesolr_mlt.info 5 Feb 2009 21:42:05 -0000 @@ -1,5 +1,5 @@ -; $Id: apachesolr_mlt.info,v 1.1.2.1 2008/10/23 20:39:16 robertDouglass Exp $ -name = ApacheSolr More Like This -description = Use Solr to make content recommendations using Solr Search +; $Id: apachesolr_mlt.info,v 1.1.4.5 2009/01/27 21:32:35 pwolanin Exp $ +name = Apache Solr more like this +description = Use Solr to make content recommendations dependencies = apachesolr -package = ApacheSolr +package = Apache Solr Index: contrib/apachesolr_mlt/apachesolr_mlt.install =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_mlt/Attic/apachesolr_mlt.install,v retrieving revision 1.1.2.1 diff -u -p -r1.1.2.1 apachesolr_mlt.install --- contrib/apachesolr_mlt/apachesolr_mlt.install 23 Oct 2008 20:39:16 -0000 1.1.2.1 +++ contrib/apachesolr_mlt/apachesolr_mlt.install 5 Feb 2009 21:42:05 -0000 @@ -1,32 +1,36 @@ t('Tracks custom content recommendation blocks.'), + 'fields' => array( + 'id' => array( + 'description' => t('The primary identifier for a custom block.'), + 'type' => 'serial', + 'unsigned' => TRUE, + 'not null' => TRUE), + 'data' => array( + 'description' => t('The serialized data for a block.'), + 'type' => 'text', + 'size' => 'big', + 'not null' => TRUE, + ), + ), + 'primary key' => array('id'), + ); + return $schema; +} + +function apachesolr_mlt_uninstall() { + // Remove tables. + drupal_uninstall_schema('apachesolr_mlt'); } \ No newline at end of file Index: contrib/apachesolr_mlt/apachesolr_mlt.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_mlt/Attic/apachesolr_mlt.module,v retrieving revision 1.1.2.4 diff -u -p -r1.1.2.4 apachesolr_mlt.module --- contrib/apachesolr_mlt/apachesolr_mlt.module 28 Nov 2008 18:08:38 -0000 1.1.2.4 +++ contrib/apachesolr_mlt/apachesolr_mlt.module 5 Feb 2009 21:42:06 -0000 @@ -1,50 +1,60 @@ 'admin/settings/apachesolr_mlt', - 'title' => t('Apache Solr More Like This'), - 'description' => t('Configure content recommendation blocks using the ApacheSolr "More Like This" handler.'), - 'callback' => 'apachesolr_mlt_settings', - 'access' => user_access('administer search'), + + $items['admin/settings/apachesolr_mlt'] = array( + 'title' => 'Apache Solr - More Like This', + 'description' => 'Configure content recommendation blocks using the Apache Solr "More Like This" handler.', + 'page callback' => 'apachesolr_mlt_settings', + 'access arguments' => array('administer search'), + ); + $items['admin/settings/apachesolr_mlt/configure_block'] = array( + 'type' => MENU_CALLBACK, + 'page callback' => 'drupal_get_form', + 'page arguments' => array('apachesolr_mlt_block_form', 4), + 'access arguments' => array('administer search'), ); - $items[] = array( - 'path' => 'admin/settings/apachesolr_mlt/configure_block', - 'type' => MENU_CALLBACK, - 'callback' => 'drupal_get_form', - 'callback arguments' => array('apachesolr_mlt_block_form', arg(4)), - 'access' => user_access('administer search'), - ); - $items[] = array( - 'path' => 'admin/settings/apachesolr_mlt/delete_block', - 'type' => MENU_CALLBACK, - 'callback' => 'drupal_get_form', - 'callback arguments' => array('apachesolr_mlt_delete_block_form', arg(4)), - 'access' => user_access('administer search'), - ); - - } + $items['admin/settings/apachesolr_mlt/delete_block'] = array( + 'type' => MENU_CALLBACK, + 'page callback' => 'drupal_get_form', + 'page arguments' => array('apachesolr_mlt_delete_block_form', 4), + 'access arguments' => array('administer search'), + ); return $items; } /** * Implementation of hook_block */ -function apachesolr_mlt_block ($op = 'list', $delta = 0, $edit = array()) { +function apachesolr_mlt_block($op = 'list', $delta = 0, $edit = array()) { + static $access; + if ($op == 'list') { //return all of the moreLikeThis blocks that the user has created $blocks = apachesolr_mlt_list_blocks(); return $blocks; } - else if ($op == 'view' && !empty($delta)) { + else if ($op == 'view' && !empty($delta) && (arg(0) == 'node')) { //return the content of the block, based on the delta - return apachesolr_mlt_suggestions($delta); + $nid = arg(1); + if (is_numeric($nid) && (!arg(2) || arg(2) == 'view')) { + // Determine whether the user can view the current node. + if (!isset($access)) { + $node = node_load($nid); + $access = $node && node_access('view', $node); + } + if ($access) { + $block = apachesolr_mlt_suggestions($delta, $nid); + $configure_link = user_access('administer search') ? l(t('Configure this block'),'admin/settings/apachesolr_mlt/configure_block/' . $delta) : ''; + $block['content'] .= $configure_link; + return $block; + } + } } } @@ -52,62 +62,76 @@ function apachesolr_mlt_block ($op = 'li * function apachesolr_mlt_suggestions() * This function loads a the parameters for each moreLikeThis query, performs * the query, and returns a list of linked node titles. - * + * * @param int $block_id A block ID for loading the suggestions * * @return array An array to be returned to hook_block */ -function apachesolr_mlt_suggestions($block_id) { - if (arg(0) == 'node' && is_numeric(arg(1))) { - $nid = check_plain(arg(1)); - $solr = & apachesolr_get_solr ( variable_get ( 'apachesolr_host', 'localhost' ), variable_get ( 'apachesolr_port', 8983 ), variable_get ( 'apachesolr_path', '/solr' ) ); - - try { - $fields = array('mlt.mintf', 'mlt.mindf', 'mlt.minwl', 'mlt.maxwl', 'mlt.maxqt', 'mlt.boost', 'mlt.qf'); - $block = apachesolr_mlt_load_block($block_id); - $params = array( - 'mlt' => 'true', - 'fl' => 'nid,title', - 'mlt.fl' => check_plain(implode(',', $block['mlt_fl'])), - ); - unset($block['mlt_fl']); - foreach ($fields as $field) { - $drupal_fieldname = str_replace('.', '_', $field); - if (!empty($block[$drupal_fieldname])) { - $params[$field] = check_plain($block[$drupal_fieldname]); - } +function apachesolr_mlt_suggestions($block_id, $nid) { + + try { + $solr = apachesolr_get_solr(); + $fields = array('mlt.mintf', 'mlt.mindf', 'mlt.minwl', 'mlt.maxwl', 'mlt.maxqt', 'mlt.boost', 'mlt.qf'); + $block = apachesolr_mlt_load_block($block_id); + + $params = array( + 'qt' => 'mlt', + 'fl' => 'nid,title,url', + 'mlt.fl' => implode(',', $block['mlt_fl']), + ); + + foreach ($fields as $field) { + $drupal_fieldname = str_replace('.', '_', $field); + if (!empty($block[$drupal_fieldname])) { + $params[$field] = check_plain($block[$drupal_fieldname]); } - $response = $solr->search('nid:' . $nid, 0, 10, $params); - - $r = (array) end($response->moreLikeThis); - //TODO: Figure out why this works + } + $query = apachesolr_drupal_query('id:' . apachesolr_document_id($nid)); + if (empty($query)) { + // An error contacting the server. + return; + } + // This hook allows modules to modify the query and params objects. + apachesolr_modify_query($query, $params); + + $response = $solr->search($query->get_query_basic(), 0, $block['num_results'], $params); + $suggestions = array(); + if ($response->response) { + $r = (array) end($response->response); $links = array(); - - if (is_array($r['docs'])) { - foreach ($r['docs'] as $doc) { - $links[] = l($doc->title, 'node/' . $doc->nid); + + if (is_array($r)) { + foreach ($r as $result) { + $links[] = l($result->title, $result->url); } - } + } - $suggestions = array(); if (count($links) > 0) { $suggestions['subject'] = $block['name']; $suggestions['content'] = theme('apachesolr_mlt_recommendation_block', $links); } - return $suggestions; - } catch ( Exception $e ) { - watchdog ( 'Apache Solr', $e->getMessage (), WATCHDOG_ERROR ); } + return $suggestions; + } catch ( Exception $e ) { + watchdog ( 'Apache Solr', $e->getMessage (), WATCHDOG_ERROR ); } } +function apachesolr_mlt_theme($existing, $type, $theme, $path) { + return array( + 'apachesolr_mlt_recommendation_block' => array( + 'arguments' => array('links' => NULL), + ), + ); +} + function theme_apachesolr_mlt_recommendation_block($links) { return theme('item_list', $links); } /** * function apachesolr_mlt_settings() - * returns the settings page. + * Returns the settings page. */ function apachesolr_mlt_settings() { $query = "SELECT * FROM {apachesolr_mlt}"; @@ -118,11 +142,11 @@ function apachesolr_mlt_settings() { $rows[] = array( $block->id, $block->data['name'], - l('Edit', 'admin/settings/apachesolr_mlt/configure_block/' . check_plain($block->id)) .' | ' . l('Delete', 'admin/settings/apachesolr_mlt/delete_block/'.check_plain($block->id)), + l('Edit', 'admin/settings/apachesolr_mlt/configure_block/' . check_plain($block->id)) .' | ' . l('Delete', 'admin/settings/apachesolr_mlt/delete_block/' . check_plain($block->id)), ); } - $header = array('Id', 'name', 'options'); - $output = l('Add block', 'admin/settings/apachesolr_mlt/configure_block'); + $header = array(t('Id'), t('Name'), t('Options')); + $output = l(t('Add block'), 'admin/settings/apachesolr_mlt/configure_block'); $output .= theme('table', $header, $rows); return $output; } @@ -134,17 +158,18 @@ function apachesolr_mlt_settings() { * * @return array The form used for editing. * TODO: - * Add term boost settings + * Add term boost settings. * Enable the user to specify a query, rather then forcing suggestions based * on the node id. * */ -function apachesolr_mlt_block_form($block_id = NULL) { - //if editing, load the current settings for the block. - if($block_id && is_numeric($block_id)) { +function apachesolr_mlt_block_form(&$form_state, $block_id = NULL) { + $block = array(); + // If editing, load the current settings for the block. + if ($block_id && is_numeric($block_id)) { $block = apachesolr_mlt_load_block($block_id); $form['block_id'] = array( - '#type' => 'hidden', + '#type' => 'value', '#value' => $block_id, ); } @@ -152,20 +177,20 @@ function apachesolr_mlt_block_form($bloc $form['name'] = array( '#type' => 'textfield', '#title' => t('Block Name'), - '#description' => t('Please enter then block name. This will only be seen by the administratior'), - '#default_value' => check_plain($block['name']), + '#description' => t('The block name displayed to site users.'), + '#default_value' => isset($block['name']) ? check_plain($block['name']) : '', '#weight' => '-2', ); - $form['mlt.count'] = array( + $form['num_results'] = array( '#type' => 'textfield', '#title' => t('Maximum number of results'), - '#default_value' => $block['mlt_count'] ? $block['mlt_count'] : 5, + '#default_value' => isset($block['num_results']) ? $block['num_results'] : 5, '#weight' => -1, ); - + $form['comparison'] = array( '#type' => 'fieldset', - '#title' => t('Comparison settings'), + '#title' => t('Comparison fields'), '#weight' => 0, '#collapsible' => TRUE, '#collapsed' => FALSE, @@ -173,9 +198,9 @@ function apachesolr_mlt_block_form($bloc $form['comparison']['mlt_fl'] = array( '#type' => 'checkboxes', '#title' => t('Fields for comparison'), - '#description' => t('The fields to be used in caclulating similarity.'), + '#description' => t('Select fields to be used in calculating similarity. The default combination of "taxonomy_names" and "title" will provide relevant results for typical sites.'), '#options' => apachesolr_mlt_get_fields(), - '#default_value' => isset($block['mlt_fl']) ? $block['mlt_fl'] : array('title', 'taxonomy_name'), + '#default_value' => isset($block['mlt_fl']) ? $block['mlt_fl'] : array('title', 'taxonomy_names'), ); $form['advanced'] = array( @@ -183,37 +208,37 @@ function apachesolr_mlt_block_form($bloc '#title' => t('Advanced Configuration'), '#weight' => '1', '#collapsible' => TRUE, - '#collapsed' => FALSE, + '#collapsed' => TRUE, ); $form['advanced']['mlt_mintf'] = array( '#type' => 'textfield', '#title' => t('Minimum Term Frequency'), - '#description' => t('The frequency below which terms will be ignored in the source document.'), - '#default_value' => $block['mlt_mintf'] ? (int) $block['mlt_mintf'] : 1, + '#description' => t('A word must appear this many times in any given document before the document is considered relevant for comparison.'), + '#default_value' => isset($block['mlt_mintf']) ? (int) $block['mlt_mintf'] : 1, ); $form['advanced']['mlt_mindf'] = array( '#type' => 'textfield', '#title' => t('Minimum Document Frequency'), - '#description' => t('The frequency at which words will be ignored which do not occur in at least this many documents.'), - '#default_value' => $block['mlt_mindf'] ? (int) $block['mlt_mindf'] : 1, + '#description' => t('A word must occur in at least this many documents before it will be used for similarity comparison.'), + '#default_value' => isset($block['mlt_mindf']) ? (int) $block['mlt_mindf'] : 1, ); $form['advanced']['mlt_minwl'] = array( '#type' => 'textfield', '#title' => t('Minimum Word Length'), - '#description' => 'Words must be at least this long or they will be ignored.', - '#default_value' => $block['mlt_minwl'] ? (int) $block['mlt_minwl'] : 3, + '#description' => 'You can use this to eliminate short words such as "the" and "it" from similarity comparisons. Words must be at least this number of characters or they will be ignored.', + '#default_value' => isset($block['mlt_minwl']) ? (int) $block['mlt_minwl'] : 3, ); $form['advanced']['mlt_maxwl'] = array( '#type' => 'textfield', '#title' => t('Maximum World Length'), - '#description' => t('Words above this length will be ignored.'), - '#default_value' => $block['mlt_maxwl'] ? (int) $block['mlt_maxwl'] : 15, + '#description' => t('You can use this to eliminate very long words from similarity comparisons. Words of more than this number of characters will be ignored.'), + '#default_value' => isset($block['mlt_maxwl']) ? (int) $block['mlt_maxwl'] : 15, ); $form['advanced']['mlt_maxqt'] = array( '#type' => 'textfield', '#title' => t('Maximum number of query terms'), - '#description' => t('The maximum number of query terms that will be included in any generated query. Lower numbers will result in fewer recommendations but perform better.'), - '#default_value' => $block['mlt_maxqt'] ? (int) $block['mlt_maxqt'] : 30, + '#description' => t('The maximum number of query terms that will be included in any query. Lower numbers will result in fewer recommendations but will get results faster. If a content recommendation is not returning any recommendations, you can either check more "Comparison fields" checkboxes or increase the maximum number of query terms here.'), + '#default_value' => isset($block['mlt_maxqt']) ? (int) $block['mlt_maxqt'] : 30, ); $form['submit'] = array( @@ -230,14 +255,14 @@ function apachesolr_mlt_block_form($bloc /** * function apachesolr_mlt_block_validate() * - * Perform basic form field validation on the morelikethis fields - * + * Perform basic form field validation on the morelikethis fields. + * * @param string $form_id the form ID * @param array $form_values an array of from values */ -function apachesolr_mlt_block_form_validate($form_id, $form_values) { - if ($form_id == 'apachesolr_mlt_block_form') { - foreach ($form_values as $key => $value) { +function apachesolr_mlt_block_form_validate($form, &$form_state) { + if ($form_state['values']['form_id'] == 'apachesolr_mlt_block_form') { + foreach ($form_state['values'] as $key => $value) { //make sure the user inputed a number, accept for the field list if (strpos($key, 'mlt_') === 0 && $key != 'mlt_fl') { if (!empty($value) && !is_numeric($value)) { @@ -253,10 +278,10 @@ function apachesolr_mlt_block_form_valid * @param string $form_id the form ID * @param array $form_values an array of from values */ -function apachesolr_mlt_block_form_submit($form_id, $form_values) { - if ($form_id == 'apachesolr_mlt_block_form') { - $form_values['mlt_fl'] = array_diff($form_values['mlt_fl'], array(0)); - apachesolr_mlt_save_block($form_values, $form_values['block_id']); +function apachesolr_mlt_block_form_submit($form, &$form_state) { + if ($form_state['values']['form_id'] == 'apachesolr_mlt_block_form') { + $form_state['values']['mlt_fl'] = array_diff($form_state['values']['mlt_fl'], array(0)); + apachesolr_mlt_save_block($form_state['values'], isset($form_state['values']['block_id']) ? $form_state['values']['block_id'] : NULL); } } @@ -268,7 +293,8 @@ function apachesolr_mlt_block_form_submi */ function apachesolr_mlt_get_fields() { include_once drupal_get_path('module', 'apachesolr') .'/Solr_Base_Query.php'; - $fields = Solr_Base_Query::get_fields_in_index(); + $solr = apachesolr_get_solr(); + $fields = $solr->getFields(); $rows = array(); foreach ($fields as $field_name => $field) { if ($field->schema{4} == 'V') @@ -281,9 +307,9 @@ function apachesolr_mlt_get_fields() { /** * function apachesolr_mlt_load_block() - * A loader function for the apachesolr more like this module. If the function + * A loader function for the apachesolr more like this module. If the function * is passed a proper block id, the function will return the settings for the - * moreLikeThis request. If the block id is invalid the function returns an + * moreLikeThis request. If the block id is invalid the function returns an * empty array. * * @param int $block_id the id of the block you wish to load @@ -306,7 +332,7 @@ function apachesolr_mlt_load_block($bloc /** * function apachesolr_mlt_save_block() * A helper function save the block data to the database. If passed a valid - * block id, the function will update block settings in the database. If it is + * block id, the function will update block settings in the database. If it is * not passed a block id, the function will create a new block. * * @param array $block_settings An array containing the settings required to form @@ -325,19 +351,19 @@ function apachesolr_mlt_save_block($bloc /** * function apachesolr_mlt_list_blocks() - * returns a list of blocks. used by hook_block + * Returns a list of blocks. Used by hook_block */ function apachesolr_mlt_list_blocks() { $block_results = db_query("SELECT * FROM {apachesolr_mlt}"); $blocks = array(); while ($block = db_fetch_object($block_results)) { $block->data = unserialize($block->data); - $blocks[$block->id] = array('info' => t('ApacheSolr recommendations: ') . $block->data['name']); + $blocks[$block->id] = array('info' => t('Apache Solr recommendations: ') . $block->data['name'], 'cache' => BLOCK_CACHE_PER_PAGE); } return $blocks; } -function apachesolr_mlt_delete_block_form($block_id = NULL) { +function apachesolr_mlt_delete_block_form(&$form_values, $block_id = NULL) { if (is_numeric($block_id)) { $block = apachesolr_mlt_load_block($block_id); $form['block_id'] = array( @@ -346,13 +372,13 @@ function apachesolr_mlt_delete_block_for ); $form['#redirect'] = 'admin/settings/apachesolr_mlt'; return confirm_form($form, - t('Are you sure you want to delete the ApacheSolr content recommendation block %name?', array('%name' => $block['name'])), + t('Are you sure you want to delete the Apache Solr content recommendation block %name?', array('%name' => $block['name'])), 'admin/settings/apachesolr_mlt', t('The block will be deleted. This action cannot be undone.'), t('Delete'), t('Cancel')); } } -function apachesolr_mlt_delete_block_form_submit($form_id, $form_values) { - db_query('DELETE FROM {apachesolr_mlt} WHERE id = %d', $form_values['block_id']['#value']); +function apachesolr_mlt_delete_block_form_submit($form, &$form_state) { + db_query('DELETE FROM {apachesolr_mlt} WHERE id = %d', $form_state['values']['block_id']['#value']); } Index: contrib/apachesolr_multisitesearch/apachesolr_multisitesearch.info =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_multisitesearch/Attic/apachesolr_multisitesearch.info,v retrieving revision 1.1.2.1 diff -u -p -r1.1.2.1 apachesolr_multisitesearch.info --- contrib/apachesolr_multisitesearch/apachesolr_multisitesearch.info 4 Nov 2008 14:29:55 -0000 1.1.2.1 +++ contrib/apachesolr_multisitesearch/apachesolr_multisitesearch.info 5 Feb 2009 21:42:06 -0000 @@ -1,5 +1,5 @@ -; $Id: apachesolr_multisitesearch.info,v 1.1.2.1 2008/11/04 14:29:55 robertDouglass Exp $ +; $Id: apachesolr_multisitesearch.info,v 1.1.4.4 2009/01/27 21:32:35 pwolanin Exp $ name = Apache Solr multisite search -description = Search across multiple Drupal sites with solr -dependencies = apachesolr -package = ApacheSolr +description = Search multiple Drupal sites with Solr +dependencies = search apachesolr +package = Apache Solr Index: contrib/apachesolr_multisitesearch/apachesolr_multisitesearch.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/contrib/apachesolr_multisitesearch/Attic/apachesolr_multisitesearch.module,v retrieving revision 1.1.2.1 diff -u -p -r1.1.2.1 apachesolr_multisitesearch.module --- contrib/apachesolr_multisitesearch/apachesolr_multisitesearch.module 4 Nov 2008 14:29:55 -0000 1.1.2.1 +++ contrib/apachesolr_multisitesearch/apachesolr_multisitesearch.module 5 Feb 2009 21:42:06 -0000 @@ -28,7 +28,7 @@ function apachesolr_multisitesearch_sear // This is the object that knows about the query coming from the user. $query =& apachesolr_drupal_query($keys); $results = array(); - + try { $params = array( //'qt' => 'standard', @@ -38,7 +38,7 @@ function apachesolr_multisitesearch_sear 'facet.mincount' => 1, 'facet.sort' => 'true' ); - + // We have to add the site explicitly because it is needed in conjunction // with the hash when doing multisite faceting. $params['facet.field'][] = 'site'; @@ -52,7 +52,7 @@ function apachesolr_multisitesearch_sear $result = call_user_func_array($function, array()); if (isset($result) && is_array($result)) { foreach ($result as $facet) { - $params['facet.field'][] = $facet; + $params['facet.field'][] = $facet; } } } @@ -80,7 +80,7 @@ function apachesolr_multisitesearch_sear } $page = isset($_GET['page']) ? $_GET['page'] : 0; $params['start'] = $page * $params['rows']; - + /** * This hook allows modules to modify the query are params objects. * @@ -90,7 +90,7 @@ function apachesolr_multisitesearch_sear * function my_module_apachesolr_modify_query(&$query, &$params) { * // I only want to see articles by the admin! * $query->add_field("uid", 1); - * + * * } * */ @@ -129,7 +129,7 @@ function apachesolr_multisitesearch_sear 'score' => $doc->score, 'snippet' => $snippet); } - + // Hook to allow modifications of the retrieved results foreach (module_implements('apachesolr_process_results') as $module) { $function = $module .'_apachesolr_process_results'; @@ -142,7 +142,7 @@ function apachesolr_multisitesearch_sear } // try catch (Exception $e) { - watchdog('Apache Solr', $e->getMessage(), NULL, WATCHDOG_ERROR); + watchdog('Apache Solr', $e->getMessage(), WATCHDOG_ERROR); apachesolr_failure(t('Search'), $query->get_query()); } break; @@ -208,7 +208,7 @@ function apachesolr_multisitesearch_bloc case 'hash': if (is_object($response->facet_counts->facet_fields->$delta)) { $contains_active = FALSE; - + // Calculate the hashes of the sites for lookukp. This is why // we ask for the site facet in addition to the hash facet, and // we trust that they are the same. @@ -216,7 +216,7 @@ function apachesolr_multisitesearch_bloc foreach ($response->facet_counts->facet_fields->site as $site => $count) { $sites[md5($site)] = $site; } - + $hashes = array(); foreach ($response->facet_counts->facet_fields->$delta as $hash => $count) { $unclick_link = ''; @@ -243,14 +243,14 @@ function apachesolr_multisitesearch_bloc } if (count($hashes) > 0) { ksort($hashes); - $facet_display_limit = isset($facet_display_limits[$delta]) ? $facet_display_limits[$delta] : 10; + $facet_display_limit = isset($facet_display_limits[$delta]) ? $facet_display_limits[$delta] : variable_get('apachesolr_facet_query_limit_default', 10); $hashes = array_slice($hashes, 0, ($facet_display_limit == -1 ? NULL : $facet_display_limit)); $output = theme('apachesolr_facet_list', $hashes); return array('subject' => t('Filter by site'), 'content' => $output); } } break; - + default: break; } @@ -273,9 +273,19 @@ function apachesolr_multisitesearch_bloc } /** + * Implementation of hook_theme(). + */ +function apachesolr_multisitesearch_theme() { + return array( + 'apachesolr_breadcrumb_hash' => array( + 'arguments' => array('hash' => NULL), + ), + ); +} + +/** * Return the site from $hash */ function theme_apachesolr_breadcrumb_hash($hash) { return apachesolr_multisitesearch_block('get site', $hash); } - Index: tests/solr_base_query.test =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/tests/solr_base_query.test,v retrieving revision 1.1.2.2 diff -u -p -r1.1.2.2 solr_base_query.test --- tests/solr_base_query.test 4 Sep 2008 23:17:03 -0000 1.1.2.2 +++ tests/solr_base_query.test 5 Feb 2009 21:42:06 -0000 @@ -1,11 +1,25 @@ 'Solr/query handling', - 'desc' => 'Throw various queries at the query object and make sure they all parse correctly.', - 'group' => 'Apache Solr tests'); + +/*** + * JacobSingh: Warning! This test is totally broken because of the addition of the "hash" in apachesolr_search.module. + * Needs to be moved out. + * + */ + + +class DrupalSolrQueryTests extends DrupalWebTestCase { + function getInfo() { + return array( + 'name' => 'Query handling', + 'description' => 'Throw various queries at the query object and make sure they all parse correctly.', + 'group' => 'ApacheSolr' + ); + } + + function setUp() { + parent::setUp('search', 'apachesolr'); } private $queries = array( @@ -24,67 +38,47 @@ class DrupalSolrQueryTests extends Drupa ); function testParseSimple() { - $result = TRUE; foreach ($this->queries as $string) { - $query =& apachesolr_drupal_query($string, TRUE); + $query = apachesolr_drupal_query($string, TRUE); // force the query to be rebuilt without removing any fields. $query->remove_field('fake-field-name'); - if (!$this->assertEqual($string, $query->get_query())) { - $result = FALSE; - } + $this->assertEqual($string, $query->get_query()); } - return $result; } function testAddTerm() { - $result = TRUE; foreach ($this->queries as $string) { - $query =& apachesolr_drupal_query($string, TRUE); + $query = apachesolr_drupal_query($string, TRUE); $query->add_field('wham', '1'); - if (!$this->assertEqual($string .' wham:1', $query->get_query())) { - $result = FALSE; - } + $this->assertEqual($string . ' wham:1', $query->get_query()); } - return $result; } function testRemoveTerm() { - $result = TRUE; $string = 'foo'; - $query =& apachesolr_drupal_query($string, TRUE); + $query = apachesolr_drupal_query($string, TRUE); $query->remove_field('', 'foo'); - if (!$this->assertEqual('foo', $query->get_query())) { - $result = FALSE; - } + $this->assertEqual('foo', $query->get_query()); $string = 'foo bar'; - $query =& apachesolr_drupal_query($string, TRUE); + $query = apachesolr_drupal_query($string, TRUE); $query->remove_field('', 'foo'); - if (!$this->assertEqual('foo bar', $query->get_query())) { - $result = FALSE; - } + $this->assertEqual('foo bar', $query->get_query()); $string = 'foo uid:1 bar'; - $query =& apachesolr_drupal_query($string, TRUE); + $query = apachesolr_drupal_query($string, TRUE); $query->remove_field('uid', '1'); - if (!$this->assertEqual('foo bar', $query->get_query())) { - $result = FALSE; - } + $this->assertEqual('foo bar', $query->get_query()); $string = 'foo uid:1 bar'; - $query =& apachesolr_drupal_query($string, TRUE); + $query = apachesolr_drupal_query($string, TRUE); $query->remove_field('uid'); - if (!$this->assertEqual('foo bar', $query->get_query())) { - $result = FALSE; - } + $this->assertEqual('foo bar', $query->get_query()); $string = 'foo uid:1 bar uid:2 tid:3'; - $query =& apachesolr_drupal_query($string, TRUE); + $query = apachesolr_drupal_query($string, TRUE); $query->remove_field('uid', '1'); - /*// May not work because query doesn't necessarily get rebuilt in same order - if (!$this->assertEqual('foo bar uid:2 tid:3', $query->get_query())) { - $result = FALSE; - }*/ + // Not very beautiful, but probably best way there is: $pass = TRUE; $components = array('foo', 'bar', 'uid:2', 'tid:3'); @@ -95,7 +89,7 @@ class DrupalSolrQueryTests extends Drupa else { foreach ($components as $s) { if (strpos($q, $s) === FALSE) { - $pass = false; + $pass = FALSE; break; } } @@ -105,17 +99,11 @@ class DrupalSolrQueryTests extends Drupa } else { $this->assertEqual('foo bar tid:3 uid:2', $q); - $result = FALSE; } - $string = 'foo uid:1 bar uid:2 tid:3'; - $query =& apachesolr_drupal_query($string, TRUE); + $query = apachesolr_drupal_query($string, TRUE); $query->remove_field('uid'); - if (!$this->assertEqual('foo bar tid:3', $query->get_query())) { - $result = FALSE; - } - - return $result; + $this->assertEqual('foo bar tid:3', $query->get_query()); } }