Index: CHANGELOG.txt =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/CHANGELOG.txt,v retrieving revision 1.1.2.112.2.6 diff -u -p -r1.1.2.112.2.6 CHANGELOG.txt --- CHANGELOG.txt 23 Oct 2009 10:18:16 -0000 1.1.2.112.2.6 +++ CHANGELOG.txt 23 Oct 2009 11:12:29 -0000 @@ -1,10 +1,11 @@ -// $Id: CHANGELOG.txt,v 1.1.2.112.2.6 2009/10/23 10:18:16 claudiucristea Exp $ +// $Id: CHANGELOG.txt,v 1.1.2.112.2.5 2009/10/13 06:56:23 claudiucristea Exp $ -Apache Solr integration x.x-x.x, xxxx-xx-xx +Apache Solr Search integration x.x-x.x, xxxx-xx-xx ------------------------------ -Apache Solr integration 5.x-2.x, 2009-09-22 +Apache Solr Search integration 5.x-2.x, 2009-09-22 ------------------------------ +#561082 by pwolanin, consolidate Solr delete queries on cron. #580404 by pwolanin, allow per-node-type comment exclusion. #597174 by Frando, add hook_apachesolr_prepare_query() to enable custom sorts. #551582 by claudiu.cristea Add callbacks to lookup values based on key for CCK fields. @@ -12,13 +13,13 @@ Apache Solr integration 5.x-2.x, 2009-09 #584902 by cpliakas, robertDouglass, make $extra a keyed array and prevent PHP warnings. #557152 by claudiu.cristea Backporting 6.x-1.x to 5.x-2.x -Apache Solr integration 6.x-1.x, xxxx-xx-xx +Apache Solr Search integration 6.x-1.x, xxxx-xx-xx ------------------------------ #557382 by Josh Waihi, Scott Reynolds mlt blocks were double encoding titles. #543226 by drunken monkey validate port on settings form. #502976 by Scott Reynolds followup to Other GET parameters ignored by Apache Solr Facet Blocks -Apache Solr integration 6.x-1.0-RC2, 2009-07-29 +Apache Solr Search integration 6.x-1.0-RC2, 2009-07-29 ------------------------------ #530196 by pwolanin, fix facecount form function calls in apachesolr_og. #530910 by Damien Tournoud, correct offset for determining indexed fields. @@ -32,7 +33,7 @@ Apache Solr integration 6.x-1.0-RC2, 200 #508548 by pwolanin, Don't implode params['fq'] if it's not set. #528002 by janusman, Add RSS discovery to taxonomy hijack page -Apache Solr integration 6.x-1.0-RC1, 2009-07-02 +Apache Solr Search integration 6.x-1.0-RC1, 2009-07-02 ------------------------------ #502976 by pwolanin, Scott Reynolds, robertDouglass facet links and form submissions respect non ApacheSolr $_GET parameters. Note that this changes the interface API: get_url_querystring is now get_url_queryvalues and returns and array instead of a string. #507708 by pwolanin, fix sort parameters to use field aliases, validate in query object. @@ -50,7 +51,7 @@ Apache Solr integration 6.x-1.0-RC1, 200 #358166 by David Lesieur, janusman, cptnCauliflower, and pwolanin, search for just facet(s). #489654 by JacobSingh, and pwolanin, allow users to set their index as "read only". -Apache Solr integration 6.x-1.0-beta11, 2009-06-11 +Apache Solr Search integration 6.x-1.0-beta11, 2009-06-11 ------------------------------ #348218 by David Lesieur, janusman, and pwolanin, retain filters for next search. #401046 by pwolanin, revist urlencoding of query strings. @@ -60,7 +61,7 @@ Apache Solr integration 6.x-1.0-beta11, #464758 by pwolanin, 4th param to htmlspecialchars breaks PHP < 5.2.3. #466328 by pwolanin, fix classes for sort links. -Apache Solr integration 6.x-1.0-beta10, 2009-05-14 +Apache Solr Search integration 6.x-1.0-beta10, 2009-05-14 ------------------------------ #449414 by pwolanin, aufumy, & Scott Reynolds, refactor apachesolr_search_search(). #462836 by pwolanin, catch fatal error in _nodeaccess if no solr. @@ -73,7 +74,7 @@ Apache Solr integration 6.x-1.0-beta10, #448298 by JacobSingh and pwolanin, use a confirm form for index deletion. #454352 by Damien Tournoud, make optimize interval configurable, document variables. -Apache Solr integration 6.x-1.0-beta9, 2009-04-30 +Apache Solr Search integration 6.x-1.0-beta9, 2009-04-30 ------------------------------ #435924 by pwolanin, only clear cache on cron after updates and if the server is available. #405780 by blackdog and pwolanin, skip excluded node types during counting and indexing. @@ -87,7 +88,7 @@ Apache Solr integration 6.x-1.0-beta9, 2 #337737 by David Lesieur, mikejoconnor and Scott Reynolds, localize arg(1) dependence. Changes query get_path() to facilitate generating facets outside the search page. -Apache Solr integration 6.x-1.0-beta8, 2009-04-16 +Apache Solr Search integration 6.x-1.0-beta8, 2009-04-16 ------------------------------ #343252 by pwolanin, fix nodeaccess for method name changes, make multi-site aware. #432946 by pwolanin, query class and sort cleanups. @@ -96,7 +97,7 @@ Apache Solr integration 6.x-1.0-beta8, 2 #348029 by pwolanin, Handle negative filters and improve date facet block code. #254565 by drunken monkey and Scott Reynolds, change the query class to enable Views integration. -Apache Solr integration 6.x-1.0-beta7, 2009-04-03 +Apache Solr Search integration 6.x-1.0-beta7, 2009-04-03 ------------------------------ #410330 by pwolanin and bhuga, return more information for error 0. #293989 by bjaspan and vladimir.dolgopolov, add date facets for created and changed dates. @@ -105,7 +106,7 @@ Apache Solr integration 6.x-1.0-beta7, 2 #383478 by pwolanin and JacobSingh, provide more information about autocommit lag, pending deletes. #339490 by aufumy, pwolanin, and JacobSingh, Organic groups Apachesolr integration, new _alter hook. -Apache Solr integration 6.x-1.0-beta6, 2009-03-20 +Apache Solr Search integration 6.x-1.0-beta6, 2009-03-20 ------------------------------ #305370 by pwolanin, Handle failed delete requests so unpublished/deleted content doesn't stay in the index. #407570 by pwolanin and moshe weitzman, _alter for sort links, hide for < 2 results. @@ -117,14 +118,14 @@ Apache Solr integration 6.x-1.0-beta6, 2 #400882 by mkalkbrenner, fix faceting bug due to static counter in method add_field. #382358 by pwolanin, use tokenizer solr.CharStreamAwareWhitespaceTokenizerFactory to fix highlighting. -Apache Solr integration 6.x-1.0-beta5, 2009-02-27 +Apache Solr Search integration 6.x-1.0-beta5, 2009-02-27 ------------------------------ #305370 by pwolanin, don't delete from apachesolr table if Solr query fails. #385348 by moshe weitzman, use key in sort links array. #385362 by pwolanin, Shorten hash from 32 chars to 12. #383804 by JacobSingh, fix query building that broke nodeaccess. -Apache Solr integration 6.x-1.0-beta4, 2009-02-23 +Apache Solr Search integration 6.x-1.0-beta4, 2009-02-23 ------------------------------ #380670 by pwolanin, only add a bq param for a node-type boost > 'Normal'. #379518 by pwolanin, correct mismatch in default boost between @@ -133,7 +134,7 @@ Apache Solr integration 6.x-1.0-beta4, 2 #380644 by JacobSingh, Backwards compatability for old sort fields. #380538 by pwolanin, fix code to find vid for taxonomy facet blocks. -Apache Solr integration 6.x-1.0-beta3, 2009-02-20 +Apache Solr Search integration 6.x-1.0-beta3, 2009-02-20 ------------------------------ #378222 by janusman and pwolanin, add boost settings for "sticky" and "promote". #378566 by pwolanin, nodeaccess not correctly marking single nodes for re-indexing. @@ -166,7 +167,7 @@ Apache Solr integration 6.x-1.0-beta3, 2 #366957 Add a "configure" link to the more like this block by JacobSingh. #365901 Add a bias on node type (and node-type exclusion) by Damien Tournoud and pwolanin. -Apache Solr integration 6.x-1.0-beta2, 2009-01-28 (changes since 6.x-1.0-alpha6 2009-Jan-08) +Apache Solr Search integration 6.x-1.0-beta2, 2009-01-28 (changes since 6.x-1.0-alpha6 2009-Jan-08) ------------------------------ #365684 Get PHP library from new svn home by pwolanin #365620 clear stale data on hook_enable by pwolanin Index: Drupal_Apache_Solr_Service.php =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/Drupal_Apache_Solr_Service.php,v retrieving revision 1.1.2.20.4.1 diff -u -p -r1.1.2.20.4.1 Drupal_Apache_Solr_Service.php --- Drupal_Apache_Solr_Service.php 22 Sep 2009 15:26:15 -0000 1.1.2.20.4.1 +++ Drupal_Apache_Solr_Service.php 23 Oct 2009 11:12:29 -0000 @@ -288,4 +288,23 @@ class Drupal_Apache_Solr_Service extends } return array($result->data, $headers); } + + /** + * Create and post a delete document based on multiple document IDs. + * + * @param array $ids Expected to be utf-8 encoded strings + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function deleteMultipleById($ids) { + $rawPost = "\n"; + foreach ($ids as $id) { + // Escape special xml characters + $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); + $rawPost .= '' . $id . "\n"; + } + $rawPost .= ''; + return $this->delete($rawPost); + } } Index: README.txt =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/README.txt,v retrieving revision 1.1.2.1.2.25.2.5 diff -u -p -r1.1.2.1.2.25.2.5 README.txt --- README.txt 23 Oct 2009 10:18:16 -0000 1.1.2.1.2.25.2.5 +++ README.txt 23 Oct 2009 11:12:29 -0000 @@ -1,4 +1,4 @@ -/* $Id: README.txt,v 1.1.2.1.2.25.2.5 2009/10/23 10:18:16 claudiucristea Exp $ */ +/* $Id: README.txt,v 1.1.2.1.2.25.2.4 2009/10/13 06:56:23 claudiucristea Exp $ */ This module integrates Drupal with the Apache Solr search platform. Solr search can be used as a replacement for core content search and boasts both extra @@ -146,6 +146,10 @@ behavior: - apachesolr_query_class: the default query class to use. + - apachesolr_cron_mass_limit: update or delete at most this many documents in + each Solr request, such as when making {apachesolr_search_node} consistent + with {node}. + Troubleshooting -------------- Problem: Index: apachesolr.index.inc =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr.index.inc,v retrieving revision 1.1.2.6.4.5 diff -u -p -r1.1.2.6.4.5 apachesolr.index.inc --- apachesolr.index.inc 23 Oct 2009 10:18:16 -0000 1.1.2.6.4.5 +++ apachesolr.index.inc 23 Oct 2009 11:12:29 -0000 @@ -1,5 +1,5 @@ nid] = $node; + if (count($nodes) == $limit) { + $node_lists[] = $nodes; + $nodes = array(); + } + } + // Any remaning ones if the limit is not reached. + if (count($nodes)) { + $node_lists[] = $nodes; + } + foreach ($node_lists as $nodes) { + watchdog('Apache Solr', t('On cron running apachesolr_nodeapi_mass_update() on nids @nids', array('@nids' => implode(',', array_keys($nodes)))), WATCHDOG_WARNING); + if (!apachesolr_nodeapi_mass_update($nodes)) { + // Solr query failed - so stop trying. + break; + } + } + // Check for deleted content that wasn't deleted from the index. + $result = db_query("SELECT asn.nid FROM {apachesolr_search_node} asn LEFT JOIN {node} n ON n.nid = asn.nid WHERE n.nid IS NULL"); + $node_lists = array(); + $nodes = array(); + while ($node = db_fetch_object($result)) { + $nodes[$node->nid] = $node; + if (count($nodes) == $limit) { + $node_lists[] = $nodes; + $nodes = array(); + } + } + // Any remaning ones if the limit is not reached. + if (count($nodes)) { + $node_lists[] = $nodes; + } + foreach ($node_lists as $nodes) { + watchdog('Apache Solr', t('On cron running apachesolr_nodeapi_mass_delete() on nids @nids', array('@nids' => implode(',', array_keys($nodes)))), WATCHDOG_WARNING); + if (!apachesolr_nodeapi_mass_delete($nodes)) { + // Solr query failed - so stop trying. + break; + } + } +} + +function apachesolr_nodeapi_mass_update($nodes) { + if (empty($nodes)) { + return TRUE; + } + $ids = array(); + foreach ($nodes as $node) { + if (!$node->status) { + $ids[] = apachesolr_document_id($node->nid); + } + } + $time = time(); + try { + $solr = apachesolr_get_solr(); + $solr->deleteMultipleById($ids); + apachesolr_index_updated($time); + foreach ($nodes as $node) { + // There was no exception, so update the table. + db_query("UPDATE {apachesolr_search_node} SET changed = %d, status = %d WHERE nid = %d", $time, $node->status, $node->nid); + } + return TRUE; + } + catch (Exception $e) { + watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), WATCHDOG_ERROR); + return FALSE; + } +} + +function apachesolr_nodeapi_mass_delete($nodes) { + if (empty($nodes)) { + return TRUE; + } + $ids = array(); + $nids = array(); + foreach ($nodes as $node) { + $ids[] = apachesolr_document_id($node->nid); + $nids[] = $node->nid; + } + try { + $solr = apachesolr_get_solr(); + $solr->deleteMultipleById($ids); + apachesolr_index_updated($time); + // There was no exception, so update the table. + db_query("DELETE FROM {apachesolr_search_node} WHERE nid IN (" . db_placeholders($nids) . ")", $nids); + return TRUE; + } + catch (Exception $e) { + watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), WATCHDOG_ERROR); + return FALSE; + } +} Index: apachesolr.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr.module,v retrieving revision 1.1.2.12.2.161.2.4 diff -u -p -r1.1.2.12.2.161.2.4 apachesolr.module --- apachesolr.module 23 Oct 2009 10:18:16 -0000 1.1.2.12.2.161.2.4 +++ apachesolr.module 23 Oct 2009 11:12:29 -0000 @@ -349,7 +349,7 @@ function apachesolr_index_nodes($rows, $ watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), WATCHDOG_ERROR); return FALSE; } - include_once(drupal_get_path('module', 'apachesolr') .'/apachesolr.index.inc') ; + include_once(drupal_get_path('module', 'apachesolr') .'/apachesolr.index.inc'); $documents = array(); $old_position = apachesolr_get_last_index($namespace); $position = $old_position; @@ -403,6 +403,10 @@ function apachesolr_date_iso($date_times } function apachesolr_delete_node_from_index($node) { + static $failed = FALSE; + if ($failed) { + return FALSE; + } try { $solr = apachesolr_get_solr(); $solr->deleteById(apachesolr_document_id($node->nid)); @@ -411,6 +415,8 @@ function apachesolr_delete_node_from_ind } catch (Exception $e) { watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), WATCHDOG_ERROR); + // Don't keep trying queries if they are failing. + $failed = TRUE; return FALSE; } } @@ -434,21 +440,11 @@ function apachesolr_index_updated($updat * Implementation of hook_cron(). */ function apachesolr_cron() { + // Mass update and delete functions are in the include file. + include_once(drupal_get_path('module', 'apachesolr') .'/apachesolr.index.inc'); + apachesolr_cron_check_node_table(); try { $solr = apachesolr_get_solr(); - - // Check for unpublished content that wasn't deleted from the index. - $result = db_query("SELECT n.nid, n.status FROM {apachesolr_search_node} asn INNER JOIN {node} n ON n.nid = asn.nid WHERE asn.status != n.status"); - while ($node = db_fetch_object($result)) { - _apachesolr_nodeapi_update($node, FALSE); - } - - // Check for deleted content that wasn't deleted from the index. - $result = db_query("SELECT asn.nid FROM {apachesolr_search_node} asn LEFT JOIN {node} n ON n.nid = asn.nid WHERE n.nid IS NULL"); - while ($node = db_fetch_object($result)) { - _apachesolr_nodeapi_delete($node, FALSE); - } - // Optimize the index (by default once a day). $optimize_interval = variable_get('apachesolr_optimize_interval', 60 * 60 * 24); $last = variable_get('apachesolr_last_optimize', 0); @@ -498,7 +494,7 @@ function apachesolr_nodeapi(&$node, $op, } /** - * Helper function for hook_nodeapi() and hook_cron(). + * Helper function for hook_nodeapi(). */ function _apachesolr_nodeapi_delete($node, $set_message = TRUE) { if (apachesolr_delete_node_from_index($node)) { @@ -511,7 +507,7 @@ function _apachesolr_nodeapi_delete($nod } /** - * Helper function for hook_nodeapi() and hook_cron(). + * Helper function for hook_nodeapi(). */ function _apachesolr_nodeapi_update($node, $set_message = TRUE) { // Check if the node has gone from published to unpublished.