Index: apachesolr.index.inc =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr.index.inc,v retrieving revision 1.1.2.12 diff -u -p -r1.1.2.12 apachesolr.index.inc --- apachesolr.index.inc 14 Oct 2009 13:28:40 -0000 1.1.2.12 +++ apachesolr.index.inc 14 Oct 2009 14:12:16 -0000 @@ -245,3 +245,109 @@ function apachesolr_add_tags_to_document } } +/** + * Additional index utility functions + */ + +/** + * hook_cron() helper to try to make {apachesolr_search_node} consistent with {node}. + */ +function apachesolr_cron_check_node_table() { + // Check for unpublished content that wasn't deleted from the index. + $result = db_query("SELECT n.nid, n.status FROM {apachesolr_search_node} asn INNER JOIN {node} n ON n.nid = asn.nid WHERE asn.status != n.status"); + $node_lists = array(); + $nodes = array(); + // Update or delete at most this many in each Solr query. + $limit = variable_get('apachesolr_cron_mass_limit', 500); + while ($node = db_fetch_object($result)) { + $nodes[$node->nid] = $node; + if (count($nodes) == $limit) { + $node_lists[] = $nodes; + $nodes = array(); + } + } + // Any remaning ones if the limit is not reached. + if (count($nodes)) { + $node_lists[] = $nodes; + } + foreach ($node_lists as $nodes) { + watchdog('Apache Solr', 'On cron running apachesolr_nodeapi_mass_update() on nids @nids', array('@nids' => implode(',', array_keys($nodes))), WATCHDOG_WARNING); + if (!apachesolr_nodeapi_mass_update($nodes)) { + // Solr query failed - so stop trying. + break; + } + } + // Check for deleted content that wasn't deleted from the index. + $result = db_query("SELECT asn.nid FROM {apachesolr_search_node} asn LEFT JOIN {node} n ON n.nid = asn.nid WHERE n.nid IS NULL"); + $node_lists = array(); + $nodes = array(); + while ($node = db_fetch_object($result)) { + $nodes[$node->nid] = $node; + if (count($nodes) == $limit) { + $node_lists[] = $nodes; + $nodes = array(); + } + } + // Any remaning ones if the limit is not reached. + if (count($nodes)) { + $node_lists[] = $nodes; + } + foreach ($node_lists as $nodes) { + watchdog('Apache Solr', 'On cron running apachesolr_nodeapi_mass_delete() on nids @nids', array('@nids' => implode(',', array_keys($nodes))), WATCHDOG_WARNING); + if (!apachesolr_nodeapi_mass_delete($nodes)) { + // Solr query failed - so stop trying. + break; + } + } +} + +function apachesolr_nodeapi_mass_update($nodes) { + if (empty($nodes)) { + return TRUE; + } + $ids = array(); + foreach ($nodes as $node) { + if (!$node->status) { + $ids[] = apachesolr_document_id($node->nid); + } + } + $time = time(); + try { + $solr = apachesolr_get_solr(); + $solr->deleteMultipleById($ids); + apachesolr_index_updated($time); + foreach ($nodes as $node) { + // There was no exception, so update the table. + db_query("UPDATE {apachesolr_search_node} SET changed = %d, status = %d WHERE nid = %d", $time, $node->status, $node->nid); + } + return TRUE; + } + catch (Exception $e) { + watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); + return FALSE; + } +} + +function apachesolr_nodeapi_mass_delete($nodes) { + if (empty($nodes)) { + return TRUE; + } + $ids = array(); + $nids = array(); + foreach ($nodes as $node) { + $ids[] = apachesolr_document_id($node->nid); + $nids[] = $node->nid; + } + try { + $solr = apachesolr_get_solr(); + $solr->deleteMultipleById($ids); + apachesolr_index_updated($time); + // There was no exception, so update the table. + db_query("DELETE FROM {apachesolr_search_node} WHERE nid IN (" . db_placeholders($nids) . ")", $nids); + return TRUE; + } + catch (Exception $e) { + watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); + return FALSE; + } +} Index: apachesolr.module =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/apachesolr.module,v retrieving revision 1.1.2.12.2.166 diff -u -p -r1.1.2.12.2.166 apachesolr.module --- apachesolr.module 14 Oct 2009 13:28:40 -0000 1.1.2.12.2.166 +++ apachesolr.module 14 Oct 2009 14:12:17 -0000 @@ -341,7 +341,7 @@ function apachesolr_index_nodes($rows, $ watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); return FALSE; } - include_once(drupal_get_path('module', 'apachesolr') .'/apachesolr.index.inc') ; + include_once(drupal_get_path('module', 'apachesolr') .'/apachesolr.index.inc'); $documents = array(); $old_position = apachesolr_get_last_index($namespace); $position = $old_position; @@ -396,6 +396,10 @@ function apachesolr_date_iso($date_times } function apachesolr_delete_node_from_index($node) { + static $failed = FALSE; + if ($failed) { + return FALSE; + } try { $solr = apachesolr_get_solr(); $solr->deleteById(apachesolr_document_id($node->nid)); @@ -404,6 +408,8 @@ function apachesolr_delete_node_from_ind } catch (Exception $e) { watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); + // Don't keep trying queries if they are failing. + $failed = TRUE; return FALSE; } } @@ -427,21 +433,11 @@ function apachesolr_index_updated($updat * Implementation of hook_cron(). */ function apachesolr_cron() { + // Mass update and delete functions are in the include file. + include_once(drupal_get_path('module', 'apachesolr') .'/apachesolr.index.inc'); + apachesolr_cron_check_node_table(); try { $solr = apachesolr_get_solr(); - - // Check for unpublished content that wasn't deleted from the index. - $result = db_query("SELECT n.nid, n.status FROM {apachesolr_search_node} asn INNER JOIN {node} n ON n.nid = asn.nid WHERE asn.status != n.status"); - while ($node = db_fetch_object($result)) { - _apachesolr_nodeapi_update($node, FALSE); - } - - // Check for deleted content that wasn't deleted from the index. - $result = db_query("SELECT asn.nid FROM {apachesolr_search_node} asn LEFT JOIN {node} n ON n.nid = asn.nid WHERE n.nid IS NULL"); - while ($node = db_fetch_object($result)) { - _apachesolr_nodeapi_delete($node, FALSE); - } - // Optimize the index (by default once a day). $optimize_interval = variable_get('apachesolr_optimize_interval', 60 * 60 * 24); $last = variable_get('apachesolr_last_optimize', 0); @@ -491,7 +487,7 @@ function apachesolr_nodeapi(&$node, $op, } /** - * Helper function for hook_nodeapi() and hook_cron(). + * Helper function for hook_nodeapi(). */ function _apachesolr_nodeapi_delete($node, $set_message = TRUE) { if (apachesolr_delete_node_from_index($node)) { @@ -504,7 +500,7 @@ function _apachesolr_nodeapi_delete($nod } /** - * Helper function for hook_nodeapi() and hook_cron(). + * Helper function for hook_nodeapi(). */ function _apachesolr_nodeapi_update($node, $set_message = TRUE) { // Check if the node has gone from published to unpublished. Index: Drupal_Apache_Solr_Service.php =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/Drupal_Apache_Solr_Service.php,v retrieving revision 1.1.2.20 diff -u -p -r1.1.2.20 Drupal_Apache_Solr_Service.php --- Drupal_Apache_Solr_Service.php 12 Jun 2009 18:16:31 -0000 1.1.2.20 +++ Drupal_Apache_Solr_Service.php 14 Oct 2009 14:12:17 -0000 @@ -288,4 +288,23 @@ class Drupal_Apache_Solr_Service extends } return array($result->data, $headers); } + + /** + * Create and post a delete document based on multiple document IDs. + * + * @param array $ids Expected to be utf-8 encoded strings + * @return Apache_Solr_Response + * + * @throws Exception If an error occurs during the service call + */ + public function deleteMultipleById($ids) { + $rawPost = "\n"; + foreach ($ids as $id) { + // Escape special xml characters + $id = htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8'); + $rawPost .= '' . $id . "\n"; + } + $rawPost .= ''; + return $this->delete($rawPost); + } } Index: README.txt =================================================================== RCS file: /cvs/drupal-contrib/contributions/modules/apachesolr/README.txt,v retrieving revision 1.1.2.1.2.29 diff -u -p -r1.1.2.1.2.29 README.txt --- README.txt 13 Oct 2009 20:43:17 -0000 1.1.2.1.2.29 +++ README.txt 14 Oct 2009 14:12:17 -0000 @@ -146,6 +146,10 @@ behavior: - apachesolr_query_class: the default query class to use. + - apachesolr_cron_mass_limit: update or delete at most this many documents in + each Solr request, such as when making {apachesolr_search_node} consistent + with {node}. + Troubleshooting -------------- Problem: