';
+ }
+
+ return $output;
+}
+
+/**
+ * Render a generic search form.
+ *
+ * This form must be usable not only within "http://example.com/search", but also
+ * as a simple search box (without "Restrict search to", help text, etc.), in the
+ * theme's header, and so forth. This means we must provide options to
+ * conditionally render certain parts of this form.
+ *
+ * @param $action
+ * Form action. Defaults to "search".
+ * @param $keys
+ * The search string entered by the user, containing keywords for the search.
+ * @param $options
+ * Whether to render the optional form fields and text ("Restrict search
+ * to", help text, etc.).
+ * @return
+ * An HTML string containing the search form.
+ */
+function search_form($action = '', $keys = '', $options = FALSE) {
+ $edit = $_POST['edit'];
+
+ if (!$action) {
+ $action = url('search');
+ }
+
+ $output = '
'. $node->body;
-
- // Fetch comments
- if (module_exist('comment')) {
- $comments = db_query('SELECT subject, comment, format FROM {comments} WHERE nid = %d AND status = 0', $node->nid);
- while ($comment = db_fetch_object($comments)) {
- $text .= '
'. $comment->subject .'
'. check_output($comment->comment, $comment->format);
- }
- }
-
- // Update index
- search_index($node->nid, 'node', $text);
- }
+ return array('last_update' => 'node_cron_last',
+ 'node_type' => 'node',
+ 'select' => "SELECT n.nid as lno, n.title as text1, n.body as text2 FROM {node} n WHERE n.status = 1 AND moderate = 0 and (created > " . variable_get('node_cron_last', 1) . " or changed > " . variable_get('node_cron_last', 1) . ")");
}
/**
diff -ur ./modules/search.module /root/drupal-4.5.1/modules/search.module
--- ./modules/search.module Mon Dec 13 19:41:48 2004
+++ /root/drupal-4.5.1/modules/search.module Thu Oct 28 19:09:41 2004
@@ -7,54 +7,20 @@
*/
/**
- * Unicode character classes to include in the index.
- * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
- *
- * Lu Letter, Uppercase
- * Ll Letter, Lowercase
- * Lt Letter, Titlecase
- * Lm Letter, Modifier
- * Lo Letter, Other
- * Mn Mark, Nonspacing
- * Mc Mark, Spacing Combining
- * Nd Number, Decimal Digit
- * Nl Number, Letter
- * No Number, Other
- * Sm Symbol, Math
- * Sc Symbol, Currency
- * Sk Symbol, Modifier
- * So Symbol, Other
- *
- * Matches all character classes not in the list above (enclosing marks, punctuation and control codes, spacers).
- * 'Me', 'Pc', 'Pd', 'Ps', 'Pe', 'Pi', 'Pf', 'Po', 'Zs', 'Zl', 'Zp', 'Cc', 'Cf', 'Cs', 'Co'
- */
-define('PREG_CLASS_SEARCH_EXCLUDE', '\x{0}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{7f}-\x{a1}\x{ab}\x{ad}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{488}\x{489}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{600}-\x{603}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{6dd}\x{6de}\x{700}-\x{70d}\x{70f}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17b4}\x{17b5}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{180e}\x{1944}\x{1945}\x{2000}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{205f}-\x{2063}\x{206a}-\x{206f}\x{207d}\x{207e}\x{208d}\x{208e}\x{20dd}-\x{20e0}\x{20e2}-\x{20e4}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3000}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{d800}\x{db7f}\x{db80}\x{dbff}\x{dc00}\x{dfff}\x{e000}\x{f8ff}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{feff}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{fff9}-\x{fffb}\x{10100}\x{10101}\x{1039f}\x{1d173}-\x{1d17a}\x{e0001}\x{e0020}-\x{e007f}\x{f0000}\x{ffffd}\x{100000}');
-
-/**
- * Matches all 'N' character classes (numbers)
- */
-define('PREG_CLASS_NUMBERS', '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}\x{10107}-\x{10133}\x{10320}-\x{10323}\x{1034a}\x{104a0}-\x{104a9}\x{1d7ce}-\x{1d7ff}');
-
-/**
- * Matches all 'P' character classes (punctuation)
- */
-define('PREG_CLASS_PUNCTUATION', '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{10100}\x{10101}\x{1039f}');
-
-/**
* Implementation of hook_help().
*/
function search_help($section = 'admin/help#search') {
switch ($section) {
+ case 'admin/help#search':
+ return t("
+ Search guidelines
+
The search page allows you to search the web site's content. You can specify multiple words, and they will all be searched for. You can also use wildcards, so 'walk*' will match 'walk', 'walking', 'walker', 'walkable' and so on. Furthermore, searches are not case sensitive so searching for 'walk', 'Walk' or 'WALK' will yield exactly the same results.
+ Words excluded from the search
+
Words that frequently occur, typically called 'noise words', are ignored. Example words are 'a', 'at', 'and', 'are', 'as', 'how', 'where', etc. Words shorter than %number letters are also ignored.
", array('%number' => variable_get('minimum_word_size', 2)));
case 'admin/modules#description':
return t('Enables site-wide keyword searching.');
- case 'admin/search':
- return t('
The search engine works by maintaining an index of words in your site\'s content. You can adjust the settings below to tweak the indexing behaviour. Note that indexing requires cron to be set up correctly.
Changes to these settings will only apply to content that is indexed after the change. If you want them to apply to everything, you need to wipe the index with the button below.
');
- case 'search#noresults':
- return t('
-
Check if your spelling is correct.
-
Try using wildcards: walk* matches walker, walking, ...
-
Use longer words (words shorter than %number letters are ignored).
-
', array('%number' => variable_get('minimum_word_size', 3)));
+ case 'admin/settings/search':
+ return t('The search engine works by keeping an index of "interesting" words. To make sure we only get "interesting" words you need to set the following.');
}
}
@@ -92,12 +58,7 @@
$items[] = array('path' => 'search/help', 'title' => t('search help'),
'callback' => 'search_help_page',
'access' => user_access('search content'),
- 'type' => MENU_NORMAL_ITEM);
- }
- if ($_POST['op'] == t('Wipe index')) {
- search_wipe();
- drupal_set_message(t('The search index has been wiped.'));
- drupal_goto('admin/settings/search');
+ 'type' => MENU_SUGGESTED_ITEM);
}
return $items;
@@ -109,657 +70,332 @@
function search_settings() {
// Indexing settings:
- $group = form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 3), 3, 3, t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
- $group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 3), 3, 3, t('The number of characters a word has to be to be searched for, including wildcard characters.'));
- $items = drupal_map_assoc(array(10, 20, 50, 100, 200, 500));
- $group .= form_select(t('Items to index per cron run'), 'search_cron_limit', variable_get('search_cron_limit', 100), $items, t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'));
+ $group = form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 2), 10, 10, t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
+ $group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 0), 10, 10, t('The number of characters a word has to be to be searched for.'));
+ $group .= form_textarea(t('Noise words'), 'noisewords', variable_get('noisewords', ''), 70, 10, t('These words will not be indexed. Enter a comma separated list; linebreaks and whitespace do not matter. Example: and, or, not, a, to, I, it, ...'));
$output = form_group(t('Indexing settings'), $group);
// Visual settings:
- // Collect some stats
- $estimate = variable_get('search_indexed', 0);
- $nodes = max(1, db_result(db_query('SELECT COUNT(*) FROM {node}')));
- $percentage = ((int)min(100, 100 * $estimate / $nodes)) . '%';
-
- $status = '
'. t('Approximately %percentage of the site has been indexed.', array('%percentage' => $percentage)) .'
';
- $status .= '
'. form_button(t('Wipe index')) . '
';
-
- $output .= form_group('Indexing status', $status);
+ $group = form_radios(t('Help text position'), 'help_pos', variable_get('help_pos', 1), array('1' => t('Above search output'), '2' => t('Below search output'), '3' => t('Link from above search output'), '4' => t('Link from below search output')), t('Where to show the help text for users on the search page.'));
$output .= form_group(t('Viewing options'), $group);
return $output;
-}
-/**
- * Wipes a part of or the entire search index.
- *
- * @param $sid
- * (optional) The SID of the item to wipe. If specified, $type must be passed
- * too.
- * @param $type
- * (optional) The type of item to wipe.
- *
- */
-function search_wipe($sid = NULL, $type = NULL) {
- if ($type == NULL && $sid == NULL) {
- db_query('DELETE FROM {search_index}');
- db_query('DELETE FROM {search_total}');
- module_invoke_all('search', 'reset');
- }
- else {
- db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
- db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
- }
-}
-
-
-/**
- * Marks a word as dirty (or retrieves the list of dirty words). This is used
- * during indexing (cron). Words which are dirty have outdated total counts in
- * the search_total table, and need to be recounted.
- */
-function search_dirty($word = null) {
- static $dirty = array();
- if ($word !== null) {
- $dirty[$word] = true;
- }
- else {
- return $dirty;
- }
}
/**
* Implementation of hook_cron().
*
- * Fires hook_update_index() in all modules and cleans up dirty words (see
- * search_dirty).
+ * Fires hook_update_index() in all modules and uses the results to make
+ * the search index current.
*/
function search_cron() {
- // Update word index
foreach (module_list() as $module) {
- module_invoke($module, 'update_index');
- }
- // Update word counts for new/changed words
- foreach (search_dirty() as $word => $dummy) {
- $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
- db_query("UPDATE {search_total} SET count = %d WHERE word = '%s'", $total, $word);
- if (!db_affected_rows()) {
- db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %d)", $word, $total);
+ $module_array = module_invoke($module, 'update_index');
+ if ($module_array) {
+ update_index($module_array);
}
+ $module_array = null;
}
- // Find words that were deleted from search_index, but are still in
- // search_total. We use a LEFT JOIN between the two tables and keep only the
- // rows which fail to join.
- $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
- while ($word = db_fetch_object($result)) {
- db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
- }
-
- // Count indexed items (for administration screen)
- $indexed = db_result(db_query('SELECT COUNT(DISTINCT sid) FROM {search_index}'));
- variable_set('search_indexed', $indexed);
-}
-
- /**
- * Splits a string into component words according to indexing rules.
- */
-function search_keywords_split($text) {
- static $last = null;
- static $lastsplit = null;
-
- if ($last == $text) {
- return $lastsplit;
- }
-
- // Decode entities to UTF-8
- $text = decode_entities($text);
-
- // Call an external processor for word handling.
- search_preprocess($text);
-
- // To improve searching for numerical data such as dates, IP addresses
- // or version numbers, we consider a group of numerical characters
- // separated only by punctuation characters to be one piece.
- // This also means that searching for e.g. '20/03/1984' also returns
- // results with '20-03-1984' in them.
- // Readable regexp: ([number]+)[punctuation]+(?=[number])
- $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
-
- // The dot, underscore and dash are simply removed. This allows meaningful
- // search behaviour with acronyms and URLs.
- $text = preg_replace('/[._-]+/', '', $text);
-
- // With the exception of the rules above, we consider all punctuation,
- // marks, spacers, etc, to be a word boundary.
- $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
-
- // Process words
- $words = explode(' ', $text);
-
- // Save last keyword result
- $last = $text;
- $lastsplit = $words;
-
- return $words;
+ return;
}
/**
- * Invokes hook_search_preprocess() in modules.
- */
-function search_preprocess(&$text) {
- static $modules = null;
- // Cache list of modules which implement this hook. This function gets called
- // a lot during reindexing.
- if (!is_array($modules)) {
- $modules = array();
- foreach (module_list() as $module) {
- if (module_hook($module, 'search_preprocess')) {
- $modules[] = $module;
- }
- }
- }
- // Process $text
- if (count($modules) > 0) {
- foreach ($modules as $module) {
- $text = module_invoke($module, 'search_preprocess', $text);
- }
- }
- }
-
-
- /**
- * Update the search index for a particular item.
- *
- * @param $sid
- * A number identifying this particular item (e.g. node id).
- *
- * @param $type
- * A string defining this type of item (e.g. 'node')
- *
- * @param $text
- * The content of this item. Must be a piece of HTML text.
- */
-function search_index($sid, $type, $text) {
- $minimum_word_size = variable_get('minimum_word_size', 3);
-
- global $base_url;
- $node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i';
-
- // Multipliers for scores of words inside certain HTML tags.
- // Note: 'a' must be included for link ranking to work.
- $tags = array('h1' => 21,
- 'h2' => 18,
- 'h3' => 15,
- 'h4' => 12,
- 'h5' => 9,
- 'h6' => 6,
- 'u' => 5,
- 'b' => 5,
- 'strong' => 5,
- 'em' => 5,
- 'a' => 10);
-
- // Strip off all ignored tags to speed up processing, but insert space before/after
- // them to keep word boundaries.
- $text = str_replace(array('<', '>'), array(' <', '> '), $text);
- $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
-
- // Split HTML tags from plain text.
- $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
- // Note: PHP ensures the array consists of alternating delimiters and literals
- // and begins and ends with a literal (inserting $null as required).
-
- $tag = false; // Odd/even counter. Tag or no tag.
- $link = false; // State variable for link analyser
- $score = 1; // Starting score per word
-
- $results = array(0 => array());
-
- foreach ($split as $value) {
- if ($tag) {
- // Increase or decrease score per word based on tag
- list($tagname) = explode(' ', $value, 2);
- $tagname = strtolower($tagname);
- if ($tagname{0} == '/') {
- $score -= $tags[substr($tagname, 1)];
- if ($score < 1) { // possible due to bad HTML
- $score = 1;
- }
- if ($tagname == '/a') {
- $link = false;
- }
- }
- else {
- if ($tagname == 'a') {
- // Check if link points to a node on this site
- if (preg_match($node_regexp, $value, $match)) {
- $path = drupal_get_normal_path($match[1]);
- if (preg_match('!(node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
- $linknid = $match[1];
- if ($linknid > 0) {
- $link = true;
- }
- }
- }
- }
- $score += $tags[$tagname];
- }
- }
- else {
- // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
- if ($value != '') {
- $words = search_keywords_split($value);
- foreach ($words as $word) {
- // Check wordlength
- if (string_length($word) >= $minimum_word_size) {
- $word = strtolower($word);
- if ($link) {
- if (!isset($results[$linknid])) {
- $results[$linknid] = array();
- }
- $results[$linknid][$word] += $score;
- }
- else {
- $results[0][$word] += $score;
- }
- }
- }
- }
- }
- $tag = !$tag;
- }
-
- db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
- search_wipe($sid, $type);
-
- // Insert results into search index
- foreach ($results[0] as $word => $score) {
- db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score);
- search_dirty($word);
- }
- unset($results[0]);
-
- // Now insert links to nodes
- foreach ($results as $nid => $words) {
- foreach ($words as $word => $score) {
- db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %d)", $word, $nid, 'node', $sid, $type, $score);
- search_dirty($word);
- }
- }
- }
-
- /**
* Perform a search on a word or words.
*
- * This function is called by each module that supports the indexed search.
+ * This function is called by each node that supports the indexed search.
*
- * The end result is an SQL select on the search_index table. As a guide for
- * writing the optional extra SQL fragments (see below), use this query:
- *
- * SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score
- * FROM {search_index} i
- * $join INNER JOIN {search_total} t ON i.word = t.word
- * WHERE $where AND (i.word = '...' OR ...)
- * GROUP BY i.type, i.sid
- * ORDER BY score DESC";
- *
- * @param $keys
- * A search string as entered by the user.
- *
- * @param $type
- * A string identifying the calling module.
- *
- * @param $join
- * (optional) A string to be inserted into the JOIN part of the SQL query.
- * For example "INNER JOIN {node} n ON n.nid = i.sid".
- *
- * @param $where
- * (optional) A string to be inserted into the WHERE part of the SQL query.
- * For example "(n.status > 0)".
+ * @param $search_array
+ * An array as returned from hook_search(). The format of this array is
+ * array('keys' => ..., 'type' => ..., 'select' => ...). See the hook_search()
+ * documentation for an explanation of the array values.
*
* @return
- * An array of SIDs for the search results.
- */
-function do_search($keys, $type, $join = '', $where = '1') {
- // Note, we replace the wildcards with U+FFFD (Replacement character) to pass
- // through the keyword extractor.
- $keys = str_replace('*', '�', $keys);
-
- // Split into words
- $keys = search_keywords_split($keys);
- // Lowercase
- foreach ($keys as $k => $v) {
- $keys[$k] = strtolower($v);
- }
-
- $words = array();
- $arguments = array();
- // Build WHERE clause
- foreach ($keys as $word) {
- if (string_length($word) < variable_get('remove_short', 3)) {
- continue;
- }
- if (strpos($word, '�') !== false) {
- $words[] = "i.word LIKE '%s'";
- $arguments[] = str_replace('�', '%', $word);
- }
- else {
- $words[] = "i.word = '%s'";
- $arguments[] = $word;
- }
- }
- if (count($words) == 0) {
- return array();
- }
- $where .= ' AND ('. implode(' OR ', $words) .')';
+ * An array of search results, of which each element is an array with the
+ * keys "count", "title", "link", "user" (name), "date", and "keywords".
+ */
+function do_search($search_array) {
- // Get result count (for pager)
- $count = db_result(db_query("SELECT COUNT(DISTINCT i.sid, i.type) FROM {search_index} i $join WHERE $where", $arguments));
- if ($count == 0) {
- return array();
- }
- $count_query = "SELECT $count";
+ $keys = strtolower($search_array['keys']);
+ $type = $search_array['type'];
+ $select = $search_array['select'];
- // Do pager query
- $query = "SELECT i.type, i.sid, i.word, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $where GROUP BY i.type, i.sid ORDER BY score DESC";
- $arguments = array_merge(array($query, 15, 0, $count_query), $arguments);
- $result = call_user_func_array('pager_query', $arguments);
-
- $results = array();
- while ($item = db_fetch_object($result)) {
- $results[] = $item->sid;
- }
+ // Remove punctuation/special characters (same rule as update_index()).
+ $keys = preg_replace("'(!|%|,|:|;|\(|\)|\&|\"|\'|\.|-|\/|\?|\\\)'", '', $keys);
- return $results;
-}
+ // Replace wildcards with MySQL wildcards.
+ $keys = str_replace('*', '%', $keys);
-/**
- * Menu callback; presents the search form and/or search results.
- */
-function search_view() {
- $keys = isset($_GET['keys']) ? $_GET['keys'] : $_POST['edit']['keys'];
- $type = isset($_GET['type']) ? $_GET['type'] : ($_POST['edit']['type'] ? $_POST['edit']['type'] : 'node');
+ // Split the words entered into an array.
+ $words = explode(' ', $keys);
- if (user_access('search content')) {
+ foreach ($words as $word) {
- // Only perform search if there is non-whitespace search term:
- if (trim($keys)) {
- // Log the search keys:
- watchdog('search',
- t('Search: %keys (%type).', array('%keys' => "$keys", '%type' => module_invoke($type, 'search', 'name'))),
- l(t('results'), 'search', NULL, 'keys='. urlencode($keys) . '&type='. urlencode($type))
- );
+ // If the word is too short, and we've got it set to skip them, loop.
+ if (strlen($word) < variable_get('remove_short', 0)) {
+ continue;
+ }
- // Collect the search results:
- $results = search_data($keys, $type);
+ // Put the next search word into the query and do the query.
+ $query = str_replace("'%'", "'". check_query($word) ."'", $select);
+ $result = db_query($query);
+
+ if (db_num_rows($result) != 0) {
+ // At least one matching record was found.
+ $found = 1;
+
+ // Create an in memory array of the results.
+ while ($row = db_fetch_array($result)) {
+ $lno = $row['lno'];
+ $nid = $row['nid'];
+ $title = $row['title'];
+ $created = $row['created'];
+ $uid = $row['uid'];
+ $name = $row['name'];
+ $count = $row['count'];
+
+ // Build reduction variable.
+ $reduction[$lno][$word] = true;
+
+ // Check whether the just-fetched row is already in the table.
+ if ($results[$lno]['lno'] != $lno) {
+ $results[$lno]['count'] = $count;
+
+ $results[$lno]['lno'] = $lno;
+ $results[$lno]['nid'] = $nid;
+ $results[$lno]['title'] = $title;
+ $results[$lno]['created'] = $created;
+ $results[$lno]['uid'] = $uid;
+ $results[$lno]['name'] = $name;
+ }
+ else {
+ // Different word, but existing "lno". Increase the count of
+ // matches against this "lno" by the number of times this
+ // word appears in the text.
+ $results[$lno]['count'] = $results[$lno]['count'] + $count;
+ }
+ }
+ }
+ }
- if ($results) {
- $results = theme('box', t('Search results'), $results);
+ if ($found) {
+ foreach ($results as $lno => $values) {
+ $pass = true;
+ foreach ($words as $word) {
+ if (!$reduction[$lno][$word]) {
+ $pass = false;
+ }
}
- else {
- $results = theme('box', t('Your search yielded no results'), search_help('search#noresults'));
+ if ($pass) {
+ $fullresults[$lno] = $values;
}
}
- else if (isset($_POST['edit'])) {
- form_set_error('keys', t('Please enter some keywords.'));
+ $results = $fullresults;
+ if (!is_array($results)) {
+ $found = 0;
+ }
+ }
+ if ($found) {
+ // Black magic here to sort the results.
+ array_multisort($results, SORT_DESC);
+
+ // Now, output the results.
+ foreach ($results as $key => $value) {
+ $lno = $value['lno'];
+ $nid = $value['nid'];
+ $title = $value['title'];
+ $created = $value['created'];
+ $uid = $value['uid'];
+ $name = $value['name'];
+ $count = $value['count'];
+ switch ($type) {
+ case 'node':
+ $find[$i++] = array('count' => $count, 'title' => $title, 'link' => url("node/$lno"), 'user' => $name, 'date' => $created, 'keywords' => implode('|', $words));
+ break;
+ case 'comment':
+ $find[$i++] = array('count' => $count, 'title' => $title, 'link' => (strstr(request_uri(), 'admin') ? url("admin/comment/edit/$lno") : url("node/$nid", NULL, "comment-$lno")), 'user' => $name, 'date' => $created, 'keywords' => implode('|', $words));
+ break;
+ break;
+ }
}
-
- // Construct the search form.
- // Note, we do this last because of the form_set_error() above.
- $output = search_form(NULL, $keys, $type, TRUE);
-
- $output .= $results;
-
- print theme('page', $output, t('Search'));
- }
- else {
- drupal_access_denied();
}
+ return $find;
}
/**
- * Menu callback; prints the search engine help page.
- */
-function search_help_page() {
- print theme('page', search_help());
-}
-
-/**
- * @defgroup search Search interface
- * @{
- * The Drupal search interface manages a global search mechanism.
+ * Update the search_index table.
*
- * Modules may plug into this system to provide searches of different types of
- * data. Most of the system is handled by search.module, so this must be enabled
- * for all of the search features to work.
+ * @param $search_array
+ * An array as returned from hook_update_index().
*/
+function update_index($search_array) {
+ $last_update = variable_get($search_array['last_update'], 1);
+ $node_type = $search_array['node_type'];
+ $select = $search_array['select'];
+ $minimum_word_size = variable_get('minimum_word_size', 2);
-/**
- * Render a search form.
- *
- * This form must be usable not only within "http://example.com/search", but also
- * as a simple search box (without "Restrict search to", help text, etc.), in the
- * theme's header, and so forth. This means we must provide options to
- * conditionally render certain parts of this form.
- *
- * @param $action
- * Form action. Defaults to "search".
- * @param $keys
- * The search string entered by the user, containing keywords for the search.
- * @param $options
- * Whether to render the optional form fields and text ("Restrict search
- * to", help text, etc.).
- * @return
- * An HTML string containing the search form.
- */
-function search_form($action = '', $keys = '', $type = null, $options = FALSE) {
- $edit = $_POST['edit'];
+ //watchdog('user', "$last_update $node_type $select");
- if (!$action) {
- $action = url('search');
- }
+ $result = db_query($select);
- $output = '
';
- $output .= form_item(t('Enter your keywords'), $box);
+ if (db_num_rows($result)) {
+ // Results were found. Look through the nodes we just selected.
+ while ($node = db_fetch_array ($result)) {
- if ($options) {
- $output .= '
'. t('Search for') .': ';
+ // Trash any existing entries in the search index for this node,
+ // in case it is a modified node.
+ db_query("DELETE from {search_index} WHERE lno = '". $node['lno'] ."' AND type = '". $node_type ."'");
- if (!isset($edit['type'])) {
- $edit['type'] = $type;
- }
+ // Build the word list (teaser not included, as it would give a
+ // false count of the number of hits).
+ $wordlist = $node['text1'] .' '. $node['text2'];
- foreach (module_list() as $name) {
- if (module_hook($name, 'search')) {
- $output .= form_radio(module_invoke($name, 'search', 'name'), 'type', $name, $edit['type'] == $name);
- }
- }
- $output .= '