'admin/settings/swish/main', 'title' => 'swish settings', 'callback' => 'swish_settings', 'access' => user_access('administer swish-e'), 'type' => MENU_DEFAULT_LOCAL_TASK, 'weight' => -10 ); $items[] = array( 'path' => 'admin/settings/swish/index', 'title' => 'swish indexer', 'callback' => 'swish_index', 'access' => user_access('administer swish-e'), 'type' => MENU_LOCAL_TASK ); return $items; } /** * swish_settings callback function */ function swish_settings(){ swish_validate_paths(); $form['swish_path'] = array( '#type' => 'textfield', '#title' => t('Swish-E Path'), '#default_value' => variable_get('swish_path', '/usr/local/bin/swish-e'), '#size' => 60, '#maxlength' => 120, '#description' => t('The absolute path of the swish-e binary.'), ); $form['catdoc_path'] = array( '#type' => 'textfield', '#title' => t('M$ Word Filter Path'), '#default_value' => variable_get('catdoc_path', '/usr/local/bin/catdoc'), '#size' => 60, '#maxlength' => 120, '#description' => t('The absolute path of the M$ Word filter.'), ); $form['xls2csv_path'] = array( '#type' => 'textfield', '#title' => t('M$ Excel Filter Path'), '#default_value' => variable_get('xls2csv_path', '/usr/local/bin/xls2csv'), '#size' => 60, '#maxlength' => 120, '#description' => t('The absolute path of the M$ Excel filter'), ); $form['ppthtml_path'] = array( '#type' => 'textfield', '#title' => t('M$ Powerpoint Filter Path'), '#default_value' => variable_get('ppthtml_path', '/usr/local/bin/ppthtml'), '#size' => 60, '#maxlength' => 120, '#description' => t('The absolute path of the M$ Excel filter'), ); $form['pdftotext_path'] = array( '#type' => 'textfield', '#title' => t('PDF Filter Path'), '#default_value' => variable_get('pdftotext_path', '/usr/local/bin/pdftotext'), '#size' => 60, '#maxlength' => 120, '#description' => t('The absolute path of the PDF filter.'), ); $form['unrtf_path'] = array( '#type' => 'textfield', '#title' => t('RTF Filter Path'), '#default_value' => variable_get('unrtf_path', '/usr/bin/unrtf'), '#size' => 60, '#maxlength' => 120, '#description' => t('The absolute path of the Rich Text Format filter.'), ); return $form; } /** * swish_settings callback function */ function swish_index() { if (swish_validate_paths()) { drupal_set_message('Swish module settings errors exist. Fix it '. l('here.', 'admin/settings/swish'), 'error'); $disable = 'disabled'; } else { $op = $_POST['op']; if ($op) { $result = _swish_do_index(); if (substr($result[0], 0, 5) == 'err: ' ){ drupal_set_message('Indexing aborted. '.$result[0], 'error'); } else { $length = count($result); //include only the last 3 lines for ($i = $length - 1 ; $i >= ($length - 3) ; --$i) $msg .= $result[$i]."
"; drupal_set_message($msg); } } } $form['index'] = array( '#type' => 'submit', '#value' => 'Index Now', '#attributes' => array('disable' => $disable), ); $output = drupal_get_form('Swish Indexer', $form); return print theme('page', $output, 'Swish Indexer'); } /** * This will be invoked by the search module. * Returns the Header, and array of items found, if any. */ function swish_search ($op = 'search', $keys = null){ switch ($op) { case 'name': return t('uploaded files'); case 'reset': return; case 'search' : $find = array(); $swish_bin = variable_get('swish_path', '/usr/local/bin/swish-e'); $swish_index = getcwd().'\\files\\my_swish_index'; //check if swish-e binary is executable and index file is readable. //if not, put an entry in the watchdog and just exit quitely. //we dont want to disturb the end user. if (!is_executable($swish_bin) && !is_readable($swish_index) && !is_readable($swish_index.'.prop')) { watchdog('swish', 'Search: Swish binary is not executable or doesnt exist. Or, index files are not readable or do not exist. Fix it '. l('here.', 'admin/settings/swish')); return array(); } $words = '"'.chop(str_replace('\(', '(', str_replace('\)', ')', str_replace('\*','*', escapeshellcmd($keys).' ')))).'"'; $swish_command = variable_get("swish_path","/usr/local/bin/swish-e") . ' -m 50 -f '. getcwd() . '\\files\\my_swish_index -w '.$words; exec ($swish_command, $results, $return_val); foreach ($results as $result){ if (substr($result,0,1) == '#' || substr($result,0,1) == '.' ){ //TODO: parse swish result comments //for now, just continue; continue; } else if (substr($result, 0, 5) == 'err: ' ){ //if swish-e coughs error msgs, just exit and display no // document found. watchdog('swish', 'Swish-e Barfed: '.$result.'
Command: '.$swish_command); return array(); } else { //if there are results. $k = strpos($result,' '); //first space $i = strpos($result,' "'); $j = strpos ($result, '" '); $rank = substr($result, 0, $k); $basename = basename(substr($result, $k+1, $i-$k-1)); $l = strlen($basename); $filetype = substr($basename, $l-3, 3 ); $title = substr($result, $i + 2, $j -$i -2 ); $file_size = substr($result, $j+2); $extra = array( 'rank' => $rank, 'file_size' => $file_size, 'file_type' => $filetype ); $link = file_create_url ("files/".$basename); $find[] = array('link' => $link, 'title' => $title, 'extra' => $extra); } } return $find; } } /** * Overrides the _search_item hook. * Returns a nice html output of each found item. */ function swish_search_item($item){ global $base_url; $extra = $item['extra']; $output = '
 '. $item['title'] .'
'; $output .= 'Rank: '. $extra['rank'] .'   File Size: ' .round($extra['file_size']/1024, 1) .' KB
'; return $output; } /** * This will be invoked by _cron hook. Will update our index. */ function swish_cron(){ if (swish_validate_paths()) { watchdog('swish', 'Cron: Swish module settings errors exist. Fix it '. l('here.', 'admin/settings/swish'), 'error'); } else { $results = _swish_do_index(); watchdog('swish', implode('
', $results)); } } /** * The SWISH-E indexer. Calls the swish-e indexer commandline via exec. * Returns the standard output of swish-e. */ function _swish_do_index(){ //build an_on_the_fly config file for the swish indexer in temp folder //and do the swish-e index with this config file. //when done. remove the config file. $getcwd = getcwd(); $tmpfname = tempnam(variable_get('file_directory_temp', '/tmp'), 'swish'); $handle = fopen($tmpfname, 'w'); //Include our site-wide configuration settings: fwrite($handle, "IncludeConfigFile $getcwd\\modules\\swish\\conf\\common.conf\n"); //Index the only this directory $index_dir = "$getcwd\\".variable_get('file_directory_path', 'files'); fwrite($handle,"IndexDir $index_dir\n"); //Ignore these words. could be put in the settings in the future. fwrite($handle, "IgnoreWords file: $getcwd\\modules\\swish\\conf\\stopwords\\english.txt\n"); //Dont index these directories/files. could be put in the settings in the future. fwrite($handle, "FileRules pathname contains stopwords CVS database images includes misc modules scripts themes tmp uploadimage\n"); // And don't index php modules inc, etc. could be put in the settings in the future. fwrite($handle, "FileRules filename contains .php .inc .module .sql index.\n"); //based on the admin settings of swish, include and index the ff document types. //the filter programs option for now are for catdoc, xls2csv, pdftotext, unrtf $indexonly = 'IndexOnly .txt'; $filefilter = "\n"; if (($filter = variable_get('catdoc_path', '/usr/local/bin/catdoc')) ){ $indexonly .= ' .doc'; //FileFilter .doc /usr/local/bin/catdoc "-s8859-1 -d8859-1 %p" $filefilter .= 'FileFilter .doc '.$filter. ' "-s8859-1 -d8859-1 %p"'. "\n"; } if ( ($filter = variable_get('xls2csv_path', '/usr/local/bin/xls2csv')) ){ $indexonly .= ' .xls'; $filefilter .= 'FileFilter .xls '.$filter.' "-fw %p"'. "\n"; } if ( ($filter = variable_get('ppthtml_path', '/usr/local/bin/ppthtml')) ){ $indexonly .= ' .ppt'; $filefilter .= 'FileFilter .ppt '.$filter.' "%p"'. "\n"; } if ( ($filter = variable_get('pdftotext_path', '/usr/local/bin/pdftotext')) ){ $indexonly .= ' .pdf'; $filefilter .= 'FileFilter .pdf '.$filter.' "%p -"'. "\n"; } if ( ($filter = variable_get("unrtf_path","/usr/bin/unrtf")) ) { $indexonly .= ' .rtf'; $filefilter .= 'FileFilter .rtf '.$filter.' "%p"'. "\n"; } $indexonly .= "\n"; $filefilter .= "\n"; fwrite ($handle, $indexonly.$filefilter); //assemble the swish-e index command, swish-e -c conf/myconfig.conf -f myindex $swish_indx_cmd = variable_get('swish_path', '/usr/local/bin/swish-e'); $swish_indx_cmd .= " -c $tmpfname "; //save the index file in files/my_swish_index $swish_indx_cmd .= " -f $index_dir\\my_swish_index"; //ok to make sure escape this shell command exec ($swish_indx_cmd, $results, $rv); //although there's a tweak on how to make swish-e to do incremental indexing, //it's complicated. we'll just rely on swish-e fast indexing feature! fclose($handle); //delete the temp conf file unlink($tmpfname); return $results; } /** * Check the various paths needed by the swish-e engine before performing any swish operation */ function swish_validate_paths(){ $file_paths = array( 'swish_path' => 'SWISH-E PATH', 'catdoc_path' => 'catdoc, M$ Word filter', 'xls2csv_path' => 'xls2csv, M$ Excel filter', 'ppthtml_path' => 'ppthtml, M$ Powerpoint filter', 'pdftotext_path' => 'pdftotext, PDF filter', 'unrtf_path' => 'unrtf, RTF filter' ); $drupal_files_folder = array( 'file_directory_path' => 'Files ', 'file_directory_temp' => 'Temp' ); $errors = false; foreach ($file_paths as $name => $label){ $value = variable_get($name, ''); if ($value && !is_executable($value) ){ form_set_error($name, "$label does not exist or not executable."); $errors = true; } } foreach ($drupal_files_folder as $name => $label) { if (!is_writable(variable_get($name, ''))) { drupal_set_message("$label folder does not exist or not writable. Fixed it ". l('here.', 'admin/settings'), 'error'); $errors = true; } } return $errors; } ?>