--- apachesolr_stats.info +++ apachesolr_stats.info @@ -0,0 +1,6 @@ +; $Id$ +name = Apache Solr statistics +description = Keeps and reports statistics about Apache Solr usage and performance. +dependencies[] = apachesolr +package = Apache Solr +core = "6.x" --- apachesolr_stats.install +++ apachesolr_stats.install @@ -0,0 +1,130 @@ + t('Table that contains a log of Apache Solr queries and performace.'), + 'fields' => array( + 'qid' => array( + 'type' => 'serial', + 'not null' => TRUE, + 'description' => 'Primary Key: Unique log ID.', + ), + 'timestamp' => array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'description' => 'Unix timestamp of when query occurred.', + ), + 'numfound' => array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'description' => 'Number of results.', + ), + 'total_time' => array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'description' => 'Total query time (miliseconds).', + ), + 'prepare_time' => array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'description' => 'Time taken by Solr prepare phase for this query (miliseconds).', + ), + 'process_time' => array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'description' => 'Time taken by Solr process phase for this query (miliseconds).', + ), + 'uid' => array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'description' => 'The {users}.uid of the user who triggered the query.', + ), + 'sid' => array( + 'type' => 'varchar', + 'length' => 64, + 'not null' => TRUE, + 'default' => '', + 'description' => 'Session ID of user who triggered the query.', + ), + 'showed_suggestions' => array( + 'type' => 'int', + 'not null' => TRUE, + 'default' => 0, + 'description' => 'Indicates whether a spelling suggestion was shown.', + ), + 'page' => array( + 'type' => 'varchar', + 'length' => 10, + 'not null' => TRUE, + 'default' => '', + 'description' => 'Current results page.', + ), + 'keywords' => array( + 'type' => 'varchar', + 'length' => 128, + 'not null' => TRUE, + 'default' => '', + 'description' => 'Query keywords arguments.', + ), + 'filters' => array( + 'type' => 'varchar', + 'length' => 128, + 'not null' => TRUE, + 'default' => '', + 'description' => 'Query filter arguments.', + ), + 'sort' => array( + 'type' => 'varchar', + 'length' => 128, + 'not null' => TRUE, + 'default' => '', + 'description' => 'Query sort arguments.', + ), + 'params' => array( + 'type' => 'text', + 'not null' => TRUE, + 'size' => 'big', + 'description' => "Query object's complete parameters.", + ), + ), + 'primary key' => array('qid'), + ); + + return $schema; +} --- apachesolr_stats.module +++ apachesolr_stats.module @@ -0,0 +1,348 @@ + array( + 'title' => 'Statistics', + 'description' => 'Apache Solr Statistics settings to measure usage and performance.', + 'page callback' => 'drupal_get_form', + 'page arguments' => array('apachesolr_stats_admin'), + 'access arguments' => array('administer search'), + ), + 'admin/reports/apachesolr/stats' => array( + 'title' => 'Statistics', + 'description' => 'Report of Apache Solr usage and performance.', + 'page callback' => 'apachesolr_stats_report', + 'page arguments' => array(), + 'access arguments' => array('administer search'), + ), + ); +} + +/** + * Build a settings form. + */ +function apachesolr_stats_admin() { + $form = array(); + $options = array('1' => t('Enabled'), '0' => t('Disabled')); + $form['access'] = array( + '#type' => 'fieldset', + '#title' => t('Apache Solr query log settings')); + $form['access']['apachesolr_stats_enabled'] = array( + '#type' => 'radios', + '#title' => t('Enable access log'), + '#default_value' => variable_get('apachesolr_stats_enabled', 0), + '#options' => $options, + '#description' => t('Log each query to Apache Solr.')); + $period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 4838400, 9676800), 'format_interval'); + $form['access']['apachesolr_stats_flush_log_timer'] = array( + '#type' => 'select', + '#title' => t('Discard query logs older than'), + '#default_value' => variable_get('apachesolr_stats_flush_log_timer', 259200), + '#options' => $period, + '#description' => t('Older query log entries will be automatically discarded. (Requires a correctly configured cron maintenance task.)', array('@cron' => url('admin/reports/status')))); + + $form = system_settings_form($form); + return $form; +} + +/** + * Implementation of hook_apachesolr_modify_query(). + */ +function apachesolr_stats_apachesolr_modify_query(&$query, &$params, $caller) { + if (variable_get('apachesolr_stats_enabled', 0)) { + // Add the debug query argument. See: http://wiki.apache.org/solr/CommonQueryParameters#head-f45a9396425956a4db8d6478ed6029adfb7b0858 + if ($caller == 'apachesolr_search') { + $params['debugQuery'] = 'true'; + } + } +} + +/** + * Implementation of apachesolr_stats_apachesolr_process_results(). + */ +function apachesolr_stats_apachesolr_process_results(&$results) { + if (!variable_get('apachesolr_stats_enabled', 0)) { + return; + } + + global $user; + $response = apachesolr_static_response_cache(); + $query = apachesolr_current_query(); + $url_queryvalues = $query->get_url_queryvalues(); + + db_query("INSERT INTO {apachesolr_stats} + (timestamp, uid, sid, numfound, showed_suggestions, total_time, prepare_time, process_time, page, keywords, filters, sort, params) + VALUES + (%d, %d, '%s', %d, %d, %d, %d, %d, '%s', '%s','%s','%s','%s')", + time(), + $user->uid, + $user->sid, + $response->response->numFound, + (int) get_object_vars($response->spellcheck->suggestions), + $response->debug->timing->time, + $response->debug->timing->prepare->time, + $response->debug->timing->process->time, + isset($_GET['page']) ? $_GET['page'] : '', + $query->get_query_basic(), + $url_queryvalues['filters'], + $url_queryvalues['sort'], + serialize($response->responseHeader->params)); + + return; + + /* + $times = array(); + $times['total']['total'] = $response->debug->timing->time; + foreach (array('prepare', 'process') as $phase) { + foreach($response->debug->timing->prepare as $key => $value) { + if (is_object($value)) { + $times[$phase][$key] = (int) $value->time; + } else { + $times[$phase]['total'] = (int) $value; + } + } + } + dsm($times); + return; + */ +} + +/** + * Callback for admin/reports/apachesolr/stats. + * + * TODO: Hardcoded to display queries per minute; this will very likely break with large logfiles. + */ +function apachesolr_stats_report() { + if (! variable_get('apachesolr_stats_enabled', 0)) { + return t('Logging is disabled in the !link. Enable it to log Apache Solr queries.', array('!link' => l('module configuration page', 'admin/settings/apachesolr/stats'))); + } + + // Decide what granularity to use: minute, hour or day + $g = "hour"; + $granularities = array( + 'minute' => array( + 'name' => t('minute'), + 'div' => 60, + ), + 'hour' => array( + 'name' => t('hour'), + 'div' => 60*60, + ), + 'day' => array( + 'name' => t('day'), + 'div' => 60*60*24, + ), + ); + $granularity = $granularities[$g]; + + // Process log + $suggestions = 0; + $queries = 0; + $users = array(); + $sessions = array(); + $result = db_query_range("SELECT * FROM {apachesolr_stats} ORDER BY timestamp DESC", 0, 1000); + $start_timeslot = 0; + $last_timeslot = 0; + while ($record = db_fetch_object($result)) { + $timeslot = intval($record->timestamp / $granularity['div']); + if ($last_timeslot == 0) { $last_timeslot = $timeslot; } + $users[$record->uid]++; + $sessions[$record->sid]++; + if ($record->suggestions) { + $suggestions++; + } + $total_queries++; + $time['total'] += $record->total_time; + $time['prepare'] += $record->prepare_time; + $time['process'] += $record->process_time; + + #dsm($record-> + // Field usage; only when on first results page (meaning it's a fresh search) + if ($record->page == "") { + if (trim($record->keywords) != "") { + $field_usage['keyword']++; + } + foreach (explode(' ', $record->filters) as $filter) { + list($fieldname, $value) = explode(':', $filter); + if ($fieldname) { + $field_usage[$fieldname]++; + } + } + } + + // Group some stats into timeslots (minutes, hours) to show trends + if (empty($user_slot[$record->uid][$timeslot])) { + $data_per_granularity['users_per_slot'][$timeslot]++; + $user_slot[$record->uid][$timeslot] = TRUE; + } + $data_per_granularity['queries'][$timeslot]++; + $count_per_granularity[$timeslot]++; + $data_per_granularity['total_time'][$timeslot] += $record->total_time; + } + $start_timeslot = $timeslot; + #dsm($queries_per_granularity); + + if (sizeof($sessions) == 0 || sizeof($users) == 0 || $total_queries == 0) { + return t('There are no logged queries yet.'); + } + + $s[t('Queries')] = $total_queries; + $s[t('Spellchecker suggestions')] = $suggestions; + $s[t('Sessions')] = sizeof($sessions); + $s[t('Unique users')] = sizeof($users); + $s[t('Average queries per user')] = sprintf("%.1f", $total_queries / sizeof($users)); + $s[t('Average queries per session')] = sprintf("%.1f", $total_queries / sizeof($sessions)); + $s[t('Average time per query')] = sprintf("%.3f sec", $time['total'] / $total_queries / 1000); + $s[t('Average time per query: prepare')] = sprintf("%.3f sec", $time['prepare'] / $total_queries / 1000); + $s[t('Average time per query: process')] = sprintf("%.3f sec", $time['process'] / $total_queries / 1000); + + // Chart for queries per timeslot + $chart = _apachesolr_stats_chart($data_per_granularity['queries'], $start_timeslot, $last_timeslot, $total_queries, $total_queries / ($last_timeslot-$start_timeslot+1)); + $s[t('Total queries, by @granularity', array('@granularity' => $granularity['name']))] = $chart; + + // Chart for average time per timeslot + $data = array(); + foreach ($data_per_granularity['total_time'] as $timeslot => $value) { + $data[$timeslot] = $value / $count_per_granularity[$timeslot]; + } + // Call with average_empty = FALSE + $chart = _apachesolr_stats_chart($data, $start_timeslot, $last_timeslot, $total_queries, $time['total'] / $total_queries); + $s[t('Average time per query, by @granularity (miliseconds)', array('@granularity' => $granularity['name']))] = $chart; + + // Chart for users per timeslot + $chart = _apachesolr_stats_chart($data_per_granularity['users_per_slot'], $start_timeslot, $last_timeslot, $total_queries, sizeof($users) / ($last_timeslot-$start_timeslot+1)); + $s[t('Total unique users executing queries, by @granularity', array('@granularity' => $granularity['name']))] = $chart; + + // Chart for field usage + foreach ($field_usage as $fieldname => $count) { + $chd = ""; + } + $chl = implode('|', array_keys($field_usage)); + $chd = implode(',', $field_usage); + $height = 30+sizeof($field_usage)*35; + $chart = ""; + $s[t('Field usage', array('@granularity' => $granularity['name']))] = $chart; + + // Create the output HTML. + $output = t('This is an overview of Apache Solr usage and performance.'); + foreach ($s as $head => $value) { + // Table data + $rows[] = array( + "data" => array( + array('data' => $head, 'header' => true, 'style' => 'width:33%'), + array('data' => $value), + ) + ); + } + $output .= theme('table', array(), $rows); + return $output; +} + +/* +function _apachesolr_stats_chart($data, $start_timeslot, $last_timeslot, $total_queries, $average = FALSE) { + #$chart_prefix = "http://chart.apis.google.com/chart?cht=lc&chm=o,FF9900,0,-1,5.0&chs=350x100&chdlp=b&chma=10,10,10,10&chd=t:"; + $chart_prefix = "http://chart.apis.google.com/chart?cht=lc&chs=350x100&chdlp=b&chma=30,100,20,20&chd=t:"; + unset($chd); + $chd_min = 9999999; + $chd_max = 0; + $total = 0; + for ($t = $start_timeslot; $t<=$last_timeslot; $t++) { + $num = $data[$t]+0; + $chd_min = ($chd_min > $num) ? $num : $chd_min; + $chd_max = ($chd_max < $num) ? $num : $chd_max; + $chd[] = $num; + } + $chd = array_reverse($chd); + if ($count > 0) { + $chd_avg = $total / $count; + } else { + $chd_avg = $num; + } + $image_url = $chart_prefix . implode(",", $chd) . "&chds=$chd_min,$chd_max"; #&chdl=". t('Queries per minute'); + // Add labels + $image_url .= "&chxl=0:|". intval($chd_min) ."|". intval($chd_max); + if ($average !== FALSE) { + $image_url .=sprintf("|1:|%s=%.2f", t('average'), $average); + $image_url .= "&chxp=1," . intval($average/$chd_max*100); + $chxt = "y,r"; + } else { + $chxt = "y"; + } + $image_url .= "&chxt=$chxt"; + return ""; +} +*/ + + +function _apachesolr_stats_chart($data, $start_timeslot, $last_timeslot, $total_queries, $average = FALSE) { + #$chart_prefix = "http://chart.apis.google.com/chart?cht=lc&chm=o,FF9900,0,-1,5.0&chs=350x100&chdlp=b&chma=10,10,10,10&chd=t:"; + $chart_prefix = "http://chart.apis.google.com/chart?cht=lc&chs=350x100&chdlp=b&chma=30,100,20,20&chd=s:"; + unset($chd); + $chd_min = 9999999; + $chd_max = 0; + $total = 0; + for ($t = $start_timeslot; $t<=$last_timeslot; $t++) { + $num = $data[$t]+0; + $chd_min = ($chd_min > $num) ? $num : $chd_min; + $chd_max = ($chd_max < $num) ? $num : $chd_max; + $chd[] = $num; + } + $chd = array_reverse($chd); + if ($count > 0) { + $chd_avg = $total / $count; + } else { + $chd_avg = $num; + } + // Encode data using Chart's simple encoding: http://code.google.com/apis/chart/formats.html#simple + $encoder_string = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'; + $encoded_values = ''; + foreach ($chd as $value) { + $encoded_values .= substr($encoder_string, (($value-$chd_min) / $chd_max)*61, 1); + } + + $image_url = $chart_prefix . $encoded_values; #&chdl=". t('Queries per minute'); + // Add labels + $image_url .= "&chxl=0:|". intval($chd_min) ."|". intval($chd_max); + if ($average !== FALSE) { + $image_url .=sprintf("|1:|%s=%.2f", t('average'), $average); + $image_url .= "&chxp=1," . intval($average/$chd_max*100); + $chxt = "y,r"; + } else { + $chxt = "y"; + } + $image_url .= "&chxt=$chxt"; + return ""; +} + + +/** + * Implementation of hook_cron(). + * + * Remove expired log messages. + */ +function apachesolr_stats_cron() { + db_query('DELETE FROM {apachesolr_stats} WHERE timestamp < %d', time() - variable_get('apachesolr_stats_flush_log_timer', 604800)); +} + +/** + * Implementation of apachesolr_search_result_ater(). + */ + /* +function apachesolr_stats_apachesolr_search_result_alter($doc) { +} +*/