array( 'title' => 'Statistics', 'description' => 'Apache Solr Statistics settings to measure usage and performance.', 'page callback' => 'drupal_get_form', 'page arguments' => array('apachesolr_stats_admin'), 'access arguments' => array('administer search'), ), 'admin/reports/apachesolr/stats' => array( 'title' => 'Statistics', 'description' => 'Report of Apache Solr usage and performance.', 'page callback' => 'apachesolr_stats_report', 'page arguments' => array(), 'access arguments' => array('administer search'), ), ); } /** * Build the settings form. */ function apachesolr_stats_admin() { $form = array(); $options = array('1' => t('Enabled'), '0' => t('Disabled')); $form['access'] = array( '#type' => 'fieldset', '#title' => t('Apache Solr query log settings') ); $form['access']['apachesolr_stats_enabled'] = array( '#type' => 'radios', '#title' => t('Enable access log'), '#default_value' => variable_get('apachesolr_stats_enabled', 0), '#options' => $options, '#description' => t('Log each query to Apache Solr.') ); $period = drupal_map_assoc(array(3600, 10800, 21600, 32400, 43200, 86400, 172800, 259200, 604800, 1209600, 2419200, 4838400, 9676800), 'format_interval'); $form['access']['apachesolr_stats_flush_log_timer'] = array( '#type' => 'select', '#title' => t('Discard query logs older than'), '#default_value' => variable_get('apachesolr_stats_flush_log_timer', 259200), '#options' => $period, '#description' => t('Older query log entries will be automatically discarded. (Requires a correctly configured cron maintenance task.)', array('@cron' => url('admin/reports/status'))) ); return system_settings_form($form); } /** * Implementation of hook_apachesolr_modify_query(). */ function apachesolr_stats_apachesolr_modify_query(&$query, &$params, $caller) { if (variable_get('apachesolr_stats_enabled', 0)) { // Add the debug query argument. See: http://wiki.apache.org/solr/CommonQueryParameters#head-f45a9396425956a4db8d6478ed6029adfb7b0858 if ($caller == 'apachesolr_search') { $params['debugQuery'] = 'true'; } } } /** * Implementation of apachesolr_stats_apachesolr_process_results(). */ function apachesolr_stats_apachesolr_process_results(&$results) { if (!variable_get('apachesolr_stats_enabled', 0)) { return; } global $user; $response = apachesolr_static_response_cache(); $query = apachesolr_current_query(); $url_queryvalues = $query->get_url_queryvalues(); db_query("INSERT INTO {apachesolr_stats} (timestamp, uid, sid, numfound, showed_suggestions, total_time, prepare_time, process_time, page, keywords, filters, sort, params) VALUES (%d, %d, '%s', %d, %d, %d, %d, %d, '%s', '%s','%s','%s','%s')", time(), $user->uid, $user->sid, $response->response->numFound, (int) get_object_vars($response->spellcheck->suggestions), $response->debug->timing->time, $response->debug->timing->prepare->time, $response->debug->timing->process->time, isset($_GET['page']) ? $_GET['page'] : '', $query->get_query_basic(), $url_queryvalues['filters'], $url_queryvalues['sort'], serialize($response->responseHeader->params) ); return; /* $times = array(); $times['total']['total'] = $response->debug->timing->time; foreach (array('prepare', 'process') as $phase) { foreach($response->debug->timing->prepare as $key => $value) { if (is_object($value)) { $times[$phase][$key] = (int) $value->time; } else { $times[$phase]['total'] = (int) $value; } } } dsm($times); return; */ } /** * Callback for admin/reports/apachesolr/stats. * * TODO: Hardcoded to display queries per minute; this will very likely break with large logfiles. */ function apachesolr_stats_report() { if (! variable_get('apachesolr_stats_enabled', 0)) { return t('Logging is disabled in the !link. Enable it to log Apache Solr queries.', array('!link' => l('module configuration page', 'admin/settings/apachesolr/stats'))); } $granularities = array( 'minute' => array( 'name' => t('minute'), 'timespan' => 60, 'time_before' => 60*60*24, // One day before ), 'hour' => array( 'name' => t('hour'), 'timespan' => 60*60, 'time_before' => 60*60*24*7, // One week before ), 'day' => array( 'name' => t('day'), 'timespan' => 60*60*24, 'time_before' => 60*60*24*7*3, // 3 weeks before ), ); // Decide what granularity to use: minute, hour or day $picked_granularity = "hour"; if ($granularities[$_GET["granularity"]]) { $picked_granularity = $_GET["granularity"]; } $granularity = $granularities[$picked_granularity]; // Process latest log entries $suggestions = 0; $queries = 0; $users = array(); $sessions = array(); $result = db_query("SELECT * FROM {apachesolr_stats} WHERE timestamp > %d ORDER BY timestamp DESC", time()- $granularity['time_before']); $start_timeslot = 0; $last_timeslot = 0; // Scan the logfile and build statistics arrays while ($record = db_fetch_object($result)) { $timeslot = intval($record->timestamp / $granularity['timespan']); if ($last_timeslot == 0) { $last_timeslot = $timeslot; } $users[$record->uid]++; $sessions[$record->sid]++; if ($record->suggestions) { $suggestions++; } $total_queries++; $time['total'] += $record->total_time; $time['prepare'] += $record->prepare_time; $time['process'] += $record->process_time; // Field usage; only when on first results page (meaning it's a fresh search) if ($record->page == "") { if (trim($record->keywords) != "") { $field_usage['keyword']++; } foreach (explode(' ', $record->filters) as $filter) { list($fieldname, $value) = explode(':', $filter); if ($fieldname) { $field_usage[$fieldname]++; } } } // Sort usage; count only the first page of results if ($record->page == "") { $sort_usage[($record->sort) ? $record->sort : "relevance"]++; } // Group some stats into timeslots (minutes, hours) to show trends if (empty($user_slot[$record->uid][$timeslot])) { $data_per_granularity['users_per_slot'][$timeslot]++; $user_slot[$record->uid][$timeslot] = TRUE; } $data_per_granularity['queries'][$timeslot]++; $count_per_granularity[$timeslot]++; $data_per_granularity['total_time'][$timeslot] += $record->total_time; } $start_timeslot = $timeslot; $earliest_timestamp = $start_timeslot * $granularity['timespan']; if (sizeof($sessions) == 0 || sizeof($users) == 0 || $total_queries == 0) { return t('There are no logged queries yet.'); } $s[t('Queries')] = $total_queries; $s[t('Spellchecker suggestions')] = $suggestions; $s[t('Sessions')] = sizeof($sessions); $s[t('Unique users')] = sizeof($users); $s[t('Average queries per user')] = sprintf("%.1f", $total_queries / sizeof($users)); $s[t('Average queries per session')] = sprintf("%.1f", $total_queries / sizeof($sessions)); $s[t('Average time per query')] = sprintf("%.3f sec", $time['total'] / $total_queries / 1000); $s[t('Average time per query: prepare')] = sprintf("%.3f sec", $time['prepare'] / $total_queries / 1000); $s[t('Average time per query: process')] = sprintf("%.3f sec", $time['process'] / $total_queries / 1000); // Chart for queries per timeslot $chart = apachesolr_stats_chart($data_per_granularity['queries'], $start_timeslot, $last_timeslot, $total_queries, $total_queries / ($last_timeslot-$start_timeslot+1)); $s[t('Total queries, by @granularity', array('@granularity' => $granularity['name']))] = $chart; // Chart for average time per timeslot $data = array(); foreach ($data_per_granularity['total_time'] as $timeslot => $value) { $data[$timeslot] = $value / $count_per_granularity[$timeslot]; } // Call with average_empty = FALSE $chart = apachesolr_stats_chart($data, $start_timeslot, $last_timeslot, $total_queries, $time['total'] / $total_queries); $s[t('Average time per query, by @granularity (miliseconds)', array('@granularity' => $granularity['name']))] = $chart; // Chart for users per timeslot $chart = apachesolr_stats_chart($data_per_granularity['users_per_slot'], $start_timeslot, $last_timeslot, $total_queries, sizeof($users) / ($last_timeslot-$start_timeslot+1)); $s[t('Total unique users executing queries, by @granularity', array('@granularity' => $granularity['name']))] = $chart; // Chart for field usage foreach ($field_usage as $fieldname => $count) { $chd = ""; } $chl = implode('|', array_keys($field_usage)); $chd = implode(',', $field_usage); $height = 30 + sizeof($field_usage) * 35; $chart = ""; $s[t('Field usage', array('@granularity' => $granularity['name']))] = $chart; // Chart for sort usage foreach ($sort_usage as $fieldname => $count) { $chd = ""; } $chl = implode('|', array_keys($sort_usage)); $chd = implode(',', $sort_usage); $chart = ""; $s[t('Sort usage', array('@granularity' => $granularity['name']))] = $chart; // Create the output HTML. // Granularity picker $output = "
Report granularity: "; foreach ($granularities as $name => $granularity) { $output .= "   "; if ($name != $picked_granularity) { $output .= "{$granularity[name]}"; } else { $output .= "{$granularity[name]}"; } } $output .= "

"; $output .= t('This is an overview of Apache Solr usage and performance since @startdate.', array('@startdate' => format_date($earliest_timestamp))); foreach ($s as $head => $value) { // Table data $rows[] = array( "data" => array( array('data' => $head, 'header' => true, 'style' => 'width:33%'), array('data' => $value), ) ); } $output .= theme('table', array(), $rows); return $output; } /** * Generate an IMG tag with the URL to generate a chart using Google Charts API. * * @param $data * The array of data to chart. * @param $start_timeslot * The index of the first data element to chart. * @param $last_timeslot * The index of the first data element to chart. * @param $total_queries * Integer with the total number of queries included in this chart. * @param $average * Boolean flag: show an average value in the chart. */ function apachesolr_stats_chart($data, $start_timeslot, $last_timeslot, $total_queries, $average = FALSE) { // Sample: http://chart.apis.google.com/chart?cht=lc&chs=350x100&chdlp=b&chma=10,10,10,10&chd=s:[encoded chart data] #drupal_set_message("start = $start_timeslot, last = $last_timeslot"); #dsm($data); $chart_prefix = 'http://chart.apis.google.com/chart?cht=lc&chs=350x100'; $chart_prefix .= '&chdlp=b&chma=30,100,20,20&chd=s:'; $chd = array(); $chd_min = 9999999; $chd_max = 0; $total = 0; for ($t = $start_timeslot; $t<=$last_timeslot; $t++) { $num = $data[$t]+0; $chd_min = ($chd_min > $num) ? $num : $chd_min; $chd_max = ($chd_max < $num) ? $num : $chd_max; $chd[] = $num; } $chd = array_reverse($chd); if ($count > 0) { $chd_avg = $total / $count; } else { $chd_avg = $num; } // Generate basic image URL $image_url = $chart_prefix . apachesolr_stats_encodedata($chd, $chd_min, $chd_max); // Add labels if ($chd_max > 0) { $image_url .= "&chxl=0:|". intval($chd_min) ."|". intval($chd_max); // Show average value in a label if ($average !== FALSE) { $image_url .= sprintf("|1:|%s=%.2f", t('average'), $average); $image_url .= "&chxp=1," . intval($average / $chd_max * 100); $chxt = "y,r"; } else { $chxt = "y"; } $image_url .= "&chxt=$chxt"; } // Return the image tag return ""; } /** * Encode data using Chart's simple encoding. * See http://code.google.com/apis/chart/formats.html#simple * * @param $chd * an array of integer values to encode. * @param $chd_min * an integer with the greatest */ function apachesolr_stats_encodedata($chd, $chd_min, $chd_max) { $encoder_string = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'; $encoded_values = ''; if (is_array($chd)) { foreach ($chd as $value) { $encoded_values .= substr($encoder_string, (($value-$chd_min) / $chd_max)*61, 1); } } return $encoded_values; } /** * Implementation of hook_cron(). * * Remove expired log messages. */ function apachesolr_stats_cron() { db_query('DELETE FROM {apachesolr_stats} WHERE timestamp < %d', time() - variable_get('apachesolr_stats_flush_log_timer', 604800)); } /** * Implementation of apachesolr_search_result_alter(). */ /* TODO: change results here to track clickthrus? function apachesolr_stats_apachesolr_search_result_alter($doc) { dprint_r($doc); } */