uid); ////////////////////////////////////////////////////////////////////////////// // Global variables //$GLOBALS['_boost_path'] = ''; //$GLOBALS['_boost_query'] = ''; //$GLOBALS['_boost_message_count'] = ''; //$GLOBALS['_boost_cache_this'] = ''; //$GLOBALS['_boost_max_execution_time'] = ''; //$GLOBALS['_boost_output_buffering'] = ''; //$GLOBALS['_boost_default_socket_timeout'] = ''; //$GLOBALS['_boost_router_item'] = ''; ////////////////////////////////////////////////////////////////////////////// // Core API hooks /** * Implementation of hook_help(). Provides online user help. */ function boost_help($path, $arg) { switch ($path) { case 'admin/help#boost': if (file_exists($file = drupal_get_path('module', 'boost') . '/README.txt')) { return '
' . implode("\n", array_slice(explode("\n", @file_get_contents($file)), 2)) . '
'; } break; case 'admin/settings/performance/boost': return '

' . t('') . '

'; // TODO: add help text. } //hack to get drupal_get_messages before they are destroyed. $GLOBALS['_boost_message_count'] = count(drupal_get_messages(NULL, FALSE)); } /** * Implementation of hook_init(). Performs page setup tasks if page not cached. */ function boost_init() { // Disable all caches when nocache is set if (isset($_GET['nocache'])) { $GLOBALS['conf']['cache'] = CACHE_DISABLED; $GLOBALS['_boost_cache_this'] = FALSE; return; } global $user, $base_path; //set variables $GLOBALS['_boost_router_item'] = _boost_get_menu_router(); $GLOBALS['_boost_path'] = $_REQUEST['q']; // Make the proper filename for our query $GLOBALS['_boost_query'] = BOOST_CHAR; foreach ($_GET as $key => $val) { if ($key != 'q' && $key != 'destination') { $GLOBALS['_boost_query'] .= (($GLOBALS['_boost_query'] == BOOST_CHAR) ? '' : '&') . $key . '=' . $val; } } if (!empty($user->uid)) { boost_set_cookie($user); if (BOOST_DISABLE_CLEAN_URL) { $GLOBALS['conf']['clean_url'] = 0; db_query('TRUNCATE {cache_filter}'); db_query('TRUNCATE {cache_menu}'); cache_clear_all('*', 'cache_menu'); cache_clear_all('*', 'cache_filter'); } } // Make sure the page is/should be cached according to our current configuration if ( strpos($_SERVER['SCRIPT_FILENAME'], 'index.php') === FALSE || variable_get('site_offline', 0) || $_SERVER['REQUEST_METHOD'] != 'GET' || $_SERVER['SERVER_SOFTWARE'] === 'PHP CLI' || !BOOST_ENABLED || !boost_is_cacheable($GLOBALS['_boost_path']) || $GLOBALS['_boost_router_item']['page_callback'] == 'search404_page' ) { $GLOBALS['_boost_cache_this'] = FALSE; return; } // We only generate cached pages for anonymous visitors. if (empty($user->uid)) { if (BOOST_ENABLED != CACHE_AGGRESSIVE) { $GLOBALS['conf']['cache'] = CACHE_DISABLED; } $GLOBALS['_boost_cache_this'] = TRUE; register_shutdown_function('_boost_ob_handler'); ob_start(); } } /** * Implementation of hook_exit(). Performs cleanup tasks. * * For POST requests by anonymous visitors, this adds a dummy query string * to any URL being redirected to using drupal_goto(). * * This is pretty much a hack that assumes a bit too much familiarity with * what happens under the hood of the Drupal core function drupal_goto(). * * It's necessary, though, in order for any session messages set on form * submission to actually show up on the next page if that page has been * cached by Boost. */ function boost_exit($destination = NULL) { // Check that hook_exit() was invoked by drupal_goto() for a POST request: if (!empty($destination) && $_SERVER['REQUEST_METHOD'] == 'POST') { // Check that we're dealing with an anonymous visitor. and that some // session messages have actually been set during this page request: global $user; if (empty($user->uid) && ($messages = drupal_set_message())) { // FIXME: call any remaining exit hooks since we're about to terminate? $query_parts = parse_url($destination); // Add a nocache parameter to query. Such pages will never be cached $query_parts['query'] .= (empty($query_parts['query']) ? '' : '&') . 'nocache=1'; // Rebuild the URL with the new query string. Do not use url() since // destination has presumably already been run through url(). $destination = boost_glue_url($query_parts); // Do what drupal_goto() would do if we were to return to it: exit(header('Location: ' . $destination)); } } } /** * Implementation of hook_menu(). */ function boost_menu() { $items['admin/settings/performance/default'] = array( 'title' => 'Performance', 'type' => MENU_DEFAULT_LOCAL_TASK, 'file path' => drupal_get_path('module', 'system'), ); $items['admin/settings/performance/boost'] = array( 'title' => 'Boost Settings', 'description' => 'Advanced boost configuration.', 'page callback' => 'drupal_get_form', 'page arguments' => array('boost_admin_boost_performance_page'), 'access arguments' => array('administer site configuration'), 'weight' => 10, 'type' => MENU_LOCAL_TASK, 'file' => 'boost.admin.inc', ); $items['admin/settings/performance/boost-rules'] = array( 'title' => 'Boost htaccess rules generation', 'description' => 'htaccess boost rules.', 'page callback' => 'drupal_get_form', 'page arguments' => array('boost_admin_htaccess_page'), 'access arguments' => array('administer site configuration'), 'weight' => 12, 'type' => MENU_LOCAL_TASK, 'file' => 'boost.admin.inc', ); $items['boost_stats.php'] = array( 'page callback' => 'boost_stats_ajax_callback', 'type' => MENU_CALLBACK, 'access callback' => 1, 'access arguments' => array('access content'), 'file path' => drupal_get_path('module', 'boost'), 'file' => 'stats/boost_stats.ajax.inc', ); $items['boost-crawler'] = array( 'page callback' => 'boost_crawler_run', 'type' => MENU_CALLBACK, 'access callback' => 1, 'access arguments' => array('access content'), 'file path' => drupal_get_path('module', 'boost'), ); return $items; } /** * Implementation of hook_form_alter(). Performs alterations before a form * is rendered. */ function boost_form_alter(&$form, $form_state, $form_id) { switch ($form_id) { // Alter Drupal's system performance settings form by hiding the default // cache enabled/disabled control (which will now always default to // CACHE_DISABLED), and inject our own settings in its stead. case 'system_performance_settings': module_load_include('inc', 'boost', 'boost.admin'); $form['page_cache'] = boost_admin_performance_page($form['page_cache']); $form['#submit'][] = 'boost_admin_performance_page_submit'; $form['clear_cache']['clear']['#submit'][0] = 'boost_admin_clear_cache_submit'; break; // Alter Drupal's site maintenance settings form in order to ensure that // the static page cache gets wiped if the administrator decides to take // the site offline. case 'system_site_maintenance_settings': module_load_include('inc', 'boost', 'boost.admin'); $form['#submit'][] = 'boost_admin_site_offline_submit'; break; // Alter Drupal's modules build form in order to ensure that // the static page cache gets wiped if the administrator decides to // change enabled modules case 'system_modules': module_load_include('inc', 'boost', 'boost.admin'); $form['#submit'][] = 'boost_admin_modules_submit'; break; // Alter Drupal's theme build form in order to ensure that // the static page cache gets wiped if the administrator decides to // change theme case 'system_themes_form': module_load_include('inc', 'boost', 'boost.admin'); $form['#submit'][] = 'boost_admin_themes_submit'; // Added below due to this bug: http://drupal.org/node/276615 if ( variable_get('preprocess_css', FALSE)==TRUE && floatval(VERSION) <= 6.13 && boost_cache_clear_all() ) { drupal_set_message(t('Boost: Static page cache cleared. See http://drupal.org/node/276615 for reason why (core bug that is fixed in 6.14+).'), 'warning'); } break; } } /** * Implementation of hook_cron(). Performs periodic actions. */ function boost_cron() { if (!BOOST_ENABLED) { return; } $expire = TRUE; if (BOOST_CHECK_BEFORE_CRON_EXPIRE) { $expire = boost_has_site_changed(); } // Expire old content if (!BOOST_LOOPBACK_BYPASS && variable_get('boost_expire_cron', TRUE) && $expire && boost_cache_db_expire()) { if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Expired stale files from static page cache.', array(), WATCHDOG_NOTICE); } } // Update Stats if (module_exists('statistics') && variable_get('boost_block_show_stats', FALSE)) { $block = module_invoke('statistics', 'block', 'view', 0); variable_set('boost_statistics_html', $block['content']); } // Crawl Site if (BOOST_CRAWL_ON_CRON && !variable_get('site_offline', 0)) { boost_crawler_run((int)$expire); } } /* * Implementation of hook_flush_caches(). Deletes all static files. */ function boost_flush_caches() { if (variable_get('cron_semaphore', FALSE)==FALSE && (variable_get('preprocess_css', FALSE)==TRUE || variable_get('preprocess_js', FALSE)==TRUE)) { boost_cache_clear_all(); } return; } /** * Implementation of hook_comment(). Acts on comment modification. */ function boost_comment($comment, $op) { if (!BOOST_ENABLED) return; switch ($op) { case 'insert': case 'update': // Expire the relevant node page from the static page cache to prevent serving stale content: if (!empty($comment['nid'])) { $node = node_load($comment['nid']); boost_expire_node($node); } break; case 'publish': case 'unpublish': case 'delete': if (!empty($comment->nid)) { $node = node_load($comment->nid); boost_expire_node($node); } break; } } /** * Implementation of hook_nodeapi(). Acts on nodes defined by other modules. */ function boost_nodeapi(&$node, $op, $teaser = NULL, $page = NULL) { if (!BOOST_ENABLED) return; switch ($op) { case 'insert': case 'update': case 'delete': boost_expire_node($node); break; } } /** * Implementation of hook_votingapi_insert(). * * @param $votes * array of votes */ function boost_votingapi_insert($votes) { if (!BOOST_ENABLED) return; foreach($votes as $vote) { $node = node_load($vote['content_id']); boost_expire_node($node); } } /** * Implementation of hook_votingapi_delete(). * * @param $votes * array of votes */ function boost_votingapi_delete($votes) { if (!BOOST_ENABLED) return; foreach($votes as $vote) { $node = node_load($vote['content_id']); boost_expire_node($node); } } /** * Expires a node from the cache. * * @param $node * node object */ function boost_expire_node($node) { // Expire all relevant node pages from the static page cache to prevent serving stale content: if (!empty($node->nid)) { if ($node->promote == 1) { boost_cache_expire_derivative(''); } boost_cache_expire_derivative('node/' . $node->nid, TRUE); } // get terms and flush their page if (module_exists('taxonomy') && BOOST_FLUSH_NODE_TERMS) { $tids = boost_taxonomy_node_get_tids($node->nid); $filenames = array(); foreach($tids as $tid) { $filenames = array_merge($filenames, boost_get_db_term($tid)); } foreach($filenames as $filename) { boost_cache_kill($filename); } } } /** * Return taxonomy terms given a nid. * * Needed because of a weird bug with CCK & node_load() * http://drupal.org/node/545922 */ function boost_taxonomy_node_get_tids($nid) { $vid = db_result(db_query('SELECT vid FROM {node} WHERE nid = %d', $nid)); $result = db_query(db_rewrite_sql('SELECT t.tid FROM {term_node} r INNER JOIN {term_data} t ON r.tid = t.tid INNER JOIN {vocabulary} v ON t.vid = v.vid WHERE r.vid = %d ORDER BY v.weight, t.weight, t.name', 't', 'tid'), $vid); $tids = array(); while ($term = db_result($result)) { $tids[] = $term; } return $tids; } /** * Implementation of hook_taxonomy(). Acts on taxonomy changes. */ function boost_taxonomy($op, $type, $term = NULL) { if (!BOOST_ENABLED) return; switch ($op) { case 'insert': case 'update': case 'delete': // TODO: Expire all relevant taxonomy pages from the static page cache to prevent serving stale content. break; } } /** * Implementation of hook_user(). Acts on user account actions. */ function boost_user($op, &$edit, &$account, $category = NULL) { if (!BOOST_ENABLED) return; global $user; switch ($op) { case 'login': // Set a special cookie to prevent authenticated users getting served // pages from the static page cache. boost_set_cookie($user); break; case 'logout': boost_set_cookie($user, BOOST_TIME - 86400); break; case 'insert': // TODO: create user-specific cache directory. break; case 'delete': // Expire the relevant user page from the static page cache to prevent serving stale content: if (!empty($account->uid)) { boost_cache_expire_derivative('user/' . $account->uid); } // TODO: recursively delete user-specific cache directory. break; } } /** * Implementation of hook_block(). */ function boost_block($op = 'list', $delta = 0, $edit = array()) { global $user; switch ($op) { case 'list': return array( 'status' => array( 'info' => t('Boost: Pages cache status'), 'region' => 'right', 'weight' => 10, 'cache' => BLOCK_NO_CACHE, ), 'config' => array( 'info' => t('Boost: Pages cache configuration'), 'region' => 'right', 'weight' => 10, 'cache' => BLOCK_NO_CACHE, ), 'stats' => array( 'info' => t('Boost: AJAX core statistics'), 'region' => 'right', 'weight' => 10, 'cache' => BLOCK_NO_CACHE, ), ); case 'configure': if ($delta == 'stats') { $form['boost_block_show_stats'] = array( '#type' => 'checkbox', '#title' => t('Display Statistics.'), '#default_value' => variable_get('boost_block_show_stats', FALSE), '#description' => t('If false, uses Javascript to hide the block via "parent().parent().hide()".'), ); $form['boost_block_cache_stats_block'] = array( '#type' => 'checkbox', '#title' => t('Cache Statistics Block'), '#default_value' => variable_get('boost_block_cache_stats_block', FALSE), ); return $form; } case 'save': if ($delta == 'stats') { variable_set('boost_block_show_stats', $edit['boost_block_show_stats']); variable_set('boost_block_cache_stats_block', $edit['boost_block_cache_stats_block']); } case 'view': $block = array(); switch ($delta) { case 'status': // Don't show the block to anonymous users, nor on any pages that // aren't even cacheable to begin with (e.g. admin/*). if (!empty($user->uid) && boost_is_cacheable($GLOBALS['_boost_path'])) { $output = t('This page is being served live to anonymous visitors, as it is not currently in the static page cache.'); if (boost_is_cached($GLOBALS['_boost_path'])) { $ttl = boost_db_get_ttl(boost_file_path($GLOBALS['_boost_path'])); $output = t('This page is being served to anonymous visitors from the static page cache.') . ' '; $output .= t($ttl < 0 ? 'The cached copy expired %interval ago.' : 'The cached copy will expire in %interval.', array('%interval' => format_interval(abs($ttl)))); $output .= drupal_get_form('boost_block_flush_form'); } $error = FALSE; if (function_exists('error_get_last')) { $error = error_get_last(); } $drupal_msg = max(count(drupal_get_messages(NULL, FALSE)), $GLOBALS['_boost_message_count']); if ((BOOST_HALT_ON_ERRORS || BOOST_HALT_ON_MESSAGES) && ($error || $drupal_msg != 0)) { $output = t('There are php errors or drupal messages on this page, preventing boost from caching.'); if ($error) { $output .= t(' ERROR:
%error
!link
!performance', array('%error' => boost_print_r($error, TRUE), '!link' => l(t('Lookup Error Type'), 'http://php.net/errorfunc.constants'), '!performance' => l(t('Turn Off Error Checking'), 'admin/settings/performance'))); } if ($drupal_msg != 0) { $output .= t(' MESSAGES: %msg
!performance', array('%msg' => $drupal_msg, '!performance' => l(t('Turn Off Error Checking'), 'admin/settings/performance'))); } } $block['subject'] = ''; $block['content'] = theme('boost_cache_status', isset($ttl) ? $ttl : -1, $output); } break; case 'config': // Don't show the block to anonymous users, nor on any pages that // aren't even cacheable to begin with (e.g. admin/*). if (!empty($user->uid) && boost_is_cacheable($GLOBALS['_boost_path'])) { $block['subject'] = ''; $block['content'] = theme('boost_cache_status', -1, drupal_get_form('boost_block_db_settings_form')); } break; case 'stats': $filename = 'boost_stats.php'; $block = module_invoke('statistics', 'block', 'view', 0); variable_set('boost_statistics_html', $block['content']); if (!( strpos($_SERVER['SCRIPT_FILENAME'], 'index.php') === FALSE || variable_get('site_offline', 0) || $_SERVER['REQUEST_METHOD'] != 'GET' || $_SERVER['SERVER_SOFTWARE'] === 'PHP CLI' || !BOOST_ENABLED || isset($_GET['nocache']) || !boost_is_cacheable($GLOBALS['_boost_path']) || !empty($user->uid) || !module_exists('statistics') )) { $block = array(); $block['subject'] = 'Popular content'; $block['content'] = '
' . boost_stats_generate($filename); } elseif (!variable_get('boost_block_show_stats', FALSE)) { $block['content'] .= '
'; drupal_add_js('$("#boost-stats").parent().parent().hide();', 'inline', 'footer'); } break; } return $block; } } function boost_block_flush_form() { $form['boost_clear']['path'] = array( '#type' => 'hidden', '#value' => $GLOBALS['_boost_path'], ); $form['boost_cache']['clear'] = array( '#type' => 'submit', '#value' => t('Flush Page'), '#submit' => array('boost_block_form_flush_submit'), ); return ($form); } function boost_block_form_flush_submit(&$form_state, $form) { boost_cache_expire_derivative($form['values']['path'], TRUE); } function boost_block_db_settings_form() { // set info $period = drupal_map_assoc(array(-1, 0, 60, 180, 300, 600, 900, 1800, 2700, 3600, 10800, 21600, 32400, 43200, 64800, 86400, 2*86400, 3*86400, 4*86400, 5*86400, 6*86400, 604800, 2*604800, 3*604800, 4*604800, 8*604800, 16*604800, 52*604800), 'format_interval'); $period[0] = '<' . t('none') . '>'; $period[-1] = t('default'); //$info = boost_get_db(boost_file_path($GLOBALS['_boost_path'])); $router_item = isset($GLOBALS['_boost_router_item']) ? $GLOBALS['_boost_router_item'] : _boost_get_menu_router(); $settings = boost_get_settings_db($router_item); $default = 0; foreach ($settings as $key => $value) { if ($value != NULL) { $info = $value; $default = $key; break; } } if (!isset($info)) { $info['lifetime'] = -1; $info['push'] = -1; } // create form $form['boost_db_settings']['lifetime'] = array( '#type' => 'select', '#title' => t('Minimum cache lifetime'), '#default_value' => $info['lifetime'], '#options' => $period, '#description' => t('Default: %default', array('%default' => format_interval(BOOST_CACHE_LIFETIME))), ); $form['boost_db_settings']['push'] = array( '#type' => 'select', '#title' => t('Preemptive Cache'), '#default_value' => $info['push'], '#options' => array( -1 => 'default', 0 => 'No', 1 => 'Yes', ), ); $form['boost_db_settings']['selection'] = array( '#type' => 'select', '#title' => t('Scope'), '#default_value' => $default, '#options' => array( 0 => 'Page ID: ' . $router_item['page_id'], 1 => 'Content Type: ' . $router_item['page_arguments'], 2 => 'Content Container: ' . $router_item['page_callback'], ), ); $form['boost_db_settings']['send'] = array( '#type' => 'submit', '#value' => t('Set Configuration'), '#submit' => array('boost_block_db_settings_form_submit'), ); $form['boost_db_rm_settings']['id'] = array( '#type' => 'checkbox', '#title' => t('Page ID'), '#default_value' => $settings[0] != NULL ? FALSE : TRUE, '#disabled' => $settings[0] != NULL ? FALSE : TRUE, '#description' => $period[$settings[0]['lifetime']] . ' - ' . $router_item['page_id'], ); $form['boost_db_rm_settings']['id_value'] = array( '#type' => 'hidden', '#title' => t('id_value'), '#default_value' => $settings[0] != NULL ? $settings[0]['csid'] : FALSE, '#disabled' => $settings[0] != NULL ? FALSE : TRUE, ); $form['boost_db_rm_settings']['type'] = array( '#type' => 'checkbox', '#title' => t('Content Type'), '#default_value' => $settings[1] != NULL ? FALSE : TRUE, '#disabled' => $settings[1] != NULL ? FALSE : TRUE, '#description' => $period[$settings[1]['lifetime']] . ' - ' . $router_item['page_arguments'], ); $form['boost_db_rm_settings']['type_value'] = array( '#type' => 'hidden', '#title' => t('type_value'), '#default_value' => $settings[1] != NULL ? $settings[1]['csid'] : FALSE, '#disabled' => $settings[1] != NULL ? FALSE : TRUE, ); $form['boost_db_rm_settings']['container'] = array( '#type' => 'checkbox', '#title' => t('Content Container'), '#default_value' => $settings[2] != NULL ? FALSE : TRUE, '#disabled' => $settings[2] != NULL ? FALSE : TRUE, '#description' => $period[$settings[2]['lifetime']] . ' - ' . $router_item['page_callback'], ); $form['boost_db_rm_settings']['container_value'] = array( '#type' => 'hidden', '#title' => t('container_value'), '#default_value' => $settings[2] != NULL ? $settings[2]['csid'] : FALSE, '#disabled' => $settings[2] != NULL ? FALSE : TRUE, ); $form['boost_db_rm_settings']['send'] = array( '#type' => 'submit', '#value' => t('Delete Configuration'), '#submit' => array('boost_block_db_rm_settings_form_submit'), '#description' => t('Check the box to delete it'), ); return $form; } /** * Sets page specific settings in the boost cache database. */ function boost_block_db_settings_form_submit(&$form_state, $form) { boost_set_db_page_settings($form['values']['lifetime'], $form['values']['push'], $form['values']['selection']); } /** * Sets page specific settings in the boost cache database. */ function boost_block_db_rm_settings_form_submit(&$form_state, $form) { if ($form['values']['id']) { boost_remove_settings_db($form['values']['id_value']); } if ($form['values']['type']) { boost_remove_settings_db($form['values']['type_value']); } if ($form['values']['container']) { boost_remove_settings_db($form['values']['container_value']); } } /** * Generate js/html for boost stat counter. * * NOTE HTML code could be added to the $buffer directly. Would prevent 2x * counts on first view. Would be hard to do though. * * @param $filename * Name of boost's statistics php file. */ function boost_stats_generate($filename) { Global $base_path; // is node & node count enabled. if (arg(0) == 'node' && is_numeric(arg(1)) && arg(2) == '' && variable_get('statistics_count_content_views', 0)) { $nid = arg(1);; } else { $nid = 'NULL'; } // access log enabled. if ((variable_get('statistics_enable_access_log', 0)) && (module_invoke('throttle', 'status') == 0)) { $title = drupal_urlencode(strip_tags(drupal_get_title())); $q = $_GET['q']; } else { $title = 'NULL'; $q = 'NULL'; } $page_js = array( 'boost' => array( 'nid' => $nid, 'q' => $q, 'title' => $title, ), ); $site_js = <<'; return $page_ns; } /** * Implementation of hook_theme(). */ function boost_theme() { return array( 'boost_cache_status' => array( 'arguments' => array('ttl' => NULL, 'text' => NULL), ), ); } function theme_boost_cache_status($ttl, $text) { return '' . $text . ''; } ////////////////////////////////////////////////////////////////////////////// // Output buffering callback /** * PHP output buffering callback for static page caching. */ function _boost_ob_handler() { $buffer = ob_get_contents(); // Ensure we're in the correct working directory, since some web servers (e.g. Apache) mess this up here. chdir(dirname($_SERVER['SCRIPT_FILENAME'])); if (function_exists('error_get_last')) { if (BOOST_HALT_ON_ERRORS && $error = error_get_last()) { switch ($error['type']) { case E_NOTICE: //Ignore run-time notices case E_USER_NOTICE: //Ignore user-generated notice message //case E_DEPRECATED: //Ignore run-time notices //case E_USER_DEPRECATED: //Ignore user-generated notice message break; default: //Do not cache page on all other errors if (BOOST_VERBOSE >= 3) { watchdog('boost', 'There are php errors on this page, preventing boost from caching. ERROR:
%error
!link
!performance', array('%error' => boost_print_r($error, TRUE), '!link' => l(t('Lookup Error Type'), 'http://php.net/errorfunc.constants'), '!performance' => l(t('Turn Off Error Checking'), 'admin/settings/performance')), WATCHDOG_WARNING); } echo $buffer; return FALSE; } } } if (BOOST_HALT_ON_MESSAGES && $GLOBALS['_boost_message_count'] != 0) { if (BOOST_VERBOSE >= 3) { watchdog('boost', 'There are drupal messages on this page, preventing boost from caching. MESSAGES: %msg
!performance', array('%msg' => $GLOBALS['_boost_message_count'], '!performance' => l(t('Turn Off Error Checking'), 'admin/settings/performance')), WATCHDOG_WARNING); } echo $buffer; return FALSE; } // Check the currently set content type and the HTTP response code. We're // going to be exceedingly conservative here and only cache 'text' pages that // were output with a 200 OK status. If it didn't get a 200 then remove that // entry from the cache. if ($GLOBALS['_boost_cache_this'] && strlen($buffer) > 0) { switch (_boost_get_content_type()) { case 'text/html': if (boost_check_http_status()) { boost_cache_set($GLOBALS['_boost_path'], $buffer, BOOST_FILE_EXTENSION); boost_cache_css_js_files($buffer); } else { // boost_cache_expire_derivative($path); // $filename = boost_file_path($path, TRUE, BOOST_FILE_EXTENSION); // if ($filename) { // boost_cache_kill($filename); // boost_remove_db($filename); // } } break; case 'application/rss': case 'text/xml': case 'application/rss+xml': if (boost_check_http_status()) { boost_cache_set($GLOBALS['_boost_path'], $buffer, BOOST_XML_EXTENSION); } else { // boost_cache_expire_derivative($path); // $filename = boost_file_path($path, TRUE, BOOST_XML_EXTENSION); // if ($filename) { // boost_cache_kill($filename); // boost_remove_db($filename); // } } break; case 'text/javascript': if (boost_check_http_status()) { boost_cache_set($GLOBALS['_boost_path'], $buffer, BOOST_JSON_EXTENSION); } else { // boost_cache_expire_derivative($path); // $filename = boost_file_path($path, TRUE, BOOST_JSON_EXTENSION); // if ($filename) { // boost_cache_kill($filename); // boost_remove_db($filename); // } } break; } } if ($output_needed) { echo $buffer; } } /** * See's if this http status was returned. * * @param $default * Look for a 200 status */ function boost_check_http_status($status = 200) { if (_boost_get_http_status() != $status || _boost_get_pressflow_http_status() != $status) { return FALSE; } else { return TRUE; } } /** * Determines the MIME content type of the current page response based on * the currently set Content-Type HTTP header. * * This should normally return the string 'text/html' unless another module * has overridden the content type. * * @param $default * Return this value if it can't be found */ function _boost_get_content_type($default = NULL) { static $regex = '!^Content-Type:\s*([\w\d\/\-]+)!i'; return _boost_get_http_header($regex, $default); } /** * Determines the HTTP response code that the current page request will be * returning by examining the HTTP headers that have been output so far. * * @param $default * If none found, return 200 */ function _boost_get_http_status($default = 200) { static $regex = '!^HTTP/1.1\s+(\d+)!'; return (int)_boost_get_http_header($regex, $default); } /** * Determines the HTTP response code that the current page request will be * returning by examining the HTTP headers that have been output so far. * * @param $default * If none found, return 200 */ function _boost_get_pressflow_http_status($default = 200) { static $regex = '!^:status:\s+(\d+)!'; return (int)_boost_get_http_header($regex, $default); } /** * Get HTTP header * * @param $regex * Regular expression to get HTTP Header Line * @param $default * Return this value if it can't be found */ function _boost_get_http_header($regex, $default = NULL) { // The last header is the one that counts: $headers = preg_grep($regex, explode("\n", drupal_get_headers())); if (!empty($headers) && preg_match($regex, array_pop($headers), $matches)) { return $matches[1]; // found it } return $default; // no such luck } ////////////////////////////////////////////////////////////////////////////// // Boost API implementation /** * Determines whether a given url can be cached or not by boost. * * To avoid potentially troublesome situations, the user login page is never * cached, nor are any admin pages. * * @param $path * Current URL * * $path = $GLOBALS['_boost_path'] most of the time * uses $GLOBALS['_boost_query'] as well */ function boost_is_cacheable($path) { $path = (empty($path)) ? variable_get('site_frontpage', 'node') : $path; $normal_path = drupal_get_normal_path($path); // normalize pathW $full = $normal_path . '-' . $GLOBALS['_boost_query']; // Never cache // the user login/registration/password pages // any admin pages // comment reply pages // node add page // URL variables that contain / or \ // if incoming URL contains '..' or null bytes // Limit the maximum directory nesting depth of the path if ( $normal_path == 'user' || preg_match('!^user/(login|register|password)!', $normal_path) || preg_match('!^admin!', $normal_path) || preg_match('!comment/reply$!', $normal_path) || preg_match('!^node/add!', $normal_path) || strpos($GLOBALS['_boost_query'], '/') || strpos($GLOBALS['_boost_query'], '\\') || strpos($full, '..') !== FALSE || strpos($full, "\0") !== FALSE || count(explode('/', $path)) > BOOST_MAX_PATH_DEPTH ) { return FALSE; } if (!BOOST_CACHE_XML && (preg_match('!/feed$!', $normal_path) || preg_match('!\.xml$!', $normal_path))) { return FALSE; } if (!BOOST_CACHE_QUERY && $GLOBALS['_boost_query'] != BOOST_CHAR) { return FALSE; } // Don't cache path if it can't be served by apache. if (BOOST_ONLY_ASCII_PATH) { if (preg_match('@[^/a-z0-9_\-&=,\.:]@i', $path)) { return FALSE; } } // Check for reserved characters if on windows // http://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words // " * : < > | $chars = '"*:<>|'; if (stristr(PHP_OS, 'WIN') && preg_match("/[".$chars."]/", $full)) { return FALSE; } // See http://api.drupal.org/api/function/block_list/6 // Match the user's cacheability settings against the path if (BOOST_CACHEABILITY_PAGES) { if (BOOST_CACHEABILITY_OPTION < 2) { $page_match = drupal_match_path($path, BOOST_CACHEABILITY_PAGES); if ($path != $_GET['q']) { $page_match = $page_match || drupal_match_path($_GET['q'], BOOST_CACHEABILITY_PAGES); } // When BOOST_CACHEABILITY_OPTION has a value of 0, boost will cache // all pages except those listed in BOOST_CACHEABILITY_PAGES. When set // to 1, boost will cache only on those pages listed in BOOST_CACHEABILITY_PAGES. $page_match = !(BOOST_CACHEABILITY_OPTION xor $page_match); } else { $page_match = drupal_eval(BOOST_CACHEABILITY_PAGES); } } else { $page_match = TRUE; } return $page_match; } /** * Determines whether a given Drupal page is currently cached or not. * * @param $path * Current URL */ function boost_is_cached($path) { // no more need to check if path is empty cause it is done on the input of this function before calling it // no more need to use drupal_get_normal_path - we do not need the internal path (node/56) - we are fine with aliases return file_exists(boost_file_path($path)); } /** * Deletes all files currently in the cache. */ function boost_cache_clear_all() { if (BOOST_IGNORE_FLUSH == 0) { boost_cache_clear_all_db(); boost_cache_delete(TRUE); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Flushed ALL files from static page cache.', array(), WATCHDOG_NOTICE); } return TRUE; } return FALSE; } /** * Deletes all expired static files currently in the cache. * OLD FUNCTION */ function boost_cache_expire_all() { boost_cache_delete(FALSE); return TRUE; } /** * Resets all entries in database. */ function boost_cache_clear_all_db() { db_query("UPDATE {boost_cache} SET expire = %d WHERE base_dir = '%s'", 0, BOOST_FILE_PATH); } /** * Deletes files in the cache. * * @param $flush * If true clear the entire cache directory. */ function boost_cache_delete($flush = FALSE) { clearstatcache(); //recreate dirs _boost_mkdir_p(BOOST_FILE_PATH); _boost_mkdir_p(BOOST_GZIP_FILE_PATH); //add in .boost root id file file_put_contents(BOOST_FILE_PATH . '/' . BOOST_ROOT_FILE, BOOST_FILE_PATH); file_put_contents(BOOST_GZIP_FILE_PATH . '/' . BOOST_ROOT_FILE, BOOST_GZIP_FILE_PATH); //Flush Cache if (file_exists(BOOST_FILE_PATH)) { _boost_rmdir_rf(BOOST_FILE_PATH, $flush, TRUE); } if (file_exists(BOOST_GZIP_FILE_PATH)) { _boost_rmdir_rf(BOOST_GZIP_FILE_PATH, $flush, TRUE); } //recreate dirs _boost_mkdir_p(BOOST_FILE_PATH); _boost_mkdir_p(BOOST_GZIP_FILE_PATH); //add in .boost root id file file_put_contents(BOOST_FILE_PATH . '/' . BOOST_ROOT_FILE, BOOST_FILE_PATH); file_put_contents(BOOST_GZIP_FILE_PATH . '/' . BOOST_ROOT_FILE, BOOST_GZIP_FILE_PATH); } /** * Finds all possible paths/redirects/aliases given the root path. * * @param $path * Current URL * @param $wildcard * If true get all chached files that start with this path. */ function boost_cache_expire_derivative($path, $wildcard = FALSE) { global $base_path; //path alias $path_alias = url($path, array('absolute' => FALSE)); if ($base_path != '/') { $path_alias = implode('/', array_diff_assoc(array_filter(explode('/', $path_alias)), array_filter(explode('/', $base_path)))); } //path redirects if (module_exists('path_redirect')) { $path_redirects = boost_path_redirect_load(array('redirect' => $path)); } //flush caches boost_cache_expire($path_alias, $wildcard); if (isset($path_redirects)) { foreach ($path_redirects as $path_redirect) { boost_cache_expire($path_redirect['path'], $wildcard); } } boost_cache_expire($path, $wildcard); } /** * Expires the static file cache for a given page, or multiple pages * matching a wildcard. * * @param $path * Current URL * @param $wildcard * If true get all chached files that start with this path. * * TODO: Replace glob() with a database operation. */ function boost_cache_expire($path, $wildcard = FALSE) { // Sanity check if (boost_file_path($path, FALSE) === FALSE) { return FALSE; } // Get list of related files $tempA = glob(boost_file_path($path, FALSE, NULL) . (($wildcard) ? '*' : '') . BOOST_FILE_EXTENSION, GLOB_NOSORT); $tempB = glob(boost_file_path($path, FALSE, NULL) . (($wildcard) ? '*' : '') . BOOST_XML_EXTENSION, GLOB_NOSORT); if (!empty($tempA) && !empty($tempB)) { $filenames = array_filter(array_merge($tempA, $tempB)); } elseif (!empty($tempA)) { $filenames = $tempA; } elseif (!empty($tempB)) { $filenames = $tempB; } else { return FALSE; } if (empty($filenames)) { return FALSE; } // Flush expired files foreach ($filenames as $filename) { boost_cache_kill($filename); } return TRUE; } /** * Deletes cached page from file system * * @param $filename * Name of cached file; primary key in database */ function boost_cache_kill($filename) { if (BOOST_IGNORE_FLUSH < 3 && strstr($filename, BOOST_FILE_PATH)) { db_query("UPDATE {boost_cache} SET expire = 0 WHERE filename = '%s'", $filename); if (file_exists($filename)) { @unlink($filename); } $gz_filename = str_replace(BOOST_FILE_PATH, BOOST_GZIP_FILE_PATH, $filename) . BOOST_GZIP_EXTENSION; if (file_exists($gz_filename)) { @unlink($gz_filename); } } } /** * Flushes all expired pages from database * * TODO del empty dirs if enabled */ function boost_cache_db_expire() { if (BOOST_IGNORE_FLUSH < 2) { $result = db_query('SELECT filename FROM {boost_cache} WHERE expire BETWEEN 1 AND %d', BOOST_TIME); while ($boost = db_fetch_array($result)) { boost_cache_kill($boost['filename']); } if (BOOST_FLUSH_DIR) { // TO-DO: del empty dirs. } return TRUE; } return FALSE; } /** * Returns the cached contents of the specified page, if available. * * @param $path * Current URL */ function boost_cache_get($path) { if (($filename = boost_file_path($path))) { if (file_exists($filename) && is_readable($filename)) { return file_get_contents($filename); } } return NULL; } /** * Replaces/Sets the cached contents of the specified page, if stale. * * @param $path * Current URL * @param $data * URL's contents * @param $extension * File extension for this mime type */ function boost_cache_set($path, $data = '', $extension = BOOST_FILE_EXTENSION) { // Code commenting style based on what is being cached. switch ($extension) { case BOOST_FILE_EXTENSION: $comment_start = '\n"; $expire = BOOST_CACHE_LIFETIME; break; case BOOST_XML_EXTENSION: $comment_start = '\n"; $expire = BOOST_CACHE_XML_LIFETIME; break; case BOOST_JSON_EXTENSION: $comment_start = '/* '; $comment_end = " */\n"; $expire = BOOST_CACHE_JSON_LIFETIME; break; } // Append the Boost footer with the relevant timestamps $cached_at = date('Y-m-d H:i:s', BOOST_TIME); $expires_at = date('Y-m-d H:i:s', BOOST_TIME + $expire); $data = rtrim($data) . "\n" . $comment_start . str_replace(array('%cached_at', '%expires_at'), array($cached_at, $expires_at), BOOST_BANNER) . $comment_end; // Invoke hook_boost_preprocess($path, $data) foreach (module_implements('boost_preprocess') as $module) { if (($result = module_invoke($module, 'boost_preprocess', $data)) != NULL) { $data = $result; } } // Execute the pre-process function if one has been defined if (function_exists(BOOST_PRE_PROCESS_FUNCTION)) { $data = call_user_func(BOOST_PRE_PROCESS_FUNCTION, $data); } db_set_active(); // Final check, make sure this page should be cached. Allow for the preprocess // function to have a final say in if this page should be cached. if (!$GLOBALS['_boost_cache_this']) { return FALSE; } // Create or update the static files as needed if (($filename = boost_file_path($path, TRUE, $extension)) && (BOOST_OVERWRITE_FILE || !file_exists($filename) || boost_db_is_expired($filename))) { // Special handling of the front page for aggressive gzip test if ($path == '' && BOOST_AGGRESSIVE_GZIP) { _boost_generate_gzip_test(); boost_cache_write($filename, _boost_gzip_test_inject_iframe($data)); } else { boost_cache_write($filename, $data); } if (BOOST_GZIP) { boost_cache_write(str_replace(BOOST_FILE_PATH, BOOST_GZIP_FILE_PATH, $filename) . BOOST_GZIP_EXTENSION, gzencode($data, 9)); } boost_db_prep($filename, $extension, BOOST_TIME + $expire); return TRUE; } else { return FALSE; } } /** * Figure out what is going in the database & put it in * * @param $filename * Name of cached file; primary key in database * @param $extension * Filename extension: Used for content types. * @param $expire * Cache expiration time in seconds (UNIX time). */ function boost_db_prep($filename, $extension, $expire) { $router_item = isset($GLOBALS['_boost_router_item']) ? $GLOBALS['_boost_router_item'] : _boost_get_menu_router(); $timer = timer_read('page'); $timer_average = $timer; $lifetime = -1; $push = -1; $settings = boost_get_settings_db($router_item); foreach ($settings as $value) { if ($value != NULL) { $boost_settings_db = $value; break; } } $boost_db = boost_get_db($filename); //get time data from actual entry, if this page has been cached before. if ($boost_db) { // $expire = $boost_db['lifetime'] != -1 ? $boost_db['lifetime'] + BOOST_TIME : $expire; // $lifetime = $boost_db['lifetime']; // $push = $boost_db['push']; $timer_average = ($boost_db['timer_average'] + $timer)/2; } //get data from settings table, if this page has not been put into the cache. if (isset($boost_settings_db)) { $expire = $boost_settings_db['lifetime'] != -1 ? $boost_settings_db['lifetime'] + BOOST_TIME : $expire; $lifetime = $boost_settings_db['lifetime']; $push = $boost_settings_db['push']; } boost_put_db($filename, $expire, $lifetime, $push, $router_item, $timer, $timer_average, $extension); } /** * Puts boost info into database. * * @param $filename * Name of cached file; primary key in database * @param $expire * Expiration time * @param $lifetime * Default lifetime * @param $push * Pre-cache this file * @param $router_item * Array containing page_callback, page_arguments & page_id. * @param $timer * Time it took drupal to build this page. * @param $timer_average * Average time Drupal has spent building this page. * @param $extension * Filename extension: Used for content types. */ function boost_put_db($filename, $expire, $lifetime, $push, $router_item, $timer, $timer_average, $extension) { global $base_root; $url = $base_root . request_uri(); db_query("UPDATE {boost_cache} SET expire = %d, lifetime = %d, push = %d, page_callback = '%s', page_arguments = '%s', timer = %d, timer_average = %d, base_dir = '%s', page_id = %d, extension = '%s', url = '%s' WHERE filename = '%s'", $expire, $lifetime, $push, $router_item['page_callback'], $router_item['page_arguments'], $timer, $timer_average, BOOST_FILE_PATH, $router_item['page_id'], $extension, $url, $filename); if (!db_affected_rows()) { db_query("INSERT INTO {boost_cache} (filename, expire, lifetime, push, page_callback, page_arguments, timer, timer_average, base_dir, page_id, extension, url) VALUES ('%s', %d, %d, %d, '%s', '%s', %d, %d, '%s', %d, '%s', '%s')", $filename, $expire, $lifetime, $push, $router_item['page_callback'], $router_item['page_arguments'], $timer, $timer_average, BOOST_FILE_PATH, $router_item['page_id'], $extension, $url); } } /** * Removes info from database. Use on 404 or 403. * * @param $filename * Name of cached file; primary key in database */ function boost_remove_db($filename) { if (strstr($filename, BOOST_FILE_PATH)) { db_query("DELETE FROM {boost_cache} WHERE filename = '%s'", $filename); } } /** * Puts boost info into database. * * @param $filename * Name of cached file; primary key in database * @param $expire * Expiration time * @param $lifetime * Default lifetime * @param $push * Pre-cache this file * @param $router_item * Array containing page_callback, page_arguments & page_id. */ function boost_put_settings_db($lifetime, $push, $router_item, $scope) { switch ($scope) { case 0: db_query("UPDATE {boost_cache_settings} SET lifetime = %d, push = %d WHERE page_callback = '%s' AND page_arguments = '%s' AND base_dir = '%s' AND page_id = %d", $lifetime, $push, $router_item['page_callback'], $router_item['page_arguments'], BOOST_FILE_PATH, $router_item['page_id']); if (!db_affected_rows()) { @db_query("INSERT INTO {boost_cache_settings} (csid, lifetime, push, page_callback, page_arguments, base_dir, page_id) VALUES (NULL, %d, %d, '%s', '%s', '%s', %d)", $lifetime, $push, $router_item['page_callback'], $router_item['page_arguments'], BOOST_FILE_PATH, $router_item['page_id']); } break; case 1: db_query("UPDATE {boost_cache_settings} SET lifetime = %d, push = %d WHERE page_callback = '%s' AND page_arguments = '%s' AND base_dir = '%s' AND page_id = 0", $lifetime, $push, $router_item['page_callback'], $router_item['page_arguments'], BOOST_FILE_PATH); if (!db_affected_rows()) { @db_query("INSERT INTO {boost_cache_settings} (csid, lifetime, push, page_callback, page_arguments, base_dir, page_id) VALUES (NULL, %d, %d, '%s', '%s', '%s', %d)", $lifetime, $push, $router_item['page_callback'], $router_item['page_arguments'], BOOST_FILE_PATH, 0); } break; case 2: db_query("UPDATE {boost_cache_settings} SET lifetime = %d, push = %d WHERE page_callback = '%s' AND page_arguments = '0' AND base_dir = '%s' AND page_id = 0", $lifetime, $push, $router_item['page_callback'], BOOST_FILE_PATH); if (!db_affected_rows()) { @db_query("INSERT INTO {boost_cache_settings} (csid, lifetime, push, page_callback, page_arguments, base_dir, page_id) VALUES (NULL, %d, %d, '%s', '%s', '%s', %d)", $lifetime, $push, $router_item['page_callback'], '0', BOOST_FILE_PATH, 0); } break; } } /** * Removes info from boost database. * * @param $filename * Name of cached file; primary key in database * @param $expire * Expiration time * @param $lifetime * Default lifetime * @param $push * Pre-cache this file * @param $router_item * Array containing page_callback, page_arguments & page_id. */ function boost_remove_settings_db($csid) { db_query("DELETE FROM {boost_cache_settings} WHERE csid = %d", $csid); } /** * Sets per page configuration. * * @param $lifetime * Default lifetime * @param $push * Pre-cache this file */ function boost_set_db_page_settings($lifetime, $push, $scope) { $router_item = isset($GLOBALS['_boost_router_item']) ? $GLOBALS['_boost_router_item'] : _boost_get_menu_router(); $filename = boost_file_path($GLOBALS['_boost_path']); $info = boost_get_db($filename); if (!$info) { $info['expire'] = 0; } elseif ($lifetime == -1) { $info['expire'] = $info['expire'] - $info['lifetime'] + BOOST_CACHE_LIFETIME; } elseif ($info['lifetime'] == -1) { $info['expire'] = $info['expire'] - BOOST_CACHE_LIFETIME + $lifetime; } elseif ($info['lifetime'] != $lifetime) { $info['expire'] = $info['expire'] - $info['lifetime'] + $lifetime; } // Clear old files so they acquire the new settings. boost_cache_expire_derivative($GLOBALS['_boost_path']); //boost_put_db($filename, $info['expire'], $lifetime, $push, $router_item); boost_put_settings_db($lifetime, $push, $router_item, $scope); } /** * Gets boost info from cache database. * * @param $filename * Filename to be looked up in the database */ function boost_get_db($filename) { return db_fetch_array(db_query_range("SELECT * FROM {boost_cache} WHERE filename = '%s'", $filename, 0, 1)); } /** * Gets boost settings from cache settings database. * * @param $router_item * Array containing page_callback, page_arguments & page_id. */ function boost_get_settings_db($router_item) { $settings = array(); // Get a more exact match first $settings[] = db_fetch_array(db_query_range("SELECT * FROM {boost_cache_settings} WHERE page_callback = '%s' AND page_arguments = '%s' AND base_dir = '%s' AND page_id = %d", $router_item['page_callback'], $router_item['page_arguments'], BOOST_FILE_PATH, $router_item['page_id'], 0, 1)); // Get for the content type $settings[] = db_fetch_array(db_query_range("SELECT * FROM {boost_cache_settings} WHERE page_callback = '%s' AND page_arguments = '%s' AND base_dir = '%s' AND page_id = %d", $router_item['page_callback'], $router_item['page_arguments'], BOOST_FILE_PATH, 0, 0, 1)); // Finally get the content container (node, view, term, ect...) $settings[] = db_fetch_array(db_query_range("SELECT * FROM {boost_cache_settings} WHERE page_callback = '%s' AND page_arguments = '%s' AND base_dir = '%s' AND page_id = %d", $router_item['page_callback'], 0, BOOST_FILE_PATH, 0, 0, 1)); return $settings; } /** * Returns all cached pages associated with the taxonomy term. */ function boost_get_db_term($term) { $filenames = array(); $result = db_query("SELECT filename FROM {boost_cache} WHERE expire > 0 AND page_id = %d AND page_callback = 'taxonomy'", $term); while ($filename = db_fetch_array($result)) { $filenames[] = $filename['filename']; } return $filenames; } /** * Checks various timestamps in the database. * * @return bool * Returns TRUE if the site has changed since the last time this function was called. */ function boost_has_site_changed() { // Get timestamps from the database $node_revisions = boost_get_time('node_revisions', 'timestamp'); $history = boost_get_time('history', 'timestamp'); $files = boost_get_time('files', 'timestamp'); $comments = boost_get_time('comments', 'timestamp'); $node = boost_get_time('node', 'changed'); $last_comment_timestamp = boost_get_time('node_comment_statistics', 'last_comment_timestamp'); $voteapi_vote = boost_get_time('votingapi_vote', 'timestamp'); $max = max($node_revisions, $history, $files, $comments, $node, $last_comment_timestamp, $voteapi_vote); if ($max != BOOST_MAX_TIMESTAMP) { variable_set('boost_max_timestamp', (int)$max); return TRUE; } else { return FALSE; } } /** * Checks various timestamps in the database. * * @param $table * Database table name * @param $column * Column containing the time stamp * @return int * Returns largest time in the table. */ function boost_get_time($table, $column) { if (db_result(db_query("SHOW TABLES LIKE '%s'", $table))) { return (int)db_result(db_query_range("SELECT %s FROM {%s} ORDER BY %s DESC", $column, $table, $column, 0, 1)); } else { return 0; } } /** * Writes data to filename in an atomic operation thats compatible with older * versions of php (php < 5.2.4 file_put_contents() doesn't lock correctly). * * @param $filename * Name of file to be written * @param $data * Contents of file */ function boost_cache_write($filename, $data = '') { if (!_boost_mkdir_p(dirname($filename))) { if (BOOST_VERBOSE >= 3) { watchdog('boost', 'Unable to create directory: %dir
Group ID: %gid
User ID: %uid
Current script owner: %user
', array('%dir' => dirname($filename), '%gid' => getmygid(), '%uid' => getmyuid(), '%user' => get_current_user()), WATCHDOG_WARNING); } } $tempfile = $filename . getmypid(); if (@file_put_contents($tempfile, $data) === FALSE) { if (BOOST_VERBOSE >= 3) { watchdog('boost', 'Unable to write temp file: %file
Group ID: %gid
User ID: %uid
Current script owner: %user
', array('%file' => $tempfile, '%gid' => getmygid(), '%uid' => getmyuid(), '%user' => get_current_user()), WATCHDOG_WARNING); } return FALSE; } else { if (is_numeric(BOOST_PERMISSIONS_FILE)) { @chmod($tempfile, octdec(BOOST_PERMISSIONS_FILE)); } // Erase old file if (BOOST_OVERWRITE_FILE) { @unlink($filename); } // Put temp file in its final location if (@rename($tempfile, $filename) === FALSE) { if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Unable to rename file: %temp to %file
Group ID: %gid
User ID: %uid
Current script owner: %user
', array('%temp' => $tempfile, '%file' => $filename, '%gid' => getmygid(), '%uid' => getmyuid(), '%user' => get_current_user()), WATCHDOG_WARNING); } @unlink($tempfile); return FALSE; } } return TRUE; } /** * Returns the full directory path to the static file cache directory. * * @param $host * Host name. Example: example.com * @param $absolute * Give path from system root if true. If false give path from web root. */ function boost_cache_directory($host = NULL, $absolute = TRUE) { global $base_url; if ($base_url == "http://") { if (BOOST_VERBOSE >= 1) { watchdog('boost', 'base_url is not set in your settings.php file. Please read #7 in boosts INSTALL.txt file.', array(), WATCHDOG_NOTICE); } if (!BOOST_MULTISITE_SINGLE_DB) { $base_url = $base_url . str_replace(BOOST_ROOT_CACHE_DIR . '/', '', variable_get('boost_file_path', boost_cache_directory(NULL, FALSE))); } } $parts = parse_url($base_url); $host = !empty($host) ? $host : $parts['host']; $parts['path'] = isset($parts['path']) ? $parts['path'] : '/'; $subdir = implode('/', array_filter(explode('/', (!empty($base_path)) ? $base_path : $parts['path']))); return implode('/', !$absolute ? array_filter(array(BOOST_ROOT_CACHE_DIR, BOOST_NORMAL_DIR, $host, $subdir)) : array_filter(array(getcwd(), BOOST_ROOT_CACHE_DIR, BOOST_NORMAL_DIR, $host, $subdir))); } /** * Returns the static file path for a Drupal page. * * @param $path * path to convert to boost's file naming convention * @param $query * add query to path * @param $extension * add extension to end of filename * * $path = $GLOBALS['_boost_path'] most of the time */ function boost_file_path($path, $query = TRUE, $extension = BOOST_FILE_EXTENSION) { //handling of url variables if ($GLOBALS['_boost_query'] != BOOST_CHAR) { if ($query) { $path .= $GLOBALS['_boost_query']; } } else { $path .= $GLOBALS['_boost_query']; } // Under no circumstances should the incoming path contain '..' or null // bytes; we also limit the maximum directory nesting depth of the path if ( strpos($path, '..') !== FALSE || strpos($path, "\0") !== FALSE || count(explode('/', $path)) > BOOST_MAX_PATH_DEPTH ) { return FALSE; } return implode('/', array(BOOST_FILE_PATH, $path . (is_null($extension) ? '' : $extension))); } /** * Returns the age of a cached file, measured in seconds since it was last * updated. * @param $filename * Name of cached file */ function boost_file_get_age($filename) { return BOOST_TIME - filemtime($filename); } function boost_db_get_age($filename) { $boost_db = boost_get_db($filename); return $boost_db['expire'] != 0 ? $boost_db['expire'] : FALSE; } /** * Returns the remaining time-to-live for a cached file, measured in * seconds. * @param $filename * Name of cached file */ function boost_file_get_ttl($filename) { return BOOST_CACHE_LIFETIME - boost_file_get_age($filename); } function boost_db_get_ttl($filename) { $boost_db = boost_get_db($filename); return boost_db_get_age($filename) - BOOST_TIME; } /** * Determines whether a cached file has expired, i.e. whether its age * exceeds the maximum cache lifetime as defined by Drupal's system * settings. * @param $filename * Name of cached file */ function boost_file_is_expired($filename) { return boost_file_get_age($filename) > BOOST_CACHE_LIFETIME; } function boost_db_is_expired($filename) { return boost_db_get_age($filename) < BOOST_TIME; } /** * Sets a special cookie preventing authenticated users getting served pages * from the static page cache. * * @param $user * User Object * @param $expires * Expiration time */ function boost_set_cookie($user, $expires = NULL) { if (!$expires) { $expires = ini_get('session.cookie_lifetime'); $expires = (!empty($expires) && is_numeric($expires)) ? BOOST_TIME + (int)$expires : 0; setcookie(BOOST_COOKIE, $user->uid, $expires, ini_get('session.cookie_path'), ini_get('session.cookie_domain'), ini_get('session.cookie_secure') == '1'); } else { setcookie(BOOST_COOKIE, FALSE, $expires, ini_get('session.cookie_path'), ini_get('session.cookie_domain'), ini_get('session.cookie_secure') == '1'); } $GLOBALS['_boost_cache_this'] = FALSE; } /** * Retrieve a specific URL redirect from the database. * http://drupal.org/node/451790 * * @param $where * Array containing 'redirect' => $path */ function boost_path_redirect_load($where = array(), $args = array(), $sort = array()) { $redirects = array(); if (is_numeric($where)) { $where = array('rid' => $where); } foreach ($where as $key => $value) { if (is_string($key)) { $args[] = $value; $where[$key] = $key .' = '. (is_numeric($value) ? '%d' : "'%s'"); } } if ($where && $args) { $sql = "SELECT * FROM {path_redirect} WHERE ". implode(' AND ', $where); if ($sort) { $sql .= ' ORDER BY '. implode(' ,', $sort); } $result = db_query($sql, $args); while ($redirect = db_fetch_array($result)) { $redirects[] = $redirect; } return $redirects; } } /** * Cache css and or js files. * * Parse the html file so we get all css/js files. drupal_get_js/css isn't 100%. * * @param $buffer * String containing documents html. */ function boost_cache_css_js_files($buffer) { if (BOOST_CACHE_CSS) { // Extract external css files from html document $css_files = explode(' $value) { // Extract css filename $temp = explode(base_path(), array_pop(explode('//', array_pop(explode('href="', array_shift(explode('" />', $value))))))); array_shift($temp); $css_files[$key] = array_shift(explode('"', array_shift(explode('?', implode('/', $temp))))); } _boost_copy_css_files($css_files); } if (BOOST_CACHE_JS) { $js_files = explode('

It works!

ETO; _boost_mkdir_p(dirname($filename)); file_put_contents($filename, gzencode($string, 9)); } } /** * Inject iframe gzip cookie test. */ function _boost_gzip_test_inject_iframe($data) { return str_replace('', '' . "\n" . '', $data); } /** * Attempts to set the PHP maximum execution time. * See http://api.drupal.org/api/function/drupal_set_time_limit/7 * * This function is a wrapper around the PHP function set_time_limit(). * When called, set_time_limit() restarts the timeout counter from zero. * In other words, if the timeout is the default 30 seconds, and 25 seconds * into script execution a call such as set_time_limit(20) is made, the * script will run for a total of 45 seconds before timing out. * * It also means that it is possible to decrease the total time limit if * the sum of the new time limit and the current time spent running the * script is inferior to the original time limit. It is inherent to the way * set_time_limit() works, it should rather be called with an appropriate * value every time you need to allocate a certain amount of time * to execute a task than only once at the beginning of the script. * * Before calling set_time_limit(), we check if this function is available * because it could be disabled by the server administrator. We also hide all * the errors that could occur when calling set_time_limit(), because it is * not possible to reliably ensure that PHP or a security extension will * not issue a warning/error if they prevent the use of this function. * * @param $time_limit * An integer specifying the new time limit, in seconds. A value of 0 * indicates unlimited execution time. */ function _boost_set_time_limit($time_limit) { if (function_exists('set_time_limit')) { @set_time_limit($time_limit); } } ////////////////////////////////////////////////////////////////////////////// // PHP 4.x compatibility if (!function_exists('file_put_contents')) { function file_put_contents($filename, $data) { if ($fp = fopen($filename, 'wb')) { fwrite($fp, $data); fclose($fp); return filesize($filename); } return FALSE; } } ////////////////////////////////////////////////////////////////////////////// // Crawler Code /** * The brains of the crawler. * * @param $expire * Has the site changed, if so get expire column */ function boost_crawler_run($expire = -1) { Global $base_url; $this_thread = (isset($_GET['thread']) && is_numeric($_GET['thread'])) ? $_GET['thread'] : NULL; $total_threads = (isset($_GET['total']) && is_numeric($_GET['total'])) ? $_GET['total'] : NULL; $expire = ($expire == -1 && isset($_GET['expire']) && is_numeric($_GET['expire'])) ? $_GET['expire'] : $expire; $self = BOOST_CRAWLER_SELF; $GLOBALS['_boost_max_execution_time'] = ini_get('max_execution_time');; $GLOBALS['_boost_output_buffering'] = ini_get('output_buffering'); if ($_GET['q'] == 'boost-crawler') { // if not called via cron, require key to be present in url if ($_GET['key'] != variable_get('boost_crawler_key', FALSE)) { drupal_access_denied(); exit; } // Test for access on status page if ($_GET['test']) { echo '

OK

'; exit; } // Stop button code if (_boost_variable_get('boost_crawler_stopped')) { // Wait 0 to 0.1 seconds before grabbing number of threads. usleep(mt_rand(0, 100000)); db_lock_table('variable'); $threads = _boost_variable_get('boost_crawler_number_of_threads'); _boost_variable_set('boost_crawler_number_of_threads', (int)$threads-1); // Clock out _boost_variable_set('boost_crawler_thread_num_' . $this_thread , 0); db_unlock_tables(); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler - Thread %num stopped.', array('%num' => $this_thread)); } ini_set('max_execution_time', $GLOBALS['_boost_max_execution_time']); ini_set('output_buffering', $GLOBALS['_boost_output_buffering']); exit; } // Kill this thread if it doesn't have a thread number assigned to it. if (!isset($this_thread)) { if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler - Rogue thread killed.'); } exit; } // Try to prevent crawler from stalling. ini_set('max_execution_time', 600); // Return html so connection closes boost_async_opp('async'); // Turn off output buffer. ini_set('output_buffering', 'off'); // Fetch the cron semaphore $semaphore = variable_get('cron_semaphore', FALSE); // Wait 15 seconds if cron still running and try again (let cron finish); if longer then 5 minutes stop stalling and start crawling. if ($semaphore == TRUE && BOOST_TIME - $semaphore < 300) { if (_boost_variable_get('boost_crawler_sleeping')) { // Kill this thread; multiple crawlers sleeping. ini_set('max_execution_time', $GLOBALS['_boost_max_execution_time']); ini_set('output_buffering', $GLOBALS['_boost_output_buffering']); exit; } _boost_variable_set('boost_crawler_sleeping', TRUE); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler Sleep for 15 seconds'); } sleep(15); _boost_variable_set('boost_crawler_sleeping', FALSE); boost_async_call_crawler($self, 1, NULL, $expire); exit; } // Crawler was forced to stop last run, wait extra time before starting up again. if (variable_get('boost_crawler_stopped', FALSE) && !isset($this_thread) && !isset($total_threads)) { if (_boost_variable_get('boost_crawler_sleeping')) { ini_set('max_execution_time', $GLOBALS['_boost_max_execution_time']); ini_set('output_buffering', $GLOBALS['_boost_output_buffering']); exit; } _boost_variable_set('boost_crawler_sleeping', TRUE); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler sleeping for @x seconds, do to forced shutdown.', array('@x' => 2 * BOOST_CRAWLER_THREADS * BOOST_CRAWLER_BATCH_SIZE)); } $i = BOOST_CRAWLER_BATCH_SIZE; while ($i > 0) { _boost_set_time_limit(0); sleep(2 * BOOST_CRAWLER_THREADS); $i--; } variable_set('boost_crawler_stopped', FALSE); _boost_variable_set('boost_crawler_sleeping', FALSE); boost_async_call_crawler($self, 1, NULL, $expire); exit; } // Add URL's to crawler table, call self and exit if (!boost_crawler_seed_tables($expire)) { boost_async_call_crawler($self, $this_thread, _boost_variable_get('boost_crawler_number_of_threads'), $expire); exit; } // Calc Threads $total = boost_crawler_total_count() - BOOST_CRAWLER_BATCH_SIZE; $threads = _boost_variable_get('boost_crawler_number_of_threads'); $threads = $threads > 0 ? $threads : BOOST_CRAWLER_THREADS; if ($total/BOOST_CRAWLER_BATCH_SIZE < BOOST_CRAWLER_THREADS) { $threads = floor($total/BOOST_CRAWLER_BATCH_SIZE); } // Start the clock on first run if (!_boost_variable_get('boost_crawler_start_time')) { _boost_variable_set('boost_crawler_start_time', BOOST_TIME); _boost_variable_set('boost_crawler_number_of_threads', (int)$threads); // Clock in _boost_variable_set('boost_crawler_thread_num_' . $this_thread , BOOST_TIME); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler - Thread @num of @total started', array('@num' => 1, '@total' => $threads)); } } // Spin up threads on demand while ($threads > 0 && $this_thread == 1) { db_lock_table('variable'); $thread_time = _boost_variable_get('boost_crawler_thread_num_' . $threads); if (!$thread_time || $thread_time + BOOST_MAX_THREAD_TIME < BOOST_TIME) { _boost_variable_set('boost_crawler_thread_num_' . $threads, BOOST_TIME); db_unlock_tables(); boost_async_call_crawler($self, $threads, _boost_variable_get('boost_crawler_number_of_threads'), $expire); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler - Thread @num of @total started', array('@num' => $threads, '@total' => _boost_variable_get('boost_crawler_number_of_threads'))); } _boost_set_time_limit(0); } db_unlock_tables(); $threads--; } // Make sure this thread is supposed to be running. $thread = _boost_variable_get('boost_crawler_number_of_threads'); if ($thread >= 1 && $this_thread > $thread) { // Clock out if (isset($this_thread)) { _boost_variable_set('boost_crawler_thread_num_' . $this_thread , 0); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler - Thread %num of %total Killed.', array('%num' => $this_thread, '%total' => $total_threads)); } } // elseif (BOOST_VERBOSE >= 5) { // watchdog('boost', 'Crawler - Extra Thread Killed.'); // } if ( !boost_crawler_threads_alive() && _boost_variable_get('boost_crawler_number_of_tries') < 3 && boost_crawler_verify($expire) ) { variable_set('boost_crawler_number_of_tries', (int)_boost_variable_get('boost_crawler_number_of_tries') + 1); _boost_variable_set('boost_crawler_number_of_threads', 1); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler - Restarting with 1 thread, to try & get the stubborn urls cached.'); } boost_async_call_crawler($self, 1, 1, $expire); exit; } ini_set('max_execution_time', $GLOBALS['_boost_max_execution_time']); ini_set('output_buffering', $GLOBALS['_boost_output_buffering']); exit; } // Clock in _boost_variable_set('boost_crawler_thread_num_' . $this_thread , BOOST_TIME); // Wait 0 to 0.1 seconds before grabbing DB position counter. usleep(mt_rand(0, 100000)); db_lock_table('variable'); $from = _boost_variable_get('boost_crawler_position'); _boost_variable_set('boost_crawler_position', $from + BOOST_CRAWLER_BATCH_SIZE); db_unlock_tables(); $results = db_query_range("SELECT url FROM {boost_crawler} ORDER BY id ASC", $from, BOOST_CRAWLER_BATCH_SIZE); $url = db_result($results); if (!$url) { // We Are Done // Wait 0 to 0.1 seconds before grabbing number of threads. usleep(mt_rand(0, 100000)); db_lock_table('variable'); $threads = _boost_variable_get('boost_crawler_number_of_threads'); _boost_variable_set('boost_crawler_number_of_threads', (int)$threads-1); // Clock out _boost_variable_set('boost_crawler_thread_num_' . $this_thread , 0); db_unlock_tables(); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler - Thread %num of %total Done.', array('%num' => $this_thread, '%total' => $total_threads)); } // Re init crawler if it missed some, try 5 times if ( !boost_crawler_threads_alive() && _boost_variable_get('boost_crawler_number_of_tries') < 3 && boost_crawler_verify($expire) ) { variable_set('boost_crawler_number_of_tries', (int)_boost_variable_get('boost_crawler_number_of_tries') + 1); _boost_variable_set('boost_crawler_number_of_threads', 1); if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler - Restarting with 1 thread, to try & get the stubborn urls cached.'); } boost_async_call_crawler($self, 1, 1, $expire); exit; } return TRUE; } else { drupal_http_request($url); if (BOOST_CRAWLER_THROTTLE) { usleep(BOOST_CRAWLER_THROTTLE); } _boost_set_time_limit(0); } while ($url = db_result($results)) { drupal_http_request($url); if (BOOST_CRAWLER_THROTTLE) { usleep(BOOST_CRAWLER_THROTTLE); } _boost_set_time_limit(0); } // Crawler for this round done, call self and exit boost_async_call_crawler($self, $this_thread, _boost_variable_get('boost_crawler_number_of_threads'), $expire); exit; } elseif (boost_crawler_threads_alive() || _boost_variable_get('boost_crawler_sleeping')) { if (BOOST_VERBOSE >= 3) { watchdog('boost', 'Crawler already running'); } drupal_set_message(t('Boost: Crawler is already running. Attempt to start crawler failed.'), 'warning'); } elseif (!BOOST_CRAWL_ON_CRON) { // Crawler Not Enabled return FALSE; } elseif (variable_get('cron_semaphore', FALSE) == TRUE) { // This function called from cron; reset & call self. if (BOOST_VERBOSE >= 5) { watchdog('boost', 'Crawler Start ' . $self); } db_query('TRUNCATE {boost_crawler}'); variable_set('boost_crawler_position', 0); variable_set('boost_crawler_loaded_count' . BOOST_FILE_EXTENSION, 0); variable_set('boost_crawler_loaded_count' . BOOST_XML_EXTENSION, 0); variable_set('boost_crawler_loaded_count' . BOOST_JSON_EXTENSION, 0); variable_set('boost_crawler_loaded_count_alias', 0); variable_set('boost_crawler_number_of_tries', 0); variable_set('boost_crawler_number_of_threads', 0); variable_set('boost_crawler_sleeping', FALSE); variable_set('boost_crawler_average_generation', max(1, db_result(db_query("SELECT AVG(timer_average) FROM {boost_cache}")))); variable_set('boost_crawler_start_time', FALSE); $threads = BOOST_MAX_THREADS; while ($threads > 0) { variable_set('boost_crawler_thread_num_' . $threads, 0); $threads--; } boost_async_call_crawler($self, 1, NULL, $expire); return TRUE; } } /** * Output text & set php in async mode. * * @param $output * string - Text to output to open connection. * @param $wait * bool - Wait 1 second? * @param $content_type * string - Content type header. */ function boost_async_opp($output, $wait = TRUE, $content_type = "text/html") { // Prime php for background operations while (ob_get_level()) { ob_end_clean(); } header("Connection: close"); ignore_user_abort(); // Output text ob_start(); header("Content-type: " . $content_type); header("Expires: Sun, 19 Nov 1978 05:00:00 GMT"); header("Cache-Control: no-cache"); header("Cache-Control: must-revalidate"); if ($content_type == 'image/gif') { header("Content-Length: 43"); } else { header("Content-Length: " . (strlen($output)-1)); } header("Connection: close"); if ($content_type == 'image/gif') { printf("%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c",71,73,70,56,57,97,1,0,1,0,128,255,0,192,192,192,0,0,0,33,249,4,1,0,0,0,0,44,0,0,0,0,1,0,1,0,0,2,2,68,1,0,59); } else { print($output); } ob_end_flush(); flush(); // wait for 1 second if ($wait) { sleep(1); } // text returned and connection closed. // Do background processing. Time taken after should not effect page load times. } /** * Call a URL with a timeout of 3 seconds. * * @param $self * URL to restart the loop. * @param $expire * Has the site changed, if so get expire column */ function boost_async_call_crawler($self, $this_thread = NULL, $total_threads = NULL, $expire) { $self .= isset($this_thread) ? '&thread=' . $this_thread : ''; $self .= isset($total_threads) ? '&total=' . $total_threads : ''; $self .= isset($expire) ? '&expire=' . (int)$expire : ''; $GLOBALS['_boost_default_socket_timeout'] = ini_get('default_socket_timeout'); ini_set('default_socket_timeout', 3); boost_drupal_http_request($self, 3, 'POST'); ini_set('default_socket_timeout', $GLOBALS['_boost_default_socket_timeout']); ini_set('max_execution_time', $GLOBALS['_boost_max_execution_time']); ini_set('output_buffering', $GLOBALS['_boost_output_buffering']); } /** * Add URL's to the boost_crawler table. * * @param $push_setting * Default crawler setting for the content type * @param $extension * File extension, controls the content type DB lookup * @param $expire * Has the site changed, if so get expire column */ function boost_crawler_add_to_table($push_setting, $extension, $expire) { // Insert batch of URL's into boost_crawler table $count = 10000; $total = boost_crawler_count($push_setting, $extension, $expire); $loaded = variable_get('boost_crawler_loaded_count' . $extension, 0); if ($total > $loaded) { if ($push_setting) { if ($expire && BOOST_LOOPBACK_BYPASS) { db_query_range("INSERT INTO {boost_crawler} (url) SELECT url FROM {boost_cache} WHERE push != 0 AND extension = '%s' AND expire BETWEEN 0 AND %d", $extension, BOOST_TIME, $loaded, $count); } else { db_query_range("INSERT INTO {boost_crawler} (url) SELECT url FROM {boost_cache} WHERE push != 0 AND extension = '%s' AND expire = 0", $extension, $loaded, $count); } } else { if ($expire && BOOST_LOOPBACK_BYPASS) { db_query_range("INSERT INTO {boost_crawler} (url) SELECT url FROM {boost_cache} WHERE push = 1 AND extension = '%s' AND expire BETWEEN 0 AND %d", $extension, BOOST_TIME, $loaded, $count); } else { db_query_range("INSERT INTO {boost_crawler} (url) SELECT url FROM {boost_cache} WHERE push = 1 AND extension = '%s' AND expire = 0", $extension, $loaded, $count); } } variable_set('boost_crawler_loaded_count' . $extension, $loaded + $count); return FALSE; } else { return TRUE; } } /** * Count the number of URL's in the boost_cache table. * * @param $push_setting * Default crawler setting for the content type * @param $extension * File extension, controls the content type DB lookup * @param $expire * Has the site changed, if so get expire column */ function boost_crawler_count($push_setting, $extension, $expire) { if ($push_setting) { if ($expire && BOOST_LOOPBACK_BYPASS) { return db_result(db_query("SELECT COUNT(*) FROM {boost_cache} WHERE push != 0 AND extension = '%s' AND expire BETWEEN 0 AND %d", $extension, BOOST_TIME)); } else { return db_result(db_query("SELECT COUNT(*) FROM {boost_cache} WHERE push != 0 AND extension = '%s' AND expire = 0", $extension)); } } else { if ($expire && BOOST_LOOPBACK_BYPASS) { return db_result(db_query("SELECT COUNT(*) FROM {boost_cache} WHERE push = 1 AND extension = '%s' AND expire BETWEEN 0 AND %d", $extension, BOOST_TIME)); } else { return db_result(db_query("SELECT COUNT(*) FROM {boost_cache} WHERE push = 1 AND extension = '%s' AND expire = 0", $extension)); } } } /** * Logic to get boost_crawler table ready. * * @param $expire * Has the site changed, if so get expire column */ function boost_crawler_seed_tables($expire) { if ( boost_crawler_add_to_table(BOOST_PUSH_HTML, BOOST_FILE_EXTENSION, $expire) && boost_crawler_add_to_table(BOOST_PUSH_XML, BOOST_XML_EXTENSION, $expire) && boost_crawler_add_to_table(BOOST_PUSH_JSON, BOOST_JSON_EXTENSION, $expire) && boost_crawler_add_alias_to_table() ) { // All URL's added to boost_crawler table; start hitting URL's return TRUE; } else { return FALSE; } } /** * Get URLs from url alias table */ function boost_crawler_add_alias_to_table() { // Insert batch of html URL's into boost_crawler table global $base_url; if (!variable_get('boost_crawl_url_alias', FALSE)) { return TRUE; } $count = 1000; $total = db_query("SELECT COUNT(*) FROM {url_alias}"); $loaded = variable_get('boost_crawler_loaded_count_alias', 0); if ($total > $loaded) { $list = db_query_range("SELECT dst FROM {url_alias}", $loaded, $count); while ($url = db_result($list)) { @db_query("INSERT INTO {boost_crawler} (url) VALUES ('%s')", $base_url . '/' . $url); } variable_set('boost_crawler_loaded_count_alias', $loaded + $count); return FALSE; } else { return TRUE; } } /** * Get count of boost_crawler table. */ function boost_crawler_total_count() { return db_result(db_query("SELECT COUNT(*) FROM {boost_crawler}")); } /** * Reload any url's that did not get cached. * * @param $expire * Has the site changed, if so get expire column */ function boost_crawler_verify($expire) { if ($expire && BOOST_LOOPBACK_BYPASS) { $list = db_query("SELECT bcrawler.url FROM {boost_cache} bcache INNER JOIN {boost_crawler} bcrawler ON bcache.url=bcrawler.url WHERE bcache.expire BETWEEN 0 AND %d", BOOST_TIME); } else { $list = db_query("SELECT bcrawler.url FROM {boost_cache} bcache INNER JOIN {boost_crawler} bcrawler ON bcache.url=bcrawler.url WHERE bcache.expire = 0"); } db_query('TRUNCATE {boost_crawler}'); variable_set('boost_crawler_position', 0); $recrawl = FALSE; while ($url = db_result($list)) { db_query("INSERT INTO {boost_crawler} (url) VALUES ('%s')", $url); $recrawl = TRUE; } return $recrawl; } /** * Check for any dead threads. */ function boost_crawler_threads_alive() { // Load all thread times $result = db_query("SELECT * FROM {variable} WHERE name LIKE 'boost_crawler_thread_num_%'"); while ($variable = db_fetch_object($result)) { $time = unserialize($variable->value); if ($time != 0 && $time + BOOST_MAX_THREAD_TIME > BOOST_TIME) { return TRUE; } } return FALSE; } /** * Return average page generation time. */ function boost_average_time() { return variable_get('boost_crawler_average_generation', 5000)/1000; } /** * Perform an HTTP request. * * @see drupal_http_request() * * @param $url * A string containing a fully qualified URI. * @param $timeout * How many seconds before giving up on request. * @param $method * HTTP request method. */ function boost_drupal_http_request($url, $timeout = 3, $method = 'GET') { global $db_prefix; $headers = array(); $data = NULL; $result = new stdClass(); // Parse the URL and make sure we can handle the schema. $uri = parse_url($url); if ($uri == FALSE) { $result->error = 'unable to parse URL'; return $result; } if (!isset($uri['scheme'])) { $result->error = 'missing schema'; return $result; } switch ($uri['scheme']) { case 'http': $port = isset($uri['port']) ? $uri['port'] : 80; $host = $uri['host'] . ($port != 80 ? ':'. $port : ''); $fp = @fsockopen($uri['host'], $port, $errno, $errstr, $timeout); break; case 'https': // Note: Only works for PHP 4.3 compiled with OpenSSL. $port = isset($uri['port']) ? $uri['port'] : 443; $host = $uri['host'] . ($port != 443 ? ':'. $port : ''); $fp = @fsockopen('ssl://'. $uri['host'], $port, $errno, $errstr, $timeout); break; default: $result->error = 'invalid schema '. $uri['scheme']; return $result; } // Make sure the socket opened properly. if (!$fp) { // When a network error occurs, we use a negative number so it does not // clash with the HTTP status codes. $result->code = -$errno; $result->error = trim($errstr); // Mark that this request failed. This will trigger a check of the web // server's ability to make outgoing HTTP requests the next time that // requirements checking is performed. // @see system_requirements() variable_set('drupal_http_request_fails', TRUE); return $result; } // Construct the path to act on. $path = isset($uri['path']) ? $uri['path'] : '/'; if (isset($uri['query'])) { $path .= '?'. $uri['query']; } // Create HTTP request. $defaults = array( // RFC 2616: "non-standard ports MUST, default ports MAY be included". // We don't add the port to prevent from breaking rewrite rules checking the // host that do not take into account the port number. 'Host' => "Host: $host", 'User-Agent' => 'User-Agent: Drupal (+http://drupal.org/)', ); // Only add Content-Length if we actually have any content or if it is a POST // or PUT request. Some non-standard servers get confused by Content-Length in // at least HEAD/GET requests, and Squid always requires Content-Length in // POST/PUT requests. $content_length = strlen($data); if ($content_length > 0 || $method == 'POST' || $method == 'PUT') { $defaults['Content-Length'] = 'Content-Length: '. $content_length; } // If the server url has a user then attempt to use basic authentication if (isset($uri['user'])) { $defaults['Authorization'] = 'Authorization: Basic '. base64_encode($uri['user'] . (!empty($uri['pass']) ? ":". $uri['pass'] : '')); } // If the database prefix is being used by SimpleTest to run the tests in a copied // database then set the user-agent header to the database prefix so that any // calls to other Drupal pages will run the SimpleTest prefixed database. The // user-agent is used to ensure that multiple testing sessions running at the // same time won't interfere with each other as they would if the database // prefix were stored statically in a file or database variable. if (is_string($db_prefix) && preg_match("/^simpletest\d+$/", $db_prefix, $matches)) { $defaults['User-Agent'] = 'User-Agent: ' . $matches[0]; } foreach ($headers as $header => $value) { $defaults[$header] = $header .': '. $value; } $request = $method .' '. $path ." HTTP/1.0\r\n"; $request .= implode("\r\n", $defaults); $request .= "\r\n\r\n"; $request .= $data; $result->request = $request; fwrite($fp, $request); // Fetch response. $response = ''; while (!feof($fp) && $chunk = fread($fp, 1024)) { $response .= $chunk; } fclose($fp); }