diff --git a/.htaccess b/.htaccess index 9494b53..8386e98 100644 --- a/.htaccess +++ b/.htaccess @@ -103,6 +103,9 @@ DirectoryIndex index.php index.html index.htm # uncomment the following line: # RewriteBase / + # Pass requests for robots.txt over to index.php, even if robots.txt already exists. + RewriteRule ^robots.txt index.php?/$1$2 [last,qsappend] + # Pass all requests not referring directly to files in the filesystem to # index.php. Clean URLs are handled in drupal_environment_initialize(). RewriteCond %{REQUEST_FILENAME} !-f diff --git a/includes/common.inc b/includes/common.inc index b6ea297..a9e126a 100644 --- a/includes/common.inc +++ b/includes/common.inc @@ -218,6 +218,22 @@ function drupal_get_profile() { return $profile; } +/** + * Gets the contents of the robots.txt file. + * + * @see hook_robotstxt() + * @see hook_robotstxt_alter() + */ +function drupal_get_robotstxt() { + $cache = &drupal_static(__FUNCTION__, ''); + if (empty($cache)) { + $cache = module_invoke_all('robotstxt'); + drupal_alter('robotstxt', $cache); + $cache = implode("\n", $cache); + } + return $cache; +} + /** * Set the breadcrumb trail for the current page. @@ -2543,6 +2559,116 @@ function drupal_deliver_html_page($page_callback_result) { } /** + * Package and send the result of a page callback to the browser as text. + * + * @param $page_callback_result + * The result of a page callback. Can be one of: + * - NULL: to indicate no content. + * - An integer menu status constant: to indicate an error condition. + * - A string of HTML content. + * - A renderable array of content. + * + * @see drupal_deliver_page() + */ +function drupal_deliver_txt_page($page_callback_result) { + // Emit the correct charset HTTP header, but not if the page callback + // result is NULL, since that likely indicates that it printed something + // in which case, no further headers may be sent, and not if code running + // for this page request has already set the content type header. + if (isset($page_callback_result) && is_null(drupal_get_http_header('Content-Type'))) { + drupal_add_http_header('Content-Type', 'text/plain; charset=utf-8'); + } + + // Menu status constants are integers; page content is a string or array. + if (is_int($page_callback_result)) { + // @todo: Break these up into separate functions? + switch ($page_callback_result) { + case MENU_NOT_FOUND: + // Print a 404 page. + drupal_add_http_header('Status', '404 Not Found'); + + watchdog('page not found', check_plain($_GET['q']), NULL, WATCHDOG_WARNING); + + // Keep old path for reference, and to allow forms to redirect to it. + if (!isset($_GET['destination'])) { + $_GET['destination'] = $_GET['q']; + } + + $path = drupal_get_normal_path(variable_get('site_404', '')); + if ($path && $path != $_GET['q']) { + // Custom 404 handler. Set the active item in case there are tabs to + // display, or other dependencies on the path. + menu_set_active_item($path); + $return = menu_execute_active_handler($path, FALSE); + } + + if (empty($return) || $return == MENU_NOT_FOUND || $return == MENU_ACCESS_DENIED) { + // Standard 404 handler. + drupal_set_title(t('Page not found')); + $return = t('The requested page could not be found.'); + } + + drupal_set_page_content($return); + $page = element_info('page'); + $page['#theme_wrappers'] = array('txt'); + print drupal_render_page($page); + break; + + case MENU_ACCESS_DENIED: + // Print a 403 page. + drupal_add_http_header('Status', '403 Forbidden'); + watchdog('access denied', check_plain($_GET['q']), NULL, WATCHDOG_WARNING); + + // Keep old path for reference, and to allow forms to redirect to it. + if (!isset($_GET['destination'])) { + $_GET['destination'] = $_GET['q']; + } + + $path = drupal_get_normal_path(variable_get('site_403', '')); + if ($path && $path != $_GET['q']) { + // Custom 403 handler. Set the active item in case there are tabs to + // display or other dependencies on the path. + menu_set_active_item($path); + $return = menu_execute_active_handler($path, FALSE); + } + + if (empty($return) || $return == MENU_NOT_FOUND || $return == MENU_ACCESS_DENIED) { + // Standard 403 handler. + drupal_set_title(t('Access denied')); + $return = t('You are not authorized to access this page.'); + } + + drupal_set_page_content($return); + $page = element_info('page'); + $page['#theme_wrappers'] = array('txt'); + print drupal_render_page($page); + break; + + case MENU_SITE_OFFLINE: + // Print a 503 page. + drupal_maintenance_theme(); + drupal_add_http_header('Status', '503 Service unavailable'); + drupal_set_title(t('Site under maintenance')); + $return = variable_get('maintenance_mode_message', t('@site is currently under maintenance. We should be back shortly. Thank you for your patience.', array('@site' => variable_get('site_name', 'Drupal')))); + + drupal_set_page_content($return); + $page = element_info('page'); + $page['#theme_wrappers'] = array('txt'); + print drupal_render_page($page); + break; + } + } + elseif (isset($page_callback_result)) { + // Print anything besides a menu constant, assuming it's not NULL or + // undefined. + print drupal_render_page($page_callback_result); + } + + // Perform end-of-request tasks. + drupal_page_footer(); +} + +/** * Perform end-of-request tasks. * * This function sets the page cache if appropriate, and allows modules to diff --git a/modules/system/system.api.php b/modules/system/system.api.php index 22ad7a6..d514e24 100644 --- a/modules/system/system.api.php +++ b/modules/system/system.api.php @@ -3728,6 +3728,28 @@ function hook_archiver_info_alter(&$info) { } /** + * Appends content to the robots.txt. + * + * @return An array of strings to append to the end of the robots.txt file. + */ +function hook_robotstxt() { + return array('Disallow: /tmp/'); +} + +/** + * Appends content to the robots.txt. + * + * @return An array of strings to append to the end of the robots.txt file. + */ +function hook_robotstxt_alter(&$robotstxt) { + foreach ($robotstxt as $index => $line) { + if ($line == 'Disallow: /tmp/') { + $robotstxt[$index] = 'Disallow: /temp/'; + } + } +} + +/** * Define additional date types. * * Next to the 'long', 'medium' and 'short' date types defined in core, any diff --git a/modules/system/system.module b/modules/system/system.module index 21b23f4..726f256 100644 --- a/modules/system/system.module +++ b/modules/system/system.module @@ -1055,6 +1055,14 @@ function system_menu() { 'type' => MENU_CALLBACK, 'file' => 'system.admin.inc', ); + + // Search engine control. + $items['robots.txt'] = array( + 'page callback' => 'drupal_get_robotstxt', + 'access callback' => TRUE, + 'type' => MENU_CALLBACK, + 'delivery callback' => 'drupal_deliver_txt_page', + ); return $items; } @@ -3903,6 +3911,24 @@ function system_archiver_info() { } /** + * Implements hook_robotstxt(). + */ +function system_robotstxt() { + // Cache the robots.txt content from the file system. + $robotstxt = &drupal_static(__FUNCTION__, array()); + if (empty($robotstxt)) { + if ($cache = cache_get(__FUNCTION__)) { + $robotstxt = $cache->data; + } + else { + $robotstxt = file(realpath('robots.txt'), FILE_IGNORE_NEW_LINES); + cache_set(__FUNCTION__, $robotstxt); + } + } + return $robotstxt; +} + +/** * Returns HTML for a confirmation form. * * By default this does not alter the appearance of a form at all, diff --git a/modules/system/txt.tpl.php b/modules/system/txt.tpl.php new file mode 100644 index 0000000..d0d6291 --- /dev/null +++ b/modules/system/txt.tpl.php @@ -0,0 +1,24 @@ +