Index: includes/database.mysql-common.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/database.mysql-common.inc,v
retrieving revision 1.13
diff -u -p -r1.13 database.mysql-common.inc
--- includes/database.mysql-common.inc	2 Oct 2007 16:15:56 -0000	1.13
+++ includes/database.mysql-common.inc	15 Oct 2007 01:58:35 -0000
@@ -530,3 +530,32 @@ function db_change_field(&$ret, $table, 
 function db_last_insert_id($table, $field) {
   return db_result(db_query('SELECT LAST_INSERT_ID()'));
 }
+
+/**
+ * @} End of "ingroup schemaapi".
+ */
+
+/**
+ * Constructs a query fragment to split a database field according to a
+ * delimiter, and to fetch the specified substring.
+ *
+ * @param $string
+ *   The database field to be split.
+ * @param $delimiter
+ *   The string value to use as a delimiter.
+ * @param $field
+ *   The number of the field to return. Must be in the range 1 ... x, where x
+ *   is the number of fields in the string.
+ * @return
+ *   An SQL query fragment, suitable for forming part of a string that gets
+ *   passed to db_query().
+ */
+function db_sql_explode($string, $delimiter, $field) {
+  $sql = "SUBSTRING_INDEX($string, '$delimiter', $field)";
+  if ($field > 1) {
+    $start_field = 1 - $field;
+    $sql = "SUBSTRING_INDEX($sql, '$delimiter', $start_field)";
+  }
+
+  return $sql;
+}
Index: includes/database.pgsql.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/database.pgsql.inc,v
retrieving revision 1.62
diff -u -p -r1.62 database.pgsql.inc
--- includes/database.pgsql.inc	2 Oct 2007 16:15:56 -0000	1.62
+++ includes/database.pgsql.inc	15 Oct 2007 01:58:35 -0000
@@ -940,3 +940,21 @@ function db_change_field(&$ret, $table, 
  * @} End of "ingroup schemaapi".
  */
 
+/**
+ * Constructs a query fragment to split a database field according to a
+ * delimiter, and to fetch the specified substring.
+ *
+ * @param $string
+ *   The database field to be split.
+ * @param $delimiter
+ *   The string value to use as a delimiter.
+ * @param $field
+ *   The number of the field to return. Must be in the range 1 ... x, where x
+ *   is the number of fields in the string.
+ * @return
+ *   An SQL query fragment, suitable for forming part of a string that gets
+ *   passed to db_query().
+ */
+function db_sql_explode($string, $delimiter, $field) {
+  return "SPLIT_PART($string, '$delimiter', $field)";
+}
Index: includes/path.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/path.inc,v
retrieving revision 1.16
diff -u -p -r1.16 path.inc
--- includes/path.inc	18 Jun 2007 06:59:11 -0000	1.16
+++ includes/path.inc	15 Oct 2007 01:58:36 -0000
@@ -52,42 +52,75 @@ function drupal_lookup_path($action, $pa
 
   // Use $count to avoid looking up paths in subsequent calls if there simply are no aliases
   if (!isset($count)) {
-    $count = db_result(db_query('SELECT COUNT(pid) FROM {url_alias}'));
+    $count = (bool)db_result(db_query_range('SELECT pid FROM {url_alias}', 0, 1));
   }
 
-  if ($action == 'wipe') {
-    $map = array();
-    $no_src = array();
-  }
-  elseif ($count > 0 && $path != '') {
-    if ($action == 'alias') {
-      if (isset($map[$path_language][$path])) {
-        return $map[$path_language][$path];
+  // Load the whitelist
+  $whitelist = variable_get('alias_whitelist', array());
+  // And derive the top level component of the path
+  $pos = strpos($path, '/');
+  $top_level = ($pos) ? substr($path, 0, $pos) : $path;
+
+  switch($action) {
+    case 'wipe':
+      $map = array();
+      $no_src = array();
+      $count = 0;
+
+      // Rebuild a whitelist of top level paths, depending on what
+      // is stored in the url_alias table for this site.
+      $whitelist = array();
+
+      // For each alias in the database, get the top level (i.e. the portion before the first /).
+      // Using GROUP BY is faster than DISTINCT, at least for MyISAM.
+      $result = db_query("SELECT ". db_sql_explode('src', '/', 1) ." AS path FROM {url_alias} GROUP BY path");
+      while ($row = db_fetch_object($result)) {
+        $whitelist[$row->path] = TRUE;
       }
-      // Get the most fitting result falling back with alias without language
-      $alias = db_result(db_query("SELECT dst FROM {url_alias} WHERE src = '%s' AND language IN('%s', '') ORDER BY language DESC", $path, $path_language));
-      $map[$path_language][$path] = $alias;
-      return $alias;
-    }
-    // Check $no_src for this $path in case we've already determined that there
-    // isn't a path that has this alias
-    elseif ($action == 'source' && !isset($no_src[$path_language][$path])) {
-      // Look for the value $path within the cached $map
-      $src = '';
-      if (!isset($map[$path_language]) || !($src = array_search($path, $map[$path_language]))) {
-        // Get the most fitting result falling back with alias without language
-        if ($src = db_result(db_query("SELECT src FROM {url_alias} WHERE dst = '%s' AND language IN('%s', '') ORDER BY language DESC", $path, $path_language))) {
-          $map[$path_language][$src] = $path;
+
+      variable_set('alias_whitelist', $whitelist);
+      return;
+
+    case 'alias':
+      if ($count > 0 && $path != '') {
+        if (isset($map[$path_language][$path])) {
+          return $map[$path_language][$path];
         }
-        else {
-          // We can't record anything into $map because we do not have a valid
-          // index and there is no need because we have not learned anything
-          // about any Drupal path. Thus cache to $no_src.
-          $no_src[$path_language][$path] = TRUE;
+        // Check the whitelist, if the top_level is not in it, then
+        // no need to do anything further, it is not in the database
+        if (!isset($whitelist[$top_level])) {
+          return FALSE;
+        }
+        // Get the most fitting result falling back with alias without language
+        $alias = db_result(db_query_range("SELECT dst FROM {url_alias} WHERE src = '%s' AND language IN('%s', '') ORDER BY language DESC", $path, $path_language, 0, 1));
+        $map[$path_language][$path] = $alias;
+        return $alias;
+      }
+      return;
+
+    case 'source':
+      if ($count > 0 && $path != '') {
+        // Check $no_src for this $path in case we've already determined that there
+        // isn't a path that has this alias
+        if (!isset($no_src[$path_language][$path])) {
+          // Look for the value $path within the cached $map
+          $src = '';
+          if (!isset($map[$path_language]) || !($src = array_search($path, $map[$path_language]))) {
+            // Get the most fitting result falling back with alias without language
+            if ($src = db_result(db_query("SELECT src FROM {url_alias} WHERE dst = '%s' AND language IN('%s', '') ORDER BY language DESC", $path, $path_language))) {
+              $map[$path_language][$src] = $path;
+            }
+            else {
+              // We can't record anything into $map because we do not have a valid
+              // index and there is no need because we have not learned anything
+              // about any Drupal path. Thus cache to $no_src.
+              $no_src[$path_language][$path] = TRUE;
+            }
+          }
+          return $src;
         }
       }
-      return $src;
-    }
+      break;
   }
 
   return FALSE;
Index: modules/system/system.install
===================================================================
RCS file: /cvs/drupal/drupal/modules/system/system.install,v
retrieving revision 1.162
diff -u -p -r1.162 system.install
--- modules/system/system.install	12 Oct 2007 10:41:47 -0000	1.162
+++ modules/system/system.install	15 Oct 2007 01:58:39 -0000
@@ -4456,6 +4456,13 @@ function system_update_6034() {
 }
 
 /**
+ * Build an initial whitelist of top level URL alias paths.
+ */
+function system_update_6035() {
+  drupal_lookup_path('wipe');
+}
+
+/**
  * @} End of "defgroup updates-5.x-to-6.x"
  * The next series of updates should start at 7000.
  */
