? drupal_lookup_path_optimization_2.patch ? lookup_whitelist.patch Index: includes/database.mysql-common.inc =================================================================== RCS file: /cvs/drupal/drupal/includes/database.mysql-common.inc,v retrieving revision 1.17 diff -u -p -r1.17 database.mysql-common.inc --- includes/database.mysql-common.inc 30 Jan 2008 14:34:29 -0000 1.17 +++ includes/database.mysql-common.inc 5 Feb 2008 10:52:24 -0000 @@ -531,3 +531,32 @@ function db_change_field(&$ret, $table, function db_last_insert_id($table, $field) { return db_result(db_query('SELECT LAST_INSERT_ID()')); } + +/** + * @} End of "ingroup schemaapi". + */ + +/** + * Constructs a query fragment to split a database field according to a + * delimiter, and to fetch the specified substring. + * + * @param $string + * The database field to be split. + * @param $delimiter + * The string value to use as a delimiter. + * @param $field + * The number of the field to return. Must be in the range 1 ... x, where x + * is the number of fields in the string. + * @return + * An SQL query fragment, suitable for forming part of a string that gets + * passed to db_query(). + */ +function db_sql_explode($string, $delimiter, $field) { + $sql = "SUBSTRING_INDEX($string, '$delimiter', $field)"; + if ($field > 1) { + $start_field = 1 - $field; + $sql = "SUBSTRING_INDEX($sql, '$delimiter', $start_field)"; + } + + return $sql; +} Index: includes/database.pgsql.inc =================================================================== RCS file: /cvs/drupal/drupal/includes/database.pgsql.inc,v retrieving revision 1.68 diff -u -p -r1.68 database.pgsql.inc --- includes/database.pgsql.inc 4 Jan 2008 09:31:48 -0000 1.68 +++ includes/database.pgsql.inc 5 Feb 2008 10:52:24 -0000 @@ -924,3 +924,21 @@ function db_change_field(&$ret, $table, * @} End of "ingroup schemaapi". */ +/** + * Constructs a query fragment to split a database field according to a + * delimiter, and to fetch the specified substring. + * + * @param $string + * The database field to be split. + * @param $delimiter + * The string value to use as a delimiter. + * @param $field + * The number of the field to return. Must be in the range 1 ... x, where x + * is the number of fields in the string. + * @return + * An SQL query fragment, suitable for forming part of a string that gets + * passed to db_query(). + */ +function db_sql_explode($string, $delimiter, $field) { + return "SPLIT_PART($string, '$delimiter', $field)"; +} Index: includes/path.inc =================================================================== RCS file: /cvs/drupal/drupal/includes/path.inc,v retrieving revision 1.19 diff -u -p -r1.19 path.inc --- includes/path.inc 4 Nov 2007 16:42:45 -0000 1.19 +++ includes/path.inc 5 Feb 2008 10:52:24 -0000 @@ -52,42 +52,75 @@ function drupal_lookup_path($action, $pa // Use $count to avoid looking up paths in subsequent calls if there simply are no aliases if (!isset($count)) { - $count = db_result(db_query('SELECT COUNT(pid) FROM {url_alias}')); + $count = (bool)db_result(db_query_range('SELECT pid FROM {url_alias}', 0, 1)); } - if ($action == 'wipe') { - $map = array(); - $no_src = array(); - } - elseif ($count > 0 && $path != '') { - if ($action == 'alias') { - if (isset($map[$path_language][$path])) { - return $map[$path_language][$path]; + // Load the whitelist + $whitelist = variable_get('alias_whitelist', array()); + // And derive the top level component of the path + $pos = strpos($path, '/'); + $top_level = ($pos) ? substr($path, 0, $pos) : $path; + + switch($action) { + case 'wipe': + $map = array(); + $no_src = array(); + $count = 0; + + // Rebuild a whitelist of top level paths, depending on what + // is stored in the url_alias table for this site. + $whitelist = array(); + + // For each alias in the database, get the top level (i.e. the portion before the first /). + // Using GROUP BY is faster than DISTINCT, at least for MyISAM. + $result = db_query("SELECT ". db_sql_explode('src', '/', 1) ." AS path FROM {url_alias} GROUP BY path"); + while ($row = db_fetch_object($result)) { + $whitelist[$row->path] = TRUE; } - // Get the most fitting result falling back with alias without language - $alias = db_result(db_query("SELECT dst FROM {url_alias} WHERE src = '%s' AND language IN('%s', '') ORDER BY language DESC", $path, $path_language)); - $map[$path_language][$path] = $alias; - return $alias; - } - // Check $no_src for this $path in case we've already determined that there - // isn't a path that has this alias - elseif ($action == 'source' && !isset($no_src[$path_language][$path])) { - // Look for the value $path within the cached $map - $src = ''; - if (!isset($map[$path_language]) || !($src = array_search($path, $map[$path_language]))) { - // Get the most fitting result falling back with alias without language - if ($src = db_result(db_query("SELECT src FROM {url_alias} WHERE dst = '%s' AND language IN('%s', '') ORDER BY language DESC", $path, $path_language))) { - $map[$path_language][$src] = $path; + + variable_set('alias_whitelist', $whitelist); + return; + + case 'alias': + if ($count > 0 && $path != '') { + if (isset($map[$path_language][$path])) { + return $map[$path_language][$path]; } - else { - // We can't record anything into $map because we do not have a valid - // index and there is no need because we have not learned anything - // about any Drupal path. Thus cache to $no_src. - $no_src[$path_language][$path] = TRUE; + // Check the whitelist, if the top_level is not in it, then + // no need to do anything further, it is not in the database + if (!isset($whitelist[$top_level])) { + return FALSE; + } + // Get the most fitting result falling back with alias without language + $alias = db_result(db_query_range("SELECT dst FROM {url_alias} WHERE src = '%s' AND language IN('%s', '') ORDER BY language DESC", $path, $path_language, 0, 1)); + $map[$path_language][$path] = $alias; + return $alias; + } + return; + + case 'source': + if ($count > 0 && $path != '') { + // Check $no_src for this $path in case we've already determined that there + // isn't a path that has this alias + if (!isset($no_src[$path_language][$path])) { + // Look for the value $path within the cached $map + $src = ''; + if (!isset($map[$path_language]) || !($src = array_search($path, $map[$path_language]))) { + // Get the most fitting result falling back with alias without language + if ($src = db_result(db_query("SELECT src FROM {url_alias} WHERE dst = '%s' AND language IN('%s', '') ORDER BY language DESC", $path, $path_language))) { + $map[$path_language][$src] = $path; + } + else { + // We can't record anything into $map because we do not have a valid + // index and there is no need because we have not learned anything + // about any Drupal path. Thus cache to $no_src. + $no_src[$path_language][$path] = TRUE; + } + } + return $src; } } - return $src; - } + break; } return FALSE; Index: modules/system/system.install =================================================================== RCS file: /cvs/drupal/drupal/modules/system/system.install,v retrieving revision 1.238 diff -u -p -r1.238 system.install --- modules/system/system.install 30 Jan 2008 20:30:35 -0000 1.238 +++ modules/system/system.install 5 Feb 2008 10:52:24 -0000 @@ -2471,3 +2471,20 @@ function system_update_6047() { * @} End of "defgroup updates-5.x-to-6.x" * The next series of updates should start at 7000. */ + +/** + * @defgroup updates-6.x-to-7.x System updates from 6.x to 7.x + * @{ + */ + +/** + * Build an initial whitelist of top level URL alias paths. + */ +function system_update_7000() { + drupal_lookup_path('wipe'); +} + +/** + * @} End of "defgroup updates-6.x-to-7.x" + * The next series of updates should start at 8000. + */