Index: includes/language.inc
===================================================================
RCS file: /cvs/drupal/drupal/includes/language.inc,v
retrieving revision 1.14.2.2
diff -u -p -r1.14.2.2 language.inc
--- includes/language.inc	22 Mar 2010 12:18:04 -0000	1.14.2.2
+++ includes/language.inc	7 Oct 2010 14:50:44 -0000
@@ -70,32 +70,75 @@ function language_initialize() {
  * Identify language from the Accept-language HTTP header we got.
  */
 function language_from_browser() {
-  // Specified by the user via the browser's Accept Language setting
+  // Get enabled language and create an array of valid language objects.
+  $languages = language_list('enabled');
+  $languages = $languages['1'];
+ 
+  if (!isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
+    return;
+  }
+
+  // RFC 2616 (section 14.4) defines the Accept-Language header as followed:
+  //   Accept-Language = "Accept-Language" ":"
+  //                  1#( language-range [ ";" "q" "=" qvalue ] )
+  //   language-range  = ( ( 1*8ALPHA *( "-" 1*8ALPHA ) ) | "*" )
   // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
   $browser_langs = array();
-
-  if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
-    $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']);
-    for ($i = 0; $i < count($browser_accept); $i++) {
-      // The language part is either a code or a code with a quality.
-      // We cannot do anything with a * code, so it is skipped.
-      // If the quality is missing, it is assumed to be 1 according to the RFC.
-      if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($browser_accept[$i]), $found)) {
-        $browser_langs[$found[1]] = (isset($found[3]) ? (float) $found[3] : 1.0);
-      }
+  if (preg_match_all('@([a-zA-Z-]+|\*)(?:;q=([0-9.]+))?(?:$|\s*,\s*)@', $_SERVER['HTTP_ACCEPT_LANGUAGE'], $matches, PREG_SET_ORDER)) {
+    foreach ($matches as $match) {
+      // We can safely use strtolower() here, tags are ASCII.
+      // RFC2616 mandates that the decimal part is no more than three digits,
+      // so we multiply the qvalue by 1000 to avoid floating point comparisons.
+      $langcode = strtolower($match[1]);
+      $qvalue = isset($match[2]) ? (float) $match[2] : 1;
+      $browser_langs[$langcode] = (int) ($qvalue * 1000);
     }
   }
 
-  // Order the codes by quality
+  // Some browsers (especially some versions of Internet Explorer) sometimes
+  // send a specific language tag (fr-CA) without the corresponding generic
+  // tag (fr). In that case, we assume that the lowest value of the specific
+  // tags is the value of the generic language.
   arsort($browser_langs);
+  foreach ($browser_langs as $langcode => $qvalue) {
+    $generic_tag = strtok($langcode, '-');
+    if (!isset($browser_langs[$generic_tag])) {
+      $browser_langs[$generic_tag] = $qvalue;
+    }
+  }
 
-  // Try to find the first preferred language we have
-  $languages = language_list('enabled');
-  foreach ($browser_langs as $langcode => $q) {
-    if (isset($languages['1'][$langcode])) {
-      return $languages['1'][$langcode];
+  // Find the enabled language with the greatest qvalue, following the rules
+  // of RFC 2616 (section 14.4). If several languages have the same qvalue,
+  // prefer the one with the greatest weight.
+  $best_match = NULL;
+  $max_qvalue = 0;
+  foreach ($languages as $langcode => $language) {
+    // Language tags are case insensitive (RFC2616, sec 3.10).
+    $langcode = strtolower($langcode);
+
+    // If nothing matches below, the default qvalue is the one of the wildcard
+    // language, if set, or is 0 (which will never match).
+    $qvalue = isset($browser_langs['*']) ? $browser_langs['*'] : 0;
+
+    // Find the longest possible prefix of the browser-supplied language
+    // ('the language-range') that matches this site language ('the language tag').
+    $prefix = $langcode;
+    do {
+      if (isset($browser_langs[$prefix])) {
+        $qvalue = $browser_langs[$prefix];
+        break;
+      }
+    }
+    while ($prefix = substr($prefix, 0, strrpos($prefix, '-')));
+
+    // Find the best match.
+    if ($qvalue > $max_qvalue) {
+      $best_match = $language->language;
+      $max_qvalue = $qvalue;
     }
   }
+
+  return $languages[$best_match];
 }
 
 /**
