Fix language parsing and matching from language_from_browser().

From: damz <damz@dev.local.local>


---

 bootstrap.inc      |   10 +++++-
 language.inc       |   56 ++++++++++++++++++++++-------------
 locale/locale.test |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 128 insertions(+), 22 deletions(-)

diff --git includes/bootstrap.inc includes/bootstrap.inc
index 334ef64..64addec 100644
--- includes/bootstrap.inc
+++ includes/bootstrap.inc
@@ -1278,14 +1278,20 @@ function drupal_init_language() {
  * Get a list of languages set up indexed by the specified key
  *
  * @param $field The field to index the list with.
- * @param $reset Boolean to request a reset of the list.
+ * @param $reset Boolean to request a reset of the list,
+ *  or (internal) an array of languages to feed that function with.
  */
 function language_list($field = 'language', $reset = FALSE) {
   static $languages = NULL;
 
   // Reset language list
   if ($reset) {
-    $languages = NULL;
+    if (is_array($reset)) {
+      $languages = $reset;
+    }
+    else {
+      $languages = NULL;
+    }
   }
 
   // Init language list
diff --git includes/language.inc includes/language.inc
index 29207b5..d3c5ab1 100644
--- includes/language.inc
+++ includes/language.inc
@@ -70,32 +70,48 @@ function language_initialize() {
  * Identify language from the Accept-language HTTP header we got.
  */
 function language_from_browser() {
-  // Specified by the user via the browser's Accept Language setting
-  // Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
-  $browser_langs = array();
-
-  if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
-    $browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']);
-    for ($i = 0; $i < count($browser_accept); $i++) {
-      // The language part is either a code or a code with a quality.
-      // We cannot do anything with a * code, so it is skipped.
-      // If the quality is missing, it is assumed to be 1 according to the RFC.
-      if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($browser_accept[$i]), $found)) {
-        $browser_langs[$found[1]] = (isset($found[3]) ? (float) $found[3] : 1.0);
-      }
-    }
+  if (!isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
+    return;
   }
 
-  // Order the codes by quality
-  arsort($browser_langs);
+  $browser_langs = array();  
+  if (preg_match_all('@([a-zA-Z]{1,8}(?:-[a-zA-Z]{1,8})?|\*)(?:;q=(1(?:\.000)?|0(?:\.[0-9]{0,3})?))?\s*,?\s*@', $_SERVER['HTTP_ACCEPT_LANGUAGE'], $matches, PREG_SET_ORDER)) {
+    foreach ($matches as $match) {
+      // We can safely use strtolower() here, tags are ASCII.
+      // RFC2616 mandates that the decimal part is no more than three digits,
+      // so we multiply the qvalue by 1000 to avoid floating point comparisons.
+      $browser_langs[strtolower($match[1])] = isset($match[2]) ? (int) ((float) $match[2] * 1000) : 1000;
+    }
+  }
 
-  // Try to find the first preferred language we have
+  // Find the enabled language with the greatest qvalue, following the rules
+  // of RFC2616 (section 14.4). If several languages have the same qvalue,
+  // prefer the one with the greatest weight.
   $languages = language_list('enabled');
-  foreach ($browser_langs as $langcode => $q) {
-    if (isset($languages['1'][$langcode])) {
-      return $languages['1'][$langcode];
+  $best_match = NULL;
+  $max_qvalue = 0;
+  foreach ($languages['1'] as $langcode => $language) {
+    // Language tags are case insensitive (RFC2616, sec 3.10).
+    $langcode = strtolower($langcode);
+    $qvalue = NULL;
+    if (isset($browser_langs[$langcode])) {
+      $qvalue = $browser_langs[$langcode];
+    }
+    else if ((($prefix = strtok($langcode, '-')) != $langcode) && isset($browser_langs[$prefix])) {
+      $qvalue = $browser_langs[$prefix];
+    }
+    else if (isset($browser_langs['*'])) {
+      $qvalue = $browser_langs['*'];
+    }
+    
+    if (!is_null($qvalue)) {
+      if ($qvalue > $max_qvalue) {
+        $best_match = $language;
+        $max_qvalue = $qvalue;
+      }
     }
   }
+  return $best_match;
 }
 
 /**
diff --git modules/locale/locale.test modules/locale/locale.test
index f8e0f9e..e925587 100644
--- modules/locale/locale.test
+++ modules/locale/locale.test
@@ -1059,6 +1059,90 @@ class LanguageSwitchingFunctionalTest extends DrupalWebTestCase {
     $this->assertIdentical($links, array('active' => array('en'), 'inactive' => array('fr')), t('Only the current language list item is marked as active on the language switcher block.'));
     $this->assertIdentical($anchors, array('active' => array('en'), 'inactive' => array('fr')), t('Only the current language anchor is marked as active on the language switcher block.'));
   }
+
+  /**
+   * Unit tests for the language_from_browser() function.
+   */
+  function testLanguageFromBrowser() {
+    $languages = array(
+      'language' => array(
+        // In our test case, 'en' has priority over 'en-US'.
+        'en' => (object) array(
+          'language' => 'en',
+          'enabled' => 1,
+          'weight' => 1,
+        ),
+        'en-US' => (object) array(
+          'language' => 'en-US',
+          'enabled' => 1,
+          'weight' => 0.6,
+        ),
+        // But 'fr-CA' has priority over 'fr'.
+        'fr-CA' => (object) array(
+          'language' => 'fr-CA',
+          'enabled' => 1,
+          'weight' => 0.5,
+        ),
+        'fr' => (object) array(
+          'language' => 'fr',
+          'enabled' => 1,
+          'weight' => 0.4,
+        ),
+        // And 'es-MX' is alone.
+        'es-MX' => (object) array(
+          'language' => 'es-MX',
+          'enabled' => 1,
+          'weight' => 0.3,
+        ),
+      )
+    );
+
+    drupal_function_exists('language_from_browser');
+
+    // Initialize the language list.
+    language_list('language', $languages);
+
+    $test_cases = array(
+      // Equal qvalue for each language, choose the site prefered one.
+      'en,en-US,fr-CA,fr,es-MX' => 'en',
+      'fr,en' => 'en',
+      'en,fr' => 'en',
+      'en-US,fr' => 'en-US',
+      'fr,en-US' => 'en-US',
+      'fr' => 'fr-CA',
+      'fr,es-MX' => 'fr-CA',
+      'fr,es' => 'fr-CA',
+      'es,fr' => 'fr-CA',
+      'es-MX,de' => 'es-MX',
+      'de,es-MX' => 'es-MX',
+
+      // Different qvalues.
+      'en-US,en;q=0.5,fr;q=0.25' => 'en-US',
+      'fr,en;q=0.5' => 'fr-CA',
+      'fr,en;q=0.5,fr-CA;q=0.25' => 'fr',
+
+      // Those should be equivalent, because a selector matches all sub-languages
+      'es-MX,en;q=0.5' => 'es-MX',
+      'es,en;q=0.5' => 'es-MX',
+
+      // Silly wildcards are also valid.
+      '*,fr-CA;q=0.5' => 'en',
+      '*,en;q=0.25' => 'fr-CA',
+      'en,en-US;q=0.5,fr;q=0.25' => 'en',
+      'en-US,en;q=0.5,fr;q=0.25' => 'en-US',
+
+      // Unresolvable cases.
+      '' => NULL,
+      'de,pl' => NULL,
+      $this->randomName(10) => NULL,
+    );
+
+    foreach ($test_cases as $accept_language => $expected_result) {
+      $_SERVER['HTTP_ACCEPT_LANGUAGE'] = $accept_language;
+      $result = language_from_browser();
+      $this->assertIdentical(isset($result->language) ? $result->language : NULL, $expected_result, t("Language selection '@accept-language' selects '@result', result = '@actual'", array('@accept-language' => $accept_language, '@result' => $expected_result, '@actual' => isset($result->language) ? $result->language : 'none')));
+    }
+  }
 }
 
 /**
