diff -u b/core/lib/Drupal/Component/Transliteration/PHPTransliteration.php b/core/lib/Drupal/Component/Transliteration/PHPTransliteration.php --- b/core/lib/Drupal/Component/Transliteration/PHPTransliteration.php +++ b/core/lib/Drupal/Component/Transliteration/PHPTransliteration.php @@ -14,7 +14,8 @@ * transliteration database, which comes from two sources. Language-dependent * character transliterations are stored in class variable $languageOverrides, * which is initialized in the base Translation class constructor by reading in - * file language_overrides.php in the $dataDirectory directory. + * a data file for the language in the $dataDirectory directory (named the same + * as the language code, with a .php extension). * * The language-dependent values take precedence over generic * language-independent transliterations, which are read in as needed in @@ -176,8 +177,8 @@ */ protected function replace($ord) { // See if there is a language-specific override for this character. - if (isset(self::$languageOverrides[$this->langcode][$ord])) { - return self::$languageOverrides[$this->langcode][$ord]; + if (isset($this->languageOverrides[$this->langcode][$ord])) { + return $this->languageOverrides[$this->langcode][$ord]; } // Read in the file containing generic transliteration tables, if it diff -u b/core/lib/Drupal/Component/Transliteration/Transliteration.php b/core/lib/Drupal/Component/Transliteration/Transliteration.php --- b/core/lib/Drupal/Component/Transliteration/Transliteration.php +++ b/core/lib/Drupal/Component/Transliteration/Transliteration.php @@ -45,12 +45,13 @@ * are the transliterations of those characters to US-ASCII. * * If unset, the constructor for the Transliteration base class sets this by - * loading file $dataDirectory/language_overrides.php and using the resulting - * value of the global-scope variable $overrides. + * loading a file in $dataDirectory that is named for the langugage code with + * a .php extension, and using the resulting value of the global-scope + * variable $overrides. * * @var array */ - protected static $languageOverrides = array(); + protected $languageOverrides = array(); /** * Constructs the transliteration object. @@ -63,16 +64,24 @@ * to '?'. */ public function __construct($langcode = 'en', $unknown_character = '?') { + // We are using $langcode in a file name, so sanitize it! + $langcode = preg_replace('[^a-zA-Z\-]', '', $langcode); $this->langcode = $langcode; $this->unknownCharacter = $unknown_character; - // Initialize the language override tables, if not already set before - // this constructor is called. + // Initialize the data directory and read in language overrides. $this->dataDirectory = dirname(__FILE__) . '/data'; - if (empty(self::$languageOverrides)) { - include $this->dataDirectory . '/language_overrides.php'; - self::$languageOverrides = $overrides; + $file = $this->dataDirectory . '/' . $langcode . '.php'; + if (is_file($file)) { + // This file will set up a variable called $overrides. + require_once($file); } + else { + $overrides = array(); + } + + drupal_alter('transliteration_overrides', $overrides, $langcode); + $this->languageOverrides = $overrides; } /** --- b/core/lib/Drupal/Component/Transliteration/data/language_overrides.php +++ /dev/null @@ -1,79 +0,0 @@ - 'Aa', - 0xC6 => 'Ae', - 0xD8 => 'Oe', - 0xE5 => 'aa', - 0xE6 => 'ae', - 0xF8 => 'oe', -); - -// Overrides for German input. -$overrides['de'] = array( - 0xC4 => 'Ae', - 0xD6 => 'Oe', - 0xDC => 'Ue', - 0xE4 => 'ae', - 0xF6 => 'oe', - 0xFC => 'ue', - 0xDF => 'ss', -); - -// Overrides for Spanish input. -$overrides['es'] = array( - 0xE1 => 'a', - 0xE9 => 'e', - 0xED => 'i', - 0xF3 => 'o', - 0xFA => 'u', - 0xF1 => 'n', -); - -// Overrides for Esperanto input. -$overrides['eo'] = array( - 0x18 => 'Cx', - 0x19 => 'cx', - 0x11C => 'Gx', - 0x11D => 'gx', - 0x124 => 'Hx', - 0x125 => 'hx', - 0x134 => 'Jx', - 0x135 => 'jx', - 0x15C => 'Sx', - 0x15D => 'sx', - 0x16C => 'Ux', - 0x16D => 'ux', -); - -// Overrides for Kyrgyz input. -$overrides['kg'] = array( - 0x41 => 'E', - 0x416 => 'C', - 0x419 => 'J', - 0x425 => 'X', - 0x426 => 'TS', - 0x429 => 'SCH', - 0x42E => 'JU', - 0x42F => 'JA', - 0x436 => 'c', - 0x439 => 'j', - 0x445 => 'x', - 0x446 => 'ts', - 0x449 => 'sch', - 0x44E => 'ju', - 0x44F => 'ja', - 0x451 => 'e', - 0x4A2 => 'H', - 0x4A3 => 'h', - 0x4AE => 'W', - 0x4AF => 'w', - 0x4E8 => 'Q', - 0x4E9 => 'q', -); diff -u b/core/lib/Drupal/Component/Transliteration/data/x00.php b/core/lib/Drupal/Component/Transliteration/data/x00.php --- b/core/lib/Drupal/Component/Transliteration/data/x00.php +++ b/core/lib/Drupal/Component/Transliteration/data/x00.php @@ -11,7 +11,7 @@ 0x90 => '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 0xA0 => ' ', '!', 'C/', 'PS', '$?', 'Y=', '|', 'SS', '"', '(c)', 'a', '<<', '!', '', '(r)', '-', 0xB0 => 'deg', '+-', '2', '3', '\'', 'u', 'P', '*', ',', '1', 'o', '>>', '1/4', '1/2', '3/4', '?', - 0xC0 => 'A', 'A', 'A', 'A', 'A', 'A', 'AE', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', + 0xC0 => 'A', 'A', 'A', 'A', 'A', 'A', 'Ae', 'C', 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', 0xD0 => 'D', 'N', 'O', 'O', 'O', 'O', 'O', 'x', 'O', 'U', 'U', 'U', 'U', 'Y', 'Th', 'ss', 0xE0 => 'a', 'a', 'a', 'a', 'a', 'a', 'ae', 'c', 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', 0xF0 => 'd', 'n', 'o', 'o', 'o', 'o', 'o', '/', 'o', 'u', 'u', 'u', 'u', 'y', 'th', 'y', diff -u b/core/modules/system/lib/Drupal/system/Tests/Transliteration/TransliterationTest.php b/core/modules/system/lib/Drupal/system/Tests/Transliteration/TransliterationTest.php --- b/core/modules/system/lib/Drupal/system/Tests/Transliteration/TransliterationTest.php +++ b/core/modules/system/lib/Drupal/system/Tests/Transliteration/TransliterationTest.php @@ -13,9 +13,7 @@ /** * Tests the transliteration classes. * - * @todo This should be a UnitTestBase not WebTestBase, but you cannot - * use a factory in WebTestBase, because system_list() requires a Drupal - * installation. + * @todo Add tests for the language overrides alter hook. */ class TransliterationTest extends UnitTestBase { public static function getInfo() { @@ -39,7 +37,7 @@ $transliterator = drupal_container()->get('transliteration')->get('en', '?'); // Verify that transliterating a plain ASCII string returns the string. - $before = $this->randomString(200); + $before = $this->randomName(200); $after = $transliterator->transliterate($before); $this->assertEqual($before, $after, "Factory class transliterates an ASCII string correctly"); } @@ -48,7 +46,7 @@ * Tests the PHPTransliteration class. */ public function testPHPTransliteration() { - $random = $this->randomString(10); + $random = $this->randomName(10); $cases = array( // Each test case is (language code, input, output). // Test ASCII in English. @@ -56,9 +54,9 @@ // Test ASCII in some other language. array('fr', $random, $random), // Test language overrides. - array('es', 'tácuañía las ción música', 'tacuania las cion musica'), - // Test generic European characters. - array('fr', 'ça va', 'ca va'), + array('de', 'Ä Ö Ü Å Ø äöüåøhello', 'Ae Oe Ue A O aeoeueaohello'), + array('dk', 'Ä Ö Ü Å Ø äöüåøhello', 'A O U Aa Oe aouaaoehello'), + array('en', 'Ä Ö Ü Å Ø äöüåøhello', 'A O U A O aouaohello'), ); foreach($cases as $case) { only in patch2: --- /dev/null +++ b/core/lib/Drupal/Component/Transliteration/data/de.php @@ -0,0 +1,15 @@ + 'Ae', + 0xD6 => 'Oe', + 0xDC => 'Ue', + 0xE4 => 'ae', + 0xF6 => 'oe', + 0xFC => 'ue', +); only in patch2: --- /dev/null +++ b/core/lib/Drupal/Component/Transliteration/data/dk.php @@ -0,0 +1,13 @@ + 'Aa', + 0xD8 => 'Oe', + 0xE5 => 'aa', + 0xF8 => 'oe', +); only in patch2: --- /dev/null +++ b/core/lib/Drupal/Component/Transliteration/data/eo.php @@ -0,0 +1,21 @@ + 'Cx', + 0x19 => 'cx', + 0x11C => 'Gx', + 0x11D => 'gx', + 0x124 => 'Hx', + 0x125 => 'hx', + 0x134 => 'Jx', + 0x135 => 'jx', + 0x15C => 'Sx', + 0x15D => 'sx', + 0x16C => 'Ux', + 0x16D => 'ux', +); only in patch2: --- /dev/null +++ b/core/lib/Drupal/Component/Transliteration/data/kg.php @@ -0,0 +1,31 @@ + 'E', + 0x416 => 'C', + 0x419 => 'J', + 0x425 => 'X', + 0x426 => 'TS', + 0x429 => 'SCH', + 0x42E => 'JU', + 0x42F => 'JA', + 0x436 => 'c', + 0x439 => 'j', + 0x445 => 'x', + 0x446 => 'ts', + 0x449 => 'sch', + 0x44E => 'ju', + 0x44F => 'ja', + 0x451 => 'e', + 0x4A2 => 'H', + 0x4A3 => 'h', + 0x4AE => 'W', + 0x4AF => 'w', + 0x4E8 => 'Q', + 0x4E9 => 'q', +); only in patch2: --- a/core/modules/system/language.api.php +++ b/core/modules/system/language.api.php @@ -183,2 +183,22 @@ + +/** + * Provide language overrides for transliteration. + * + * @param array $overrides + * Associative array of language overrides. The outermost key is the language + * code, and the corresponding value is an array whose keys are integer + * Unicode character codes, and whose values are the transliterations of those + * characters in the given language, to override default transliterations. + * @param string $langcode + * The code for the language that is being transliterated. + */ +function hook_transliteration_overrides_alter(&$overrides, $langcode) { + // Provide special overrides for German for a custom site. + if ($langcode == 'de') { + // The core-provided transliteration of Ä is Ae, but we want just A. + $overrides['de'][0xC4] = 'A'; + } +} + /**