diff -r 4f145ac18162 smartypants.php --- a/smartypants.php Wed Oct 26 14:38:42 2011 +0200 +++ b/smartypants.php Wed Oct 26 21:18:43 2011 +0200 @@ -23,7 +23,7 @@ // A global variable to keep track of our current SmartyPants // configuration setting. global $_typogrify_smartypants_attr; -$_typogrify_smartypants_attr = "1"; # Change this to configure. +$_typogrify_smartypants_attr = "1Q"; # Change this to configure. # 1 => "--" for em-dashes; no en-dash support # 2 => "---" for em-dashes; "--" for en-dashes # 3 => "--" for em-dashes; "---" for en-dashes @@ -56,8 +56,8 @@ # 3 : set all, using inverted old school en and em- dash shortcuts # # q : quotes - # b : backtick quotes (``double'' only) - # B : backtick quotes (``double'' and `single') + # b : backtick quotes (``double'' and ,,double`` only) + # B : backtick quotes (``double'', ,,double``, ,single` and `single') # d : dashes # D : old school dashes # i : inverted old school dashes @@ -70,21 +70,21 @@ } else if ($attr == "1") { # Do everything, turn all options on. - $do_quotes = 1; + $do_quotes = 2; $do_backticks = 1; $do_dashes = 1; $do_ellipses = 1; } else if ($attr == "2") { # Do everything, turn all options on, use old school dash shorthand. - $do_quotes = 1; + $do_quotes = 2; $do_backticks = 1; $do_dashes = 2; $do_ellipses = 1; } else if ($attr == "3") { # Do everything, turn all options on, use inverted old school dash shorthand. - $do_quotes = 1; + $do_quotes = 2; $do_backticks = 1; $do_dashes = 3; $do_ellipses = 1; @@ -97,7 +97,8 @@ $chars = preg_split('//', $attr); foreach ($chars as $c){ if ($c == "q") { $do_quotes = 1; } - else if ($c == "b") { $do_backticks = 1; } + else if ($c == "Q") { $do_quotes = 2; } + else if ($c == "b") { $do_backticks = 1; } else if ($c == "B") { $do_backticks = 2; } else if ($c == "d") { $do_dashes = 1; } else if ($c == "D") { $do_dashes = 2; } @@ -151,8 +152,7 @@ $t = EducateBackticks($t); if ($do_backticks == 2) $t = EducateSingleBackticks($t); } - - if ($do_quotes) { + if ($do_quotes == 1) { if ($t == "'") { # Special case: single-character ' token if (preg_match('/\S/', $prev_token_last_char)) { @@ -175,6 +175,34 @@ # Normal case: $t = EducateQuotes($t); } + } else if ($do_quotes == 2) { + if ($t == "'") { + # Special case: single-character ' token + if (preg_match('/\S/', $prev_token_last_char)) { + $t = "‘"; + } + else { + $t = "‚"; + } + } + else if ($t == '"') { + # Special case: single-character " token + if (preg_match('/\S/', $prev_token_last_char)) { + $t = "“"; + } + else { + $t = "„"; + } + } + else { + # Normal case: + if ($ctx['langcode'] == 'de') { + $t = EducateGermanQuotes($t); + } + else { + $t = EducateQuotes($t); + } + } } if ($do_stupefy) $t = StupefyEntities($t); @@ -457,6 +485,93 @@ } +function EducateGermanQuotes($_) { +# +# Parameter: String. +# +# Returns: The string, with "educated" curly quote HTML entities. +# +# Example input: "Isn't this fun?" +# Example output: „Isn’t this fun?“ +# + # Make our own "punctuation" character class, because the POSIX-style + # [:PUNCT:] is only available in Perl 5.6 or later: + $punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]"; + + # Special case if the very first character is a quote + # followed by punctuation at a non-word-break. Close the quotes by brute force: + $_ = preg_replace( + array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"), + array('’', '“'), $_); + + # Special case for double sets of quotes, e.g.: + #
He said, "'Quoted' words in a larger quote."
+ $_ = preg_replace( + array("/\"'(?=\w)/", "/'\"(?=\w)/"), + array('„‚', '‚„'), $_); + + # Special case for decade abbreviations (the '80s): + $_ = preg_replace("/'(?=\\d{2}s)/", '', $_); + + $close_class = '[^\ \t\r\n\[\{\(\-]'; + $dec_dashes = '&\#8211;|&\#8212;'; + + # Get most opening single quotes: + $_ = preg_replace("{ + ( + \\s | # a whitespace char, or + | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + $dec_dashes | # or decimal entities + &\\#x201[34]; # or hex + ) + ' # the quote + (?=\\w) # followed by a word character + }x", '\1‚', $_); + # Single closing quotes: + $_ = preg_replace("{ + ($close_class)? + ' + (?(1)| # If $1 captured, then do nothing; + (?=\\s | s\\b) # otherwise, positive lookahead for a whitespace + ) # char or an 's' at a word ending position. This + # is a special case to handle something like: + # \"Custer's Last Stand.\" + }xi", '\1‘', $_); + + # Any remaining single quotes should be opening ones: + $_ = str_replace("'", '‚', $_); + + + # Get most opening double quotes: + $_ = preg_replace("{ + ( + \\s | # a whitespace char, or + | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + $dec_dashes | # or decimal entities + &\\#x201[34]; # or hex + ) + \" # the quote + (?=\\w) # followed by a word character + }x", '\1„', $_); + + # Double closing quotes: + $_ = preg_replace("{ + ($close_class)? + \" + (?(1)|(?=\\s)) # If $1 captured, then do nothing; + # if not, then make sure the next char is whitespace. + }x", '\1“', $_); + + # Any remaining quotes should be opening ones. + $_ = str_replace('"', '„', $_); + + return $_; +} + function EducateBackticks($_) { # # Parameter: String. @@ -467,8 +582,8 @@ # Example output: “Isn't this fun?” # - $_ = str_replace(array("``", "''",), - array('“', '”'), $_); + $_ = str_replace(array("``", "''", ",,"), + array('“', '”', '„'), $_); return $_; } @@ -483,8 +598,8 @@ # Example output: ‘Isn’t this fun?’ # - $_ = str_replace(array("`", "'",), - array('‘', '’'), $_); + $_ = str_replace(array("`", "'", ","), + array('‘', '’', '‚'), $_); return $_; } @@ -572,10 +687,10 @@ array('-', '--'), $_); # single quote open close - $_ = str_replace(array('‘', '’'), "'", $_); + $_ = str_replace(array('‘', '’','‚'), "'", $_); # double quote open close - $_ = str_replace(array('“', '”'), '"', $_); + $_ = str_replace(array('“', '”','„'), '"', $_); $_ = str_replace('…', '...', $_); # ellipsis @@ -598,10 +713,11 @@ # \. . # \- - # \` ` +# \, , # $_ = str_replace( - array('\\\\', '\"', "\'", '\.', '\-', '\`'), - array('\', '"', ''', '.', '-', '`'), $_); + array('\\\\', '\"', "\'", '\.', '\-', '\`', '\,',), + array('\', '"', ''', '.', '-', '`', ','), $_); return $_; } diff -r 4f145ac18162 typogrify.module --- a/typogrify.module Wed Oct 26 14:38:42 2011 +0200 +++ b/typogrify.module Wed Oct 26 21:18:43 2011 +0200 @@ -71,6 +71,8 @@ */ function _typogrify_process($text, $filter, $format, $langcode, $cache, $cache_id) { $characters_to_convert = array(); + $ctx = array(); + $ctx['langcode'] = $langcode; // Load Helpers. module_load_include('class.php', 'typogrify'); module_load_include('php', 'typogrify', 'unicode-conversion'); @@ -90,7 +92,7 @@ if ($filter->settings['smartypants_enabled']) { global $_typogrify_smartypants_attr; $_typogrify_smartypants_attr = $filter->settings['smartypants_hyphens']; - $text = SmartyPants($text); + $text = SmartyPants($text, NULL, $ctx); } // Wrap caps. @@ -166,7 +168,7 @@ ); // Smartypants hyphenation settings. - // Uses the same values as the parse attributes in the SmartyPants + // Uses the same values as the parse attributes in the SmartyPants // function (@see SmartyPants in smartypants.php) $form['smartypants_hyphens'] = array( '#type' => 'select', @@ -206,7 +208,7 @@ '#title' => t('Wrap quotation marks'), '#default_value' => $defaults->settings['wrap_initial_quotes'], ); - + // Ligature conversion settings. $ligature_options = array(); foreach (unicode_conversion_map('ligature') as $ascii => $unicode) { @@ -239,8 +241,9 @@ $version_strings[] = t('PHP Typogrify Version: !version', array('!version' => l('1.0', 'http://blog.hamstu.com/'))); $form['info']['typogrify_status'] = array( - '#type' => 'markup', - '#value' => theme('item_list', $version_strings, t('Version Information')) + '#type' => 'item', + '#title' => t('Versions'), + '#markup' => theme('item_list', array('items' => $version_strings)) ); return $form;