SPACE U+00A0 NO BREAK SPACE U+1680 <9A><80> OGHAM SPACE MARK U+180E <8E> MONGOLIAN VOWEL SEPARATOR U+2000-U+200A <80><80>-<80><8A> U+2028 <80> LINE SEPARATOR U+2029 <80> PARAGRAPH SEPARATOR U+202F <80> NARROW NO-BREAK SPACE U+205F <81><9F> MEDIUM MATHEMATICAL SPACE U+3000 <80><80> IDEOGRAPHIC SPACE */ function truncate_to_words($string, $len, $wordsafe = FALSE, $dots = FALSE) { $return = ''; $count = 0; $index = 0; $separators = array( 0xE1 => array( 0x9A => array( 0x80 => TRUE, ), 0xA0 => array( 0x8E => TRUE, ), ), 0xE2 => array( 0x80 => array( 0x80 => TRUE, 0x81 => TRUE, 0x82 => TRUE, 0x83 => TRUE, 0x84 => TRUE, 0x85 => TRUE, 0x86 => TRUE, 0x87 => TRUE, 0x88 => TRUE, 0x89 => TRUE, 0x8A => TRUE, 0xA8 => TRUE, 0xA9 => TRUE, 0xAF => TRUE, ), 0x81 => array( 0x9F => TRUE, ), ), 0xE3 => array( 0x80 => array( 0x80 => TRUE, ), ), ); while ($count < $len) { $c = $string[$index++]; $o = ord($c); if ($o < 0x80) { if ($o == 0x20) { $to_space = $return; } $return .= $c; } elseif ($o >= 0xC2 && $o <= 0xDF) { $c1 = $string[$index++]; if ($c == 0xC2 && $c1 == 0xA0) { $to_space = $return; } $return .= $c; $return .= $c1; } elseif ($o >= 0xE0 && $o <= 0xEF) { $c1 = $string[$index++]; $c2 = $string[$index++]; if (isset($separators[$c][$c1][$c2])) { $to_space = $count; } $return .= $c; $return .= $c1; $return .= $c2; } elseif ($o >= 0xF0 && $o <= 0xF4) { $return .= $c; $return .= $string[$index++]; $return .= $string[$index++]; $return .= $string[$index++]; } $count++; } return $wordsafe ? $to_space : $return; } echo truncate_to_words('áéó űúí', 5, TRUE); echo "!\n"; echo truncate_to_words('áéó űúí', 5, FALSE); echo "!\n";