$str) {
// Tokenize the string
$tokens[$key] = preg_split('/(?<=\W)|(?=\W)/u', trim($str));
}
// Remove identical prefix.
while ($tokens[0] && $tokens[1] && reset($tokens[0]) === reset($tokens[1])) {
$prefix .= array_shift($tokens[0]);
array_shift($tokens[1]);
}
// Remove identical suffixes.
while ($tokens[0] && $tokens[1] && end($tokens[0]) === end($tokens[1])) {
$suffix = array_pop($tokens[0]) . $suffix;
array_pop($tokens[1]);
}
// Calculate the length of the token arrays.
foreach (array_keys($tokens) as $key) {
$length[$key] = count($tokens[$key]);
}
return array($tokens, $length, $prefix, $suffix);
}
function _worddiff_lcs($tokens, $length) {
$matrix = array();
// Calculate the matrix.
for ($i = 0; $i < $length[0]; $i++) {
for ($j = 0; $j < $length[1]; $j++) {
if ($tokens[0][$i] === $tokens[1][$j]) {
$matrix[$i][$j] = (isset($matrix[$i - 1][$j - 1]) ? $matrix[$i - 1][$j - 1] : 0) + 1;
}
else {
$matrix[$i][$j] = max(
isset($matrix[$i][$j - 1]) ? $matrix[$i][$j - 1] : 0,
isset($matrix[$i - 1][$j]) ? $matrix[$i - 1][$j] : 0
);
}
}
}
return $matrix;
}
function _worddiff_expand_result($str) {
return array(-1 => '', 0 => $str, 1 => '');
}
function _worddiff_changeset($tokens, $matrix, $length) {
// Setup work.
$index = 0;
$mode = NULL;
$result = array_map('_worddiff_expand_result', $tokens[0]);
for ($i = $length[0] - 1, $j = $length[1] - 1; $i >= 0; $i--, $j--) {
if ($j < 0 || $tokens[0][$i] !== $tokens[1][$j]) {
if ($j < 0 || ($j > 0 && ($matrix[$i][$j - 1] < $matrix[$i - 1][$j]))) {
// Delete substring.
if ($mode === -1) {
// If we're already in delete mode, prepend it to the previous delete.
$result[$index][-1] = $result[$i][0] . $result[$index][-1];
unset($result[$i]);
}
else {
// Otherwise, move this token's string to the delete part.
$result[$i][-1] = $result[$i][0];
$result[$i][0] = '';
$index = $i;
$mode = -1;
}
$j++;
}
else {
// Add a new substring to the last token.
$result[$i][1] = $tokens[1][$j] . $result[$i][1];
$i++;
$mode = 1;
}
}
else {
// Preserve substring.
if ($mode === 0) {
// Merge this token into the previous one to minimize the amount of tokens.
$result[$index][0] = $result[$i][0] . $result[$index][0];
unset($result[$i]);
}
else {
$index = $i;
$mode = 0;
}
}
}
return array_values($result);
}
function _worddiff_find_prefix($a, $b) {
$prefix = '';
while (mb_strlen($a) && mb_strlen($b) && mb_substr($a, 0, 1) === mb_substr($b, 0, 1)) {
$prefix .= mb_substr($a, 0, 1);
$a = mb_substr($a, 1);
$b = mb_substr($b, 1);
}
return array($prefix, $a, $b);
}
function _worddiff_aggregate($diff) {
for ($i = 0; isset($diff[$i]); $i++) {
if ($diff[$i][-1] == '' && mb_strlen($diff[$i][0]) < 3 && $diff[$i][1] == '' &&
isset($diff[$i - 1]) && $diff[$i - 1][0] == '' &&
isset($diff[$i + 1]) && $diff[$i + 1][0] == '') {
$diff[$i + 1] = array(
-1 => $diff[$i - 1][-1] . $diff[$i][0] . $diff[$i + 1][-1],
0 => '',
1 => $diff[$i - 1][1] . $diff[$i][0] . $diff[$i + 1][1],
);
unset($diff[$i - 1], $diff[$i]);
}
}
// Find common prefixes.
$diff = array_values($diff);
for ($i = 0; isset($diff[$i]); $i++) {
if ($diff[$i][0] == '' && isset($diff[$i - 1]) && $diff[$i - 1][1] == '') {
list($prefix, $diff[$i][-1], $diff[$i][1]) = _worddiff_find_prefix($diff[$i][-1], $diff[$i][1]);
$diff[$i - 1][0] .= $prefix;
}
}
return $diff;
}
function worddiff($old, $new) {
// Preparation stuff.
list($tokens, $length, $prefix, $suffix) = _worddiff_tokenize(array($old, $new));
$matrix = _worddiff_lcs($tokens, $length);
// Generate the changeset.
$result = _worddiff_changeset($tokens, $matrix, $length);
// Add the previously trimmed prefix and suffix.
array_unshift($result, array(-1 => '', 0 => $prefix, 1 => ''));
array_push($result, array(-1 => '', 0 => $suffix, 1 => ''));
// Merge small changes together.
$result = _worddiff_aggregate($result);
return $result;
}
function _worddiff_render_deletions($changes) {
$value = '';
if (!empty($changes[-1])) {
$value .= ''. $changes[-1] .'';
}
if (!empty($changes[0])) {
$value .= $changes[0];
}
return $value;
}
function _worddiff_render_insertions($changes) {
$value = '';
if (!empty($changes[0])) {
$value .= $changes[0];
}
if (!empty($changes[1])) {
$value .= ''. $changes[1] .'';
}
return $value;
}
function worddiff_render($result) {
return array(
'old' => implode('', array_map('_worddiff_render_deletions', $result)),
'new' => implode('', array_map('_worddiff_render_insertions', $result))
);
}