-
+ 329353CED261CE4D9AE48736630D9B7EDF3A947074FCA6906C9C994C8ABAFB29F0F4ACC97F360506F8CCAD382A56D0906FEF00F076C99BEB0E789116FE0FA11D
mp-wp/wp-includes/Text/Diff/Engine/native.php
(0 . 0)(1 . 437)
69089 <?php
69090 /**
69091 * $Horde: framework/Text_Diff/Diff/Engine/native.php,v 1.10 2008/01/04 10:27:53 jan Exp $
69092 *
69093 * Class used internally by Text_Diff to actually compute the diffs. This
69094 * class is implemented using native PHP code.
69095 *
69096 * The algorithm used here is mostly lifted from the perl module
69097 * Algorithm::Diff (version 1.06) by Ned Konz, which is available at:
69098 * http://www.perl.com/CPAN/authors/id/N/NE/NEDKONZ/Algorithm-Diff-1.06.zip
69099 *
69100 * More ideas are taken from: http://www.ics.uci.edu/~eppstein/161/960229.html
69101 *
69102 * Some ideas (and a bit of code) are taken from analyze.c, of GNU
69103 * diffutils-2.7, which can be found at:
69104 * ftp://gnudist.gnu.org/pub/gnu/diffutils/diffutils-2.7.tar.gz
69105 *
69106 * Some ideas (subdivision by NCHUNKS > 2, and some optimizations) are from
69107 * Geoffrey T. Dairiki <dairiki@dairiki.org>. The original PHP version of this
69108 * code was written by him, and is used/adapted with his permission.
69109 *
69110 * Copyright 2004-2008 The Horde Project (http://www.horde.org/)
69111 *
69112 * See the enclosed file COPYING for license information (LGPL). If you did
69113 * not receive this file, see http://opensource.org/licenses/lgpl-license.php.
69114 *
69115 * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
69116 * @package Text_Diff
69117 */
69118 class Text_Diff_Engine_native {
69119
69120 function diff($from_lines, $to_lines)
69121 {
69122 array_walk($from_lines, array('Text_Diff', 'trimNewlines'));
69123 array_walk($to_lines, array('Text_Diff', 'trimNewlines'));
69124
69125 $n_from = count($from_lines);
69126 $n_to = count($to_lines);
69127
69128 $this->xchanged = $this->ychanged = array();
69129 $this->xv = $this->yv = array();
69130 $this->xind = $this->yind = array();
69131 unset($this->seq);
69132 unset($this->in_seq);
69133 unset($this->lcs);
69134
69135 // Skip leading common lines.
69136 for ($skip = 0; $skip < $n_from && $skip < $n_to; $skip++) {
69137 if ($from_lines[$skip] !== $to_lines[$skip]) {
69138 break;
69139 }
69140 $this->xchanged[$skip] = $this->ychanged[$skip] = false;
69141 }
69142
69143 // Skip trailing common lines.
69144 $xi = $n_from; $yi = $n_to;
69145 for ($endskip = 0; --$xi > $skip && --$yi > $skip; $endskip++) {
69146 if ($from_lines[$xi] !== $to_lines[$yi]) {
69147 break;
69148 }
69149 $this->xchanged[$xi] = $this->ychanged[$yi] = false;
69150 }
69151
69152 // Ignore lines which do not exist in both files.
69153 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
69154 $xhash[$from_lines[$xi]] = 1;
69155 }
69156 for ($yi = $skip; $yi < $n_to - $endskip; $yi++) {
69157 $line = $to_lines[$yi];
69158 if (($this->ychanged[$yi] = empty($xhash[$line]))) {
69159 continue;
69160 }
69161 $yhash[$line] = 1;
69162 $this->yv[] = $line;
69163 $this->yind[] = $yi;
69164 }
69165 for ($xi = $skip; $xi < $n_from - $endskip; $xi++) {
69166 $line = $from_lines[$xi];
69167 if (($this->xchanged[$xi] = empty($yhash[$line]))) {
69168 continue;
69169 }
69170 $this->xv[] = $line;
69171 $this->xind[] = $xi;
69172 }
69173
69174 // Find the LCS.
69175 $this->_compareseq(0, count($this->xv), 0, count($this->yv));
69176
69177 // Merge edits when possible.
69178 $this->_shiftBoundaries($from_lines, $this->xchanged, $this->ychanged);
69179 $this->_shiftBoundaries($to_lines, $this->ychanged, $this->xchanged);
69180
69181 // Compute the edit operations.
69182 $edits = array();
69183 $xi = $yi = 0;
69184 while ($xi < $n_from || $yi < $n_to) {
69185 assert($yi < $n_to || $this->xchanged[$xi]);
69186 assert($xi < $n_from || $this->ychanged[$yi]);
69187
69188 // Skip matching "snake".
69189 $copy = array();
69190 while ($xi < $n_from && $yi < $n_to
69191 && !$this->xchanged[$xi] && !$this->ychanged[$yi]) {
69192 $copy[] = $from_lines[$xi++];
69193 ++$yi;
69194 }
69195 if ($copy) {
69196 $edits[] = &new Text_Diff_Op_copy($copy);
69197 }
69198
69199 // Find deletes & adds.
69200 $delete = array();
69201 while ($xi < $n_from && $this->xchanged[$xi]) {
69202 $delete[] = $from_lines[$xi++];
69203 }
69204
69205 $add = array();
69206 while ($yi < $n_to && $this->ychanged[$yi]) {
69207 $add[] = $to_lines[$yi++];
69208 }
69209
69210 if ($delete && $add) {
69211 $edits[] = &new Text_Diff_Op_change($delete, $add);
69212 } elseif ($delete) {
69213 $edits[] = &new Text_Diff_Op_delete($delete);
69214 } elseif ($add) {
69215 $edits[] = &new Text_Diff_Op_add($add);
69216 }
69217 }
69218
69219 return $edits;
69220 }
69221
69222 /**
69223 * Divides the Largest Common Subsequence (LCS) of the sequences (XOFF,
69224 * XLIM) and (YOFF, YLIM) into NCHUNKS approximately equally sized
69225 * segments.
69226 *
69227 * Returns (LCS, PTS). LCS is the length of the LCS. PTS is an array of
69228 * NCHUNKS+1 (X, Y) indexes giving the diving points between sub
69229 * sequences. The first sub-sequence is contained in (X0, X1), (Y0, Y1),
69230 * the second in (X1, X2), (Y1, Y2) and so on. Note that (X0, Y0) ==
69231 * (XOFF, YOFF) and (X[NCHUNKS], Y[NCHUNKS]) == (XLIM, YLIM).
69232 *
69233 * This function assumes that the first lines of the specified portions of
69234 * the two files do not match, and likewise that the last lines do not
69235 * match. The caller must trim matching lines from the beginning and end
69236 * of the portions it is going to specify.
69237 */
69238 function _diag ($xoff, $xlim, $yoff, $ylim, $nchunks)
69239 {
69240 $flip = false;
69241
69242 if ($xlim - $xoff > $ylim - $yoff) {
69243 /* Things seems faster (I'm not sure I understand why) when the
69244 * shortest sequence is in X. */
69245 $flip = true;
69246 list ($xoff, $xlim, $yoff, $ylim)
69247 = array($yoff, $ylim, $xoff, $xlim);
69248 }
69249
69250 if ($flip) {
69251 for ($i = $ylim - 1; $i >= $yoff; $i--) {
69252 $ymatches[$this->xv[$i]][] = $i;
69253 }
69254 } else {
69255 for ($i = $ylim - 1; $i >= $yoff; $i--) {
69256 $ymatches[$this->yv[$i]][] = $i;
69257 }
69258 }
69259
69260 $this->lcs = 0;
69261 $this->seq[0]= $yoff - 1;
69262 $this->in_seq = array();
69263 $ymids[0] = array();
69264
69265 $numer = $xlim - $xoff + $nchunks - 1;
69266 $x = $xoff;
69267 for ($chunk = 0; $chunk < $nchunks; $chunk++) {
69268 if ($chunk > 0) {
69269 for ($i = 0; $i <= $this->lcs; $i++) {
69270 $ymids[$i][$chunk - 1] = $this->seq[$i];
69271 }
69272 }
69273
69274 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $chunk) / $nchunks);
69275 for (; $x < $x1; $x++) {
69276 $line = $flip ? $this->yv[$x] : $this->xv[$x];
69277 if (empty($ymatches[$line])) {
69278 continue;
69279 }
69280 $matches = $ymatches[$line];
69281 reset($matches);
69282 while (list(, $y) = each($matches)) {
69283 if (empty($this->in_seq[$y])) {
69284 $k = $this->_lcsPos($y);
69285 assert($k > 0);
69286 $ymids[$k] = $ymids[$k - 1];
69287 break;
69288 }
69289 }
69290 while (list(, $y) = each($matches)) {
69291 if ($y > $this->seq[$k - 1]) {
69292 assert($y <= $this->seq[$k]);
69293 /* Optimization: this is a common case: next match is
69294 * just replacing previous match. */
69295 $this->in_seq[$this->seq[$k]] = false;
69296 $this->seq[$k] = $y;
69297 $this->in_seq[$y] = 1;
69298 } elseif (empty($this->in_seq[$y])) {
69299 $k = $this->_lcsPos($y);
69300 assert($k > 0);
69301 $ymids[$k] = $ymids[$k - 1];
69302 }
69303 }
69304 }
69305 }
69306
69307 $seps[] = $flip ? array($yoff, $xoff) : array($xoff, $yoff);
69308 $ymid = $ymids[$this->lcs];
69309 for ($n = 0; $n < $nchunks - 1; $n++) {
69310 $x1 = $xoff + (int)(($numer + ($xlim - $xoff) * $n) / $nchunks);
69311 $y1 = $ymid[$n] + 1;
69312 $seps[] = $flip ? array($y1, $x1) : array($x1, $y1);
69313 }
69314 $seps[] = $flip ? array($ylim, $xlim) : array($xlim, $ylim);
69315
69316 return array($this->lcs, $seps);
69317 }
69318
69319 function _lcsPos($ypos)
69320 {
69321 $end = $this->lcs;
69322 if ($end == 0 || $ypos > $this->seq[$end]) {
69323 $this->seq[++$this->lcs] = $ypos;
69324 $this->in_seq[$ypos] = 1;
69325 return $this->lcs;
69326 }
69327
69328 $beg = 1;
69329 while ($beg < $end) {
69330 $mid = (int)(($beg + $end) / 2);
69331 if ($ypos > $this->seq[$mid]) {
69332 $beg = $mid + 1;
69333 } else {
69334 $end = $mid;
69335 }
69336 }
69337
69338 assert($ypos != $this->seq[$end]);
69339
69340 $this->in_seq[$this->seq[$end]] = false;
69341 $this->seq[$end] = $ypos;
69342 $this->in_seq[$ypos] = 1;
69343 return $end;
69344 }
69345
69346 /**
69347 * Finds LCS of two sequences.
69348 *
69349 * The results are recorded in the vectors $this->{x,y}changed[], by
69350 * storing a 1 in the element for each line that is an insertion or
69351 * deletion (ie. is not in the LCS).
69352 *
69353 * The subsequence of file 0 is (XOFF, XLIM) and likewise for file 1.
69354 *
69355 * Note that XLIM, YLIM are exclusive bounds. All line numbers are
69356 * origin-0 and discarded lines are not counted.
69357 */
69358 function _compareseq ($xoff, $xlim, $yoff, $ylim)
69359 {
69360 /* Slide down the bottom initial diagonal. */
69361 while ($xoff < $xlim && $yoff < $ylim
69362 && $this->xv[$xoff] == $this->yv[$yoff]) {
69363 ++$xoff;
69364 ++$yoff;
69365 }
69366
69367 /* Slide up the top initial diagonal. */
69368 while ($xlim > $xoff && $ylim > $yoff
69369 && $this->xv[$xlim - 1] == $this->yv[$ylim - 1]) {
69370 --$xlim;
69371 --$ylim;
69372 }
69373
69374 if ($xoff == $xlim || $yoff == $ylim) {
69375 $lcs = 0;
69376 } else {
69377 /* This is ad hoc but seems to work well. $nchunks =
69378 * sqrt(min($xlim - $xoff, $ylim - $yoff) / 2.5); $nchunks =
69379 * max(2,min(8,(int)$nchunks)); */
69380 $nchunks = min(7, $xlim - $xoff, $ylim - $yoff) + 1;
69381 list($lcs, $seps)
69382 = $this->_diag($xoff, $xlim, $yoff, $ylim, $nchunks);
69383 }
69384
69385 if ($lcs == 0) {
69386 /* X and Y sequences have no common subsequence: mark all
69387 * changed. */
69388 while ($yoff < $ylim) {
69389 $this->ychanged[$this->yind[$yoff++]] = 1;
69390 }
69391 while ($xoff < $xlim) {
69392 $this->xchanged[$this->xind[$xoff++]] = 1;
69393 }
69394 } else {
69395 /* Use the partitions to split this problem into subproblems. */
69396 reset($seps);
69397 $pt1 = $seps[0];
69398 while ($pt2 = next($seps)) {
69399 $this->_compareseq ($pt1[0], $pt2[0], $pt1[1], $pt2[1]);
69400 $pt1 = $pt2;
69401 }
69402 }
69403 }
69404
69405 /**
69406 * Adjusts inserts/deletes of identical lines to join changes as much as
69407 * possible.
69408 *
69409 * We do something when a run of changed lines include a line at one end
69410 * and has an excluded, identical line at the other. We are free to
69411 * choose which identical line is included. `compareseq' usually chooses
69412 * the one at the beginning, but usually it is cleaner to consider the
69413 * following identical line to be the "change".
69414 *
69415 * This is extracted verbatim from analyze.c (GNU diffutils-2.7).
69416 */
69417 function _shiftBoundaries($lines, &$changed, $other_changed)
69418 {
69419 $i = 0;
69420 $j = 0;
69421
69422 assert('count($lines) == count($changed)');
69423 $len = count($lines);
69424 $other_len = count($other_changed);
69425
69426 while (1) {
69427 /* Scan forward to find the beginning of another run of
69428 * changes. Also keep track of the corresponding point in the
69429 * other file.
69430 *
69431 * Throughout this code, $i and $j are adjusted together so that
69432 * the first $i elements of $changed and the first $j elements of
69433 * $other_changed both contain the same number of zeros (unchanged
69434 * lines).
69435 *
69436 * Furthermore, $j is always kept so that $j == $other_len or
69437 * $other_changed[$j] == false. */
69438 while ($j < $other_len && $other_changed[$j]) {
69439 $j++;
69440 }
69441
69442 while ($i < $len && ! $changed[$i]) {
69443 assert('$j < $other_len && ! $other_changed[$j]');
69444 $i++; $j++;
69445 while ($j < $other_len && $other_changed[$j]) {
69446 $j++;
69447 }
69448 }
69449
69450 if ($i == $len) {
69451 break;
69452 }
69453
69454 $start = $i;
69455
69456 /* Find the end of this run of changes. */
69457 while (++$i < $len && $changed[$i]) {
69458 continue;
69459 }
69460
69461 do {
69462 /* Record the length of this run of changes, so that we can
69463 * later determine whether the run has grown. */
69464 $runlength = $i - $start;
69465
69466 /* Move the changed region back, so long as the previous
69467 * unchanged line matches the last changed one. This merges
69468 * with previous changed regions. */
69469 while ($start > 0 && $lines[$start - 1] == $lines[$i - 1]) {
69470 $changed[--$start] = 1;
69471 $changed[--$i] = false;
69472 while ($start > 0 && $changed[$start - 1]) {
69473 $start--;
69474 }
69475 assert('$j > 0');
69476 while ($other_changed[--$j]) {
69477 continue;
69478 }
69479 assert('$j >= 0 && !$other_changed[$j]');
69480 }
69481
69482 /* Set CORRESPONDING to the end of the changed run, at the
69483 * last point where it corresponds to a changed run in the
69484 * other file. CORRESPONDING == LEN means no such point has
69485 * been found. */
69486 $corresponding = $j < $other_len ? $i : $len;
69487
69488 /* Move the changed region forward, so long as the first
69489 * changed line matches the following unchanged one. This
69490 * merges with following changed regions. Do this second, so
69491 * that if there are no merges, the changed region is moved
69492 * forward as far as possible. */
69493 while ($i < $len && $lines[$start] == $lines[$i]) {
69494 $changed[$start++] = false;
69495 $changed[$i++] = 1;
69496 while ($i < $len && $changed[$i]) {
69497 $i++;
69498 }
69499
69500 assert('$j < $other_len && ! $other_changed[$j]');
69501 $j++;
69502 if ($j < $other_len && $other_changed[$j]) {
69503 $corresponding = $i;
69504 while ($j < $other_len && $other_changed[$j]) {
69505 $j++;
69506 }
69507 }
69508 }
69509 } while ($runlength != $i - $start);
69510
69511 /* If possible, move the fully-merged run of changes back to a
69512 * corresponding run in the other file. */
69513 while ($corresponding < $i) {
69514 $changed[--$start] = 1;
69515 $changed[--$i] = 0;
69516 assert('$j > 0');
69517 while ($other_changed[--$j]) {
69518 continue;
69519 }
69520 assert('$j >= 0 && !$other_changed[$j]');
69521 }
69522 }
69523 }
69524
69525 }