raw
vtools_genesis          1 
vtools_genesis 2 #include "diff.h"
vtools_genesis 3 #include <cmpbuf.h>
vtools_genesis 4 #include <error.h>
vtools_genesis 5 #include <xalloc.h>
vtools_genesis 6 #include <stdlib.h>
vtools_genesis 7
vtools_genesis 8 /* The core of the Diff algorithm. */
vtools_genesis 9 #define ELEMENT lin
vtools_genesis 10 #define EQUAL(x, y) ((x) == (y))
vtools_genesis 11 #define OFFSET lin
vtools_genesis 12 #define EXTRA_CONTEXT_FIELDS /* none */
vtools_genesis 13 #define NOTE_DELETE(c, xoff) (files[0].changed[files[0].realindexes[xoff]] = 1)
vtools_genesis 14 #define NOTE_INSERT(c, yoff) (files[1].changed[files[1].realindexes[yoff]] = 1)
vtools_genesis 15 #define USE_HEURISTIC 1
vtools_genesis 16 #include <limits.h>
vtools_genesis 17 #include <diffseq.h>
vtools_genesis 18 #include <sys/stat.h>
vtools_genesis 19
vtools_genesis 20 /* Discard lines from one file that have no matches in the other file.
vtools_genesis 21
vtools_genesis 22 A line which is discarded will not be considered by the actual
vtools_genesis 23 comparison algorithm; it will be as if that line were not in the
vtools_genesis 24 file. The file's |realindexes| table maps virtual line numbers
vtools_genesis 25 (which don't count the discarded lines) into real line numbers;
vtools_genesis 26 this is how the actual comparison algorithm produces results that
vtools_genesis 27 are comprehensible when the discarded lines are counted.
vtools_genesis 28
vtools_genesis 29 When we discard a line, we also mark it as a deletion or insertion
vtools_genesis 30 so that it will be printed in the output. */
vtools_genesis 31
vtools_genesis 32 static void
vtools_genesis 33 discard_confusing_lines(struct file_data filevec[]) {
vtools_genesis 34 int f;
vtools_genesis 35 lin i;
vtools_genesis 36 char *discarded[2];
vtools_genesis 37 lin *equiv_count[2];
vtools_genesis 38 lin *p;
vtools_genesis 39
vtools_genesis 40 /* Allocate our results. */
vtools_genesis 41 p = xmalloc((filevec[0].buffered_lines + filevec[1].buffered_lines)
vtools_genesis 42 * (2 * sizeof *p));
vtools_genesis 43 for (f = 0; f < 2; f++) {
vtools_genesis 44 filevec[f].undiscarded = p;
vtools_genesis 45 p += filevec[f].buffered_lines;
vtools_genesis 46 filevec[f].realindexes = p;
vtools_genesis 47 p += filevec[f].buffered_lines;
vtools_genesis 48 }
vtools_genesis 49
vtools_genesis 50 /* Set up |equiv_count[f][i]| as the number of lines in file |f|
vtools_genesis 51 that fall in equivalence class |i|. */
vtools_genesis 52
vtools_genesis 53 p = zalloc(filevec[0].equiv_max * (2 * sizeof *p));
vtools_genesis 54 equiv_count[0] = p;
vtools_genesis 55 equiv_count[1] = p + filevec[0].equiv_max;
vtools_genesis 56
vtools_genesis 57 for (i = 0; i < filevec[0].buffered_lines; ++i)
vtools_genesis 58 ++equiv_count[0][filevec[0].equivs[i]];
vtools_genesis 59 for (i = 0; i < filevec[1].buffered_lines; ++i)
vtools_genesis 60 ++equiv_count[1][filevec[1].equivs[i]];
vtools_genesis 61
vtools_genesis 62 /* Set up tables of which lines are going to be discarded. */
vtools_genesis 63
vtools_genesis 64 discarded[0] = zalloc(filevec[0].buffered_lines
vtools_genesis 65 + filevec[1].buffered_lines);
vtools_genesis 66 discarded[1] = discarded[0] + filevec[0].buffered_lines;
vtools_genesis 67
vtools_genesis 68 /* Mark to be discarded each line that matches no line of the
vtools_genesis 69 other file. If a line matches many lines, mark it as
vtools_genesis 70 provisionally discardable. */
vtools_genesis 71
vtools_genesis 72 for (f = 0; f < 2; f++) {
vtools_genesis 73 size_t end = filevec[f].buffered_lines;
vtools_genesis 74 char *discards = discarded[f];
vtools_genesis 75 lin *counts = equiv_count[1 - f];
vtools_genesis 76 lin *equivs = filevec[f].equivs;
vtools_genesis 77 size_t many = 5;
vtools_genesis 78 size_t tem = end / 64;
vtools_genesis 79
vtools_genesis 80 /* Multiply |many| by approximate square root of number of
vtools_genesis 81 lines. That is the threshold for provisionally discardable
vtools_genesis 82 lines. */
vtools_genesis 83 while ((tem = tem >> 2) > 0)
vtools_genesis 84 many *= 2;
vtools_genesis 85
vtools_genesis 86 for (i = 0; i < end; i++) {
vtools_genesis 87 lin nmatch;
vtools_genesis 88 if (equivs[i] == 0)
vtools_genesis 89 continue;
vtools_genesis 90 nmatch = counts[equivs[i]];
vtools_genesis 91 if (nmatch == 0)
vtools_genesis 92 discards[i] = 1;
vtools_genesis 93 else if (nmatch > many)
vtools_genesis 94 discards[i] = 2;
vtools_genesis 95 }
vtools_genesis 96 }
vtools_genesis 97
vtools_genesis 98 /* Don't really discard the provisional lines except when they
vtools_genesis 99 occur in a run of discardables, with nonprovisionals at the
vtools_genesis 100 beginning and end. */
vtools_genesis 101
vtools_genesis 102 for (f = 0; f < 2; f++) {
vtools_genesis 103 lin end = filevec[f].buffered_lines;
vtools_genesis 104 register char *discards = discarded[f];
vtools_genesis 105
vtools_genesis 106 for (i = 0; i < end; i++) {
vtools_genesis 107 /* Cancel provisional discards not in middle of run of
vtools_genesis 108 discards. */
vtools_genesis 109 if (discards[i] == 2)
vtools_genesis 110 discards[i] = 0;
vtools_genesis 111 else if (discards[i] != 0) {
vtools_genesis 112 /* We have found a nonprovisional discard. */
vtools_genesis 113 register lin j;
vtools_genesis 114 lin length;
vtools_genesis 115 lin provisional = 0;
vtools_genesis 116
vtools_genesis 117 /* Find end of this run of discardable lines. Count
vtools_genesis 118 how many are provisionally discardable. */
vtools_genesis 119 for (j = i; j < end; j++) {
vtools_genesis 120 if (discards[j] == 0)
vtools_genesis 121 break;
vtools_genesis 122 if (discards[j] == 2)
vtools_genesis 123 ++provisional;
vtools_genesis 124 }
vtools_genesis 125
vtools_genesis 126 /* Cancel provisional discards at end, and shrink the
vtools_genesis 127 run. */
vtools_genesis 128 while (j > i && discards[j - 1] == 2)
vtools_genesis 129 discards[--j] = 0, --provisional;
vtools_genesis 130
vtools_genesis 131 /* Now we have the length of a run of discardable
vtools_genesis 132 lines whose first and last are not provisional. */
vtools_genesis 133 length = j - i;
vtools_genesis 134
vtools_genesis 135 /* If $1/4$ of the lines in the run are provisional,
vtools_genesis 136 cancel discarding of all provisional lines in the
vtools_genesis 137 run. */
vtools_genesis 138 if (provisional * 4 > length) {
vtools_genesis 139 while (j > i)
vtools_genesis 140 if (discards[--j] == 2)
vtools_genesis 141 discards[j] = 0;
vtools_genesis 142 } else {
vtools_genesis 143 register lin consec;
vtools_genesis 144 lin minimum = 1;
vtools_genesis 145 lin tem = length >> 2;
vtools_genesis 146
vtools_genesis 147 /* |minimum| is approximate square root of
vtools_genesis 148 |length/4|. A subrun of two or more
vtools_genesis 149 provisionals can stand when |length| is at
vtools_genesis 150 least 16. A subrun of 4 or more can stand when
vtools_genesis 151 |LENGTH >= 64|. */
vtools_genesis 152 while (0 < (tem >>= 2))
vtools_genesis 153 minimum <<= 1;
vtools_genesis 154 minimum++;
vtools_genesis 155
vtools_genesis 156 /* Cancel any subrun of |minimum| or more
vtools_genesis 157 provisionals within the larger run. */
vtools_genesis 158 for (j = 0, consec = 0; j < length; j++)
vtools_genesis 159 if (discards[i + j] != 2)
vtools_genesis 160 consec = 0;
vtools_genesis 161 else if (minimum == ++consec)
vtools_genesis 162 /* Back up to start of subrun, to cancel
vtools_genesis 163 it all. */
vtools_genesis 164 j -= consec;
vtools_genesis 165 else if (minimum < consec)
vtools_genesis 166 discards[i + j] = 0;
vtools_genesis 167
vtools_genesis 168 /* Scan from beginning of run until we find 3 or
vtools_genesis 169 more nonprovisionals in a row or until the
vtools_genesis 170 first nonprovisional at least 8 lines in.
vtools_genesis 171 Until that point, cancel any provisionals. */
vtools_genesis 172 for (j = 0, consec = 0; j < length; j++) {
vtools_genesis 173 if (j >= 8 && discards[i + j] == 1)
vtools_genesis 174 break;
vtools_genesis 175 if (discards[i + j] == 2)
vtools_genesis 176 consec = 0, discards[i + j] = 0;
vtools_genesis 177 else if (discards[i + j] == 0)
vtools_genesis 178 consec = 0;
vtools_genesis 179 else
vtools_genesis 180 consec++;
vtools_genesis 181 if (consec == 3)
vtools_genesis 182 break;
vtools_genesis 183 }
vtools_genesis 184
vtools_genesis 185 /* I advances to the last line of the run. */
vtools_genesis 186 i += length - 1;
vtools_genesis 187
vtools_genesis 188 /* Same thing, from end. */
vtools_genesis 189 for (j = 0, consec = 0; j < length; j++) {
vtools_genesis 190 if (j >= 8 && discards[i - j] == 1)
vtools_genesis 191 break;
vtools_genesis 192 if (discards[i - j] == 2)
vtools_genesis 193 consec = 0, discards[i - j] = 0;
vtools_genesis 194 else if (discards[i - j] == 0)
vtools_genesis 195 consec = 0;
vtools_genesis 196 else
vtools_genesis 197 consec++;
vtools_genesis 198 if (consec == 3)
vtools_genesis 199 break;
vtools_genesis 200 }
vtools_genesis 201 }
vtools_genesis 202 }
vtools_genesis 203 }
vtools_genesis 204 }
vtools_genesis 205
vtools_genesis 206 /* Actually discard the lines. */
vtools_genesis 207 for (f = 0; f < 2; f++) {
vtools_genesis 208 char *discards = discarded[f];
vtools_genesis 209 lin end = filevec[f].buffered_lines;
vtools_genesis 210 lin j = 0;
vtools_genesis 211 for (i = 0; i < end; ++i)
vtools_genesis 212 if (minimal || discards[i] == 0) {
vtools_genesis 213 filevec[f].undiscarded[j] = filevec[f].equivs[i];
vtools_genesis 214 filevec[f].realindexes[j++] = i;
vtools_genesis 215 } else
vtools_genesis 216 filevec[f].changed[i] = 1;
vtools_genesis 217 filevec[f].nondiscarded_lines = j;
vtools_genesis 218 }
vtools_genesis 219
vtools_genesis 220 free(discarded[0]);
vtools_genesis 221 free(equiv_count[0]);
vtools_genesis 222 }
vtools_genesis 223
vtools_genesis 224 /* Adjust inserts/deletes of identical lines to join changes as much
vtools_genesis 225 as possible.
vtools_genesis 226
vtools_genesis 227 We do something when a run of changed lines include a line at one
vtools_genesis 228 end and have an excluded, identical line at the other. We are free
vtools_genesis 229 to choose which identical line is included. |compareseq| usually
vtools_genesis 230 chooses the one at the beginning, but usually it is cleaner to
vtools_genesis 231 consider the following identical line to be the "change". */
vtools_genesis 232
vtools_genesis 233 static void
vtools_genesis 234 shift_boundaries(struct file_data filevec[]) {
vtools_genesis 235 int f;
vtools_genesis 236
vtools_genesis 237 for (f = 0; f < 2; f++) {
vtools_genesis 238 char *changed = filevec[f].changed;
vtools_genesis 239 char *other_changed = filevec[1 - f].changed;
vtools_genesis 240 lin const *equivs = filevec[f].equivs;
vtools_genesis 241 lin i = 0;
vtools_genesis 242 lin j = 0;
vtools_genesis 243 lin i_end = filevec[f].buffered_lines;
vtools_genesis 244
vtools_genesis 245 while (1) {
vtools_genesis 246 lin runlength, start, corresponding;
vtools_genesis 247
vtools_genesis 248 /* Scan forwards to find beginning of another run of
vtools_genesis 249 changes. Also keep track of the corresponding point in
vtools_genesis 250 the other file. */
vtools_genesis 251
vtools_genesis 252 while (i < i_end && !changed[i]) {
vtools_genesis 253 while (other_changed[j++])
vtools_genesis 254 continue;
vtools_genesis 255 i++;
vtools_genesis 256 }
vtools_genesis 257
vtools_genesis 258 if (i == i_end)
vtools_genesis 259 break;
vtools_genesis 260
vtools_genesis 261 start = i;
vtools_genesis 262
vtools_genesis 263 /* Find the end of this run of changes. */
vtools_genesis 264
vtools_genesis 265 while (changed[++i])
vtools_genesis 266 continue;
vtools_genesis 267 while (other_changed[j])
vtools_genesis 268 j++;
vtools_genesis 269
vtools_genesis 270 do {
vtools_genesis 271 /* Record the length of this run of changes, so that
vtools_genesis 272 we can later determine whether the run has grown. */
vtools_genesis 273 runlength = i - start;
vtools_genesis 274
vtools_genesis 275 /* Move the changed region back, so long as the
vtools_genesis 276 previous unchanged line matches the last changed one.
vtools_genesis 277 This merges with previous changed regions. */
vtools_genesis 278
vtools_genesis 279 while (start && equivs[start - 1] == equivs[i - 1]) {
vtools_genesis 280 changed[--start] = 1;
vtools_genesis 281 changed[--i] = 0;
vtools_genesis 282 while (changed[start - 1])
vtools_genesis 283 start--;
vtools_genesis 284 while (other_changed[--j])
vtools_genesis 285 continue;
vtools_genesis 286 }
vtools_genesis 287
vtools_genesis 288 /* Set |corresponding| to the end of the changed run,
vtools_genesis 289 at the last point where it corresponds to a changed run
vtools_genesis 290 in the other file. |corresponding == i_end| means no
vtools_genesis 291 such point has been found. */
vtools_genesis 292 corresponding = other_changed[j - 1] ? i : i_end;
vtools_genesis 293
vtools_genesis 294 /* Move the changed region forward, so long as the
vtools_genesis 295 first changed line matches the following unchanged one.
vtools_genesis 296 This merges with following changed regions. Do this
vtools_genesis 297 second, so that if there are no merges, the changed
vtools_genesis 298 region is moved forward as far as possible. */
vtools_genesis 299
vtools_genesis 300 while (i != i_end && equivs[start] == equivs[i]) {
vtools_genesis 301 changed[start++] = 0;
vtools_genesis 302 changed[i++] = 1;
vtools_genesis 303 while (changed[i])
vtools_genesis 304 i++;
vtools_genesis 305 while (other_changed[++j])
vtools_genesis 306 corresponding = i;
vtools_genesis 307 }
vtools_genesis 308 } while (runlength != i - start);
vtools_genesis 309
vtools_genesis 310 /* If possible, move the fully-merged run of changes back
vtools_genesis 311 to a corresponding run in the other file. */
vtools_genesis 312
vtools_genesis 313 while (corresponding < i) {
vtools_genesis 314 changed[--start] = 1;
vtools_genesis 315 changed[--i] = 0;
vtools_genesis 316 while (other_changed[--j])
vtools_genesis 317 continue;
vtools_genesis 318 }
vtools_genesis 319 }
vtools_genesis 320 }
vtools_genesis 321 }
vtools_genesis 322
vtools_genesis 323 /* Cons an additional entry onto the front of an edit script |old|.
vtools_genesis 324 |line0| and |line1| are the first affected lines in the two files
vtools_genesis 325 (origin 0). |deleted| is the number of lines deleted here from
vtools_genesis 326 file 0. |inserted| is the number of lines inserted here in file 1.
vtools_genesis 327
vtools_genesis 328 If |deleted| is 0 then |line0| is the number of the line before
vtools_genesis 329 which the insertion was done; vice versa for |inserted| and
vtools_genesis 330 |line1|. */
vtools_genesis 331
vtools_genesis 332 static struct change *
vtools_genesis 333 add_change(lin line0, lin line1, lin deleted, lin inserted,
vtools_genesis 334 struct change *old) {
vtools_genesis 335 struct change *new = xmalloc(sizeof *new);
vtools_genesis 336
vtools_genesis 337 new->line0 = line0;
vtools_genesis 338 new->line1 = line1;
vtools_genesis 339 new->inserted = inserted;
vtools_genesis 340 new->deleted = deleted;
vtools_genesis 341 new->link = old;
vtools_genesis 342 return new;
vtools_genesis 343 }
vtools_genesis 344
vtools_genesis 345 /* Scan the tables of which lines are inserted and deleted, producing
vtools_genesis 346 an edit script in reverse order. */
vtools_genesis 347
vtools_genesis 348 static struct change *
vtools_genesis 349 build_reverse_script(struct file_data const filevec[]) {
vtools_genesis 350 struct change *script = 0;
vtools_genesis 351 char *changed0 = filevec[0].changed;
vtools_genesis 352 char *changed1 = filevec[1].changed;
vtools_genesis 353 lin len0 = filevec[0].buffered_lines;
vtools_genesis 354 lin len1 = filevec[1].buffered_lines;
vtools_genesis 355
vtools_genesis 356 /* Note that |changedN[lenN]| does exist, and is 0. */
vtools_genesis 357
vtools_genesis 358 lin i0 = 0, i1 = 0;
vtools_genesis 359
vtools_genesis 360 while (i0 < len0 || i1 < len1) {
vtools_genesis 361 if (changed0[i0] | changed1[i1]) {
vtools_genesis 362 lin line0 = i0, line1 = i1;
vtools_genesis 363
vtools_genesis 364 /* Find \# lines changed here in each file. */
vtools_genesis 365 while (changed0[i0]) ++i0;
vtools_genesis 366 while (changed1[i1]) ++i1;
vtools_genesis 367
vtools_genesis 368 /* Record this change. */
vtools_genesis 369 script = add_change(line0, line1, i0 - line0, i1 - line1, script);
vtools_genesis 370 }
vtools_genesis 371
vtools_genesis 372 /* We have reached lines in the two files that match each
vtools_genesis 373 other. */
vtools_genesis 374 i0++, i1++;
vtools_genesis 375 }
vtools_genesis 376
vtools_genesis 377 return script;
vtools_genesis 378 }
vtools_genesis 379
vtools_genesis 380 /* Scan the tables of which lines are inserted and deleted, producing
vtools_genesis 381 an edit script in forward order. */
vtools_genesis 382
vtools_genesis 383 static struct change *
vtools_genesis 384 build_script(struct file_data const filevec[]) {
vtools_genesis 385 struct change *script = 0;
vtools_genesis 386 char *changed0 = filevec[0].changed;
vtools_genesis 387 char *changed1 = filevec[1].changed;
vtools_genesis 388 lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
vtools_genesis 389
vtools_genesis 390 /* Note that |changedN[-1]| does exist, and is 0. */
vtools_genesis 391
vtools_genesis 392 while (i0 >= 0 || i1 >= 0) {
vtools_genesis 393 if (changed0[i0 - 1] | changed1[i1 - 1]) {
vtools_genesis 394 lin line0 = i0, line1 = i1;
vtools_genesis 395
vtools_genesis 396 /* Find \# lines changed here in each file. */
vtools_genesis 397 while (changed0[i0 - 1]) --i0;
vtools_genesis 398 while (changed1[i1 - 1]) --i1;
vtools_genesis 399
vtools_genesis 400 /* Record this change. */
vtools_genesis 401 script = add_change(i0, i1, line0 - i0, line1 - i1, script);
vtools_genesis 402 }
vtools_genesis 403
vtools_genesis 404 /* We have reached lines in the two files that match each
vtools_genesis 405 other. */
vtools_genesis 406 i0--, i1--;
vtools_genesis 407 }
vtools_genesis 408
vtools_genesis 409 return script;
vtools_genesis 410 }
vtools_genesis 411
vtools_genesis 412 /* If |changes|, briefly report that two files differed. */
vtools_genesis 413 static void
vtools_genesis 414 briefly_report(int changes, struct file_data const filevec[]) {
vtools_genesis 415 if (changes)
vtools_genesis 416 message((brief
vtools_genesis 417 ? "Files %s and %s differ\n"
vtools_genesis 418 : "Binary files %s and %s differ\n"),
vtools_genesis 419 file_label[0] ? file_label[0] : filevec[0].name,
vtools_genesis 420 file_label[1] ? file_label[1] : filevec[1].name);
vtools_genesis 421 }
vtools_genesis 422
vtools_genesis 423 /* Report the differences of two files. */
vtools_genesis 424 int
vtools_genesis 425 diff_2_files(struct comparison *cmp) {
vtools_genesis 426 int f;
vtools_genesis 427 struct change *e, *p;
vtools_genesis 428 struct change *script;
vtools_genesis 429 int changes;
vtools_genesis 430
vdiff_keccak 431 cmp->file[0].hash_context = keccak_begin();
vdiff_keccak 432 cmp->file[1].hash_context = keccak_begin();
vtools_genesis 433
vtools_genesis 434 /* If we have detected that either file is binary, compare the two
vtools_genesis 435 files as binary. This can happen only when the first chunk is
vtools_genesis 436 read. */
vtools_genesis 437
vtools_genesis 438 if (read_files(cmp->file, files_can_be_treated_as_binary)) {
vtools_genesis 439 /* Files with different lengths must be different. */
vtools_genesis 440 if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
vtools_genesis 441 && 0 < cmp->file[0].stat.st_size
vtools_genesis 442 && 0 < cmp->file[1].stat.st_size
vtools_genesis 443 && (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
vtools_genesis 444 && (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
vtools_genesis 445 changes = 1;
vtools_genesis 446
vtools_genesis 447 /* Standard input equals itself. */
vtools_genesis 448 else if (cmp->file[0].desc == cmp->file[1].desc)
vtools_genesis 449 changes = 0;
vtools_genesis 450
vtools_genesis 451 else
vtools_genesis 452 /* Scan both files, a buffer at a time, looking for a
vtools_genesis 453 difference. */
vtools_genesis 454 {
vtools_genesis 455 /* Allocate same-sized buffers for both files. */
vtools_genesis 456 size_t lcm_max = PTRDIFF_MAX - 1;
vtools_genesis 457 size_t buffer_size =
vtools_genesis 458 buffer_lcm(sizeof(word),
vtools_genesis 459 buffer_lcm(STAT_BLOCKSIZE (cmp->file[0].stat),
vtools_genesis 460 STAT_BLOCKSIZE (cmp->file[1].stat),
vtools_genesis 461 lcm_max),
vtools_genesis 462 lcm_max);
vtools_genesis 463 for (f = 0; f < 2; f++)
vtools_genesis 464 cmp->file[f].buffer = xrealloc(cmp->file[f].buffer, buffer_size);
vtools_genesis 465
vtools_genesis 466 for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0) {
vtools_genesis 467 /* Read a buffer's worth from both files. */
vtools_genesis 468 for (f = 0; f < 2; f++)
vtools_genesis 469 if (0 <= cmp->file[f].desc)
vtools_genesis 470 file_block_read(&cmp->file[f],
vtools_genesis 471 buffer_size - cmp->file[f].buffered);
vtools_genesis 472
vtools_genesis 473 /* If the buffers differ, the files differ. */
vtools_genesis 474 if (cmp->file[0].buffered != cmp->file[1].buffered
vtools_genesis 475 || memcmp(cmp->file[0].buffer,
vtools_genesis 476 cmp->file[1].buffer,
vtools_genesis 477 cmp->file[0].buffered)) {
vtools_genesis 478 changes = 1;
vtools_genesis 479 break;
vtools_genesis 480 }
vtools_genesis 481
vtools_genesis 482 /* If we reach end of file, the files are the
vtools_genesis 483 same. */
vtools_genesis 484 if (cmp->file[0].buffered != buffer_size) {
vtools_genesis 485 changes = 0;
vtools_genesis 486 break;
vtools_genesis 487 }
vtools_genesis 488 }
vtools_genesis 489 }
vtools_genesis 490
vtools_genesis 491 briefly_report(changes, cmp->file);
vtools_genesis 492 } else {
vtools_genesis 493 struct context ctxt;
vtools_genesis 494 lin diags;
vtools_genesis 495 lin too_expensive;
vtools_genesis 496
vtools_genesis 497 /* Allocate vectors for the results of comparison: a flag for
vtools_genesis 498 each line of each file, saying whether that line is an
vtools_genesis 499 insertion or deletion. Allocate an extra element, always 0, at
vtools_genesis 500 each end of each vector. */
vtools_genesis 501
vtools_genesis 502 size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
vtools_genesis 503 char *flag_space = zalloc(s);
vtools_genesis 504 cmp->file[0].changed = flag_space + 1;
vtools_genesis 505 cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
vtools_genesis 506
vtools_genesis 507 /* Some lines are obviously insertions or deletions because
vtools_genesis 508 they don't match anything. Detect them now, and avoid even
vtools_genesis 509 thinking about them in the main comparison algorithm. */
vtools_genesis 510
vtools_genesis 511 discard_confusing_lines(cmp->file);
vtools_genesis 512
vtools_genesis 513 /* Now do the main comparison algorithm, considering just the
vtools_genesis 514 undiscarded lines. */
vtools_genesis 515
vtools_genesis 516 ctxt.xvec = cmp->file[0].undiscarded;
vtools_genesis 517 ctxt.yvec = cmp->file[1].undiscarded;
vtools_genesis 518 diags = (cmp->file[0].nondiscarded_lines
vtools_genesis 519 + cmp->file[1].nondiscarded_lines + 3);
vtools_genesis 520 ctxt.fdiag = xmalloc(diags * (2 * sizeof *ctxt.fdiag));
vtools_genesis 521 ctxt.bdiag = ctxt.fdiag + diags;
vtools_genesis 522 ctxt.fdiag += cmp->file[1].nondiscarded_lines + 1;
vtools_genesis 523 ctxt.bdiag += cmp->file[1].nondiscarded_lines + 1;
vtools_genesis 524
vtools_genesis 525 ctxt.heuristic = speed_large_files;
vtools_genesis 526
vtools_genesis 527 /* Set |too_expensive| to be the approximate square root of
vtools_genesis 528 the input size, bounded below by 4096. 4096 seems to be good
vtools_genesis 529 for circa-2016 CPUs; see |Bug#16848| and |Bug#24715|. */
vtools_genesis 530 too_expensive = 1;
vtools_genesis 531 for (; diags != 0; diags >>= 2)
vtools_genesis 532 too_expensive <<= 1;
vtools_genesis 533 ctxt.too_expensive = MAX (4096, too_expensive);
vtools_genesis 534
vtools_genesis 535 files[0] = cmp->file[0];
vtools_genesis 536 files[1] = cmp->file[1];
vtools_genesis 537
vtools_genesis 538 compareseq(0, cmp->file[0].nondiscarded_lines,
vtools_genesis 539 0, cmp->file[1].nondiscarded_lines, minimal, &ctxt);
vtools_genesis 540
vtools_genesis 541 free(ctxt.fdiag - (cmp->file[1].nondiscarded_lines + 1));
vtools_genesis 542
vtools_genesis 543 /* Modify the results slightly to make them prettier in cases
vtools_genesis 544 where that can validly be done. */
vtools_genesis 545
vtools_genesis 546 shift_boundaries(cmp->file);
vtools_genesis 547
vtools_genesis 548 /* Get the results of comparison in the form of a chain of
vtools_genesis 549 |struct change|s -- an edit script. */
vtools_genesis 550
vtools_genesis 551 script = build_script(cmp->file);
vtools_genesis 552
vtools_genesis 553 /* Set |changes| if we had any diffs. If some changes are
vtools_genesis 554 ignored, we must scan the script to decide. */
vtools_genesis 555 changes = (script != 0);
vtools_genesis 556
vtools_genesis 557 if (brief)
vtools_genesis 558 briefly_report(changes, cmp->file);
vtools_genesis 559 else {
vtools_genesis 560 if (changes) {
vtools_genesis 561 /* Record info for starting up output, to be used if
vtools_genesis 562 and when we have some output to print. */
vtools_genesis 563 setup_output(file_label[0] ? file_label[0] : cmp->file[0].name,
vtools_genesis 564 file_label[1] ? file_label[1] : cmp->file[1].name,
vtools_genesis 565 cmp->parent != 0);
vtools_genesis 566 print_context_script(script);
vtools_genesis 567 finish_output();
vtools_genesis 568 }
vtools_genesis 569 }
vtools_genesis 570
vtools_genesis 571 free(cmp->file[0].undiscarded);
vtools_genesis 572
vtools_genesis 573 free(flag_space);
vtools_genesis 574
vtools_genesis 575 for (f = 0; f < 2; f++) {
vtools_genesis 576 free(cmp->file[f].equivs);
vtools_genesis 577 free(cmp->file[f].linbuf + cmp->file[f].linbuf_base);
vtools_genesis 578 }
vtools_genesis 579
vtools_genesis 580 for (e = script; e; e = p) {
vtools_genesis 581 p = e->link;
vtools_genesis 582 free(e);
vtools_genesis 583 }
vtools_genesis 584 }
vtools_genesis 585
vtools_genesis 586 if (cmp->file[0].buffer != cmp->file[1].buffer)
vtools_genesis 587 free(cmp->file[0].buffer);
vtools_genesis 588 free(cmp->file[1].buffer);
vtools_genesis 589
vtools_genesis 590 return changes;
vtools_genesis 591 }