vtools_genesis

all signers: phf

antecedents:

press order:

patch:

-
+ AE3D7FE005038F18760EF9C78E0AA83BEAD91FE39020238FF712E46CD52682D0DA8DDF2067CDC77247C1466564CAF7089B5670CB7EA684B0076F7AA125945351
vtools/Makefile

(0 . 0)(1 . 37)
   5 
   6 CC:=gcc
   7 LDFLAGS:=-static
   8 
   9 SOURCES=lib/cmpbuf.c \
  10 	lib/dirname.c \
  11 	lib/error.c \
  12 	lib/filenamecat.c \
  13 	lib/filetype.c \
  14 	lib/hash.c \
  15 	lib/progname.c \
  16 	lib/xalloc.c \
  17 	src/analyze.c \
  18 	src/context.c \
  19 	src/diff.c \
  20 	src/dir.c \
  21 	src/io.c \
  22 	src/util.c
  23 HEADERS=lib/cmpbuf.h \
  24 	lib/diffseq.h \
  25 	lib/dirname.h \
  26 	lib/error.h \
  27 	lib/filenamecat.h \
  28 	lib/filetype.h \
  29 	lib/hash.h \
  30 	lib/progname.h \
  31 	lib/xalloc.h \
  32 	src/diff.h \
  33 	src/system.h
  34 OBJECTS=$(SRCS:.c=.o)
  35 
  36 vdiff: $(SOURCES) $(HEADERS)
  37 	$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -std=c99 -Ilib/ -Isrc/ -o $@ $(SOURCES)
  38 
  39 clean:
  40 	@rm -f vdiff
  41

-
+ 48454218170DAD22CB7BD9088EC90F7F808FCDE914962D0FCA20B3331BEF95AFE749D1A218D21AE02F00588C8DC05607B35C1B7C7CD3B852D58AC0D504DD3C30
vtools/lib/cmpbuf.c

(0 . 0)(1 . 57)
  46 #include <errno.h>
  47 #include <limits.h>
  48 #include <signal.h>
  49 #include <unistd.h>
  50 #include <stdint.h>
  51 #include "system.h"
  52 #include "cmpbuf.h"
  53 
  54 /* Read |nbytes| bytes from descriptor |fd| into |buf|.  |nbytes| must
  55    not be |SIZE_MAX|.  Return the number of characters successfully
  56    read.  On error, return |SIZE_MAX|, setting |errno|.  The number
  57    returned is always |nbytes| unless end-of-file or error.  */
  58 
  59 size_t
  60 block_read(int fd, char *buf, size_t nbytes) {
  61     char *bp = buf;
  62     char const *buflim = buf + nbytes;
  63     size_t readlim = MIN (SSIZE_MAX, SIZE_MAX);
  64 
  65     do {
  66         size_t bytes_remaining = buflim - bp;
  67         size_t bytes_to_read = MIN (bytes_remaining, readlim);
  68         ssize_t nread = read(fd, bp, bytes_to_read);
  69         if (nread <= 0) {
  70             if (nread == 0)
  71                 break;
  72             return SIZE_MAX;
  73         }
  74         bp += nread;
  75     } while (bp < buflim);
  76 
  77     return bp - buf;
  78 }
  79 
  80 /* Least common multiple of two buffer sizes |a| and |b|.  However, if
  81    either |a| or |b| is zero, or if the multiple is greater than
  82    |lcm_max|, return a reasonable buffer size. */
  83 
  84 size_t
  85 buffer_lcm(size_t a, size_t b, size_t lcm_max) {
  86     size_t lcm, m, n, q, r;
  87 
  88     /* Yield reasonable values if buffer sizes are zero.  */
  89     if (!a)
  90         return b ? b : 8 * 1024;
  91     if (!b)
  92         return a;
  93 
  94     /* |n = gcd (a, b)| */
  95     for (m = a, n = b; (r = m % n) != 0; m = n, n = r)
  96         continue;
  97 
  98     /* Yield a if there is an overflow.  */
  99     q = a / n;
 100     lcm = q * b;
 101     return lcm <= lcm_max && lcm / b == q ? lcm : a;
 102 }

-
+ E52FB69AF603972D2409D2AA52D2FA4C7A5AA143778B1B2738207678FB27C38BA97BCB1200E32A6CAB425AD90E3FFB4A0720ED47D8D8A123484082EB9D0A1EFC
vtools/lib/cmpbuf.h

(0 . 0)(1 . 4)
 107 
 108 size_t block_read(int, char *, size_t);
 109 
 110 size_t buffer_lcm(size_t, size_t, size_t);

-
+ 8802BDF2CCDB11A2618988AB2791E89185379A02892642599CD00559C57C383AD85BFA5DAA5261508372143E5FF34054010E72C01D52063CDA88F4D3F97CCBBA
vtools/lib/diffseq.h

(0 . 0)(1 . 478)
 115 
 116 /* The basic idea is to consider two vectors as similar if, when
 117    transforming the first vector into the second vector through a
 118    sequence of edits (inserts and deletes of one element each), this
 119    sequence is short - or equivalently, if the ordered list of
 120    elements that are untouched by these edits is long.  For a good
 121    introduction to the subject, read about the ``Levenshtein
 122    distance'' in Wikipedia.
 123 
 124    The basic algorithm is described in: ``An O(ND) Difference
 125    Algorithm and its Variations'', Eugene W. Myers, Algorithmica
 126    Vol. 1, 1986, pp. 251-266,
 127    \url{http://dx.doi.org/10.1007/BF01840446}.  See especially section
 128    4.2, which describes the variation used below.
 129 
 130    The basic algorithm was independently discovered as described in:
 131    ``Algorithms for Approximate String Matching'', Esko Ukkonen,
 132    Information and Control Vol. 64, 1985, pp. 100-118,
 133    \url{http://dx.doi.org/10.1016/S0019-9958(85)80046-2}.
 134 
 135    Unless the |find_minimal| flag is set, this code uses the
 136    |TOO_EXPENSIVE| heuristic, by Paul Eggert, to limit the cost to
 137    $O(N^1.5 \log N)$ at the price of producing suboptimal output for
 138    large inputs with many differences.  */
 139 
 140 /* Before including this file, you need to define:
 141      |ELEMENT|                   The element type of the vectors being compared.
 142      |EQUAL|                     A two-argument macro that tests two elements for
 143                                equality.
 144      |OFFSET|                    A signed integer type sufficient to hold the
 145                                difference between two indices.  Usually
 146                                something like |ptrdiff_t|.
 147      |EXTRA_CONTEXT_FIELDS|    Declarations of fields for 'struct context'.
 148      |NOTE_DELETE(ctxt, xoff)| Record the removal of the object xvec[xoff].
 149      |NOTE_INSERT(ctxt, yoff)| Record the insertion of the object yvec[yoff].
 150      |EARLY_ABORT(ctxt)|       (Optional) A boolean expression that triggers an
 151                                early abort of the computation.
 152      |USE_HEURISTIC|          (Optional) Define if you want to support the
 153                                heuristic for large vectors.
 154    It is also possible to use this file with abstract arrays.  In this case,
 155    xvec and yvec are not represented in memory.  They only exist conceptually.
 156    In this case, the list of defines above is amended as follows:
 157      |ELEMENT|                 Undefined.
 158      |EQUAL|                   Undefined.
 159      |XVECREF_YVECREF_EQUAL(ctxt, xoff, yoff)|
 160                              A three-argument macro: References xvec[xoff] and
 161                              yvec[yoff] and tests these elements for equality.
 162    Before including this file, you also need to include:
 163      |#include <limits.h>
 164      #include <stdbool.h>|
 165  */
 166 
 167 /* Maximum value of type |OFFSET|.  */
 168 #define OFFSET_MAX \
 169   ((((OFFSET)1 << (sizeof (OFFSET) * CHAR_BIT - 2)) - 1) * 2 + 1)
 170 
 171 /* Default to no early abort.  */
 172 #ifndef EARLY_ABORT
 173 # define EARLY_ABORT(ctxt) false
 174 #endif
 175 
 176 /* Use this to suppress gcc's ``...may be used before initialized''
 177    warnings.  Beware: The Code argument must not contain commas.  */
 178 #ifndef IF_LINT
 179 # if defined GCC_LINT || defined lint
 180 #  define IF_LINT(Code) Code
 181 # else
 182 #  define IF_LINT(Code) /* empty */
 183 # endif
 184 #endif
 185 
 186 /* As above, but when Code must contain one comma. */
 187 #ifndef IF_LINT2
 188 # if defined GCC_LINT || defined lint
 189 #  define IF_LINT2(Code1, Code2) Code1, Code2
 190 # else
 191 #  define IF_LINT2(Code1, Code2) /* empty */
 192 # endif
 193 #endif
 194 
 195 /*
 196  * Context of comparison operation.
 197  */
 198 struct context {
 199 #ifdef ELEMENT
 200     /* Vectors being compared.  */
 201     ELEMENT const *xvec;
 202     ELEMENT const *yvec;
 203 #endif
 204 
 205     /* Extra fields.  */
 206     EXTRA_CONTEXT_FIELDS
 207 
 208     /* Vector, indexed by diagonal, containing 1 + the X coordinate of
 209        the point furthest along the given diagonal in the forward
 210        search of the edit matrix.  */
 211     OFFSET *fdiag;
 212 
 213     /* Vector, indexed by diagonal, containing the X coordinate of the
 214        point furthest along the given diagonal in the backward search
 215        of the edit matrix.  */
 216     OFFSET *bdiag;
 217 
 218 #ifdef USE_HEURISTIC
 219     /* This corresponds to the |diff --speed-large-files| flag.  With
 220        this heuristic, for vectors with a constant small density of
 221        changes, the algorithm is linear in the vector size.  */
 222     bool heuristic;
 223 #endif
 224 
 225     /* Edit scripts longer than this are too expensive to compute.  */
 226     OFFSET too_expensive;
 227 
 228     /* Snakes bigger than this are considered "big".  */
 229 #define SNAKE_LIMIT 20
 230 };
 231 
 232 struct partition {
 233     /* Midpoints of this partition.  */
 234     OFFSET xmid;
 235     OFFSET ymid;
 236 
 237     /* True if low half will be analyzed minimally.  */
 238     bool lo_minimal;
 239 
 240     /* Likewise for high half.  */
 241     bool hi_minimal;
 242 };
 243 
 244 
 245 /* Find the midpoint of the shortest edit script for a specified
 246    portion of the two vectors.
 247 
 248    Scan from the beginnings of the vectors, and simultaneously from
 249    the ends, doing a breadth-first search through the space of
 250    edit-sequence.  When the two searches meet, we have found the
 251    midpoint of the shortest edit sequence.
 252 
 253    If |find_minimal| is true, find the minimal edit script regardless
 254    of expense.  Otherwise, if the search is too expensive, use
 255    heuristics to stop the search and report a suboptimal answer.
 256 
 257    Set |part->(xmid,ymid)| to the midpoint |(xmid,ymid)|.  The
 258    diagonal number |xmid - ymid| equals the number of inserted
 259    elements minus the number of deleted elements (counting only
 260    elements before the midpoint).
 261 
 262    Set |part->lo_minimal| to true iff the minimal edit script for the
 263    left half of the partition is known; similarly for
 264    |part->hi_minimal|.
 265 
 266    This function assumes that the first elements of the specified
 267    portions of the two vectors do not match, and likewise that the
 268    last elements do not match.  The caller must trim matching elements
 269    from the beginning and end of the portions it is going to specify.
 270 
 271    If we return the "wrong" partitions, the worst this can do is cause
 272    suboptimal diff output.  It cannot cause incorrect diff output.  */
 273 
 274 static void
 275 diag(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal,
 276      struct partition *part, struct context *ctxt) {
 277     OFFSET *const fd = ctxt->fdiag;       /* Give the compiler a chance. */
 278     OFFSET *const bd = ctxt->bdiag;       /* Additional help for the compiler. */
 279 #ifdef ELEMENT
 280     ELEMENT const *const xv = ctxt->xvec; /* Still more help for the compiler. */
 281     ELEMENT const *const yv = ctxt->yvec; /* And more and more . . . */
 282 #define XREF_YREF_EQUAL(x, y)  EQUAL (xv[x], yv[y])
 283 #else
 284 #define XREF_YREF_EQUAL(x,y)  XVECREF_YVECREF_EQUAL (ctxt, x, y)
 285 #endif
 286     const OFFSET dmin = xoff - ylim;      /* Minimum valid diagonal. */
 287     const OFFSET dmax = xlim - yoff;      /* Maximum valid diagonal. */
 288     const OFFSET fmid = xoff - yoff;      /* Center diagonal of top-down search. */
 289     const OFFSET bmid = xlim - ylim;      /* Center diagonal of bottom-up search. */
 290     OFFSET fmin = fmid;
 291     OFFSET fmax = fmid;           /* Limits of top-down search. */
 292     OFFSET bmin = bmid;
 293     OFFSET bmax = bmid;           /* Limits of bottom-up search. */
 294     OFFSET c;                     /* Cost. */
 295     bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd
 296                                    diagonal with respect to the northwest. */
 297 
 298     fd[fmid] = xoff;
 299     bd[bmid] = xlim;
 300 
 301     for (c = 1;; ++c) {
 302         OFFSET d;                 /* Active diagonal. */
 303         bool big_snake = false;
 304 
 305         /* Extend the top-down search by an edit step in each diagonal. */
 306         if (fmin > dmin)
 307             fd[--fmin - 1] = -1;
 308         else
 309             ++fmin;
 310         if (fmax < dmax)
 311             fd[++fmax + 1] = -1;
 312         else
 313             --fmax;
 314         for (d = fmax; d >= fmin; d -= 2) {
 315             OFFSET x;
 316             OFFSET y;
 317             OFFSET tlo = fd[d - 1];
 318             OFFSET thi = fd[d + 1];
 319             OFFSET x0 = tlo < thi ? thi : tlo + 1;
 320 
 321             for (x = x0, y = x0 - d;
 322                  x < xlim && y < ylim && XREF_YREF_EQUAL (x, y);
 323                  x++, y++)
 324                 continue;
 325             if (x - x0 > SNAKE_LIMIT)
 326                 big_snake = true;
 327             fd[d] = x;
 328             if (odd && bmin <= d && d <= bmax && bd[d] <= x) {
 329                 part->xmid = x;
 330                 part->ymid = y;
 331                 part->lo_minimal = part->hi_minimal = true;
 332                 return;
 333             }
 334         }
 335 
 336         /* Similarly extend the bottom-up search.  */
 337         if (bmin > dmin)
 338             bd[--bmin - 1] = OFFSET_MAX;
 339         else
 340             ++bmin;
 341         if (bmax < dmax)
 342             bd[++bmax + 1] = OFFSET_MAX;
 343         else
 344             --bmax;
 345         for (d = bmax; d >= bmin; d -= 2) {
 346             OFFSET x;
 347             OFFSET y;
 348             OFFSET tlo = bd[d - 1];
 349             OFFSET thi = bd[d + 1];
 350             OFFSET x0 = tlo < thi ? tlo : thi - 1;
 351 
 352             for (x = x0, y = x0 - d;
 353                  xoff < x && yoff < y && XREF_YREF_EQUAL (x - 1, y - 1);
 354                  x--, y--)
 355                 continue;
 356             if (x0 - x > SNAKE_LIMIT)
 357                 big_snake = true;
 358             bd[d] = x;
 359             if (!odd && fmin <= d && d <= fmax && x <= fd[d]) {
 360                 part->xmid = x;
 361                 part->ymid = y;
 362                 part->lo_minimal = part->hi_minimal = true;
 363                 return;
 364             }
 365         }
 366 
 367         if (find_minimal)
 368             continue;
 369 
 370 #ifdef USE_HEURISTIC
 371         /* Heuristic: check occasionally for a diagonal that has made
 372            lots of progress compared with the edit distance.  If we
 373            have any such, find the one that has made the most progress
 374            and return it as if it had succeeded.
 375 
 376            With this heuristic, for vectors with a constant small
 377            density of changes, the algorithm is linear in the vector
 378            size.  */
 379 
 380         if (200 < c && big_snake && ctxt->heuristic) {
 381             {
 382                 OFFSET best = 0;
 383 
 384                 for (d = fmax; d >= fmin; d -= 2) {
 385                     OFFSET dd = d - fmid;
 386                     OFFSET x = fd[d];
 387                     OFFSET y = x - d;
 388                     OFFSET v = (x - xoff) * 2 - dd;
 389 
 390                     if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
 391                         if (v > best
 392                             && xoff + SNAKE_LIMIT <= x && x < xlim
 393                             && yoff + SNAKE_LIMIT <= y && y < ylim) {
 394                             /* We have a good enough best diagonal;
 395                                now insist that it end with a
 396                                significant snake.  */
 397                             int k;
 398 
 399                             for (k = 1; XREF_YREF_EQUAL (x - k, y - k); k++)
 400                                 if (k == SNAKE_LIMIT) {
 401                                     best = v;
 402                                     part->xmid = x;
 403                                     part->ymid = y;
 404                                     break;
 405                                 }
 406                         }
 407                     }
 408                 }
 409                 if (best > 0) {
 410                     part->lo_minimal = true;
 411                     part->hi_minimal = false;
 412                     return;
 413                 }
 414             }
 415 
 416             {
 417                 OFFSET best = 0;
 418 
 419                 for (d = bmax; d >= bmin; d -= 2) {
 420                     OFFSET dd = d - bmid;
 421                     OFFSET x = bd[d];
 422                     OFFSET y = x - d;
 423                     OFFSET v = (xlim - x) * 2 + dd;
 424 
 425                     if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
 426                         if (v > best
 427                             && xoff < x && x <= xlim - SNAKE_LIMIT
 428                             && yoff < y && y <= ylim - SNAKE_LIMIT) {
 429                             /* We have a good enough best diagonal;
 430                                now insist that it end with a
 431                                significant snake.  */
 432                             int k;
 433 
 434                             for (k = 0; XREF_YREF_EQUAL (x + k, y + k); k++)
 435                                 if (k == SNAKE_LIMIT - 1) {
 436                                     best = v;
 437                                     part->xmid = x;
 438                                     part->ymid = y;
 439                                     break;
 440                                 }
 441                         }
 442                     }
 443                 }
 444                 if (best > 0) {
 445                     part->lo_minimal = false;
 446                     part->hi_minimal = true;
 447                     return;
 448                 }
 449             }
 450         }
 451 #endif
 452 
 453         /* Heuristic: if we've gone well beyond the call of duty, give
 454            up and report halfway between our best results so far.  */
 455         if (c >= ctxt->too_expensive) {
 456             OFFSET fxybest;
 457             OFFSET fxbest IF_LINT (= 0);
 458             OFFSET bxybest;
 459             OFFSET bxbest IF_LINT (= 0);
 460 
 461             /* Find forward diagonal that maximizes |X + Y|.  */
 462             fxybest = -1;
 463             for (d = fmax; d >= fmin; d -= 2) {
 464                 OFFSET x = MIN (fd[d], xlim);
 465                 OFFSET y = x - d;
 466                 if (ylim < y) {
 467                     x = ylim + d;
 468                     y = ylim;
 469                 }
 470                 if (fxybest < x + y) {
 471                     fxybest = x + y;
 472                     fxbest = x;
 473                 }
 474             }
 475 
 476             /* Find backward diagonal that minimizes |X + Y|.  */
 477             bxybest = OFFSET_MAX;
 478             for (d = bmax; d >= bmin; d -= 2) {
 479                 OFFSET x = MAX (xoff, bd[d]);
 480                 OFFSET y = x - d;
 481                 if (y < yoff) {
 482                     x = yoff + d;
 483                     y = yoff;
 484                 }
 485                 if (x + y < bxybest) {
 486                     bxybest = x + y;
 487                     bxbest = x;
 488                 }
 489             }
 490 
 491             /* Use the better of the two diagonals.  */
 492             if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) {
 493                 part->xmid = fxbest;
 494                 part->ymid = fxybest - fxbest;
 495                 part->lo_minimal = true;
 496                 part->hi_minimal = false;
 497             } else {
 498                 part->xmid = bxbest;
 499                 part->ymid = bxybest - bxbest;
 500                 part->lo_minimal = false;
 501                 part->hi_minimal = true;
 502             }
 503             return;
 504         }
 505     }
 506 #undef XREF_YREF_EQUAL
 507 }
 508 
 509 
 510 /* Compare in detail contiguous subsequences of the two vectors which
 511    are known, as a whole, to match each other.
 512 
 513    The subsequence of vector 0 is |[xoff, xlim)| and likewise for
 514    vector 1.
 515 
 516    Note that |xlim, ylim| are exclusive bounds.  All indices into the
 517    vectors are origin-0.
 518 
 519    If |find_minimal|, find a minimal difference no matter how
 520    expensive it is.
 521 
 522    The results are recorded by invoking |note_delete| and
 523    |note_insert|.
 524 
 525    Return false if terminated normally, or true if terminated through
 526    early abort.  */
 527 
 528 static bool
 529 compareseq(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
 530            bool find_minimal, struct context *ctxt) {
 531 #ifdef ELEMENT
 532     ELEMENT const *xv = ctxt->xvec; /* Help the compiler.  */
 533     ELEMENT const *yv = ctxt->yvec;
 534 #define XREF_YREF_EQUAL(x, y)  EQUAL (xv[x], yv[y])
 535 #else
 536 #define XREF_YREF_EQUAL(x,y)  XVECREF_YVECREF_EQUAL (ctxt, x, y)
 537 #endif
 538 
 539     /* Slide down the bottom initial diagonal.  */
 540     while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xoff, yoff)) {
 541         xoff++;
 542         yoff++;
 543     }
 544 
 545     /* Slide up the top initial diagonal. */
 546     while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xlim - 1, ylim - 1)) {
 547         xlim--;
 548         ylim--;
 549     }
 550 
 551     /* Handle simple cases. */
 552     if (xoff == xlim)
 553         while (yoff < ylim) {
 554             NOTE_INSERT (ctxt, yoff);
 555             if (EARLY_ABORT (ctxt))
 556                 return true;
 557             yoff++;
 558         }
 559     else if (yoff == ylim)
 560         while (xoff < xlim) {
 561             NOTE_DELETE (ctxt, xoff);
 562             if (EARLY_ABORT (ctxt))
 563                 return true;
 564             xoff++;
 565         }
 566     else {
 567         struct partition part IF_LINT2 (= { .xmid = 0, .ymid = 0 });
 568 
 569         /* Find a point of correspondence in the middle of the vectors.  */
 570         diag(xoff, xlim, yoff, ylim, find_minimal, &part, ctxt);
 571 
 572         /* Use the partitions to split this problem into subproblems.  */
 573         if (compareseq(xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt))
 574             return true;
 575         if (compareseq(part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt))
 576             return true;
 577     }
 578 
 579     return false;
 580 #undef XREF_YREF_EQUAL
 581 }
 582 
 583 #undef ELEMENT
 584 #undef EQUAL
 585 #undef OFFSET
 586 #undef EXTRA_CONTEXT_FIELDS
 587 #undef NOTE_DELETE
 588 #undef NOTE_INSERT
 589 #undef EARLY_ABORT
 590 #undef USE_HEURISTIC
 591 #undef XVECREF_YVECREF_EQUAL
 592 #undef OFFSET_MAX

-
+ 1FBC6B96E09DF1B2470978CED1AF30DEFF7BE71E76AAF15873C60095E883C549309C7C5DAE5F608C0004F54B69884677D5FD1184B2224B453848D8879511F0FF
vtools/lib/dirname.c

(0 . 0)(1 . 41)
 597 #include "dirname.h"
 598 #include <string.h>
 599 #include <stdbool.h>
 600 
 601 /* Return the address of the last file name component of |name|.  If
 602    |name| has no relative file name components because it is a file
 603    system root, return the empty string.  */
 604 
 605 char *
 606 last_component(char const *name) {
 607     char const *base = name + 0;
 608     char const *p;
 609     bool saw_slash = false;
 610 
 611     while (((*base) == '/'))
 612         base++;
 613 
 614     for (p = base; *p; p++) {
 615         if (((*p) == '/'))
 616             saw_slash = true;
 617         else if (saw_slash) {
 618             base = p;
 619             saw_slash = false;
 620         }
 621     }
 622 
 623     return (char *) base;
 624 }
 625 
 626 /* Return the length of the basename |name|.  Typically |name| is the
 627    value returned by |base_name| or |last_component|.  Act like
 628    |strlen(name)|, except omit all trailing slashes.  */
 629 
 630 size_t
 631 base_len(char const *name) {
 632     size_t len;
 633 
 634     for (len = strlen(name); 1 < len && ((name[len - 1]) == '/'); len--)
 635         continue;
 636     return len;
 637 }

-
+ 5AC7F4190EFF243F401B3EB9E0AF4404BF391AE851FA443BB17A4C76BFAF9108239441352D5C7B90FA72EEA272965B5B54E8E72F1E42B2C99461D4EF2EF095A2
vtools/lib/dirname.h

(0 . 0)(1 . 10)
 642 #ifndef DIRNAME_H_
 643 #define DIRNAME_H_ 1
 644 
 645 #include <stddef.h>
 646 
 647 size_t base_len(char const *file);
 648 
 649 char *last_component(char const *file);
 650 
 651 #endif

-
+ 347F7CC6CC2D4AE711BAFCD7E08FEC2D76F741562B452CC8EEFBDAB6EE2A14781D162E4C4B597D528FD29D3F8295675A3636E0CB0C7F432778E56E79B1786A89
vtools/lib/error.c

(0 . 0)(1 . 78)
 656 /* Written by David MacKenzie <djm@gnu.ai.mit.edu>.  */
 657 
 658 #include "system.h"
 659 #include "error.h"
 660 #include "progname.h"
 661 #include <stdarg.h>
 662 #include <stdio.h>
 663 #include <stdlib.h>
 664 #include <string.h>
 665 #include <fcntl.h>
 666 
 667 /* If |NULL|, error will flush |stdout|, then print on |stderr| the
 668    program name, a colon and a space.  Otherwise, error will call this
 669    function without parameters instead.  */
 670 void (*error_print_progname)(void);
 671 
 672 /* This variable is incremented each time |error| is called.  */
 673 unsigned int error_message_count;
 674 
 675 
 676 /* Return non-zero if |fd| is open.  */
 677 static int
 678 is_open(int fd) {
 679     return 0 <= fcntl(fd, F_GETFL);
 680 }
 681 
 682 static void
 683 flush_stdout(void) {
 684     if (is_open(1))
 685         fflush(stdout);
 686 }
 687 
 688 static void
 689 print_errno_message(int errnum) {
 690     char const *s;
 691     char errbuf[1024];
 692     if (strerror_r(errnum, errbuf, sizeof errbuf) == 0)
 693         s = errbuf;
 694     else
 695         s = 0;
 696     if (!s)
 697         s = "Unknown system error";
 698     fprintf(stderr, ": %s", s);
 699 }
 700 
 701 static void
 702 error_tail(int status, int errnum, const char *message, va_list args) {
 703     vfprintf(stderr, message, args);
 704     va_end (args);
 705 
 706     ++error_message_count;
 707     if (errnum)
 708         print_errno_message(errnum);
 709     putc ('\n', stderr);
 710     fflush(stderr);
 711     if (status)
 712         exit(status);
 713 }
 714 
 715 
 716 /* Print the program name and error message |message|, which is a
 717    printf-style format string with optional args.  If |errnum| is
 718    nonzero, print its corresponding system error message.  Exit with
 719    status |status| if it is nonzero.  */
 720 void
 721 error(int status, int errnum, const char *message, ...) {
 722     va_list args;
 723     flush_stdout();
 724     if (error_print_progname)
 725         (*error_print_progname)();
 726     else {
 727         fprintf(stderr, "%s: ", get_program_name());
 728     }
 729 
 730     va_start (args, message);
 731     error_tail(status, errnum, message, args);
 732 }
 733

-
+ D8B7E0F6AEA24BE22BCFA208F4A3A115B6F30E9C38A3E5DCF7386BF8781684286D1368703CEA7D51AE15EE29AA9D5D7524C4D6F7A487A2877053B90F85CD64BF
vtools/lib/error.h

(0 . 0)(1 . 25)
 738 #ifndef _ERROR_H
 739 #define _ERROR_H 1
 740 
 741 /* Print a message with |fprintf(stderr, format, ...);| if |errnum| is
 742    nonzero, follow it with ": " and |strerror(errnum)|.  If |status|
 743    is nonzero, terminate the program with |exit(status)|.  */
 744 
 745 extern void error(int __status, int __errnum, const char *__format, ...);
 746 
 747 extern void error_at_line(int __status, int __errnum, const char *__fname,
 748                           unsigned int __lineno, const char *__format, ...);
 749 
 750 /* If |NULL|, error will flush |stdout|, then print on |stderr| the
 751    program name, a colon and a space.  Otherwise, error will call this
 752    function without parameters instead.  */
 753 extern void (*error_print_progname)(void);
 754 
 755 /* This variable is incremented each time |error| is called.  */
 756 extern unsigned int error_message_count;
 757 
 758 /* Sometimes we want to have at most one error per line.  This
 759    variable controls whether this mode is selected or not.  */
 760 extern int error_one_per_line;
 761 
 762 #endif

-
+ 06209A17B6C66FDBBF324A004AC8853D54FD3C2A69F985A27DBD8DC65E5310C4228C1F9B41D7C639D24B47515641413AA57E35460B6246F41C8C961C0A5889FA
vtools/lib/filenamecat.c

(0 . 0)(1 . 83)
 767 
 768 /* Written by Jim Meyering.  */
 769 
 770 /* Specification.  */
 771 #include "filenamecat.h"
 772 
 773 #include <stdlib.h>
 774 #include <string.h>
 775 #include "xalloc.h"
 776 
 777 #include "dirname.h"
 778 
 779 #if !HAVE_MEMPCPY && !defined mempcpy
 780 # define mempcpy(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N)))
 781 #endif
 782 
 783 /* Return the longest suffix of |f| that is a relative file name.  If
 784    it has no such suffix, return the empty string.  */
 785 
 786 static char const *
 787 longest_relative_suffix(char const *f) {
 788     for (f += 0; ((*f) == '/'); f++)
 789         continue;
 790     return f;
 791 }
 792 
 793 /* Concatenate two file name components, |dir| and |abase|, in
 794    newly-allocated storage and return the result.  The resulting file
 795    name |f| is such that the commands |ls F| and |(cd DIR; ls BASE)|
 796    refer to the same file, where |base| is |abase| with any file
 797    system prefixes and leading separators removed.  Arrange for a
 798    directory separator if necessary between |dir| and |base| in the
 799    result, removing any redundant separators.  In any case, if
 800    |base_in_result| is non-|NULL|, set |*base_in_result| to point to
 801    the copy of |abase| in the returned concatenation.  However, if
 802    |abase| begins with more than one slash, set |*base_in_result| to
 803    point to the sole corresponding slash that is copied into the
 804    result buffer.
 805 
 806    Return |NULL| if |malloc| fails.  */
 807 
 808 char *
 809 mfile_name_concat(char const *dir, char const *abase, char **base_in_result) {
 810     char const *dirbase = last_component(dir);
 811     size_t dirbaselen = base_len(dirbase);
 812     size_t dirlen = dirbase - dir + dirbaselen;
 813     size_t needs_separator = (dirbaselen && !((dirbase[dirbaselen - 1]) == '/'));
 814 
 815     char const *base = longest_relative_suffix(abase);
 816     size_t baselen = strlen(base);
 817 
 818     char *p_concat = malloc(dirlen + needs_separator + baselen + 1);
 819     char *p;
 820 
 821     if (p_concat == NULL)
 822         return NULL;
 823 
 824     p = mempcpy (p_concat, dir, dirlen);
 825     *p = '/';
 826     p += needs_separator;
 827 
 828     if (base_in_result)
 829         *base_in_result = p - (abase[0] == '/');
 830 
 831     p = mempcpy (p, base, baselen);
 832     *p = '\0';
 833 
 834     return p_concat;
 835 }
 836 
 837 /* Written by Jim Meyering.  */
 838 
 839 /* Just like |mfile_name_concat| (|filenamecat-lgpl.c|), except,
 840    rather than returning |NULL| upon |malloc| failure, here, we report
 841    the "memory exhausted" condition and exit.  */
 842 
 843 char *
 844 file_name_concat(char const *dir, char const *abase, char **base_in_result) {
 845     char *p = mfile_name_concat(dir, abase, base_in_result);
 846     if (p == NULL)
 847         xalloc_die();
 848     return p;
 849 }

-
+ F462FD0AC792B50B17CA4AB2BFE91037CABCCE21BF7BA1D44EDFF9DFCA42F930547262163D7795EADFF22FB05F4F591936A2476B9E1CB619E4B54FC177AD6A17
vtools/lib/filenamecat.h

(0 . 0)(1 . 7)
 854 /* Written by Jim Meyering.  */
 855 
 856 char *file_name_concat(char const *dir, char const *base,
 857                        char **base_in_result);
 858 
 859 char *mfile_name_concat(char const *dir, char const *base,
 860                         char **base_in_result);

-
+ 44E3DF4102126EABBC8E4C0B424A650BD2AA11B1E1A84BD9169019A77DA0C3F4005D0A7708A8DBA5F005A642FAB96C0D9F02E788CCF6EEBF89246C8ADA039C73
vtools/lib/filetype.c

(0 . 0)(1 . 87)
 865 /* Written by Paul Eggert.  */
 866 
 867 #include "filetype.h"
 868 
 869 char const *
 870 file_type(struct stat const *st) {
 871     /* To keep diagnostics grammatical in English, the returned string
 872        must start with a consonant.  */
 873 
 874     /* Do these three first, as they're the most common.  */
 875 
 876     if (S_ISREG (st->st_mode))
 877         return st->st_size == 0 ? ("regular empty file") : ("regular file");
 878 
 879     if (S_ISDIR (st->st_mode))
 880         return ("directory");
 881 
 882     if (S_ISLNK (st->st_mode))
 883         return ("symbolic link");
 884 
 885 #if 0
 886     /* Do the |S_TYPEIS*| macros next, as they may be implemented in
 887        terms of |S_ISNAM|, and we want the more-specialized
 888        interpretation.  */
 889 
 890     if (S_TYPEISMQ (st))
 891         return ("message queue");
 892 
 893     if (S_TYPEISSEM (st))
 894         return ("semaphore");
 895 
 896     if (S_TYPEISSHM (st))
 897         return ("shared memory object");
 898 
 899     if (S_TYPEISTMO (st))
 900         return ("typed memory object");
 901 
 902     /* The remaining are in alphabetical order.  */
 903 
 904     if (S_ISBLK (st->st_mode))
 905         return ("block special file");
 906 
 907     if (S_ISCHR (st->st_mode))
 908         return ("character special file");
 909 
 910     if (S_ISCTG (st->st_mode))
 911         return ("contiguous data");
 912 
 913     if (S_ISFIFO (st->st_mode))
 914         return ("fifo");
 915 
 916     if (S_ISDOOR (st->st_mode))
 917         return ("door");
 918 
 919     if (S_ISMPB (st->st_mode))
 920         return ("multiplexed block special file");
 921 
 922     if (S_ISMPC (st->st_mode))
 923         return ("multiplexed character special file");
 924 
 925     if (S_ISMPX (st->st_mode))
 926         return ("multiplexed file");
 927 
 928     if (S_ISNAM (st->st_mode))
 929         return ("named file");
 930 
 931     if (S_ISNWK (st->st_mode))
 932         return ("network special file");
 933 
 934     if (S_ISOFD (st->st_mode))
 935         return ("migrated file with data");
 936 
 937     if (S_ISOFL (st->st_mode))
 938         return ("migrated file without data");
 939 
 940     if (S_ISPORT (st->st_mode))
 941         return ("port");
 942 
 943     if (S_ISSOCK (st->st_mode))
 944         return ("socket");
 945 
 946     if (S_ISWHT (st->st_mode))
 947         return ("whiteout");
 948 #endif
 949 
 950     return ("weird file");
 951 }

-
+ 2EBF923F7C2DC3578DEFD13F195F5CBF919B510468A5F523B462BC85E48BB3E8B66E9A4A08C1998BECB0A165BB3B8C4D75D81F0A2C32725DA1593F1EFEDCE6F1
vtools/lib/filetype.h

(0 . 0)(1 . 11)
 956 /* Written by Paul Eggert and Jim Meyering.  */
 957 
 958 #ifndef FILE_TYPE_H
 959 #define FILE_TYPE_H 1
 960 
 961 #include <sys/types.h>
 962 #include <sys/stat.h>
 963 
 964 char const *file_type(struct stat const *);
 965 
 966 #endif

-
+ 17F92FC88844A70C26B1CCD4CC068F81EB9320557802E744A3190C944EAB08E5DBCB58CAF37008DE8031261D3BA6E584F9795F24B11B3B47B2243FAEA18F180B
vtools/lib/hash.c

(0 . 0)(1 . 1152)
 971 /* A generic hash table package.  */
 972 
 973 /* Define |USE_OBSTACK| to 1 if you want the allocator to use obstacks
 974    instead of |malloc|.  If you change |USE_OBSTACK|, you have to
 975    recompile!  */
 976 
 977 #include "hash.h"
 978 #include "system.h"
 979 
 980 #include <stdlib.h>
 981 #include <limits.h>
 982 
 983 #if USE_OBSTACK
 984 # include "obstack.h"
 985 # ifndef obstack_chunk_alloc
 986 #  define obstack_chunk_alloc malloc
 987 # endif
 988 # ifndef obstack_chunk_free
 989 #  define obstack_chunk_free free
 990 # endif
 991 #endif
 992 
 993 struct hash_entry {
 994     void *data;
 995     struct hash_entry *next;
 996 };
 997 
 998 struct hash_table {
 999     /* The array of buckets starts at |bucket| and extends to
1000        |bucket_limit-1|, for a possibility of |n_buckets|.  Among
1001        those, |n_buckets_used| buckets are not empty, there are
1002        |n_entries| active entries in the table.  */
1003     struct hash_entry *bucket;
1004     struct hash_entry const *bucket_limit;
1005     size_t n_buckets;
1006     size_t n_buckets_used;
1007     size_t n_entries;
1008 
1009     /* Tuning arguments, kept in a physically separate structure.  */
1010     const Hash_tuning *tuning;
1011 
1012     /* Three functions are given to |hash_initialize|, see the
1013        documentation block for this function.  In a word, |hasher|
1014        randomizes a user entry into a number up from 0 up to some
1015        maximum minus 1; |comparator| returns true if two user entries
1016        compare equally; and |data_freer| is the cleanup function for a
1017        user entry.  */
1018     Hash_hasher hasher;
1019     Hash_comparator comparator;
1020     Hash_data_freer data_freer;
1021 
1022     /* A linked list of freed struct |hash_entry| structs.  */
1023     struct hash_entry *free_entry_list;
1024 
1025 #if USE_OBSTACK
1026     /* Whenever obstacks are used, it is possible to allocate all
1027        overflowed entries into a single stack, so they all can be
1028        freed in a single operation.  It is not clear if the speedup is
1029        worth the trouble.  */
1030     struct obstack entry_stack;
1031 #endif
1032 };
1033 
1034 /* A hash table contains many internal entries, each holding a pointer
1035    to some user-provided data (also called a user entry).  An entry
1036    indistinctly refers to both the internal entry and its associated
1037    user entry.  A user entry contents may be hashed by a randomization
1038    function (the hashing function, or just "hasher" for short) into a
1039    number (or "slot") between 0 and the current table size.  At each
1040    slot position in the hash table, starts a linked chain of entries
1041    for which the user data all hash to this slot.  A bucket is the
1042    collection of all entries hashing to the same slot.
1043 
1044    A good "hasher" function will distribute entries rather evenly in
1045    buckets.  In the ideal case, the length of each bucket is roughly
1046    the number of entries divided by the table size.  Finding the slot
1047    for a data is usually done in constant time by the "hasher", and
1048    the later finding of a precise entry is linear in time with the
1049    size of the bucket.  Consequently, a larger hash table size (that
1050    is, a larger number of buckets) is prone to yielding shorter
1051    chains, {\bf given} the "hasher" function behaves properly.
1052 
1053    Long buckets slow down the lookup algorithm.  One might use big
1054    hash table sizes in hope to reduce the average length of buckets,
1055    but this might become inordinate, as unused slots in the hash table
1056    take some space.  The best bet is to make sure you are using a good
1057    "hasher" function (beware that those are not that easy to write!
1058    :-), and to use a table size larger than the actual number of
1059    entries.  */
1060 
1061 /* If an insertion makes the ratio of nonempty buckets to table size
1062    larger than the growth threshold (a number between 0.0 and 1.0),
1063    then increase the table size by multiplying by the growth factor (a
1064    number greater than 1.0).  The growth threshold defaults to 0.8,
1065    and the growth factor defaults to 1.414, meaning that the table
1066    will have doubled its size every second time 80\% of the buckets
1067    get used.  */
1068 #define DEFAULT_GROWTH_THRESHOLD 0.8f
1069 #define DEFAULT_GROWTH_FACTOR 1.414f
1070 
1071 /* If a deletion empties a bucket and causes the ratio of used buckets
1072    to table size to become smaller than the shrink threshold (a number
1073    between 0.0 and 1.0), then shrink the table by multiplying by the
1074    shrink factor (a number greater than the shrink threshold but
1075    smaller than 1.0).  The shrink threshold and factor default to 0.0
1076    and 1.0, meaning that the table never shrinks.  */
1077 #define DEFAULT_SHRINK_THRESHOLD 0.0f
1078 #define DEFAULT_SHRINK_FACTOR 1.0f
1079 
1080 /* Use this to initialize or reset a |tuning| structure to some
1081    sensible values. */
1082 static const Hash_tuning default_tuning =
1083         {
1084                 DEFAULT_SHRINK_THRESHOLD,
1085                 DEFAULT_SHRINK_FACTOR,
1086                 DEFAULT_GROWTH_THRESHOLD,
1087                 DEFAULT_GROWTH_FACTOR,
1088                 false
1089         };
1090 
1091 /* Information and lookup.  */
1092 
1093 /* The following few functions provide information about the overall
1094    hash table organization: the number of entries, number of buckets
1095    and maximum length of buckets.  */
1096 
1097 /* Return the number of buckets in the hash table.  The table size,
1098    the total number of buckets (used plus unused), or the maximum
1099    number of slots, are the same quantity.  */
1100 
1101 size_t
1102 hash_get_n_buckets(const Hash_table *table) {
1103     return table->n_buckets;
1104 }
1105 
1106 /* Return the number of slots in use (non-empty buckets).  */
1107 
1108 size_t
1109 hash_get_n_buckets_used(const Hash_table *table) {
1110     return table->n_buckets_used;
1111 }
1112 
1113 /* Return the number of active entries.  */
1114 
1115 size_t
1116 hash_get_n_entries(const Hash_table *table) {
1117     return table->n_entries;
1118 }
1119 
1120 /* Return the length of the longest chain (bucket).  */
1121 
1122 size_t
1123 hash_get_max_bucket_length(const Hash_table *table) {
1124     struct hash_entry const *bucket;
1125     size_t max_bucket_length = 0;
1126 
1127     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1128         if (bucket->data) {
1129             struct hash_entry const *cursor = bucket;
1130             size_t bucket_length = 1;
1131 
1132             while (cursor = cursor->next, cursor)
1133                 bucket_length++;
1134 
1135             if (bucket_length > max_bucket_length)
1136                 max_bucket_length = bucket_length;
1137         }
1138     }
1139 
1140     return max_bucket_length;
1141 }
1142 
1143 /* Do a mild validation of a hash table, by traversing it and checking
1144    two statistics.  */
1145 
1146 bool
1147 hash_table_ok(const Hash_table *table) {
1148     struct hash_entry const *bucket;
1149     size_t n_buckets_used = 0;
1150     size_t n_entries = 0;
1151 
1152     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1153         if (bucket->data) {
1154             struct hash_entry const *cursor = bucket;
1155 
1156             /* Count bucket head.  */
1157             n_buckets_used++;
1158             n_entries++;
1159 
1160             /* Count bucket overflow.  */
1161             while (cursor = cursor->next, cursor)
1162                 n_entries++;
1163         }
1164     }
1165 
1166     if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
1167         return true;
1168 
1169     return false;
1170 }
1171 
1172 void
1173 hash_print_statistics(const Hash_table *table, FILE *stream) {
1174     size_t n_entries = hash_get_n_entries(table);
1175     size_t n_buckets = hash_get_n_buckets(table);
1176     size_t n_buckets_used = hash_get_n_buckets_used(table);
1177     size_t max_bucket_length = hash_get_max_bucket_length(table);
1178 
1179     fprintf(stream, "# entries:         %lu\n", (unsigned long int) n_entries);
1180     fprintf(stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
1181     fprintf(stream, "# buckets used:    %lu (%.2f%%)\n",
1182             (unsigned long int) n_buckets_used,
1183             (100.0 * n_buckets_used) / n_buckets);
1184     fprintf(stream, "max bucket length: %lu\n",
1185             (unsigned long int) max_bucket_length);
1186 }
1187 
1188 /* Hash |key| and return a pointer to the selected bucket.  If
1189    |table->hasher| misbehaves, abort.  */
1190 static struct hash_entry *
1191 safe_hasher(const Hash_table *table, const void *key) {
1192     size_t n = table->hasher(key, table->n_buckets);
1193     if (!(n < table->n_buckets))
1194         abort();
1195     return table->bucket + n;
1196 }
1197 
1198 /* If |entry| matches an entry already in the hash table, return the
1199    entry from the table.  Otherwise, return |NULL|.  */
1200 
1201 void *
1202 hash_lookup(const Hash_table *table, const void *entry) {
1203     struct hash_entry const *bucket = safe_hasher(table, entry);
1204     struct hash_entry const *cursor;
1205 
1206     if (bucket->data == NULL)
1207         return NULL;
1208 
1209     for (cursor = bucket; cursor; cursor = cursor->next)
1210         if (entry == cursor->data || table->comparator(entry, cursor->data))
1211             return cursor->data;
1212 
1213     return NULL;
1214 }
1215 
1216 /* Walking.  */
1217 
1218 /* The functions in this page traverse the hash table and process the
1219    contained entries.  For the traversal to work properly, the hash
1220    table should not be resized nor modified while any particular entry
1221    is being processed.  In particular, entries should not be added,
1222    and an entry may be removed only if there is no shrink threshold
1223    and the entry being removed has already been passed to
1224    |hash_get_next|.  */
1225 
1226 /* Return the first data in the table, or |NULL| if the table is
1227    empty.  */
1228 
1229 void *
1230 hash_get_first(const Hash_table *table) {
1231     struct hash_entry const *bucket;
1232 
1233     if (table->n_entries == 0)
1234         return NULL;
1235 
1236     for (bucket = table->bucket;; bucket++)
1237         if (!(bucket < table->bucket_limit))
1238             abort();
1239         else if (bucket->data)
1240             return bucket->data;
1241 }
1242 
1243 /* Return the user data for the entry following |entry|, where |entry|
1244    has been returned by a previous call to either |hash_get_first| or
1245    |hash_get_next|.  Return |NULL| if there are no more entries.  */
1246 
1247 void *
1248 hash_get_next(const Hash_table *table, const void *entry) {
1249     struct hash_entry const *bucket = safe_hasher(table, entry);
1250     struct hash_entry const *cursor;
1251 
1252     /* Find next entry in the same bucket.  */
1253     cursor = bucket;
1254     do {
1255         if (cursor->data == entry && cursor->next)
1256             return cursor->next->data;
1257         cursor = cursor->next;
1258     } while (cursor != NULL);
1259 
1260     /* Find first entry in any subsequent bucket.  */
1261     while (++bucket < table->bucket_limit)
1262         if (bucket->data)
1263             return bucket->data;
1264 
1265     /* None found.  */
1266     return NULL;
1267 }
1268 
1269 /* Fill |buffer| with pointers to active user entries in the hash
1270    table, then return the number of pointers copied.  Do not copy more
1271    than |buffer_size| pointers.  */
1272 
1273 size_t
1274 hash_get_entries(const Hash_table *table, void **buffer,
1275                  size_t buffer_size) {
1276     size_t counter = 0;
1277     struct hash_entry const *bucket;
1278     struct hash_entry const *cursor;
1279 
1280     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1281         if (bucket->data) {
1282             for (cursor = bucket; cursor; cursor = cursor->next) {
1283                 if (counter >= buffer_size)
1284                     return counter;
1285                 buffer[counter++] = cursor->data;
1286             }
1287         }
1288     }
1289 
1290     return counter;
1291 }
1292 
1293 /* Call a |processor| function for each entry of a hash table, and
1294    return the number of entries for which the processor function
1295    returned success.  A pointer to some |processor_data| which will be
1296    made available to each call to the processor function.  The
1297    |processor| accepts two arguments: the first is the user entry
1298    being walked into, the second is the value of |processor_data| as
1299    received.  The walking continue for as long as the |processor|
1300    function returns nonzero.  When it returns zero, the walking is
1301    interrupted.  */
1302 
1303 size_t
1304 hash_do_for_each(const Hash_table *table, Hash_processor processor,
1305                  void *processor_data) {
1306     size_t counter = 0;
1307     struct hash_entry const *bucket;
1308     struct hash_entry const *cursor;
1309 
1310     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1311         if (bucket->data) {
1312             for (cursor = bucket; cursor; cursor = cursor->next) {
1313                 if (!processor(cursor->data, processor_data))
1314                     return counter;
1315                 counter++;
1316             }
1317         }
1318     }
1319 
1320     return counter;
1321 }
1322 
1323 /* Allocation and clean-up.  */
1324 
1325 /* Return a hash index for a |NULL|-terminated |string| between |0| and
1326    |n_buckets-1|.  This is a convenience routine for constructing
1327    other hashing functions.  */
1328 
1329 #if USE_DIFF_HASH
1330 
1331 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01:
1332    ``Please see B. J. McKenzie, R. Harries \& T. Bell, Selecting a
1333    hashing algorithm, Software--practice \& experience 20, 2 (Feb
1334    1990), 209-224.  Good hash algorithms tend to be domain-specific,
1335    so what's good for [diffutils'] |io.c| may not be good for your
1336    application.''  */
1337 
1338 size_t
1339 hash_string (const char *string, size_t n_buckets)
1340 {
1341 # define HASH_ONE_CHAR(Value, Byte) \
1342   ((Byte) + rotl_sz (Value, 7))
1343 
1344   size_t value = 0;
1345   unsigned char ch;
1346 
1347   for (; (ch = *string); string++)
1348     value = HASH_ONE_CHAR (value, ch);
1349   return value % n_buckets;
1350 
1351 # undef HASH_ONE_CHAR
1352 }
1353 
1354 #else
1355 
1356 /* This one comes from |recode|, and performs a bit better than the
1357    above as per a few experiments.  It is inspired from a hashing
1358    routine found in the very old Cyber `snoop', itself written in
1359    typical Greg Mansfield style.  (By the way, what happened to this
1360    excellent man?  Is he still alive?)  */
1361 
1362 size_t
1363 hash_string(const char *string, size_t n_buckets) {
1364     size_t value = 0;
1365     unsigned char ch;
1366 
1367     for (; (ch = *string); string++)
1368         value = (value * 31 + ch) % n_buckets;
1369     return value;
1370 }
1371 
1372 #endif
1373 
1374 /* Return true if |candidate| is a prime number.  |candidate| should
1375    be an odd number at least equal to 11.  */
1376 
1377 static bool
1378 is_prime(size_t candidate) {
1379     size_t divisor = 3;
1380     size_t square = divisor * divisor;
1381 
1382     while (square < candidate && (candidate % divisor)) {
1383         divisor++;
1384         square += 4 * divisor;
1385         divisor++;
1386     }
1387 
1388     return (candidate % divisor ? true : false);
1389 }
1390 
1391 /* Round a given |candidate| number up to the nearest prime, and
1392    return that prime.  Primes lower than 10 are merely skipped.  */
1393 
1394 static size_t
1395 next_prime(size_t candidate) {
1396     /* Skip small primes.  */
1397     if (candidate < 10)
1398         candidate = 10;
1399 
1400     /* Make it definitely odd.  */
1401     candidate |= 1;
1402 
1403     while (SIZE_MAX != candidate && !is_prime(candidate))
1404         candidate += 2;
1405 
1406     return candidate;
1407 }
1408 
1409 void
1410 hash_reset_tuning(Hash_tuning *tuning) {
1411     *tuning = default_tuning;
1412 }
1413 
1414 /* Given a |size_t| argument |x|, return the value corresponding to
1415    rotating the bits |n| steps to the right.  |n| must be between 1 to
1416    |(CHAR_BIT * sizeof (size_t) - 1)| inclusive.  */
1417 static inline size_t
1418 rotr_sz(size_t x, int n) {
1419     return ((x >> n) | (x << ((CHAR_BIT * sizeof x) - n))) & SIZE_MAX;
1420 }
1421 
1422 /* If the user passes a |NULL| hasher, we hash the raw pointer.  */
1423 static size_t
1424 raw_hasher(const void *data, size_t n) {
1425     /* When hashing unique pointers, it is often the case that they
1426        were generated by malloc and thus have the property that the
1427        low-order bits are 0.  As this tends to give poorer performance
1428        with small tables, we rotate the pointer value before
1429        performing division, in an attempt to improve hash quality.  */
1430     size_t val = rotr_sz((size_t) data, 3);
1431     return val % n;
1432 }
1433 
1434 /* If the user passes a |NULL| comparator, we use pointer
1435    comparison.  */
1436 static bool
1437 raw_comparator(const void *a, const void *b) {
1438     return a == b;
1439 }
1440 
1441 
1442 /* For the given hash |table|, check the user supplied tuning
1443    structure for reasonable values, and return true if there is no
1444    gross error with it.  Otherwise, definitively reset the |tuning|
1445    field to some acceptable default in the hash table (that is, the
1446    user loses the right of further modifying tuning arguments), and
1447    return false.  */
1448 
1449 static bool
1450 check_tuning(Hash_table *table) {
1451     const Hash_tuning *tuning = table->tuning;
1452     float epsilon;
1453     if (tuning == &default_tuning)
1454         return true;
1455 
1456     /* Be a bit stricter than mathematics would require, so that
1457        rounding errors in size calculations do not cause allocations
1458        to fail to grow or shrink as they should.  The smallest
1459        allocation is 11 (due to |next_prime|'s algorithm), so an
1460        epsilon of 0.1 should be good enough.  */
1461     epsilon = 0.1f;
1462 
1463     if (epsilon < tuning->growth_threshold
1464         && tuning->growth_threshold < 1 - epsilon
1465         && 1 + epsilon < tuning->growth_factor
1466         && 0 <= tuning->shrink_threshold
1467         && tuning->shrink_threshold + epsilon < tuning->shrink_factor
1468         && tuning->shrink_factor <= 1
1469         && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
1470         return true;
1471 
1472     table->tuning = &default_tuning;
1473     return false;
1474 }
1475 
1476 /* Compute the size of the bucket array for the given |candidate| and
1477    |tuning|, or return 0 if there is no possible way to allocate that
1478    many entries.  */
1479 
1480 static size_t
1481 compute_bucket_size(size_t candidate, const Hash_tuning *tuning) {
1482     if (!tuning->is_n_buckets) {
1483         float new_candidate = candidate / tuning->growth_threshold;
1484         if (SIZE_MAX <= new_candidate)
1485             return 0;
1486         candidate = new_candidate;
1487     }
1488     candidate = next_prime(candidate);
1489     if (xalloc_oversized (candidate, sizeof(struct hash_entry *)))
1490         return 0;
1491     return candidate;
1492 }
1493 
1494 /* Allocate and return a new hash table, or |NULL| upon failure.  The
1495    initial number of buckets is automatically selected so as to {\it
1496    guarantee} that you may insert at least |candidate| different user
1497    entries before any growth of the hash table size occurs.  So, if
1498    have a reasonably tight a-priori upper bound on the number of
1499    entries you intend to insert in the hash table, you may save some
1500    table memory and insertion time, by specifying it here.  If the
1501    |is_n_buckets| field of the |tuning| structure is true, the
1502    |candidate| argument has its meaning changed to the wanted number
1503    of buckets.
1504 
1505    |tuning| points to a structure of user-supplied values, in case
1506    some fine tuning is wanted over the default behavior of the hasher.
1507    If |tuning| is |NULL|, the default tuning parameters are used
1508    instead.  If |tuning| is provided but the values requested are out
1509    of bounds or might cause rounding errors, return |NULL|.
1510 
1511    The user-supplied |hasher| function, when not |NULL|, accepts two
1512    arguments |entry| and |table_size|.  It computes, by hashing
1513    |entry| contents, a slot number for that entry which should be in
1514    the range |0..table_size-1|.  This slot number is then returned.
1515 
1516    The user-supplied |comparator| function, when not |NULL|, accepts
1517    two arguments pointing to user data, it then returns true for a
1518    pair of entries that compare equal, or false otherwise.  This
1519    function is internally called on entries which are already known to
1520    hash to the same bucket index, but which are distinct pointers.
1521 
1522    The user-supplied |data_freer| function, when not |NULL|, may be
1523    later called with the user data as an argument, just before the
1524    entry containing the data gets freed.  This happens from within
1525    |hash_free| or |hash_clear|.  You should specify this function only
1526    if you want these functions to free all of your |data| data.  This
1527    is typically the case when your data is simply an auxiliary struct
1528    that you have |malloc|'d to aggregate several values.  */
1529 
1530 Hash_table *
1531 hash_initialize(size_t candidate, const Hash_tuning *tuning,
1532                 Hash_hasher hasher, Hash_comparator comparator,
1533                 Hash_data_freer data_freer) {
1534     Hash_table *table;
1535 
1536     if (hasher == NULL)
1537         hasher = raw_hasher;
1538     if (comparator == NULL)
1539         comparator = raw_comparator;
1540 
1541     table = malloc(sizeof *table);
1542     if (table == NULL)
1543         return NULL;
1544 
1545     if (!tuning)
1546         tuning = &default_tuning;
1547     table->tuning = tuning;
1548     if (!check_tuning(table)) {
1549         /* Fail if the tuning options are invalid.  This is the only
1550            occasion when the user gets some feedback about it.  Once
1551            the table is created, if the user provides invalid tuning
1552            options, we silently revert to using the defaults, and
1553            ignore further request to change the tuning options.  */
1554         goto fail;
1555     }
1556 
1557     table->n_buckets = compute_bucket_size(candidate, tuning);
1558     if (!table->n_buckets)
1559         goto fail;
1560 
1561     table->bucket = calloc(table->n_buckets, sizeof *table->bucket);
1562     if (table->bucket == NULL)
1563         goto fail;
1564     table->bucket_limit = table->bucket + table->n_buckets;
1565     table->n_buckets_used = 0;
1566     table->n_entries = 0;
1567 
1568     table->hasher = hasher;
1569     table->comparator = comparator;
1570     table->data_freer = data_freer;
1571 
1572     table->free_entry_list = NULL;
1573 #if USE_OBSTACK
1574     obstack_init (&table->entry_stack);
1575 #endif
1576     return table;
1577 
1578     fail:
1579     free(table);
1580     return NULL;
1581 }
1582 
1583 /* Make all buckets empty, placing any chained entries on the free
1584    list.  Apply the user-specified function |data_freer| (if any) to
1585    the datas of any affected entries.  */
1586 
1587 void
1588 hash_clear(Hash_table *table) {
1589     struct hash_entry *bucket;
1590 
1591     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1592         if (bucket->data) {
1593             struct hash_entry *cursor;
1594             struct hash_entry *next;
1595 
1596             /* Free the bucket overflow.  */
1597             for (cursor = bucket->next; cursor; cursor = next) {
1598                 if (table->data_freer)
1599                     table->data_freer(cursor->data);
1600                 cursor->data = NULL;
1601 
1602                 next = cursor->next;
1603                 /* Relinking is done one entry at a time, as it is to
1604                    be expected that overflows are either rare or
1605                    short.  */
1606                 cursor->next = table->free_entry_list;
1607                 table->free_entry_list = cursor;
1608             }
1609 
1610             /* Free the bucket head.  */
1611             if (table->data_freer)
1612                 table->data_freer(bucket->data);
1613             bucket->data = NULL;
1614             bucket->next = NULL;
1615         }
1616     }
1617 
1618     table->n_buckets_used = 0;
1619     table->n_entries = 0;
1620 }
1621 
1622 /* Reclaim all storage associated with a hash table.  If a
1623    |data_freer| function has been supplied by the user when the hash
1624    table was created, this function applies it to the data of each
1625    entry before freeing that entry.  */
1626 
1627 void
1628 hash_free(Hash_table *table) {
1629     struct hash_entry *bucket;
1630     struct hash_entry *cursor;
1631     struct hash_entry *next;
1632 
1633     /* Call the user |data_freer| function.  */
1634     if (table->data_freer && table->n_entries) {
1635         for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1636             if (bucket->data) {
1637                 for (cursor = bucket; cursor; cursor = cursor->next)
1638                     table->data_freer(cursor->data);
1639             }
1640         }
1641     }
1642 
1643 #if USE_OBSTACK
1644 
1645     obstack_free (&table->entry_stack, NULL);
1646 
1647 #else
1648 
1649     /* Free all bucket overflowed entries.  */
1650     for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1651         for (cursor = bucket->next; cursor; cursor = next) {
1652             next = cursor->next;
1653             free(cursor);
1654         }
1655     }
1656 
1657     /* Also reclaim the internal list of previously freed entries.  */
1658     for (cursor = table->free_entry_list; cursor; cursor = next) {
1659         next = cursor->next;
1660         free(cursor);
1661     }
1662 
1663 #endif
1664 
1665     /* Free the remainder of the hash table structure.  */
1666     free(table->bucket);
1667     free(table);
1668 }
1669 
1670 /* Insertion and deletion.  */
1671 
1672 /* Get a new hash entry for a bucket overflow, possibly by recycling a
1673    previously freed one.  If this is not possible, allocate a new
1674    one.  */
1675 
1676 static struct hash_entry *
1677 allocate_entry(Hash_table *table) {
1678     struct hash_entry *new;
1679 
1680     if (table->free_entry_list) {
1681         new = table->free_entry_list;
1682         table->free_entry_list = new->next;
1683     } else {
1684 #if USE_OBSTACK
1685         new = obstack_alloc (&table->entry_stack, sizeof *new);
1686 #else
1687         new = malloc(sizeof *new);
1688 #endif
1689     }
1690 
1691     return new;
1692 }
1693 
1694 /* Free a hash entry which was part of some bucket overflow, saving it
1695    for later recycling.  */
1696 
1697 static void
1698 free_entry(Hash_table *table, struct hash_entry *entry) {
1699     entry->data = NULL;
1700     entry->next = table->free_entry_list;
1701     table->free_entry_list = entry;
1702 }
1703 
1704 /* This private function is used to help with insertion and deletion.
1705    When |entry| matches an entry in the table, return a pointer to the
1706    corresponding user data and set |*bucket_head| to the head of the
1707    selected bucket.  Otherwise, return |NULL|.  When |delete| is true
1708    and |entry| matches an entry in the table, unlink the matching
1709    entry.  */
1710 
1711 static void *
1712 hash_find_entry(Hash_table *table, const void *entry,
1713                 struct hash_entry **bucket_head, bool delete) {
1714     struct hash_entry *bucket = safe_hasher(table, entry);
1715     struct hash_entry *cursor;
1716 
1717     *bucket_head = bucket;
1718 
1719     /* Test for empty bucket.  */
1720     if (bucket->data == NULL)
1721         return NULL;
1722 
1723     /* See if the entry is the first in the bucket.  */
1724     if (entry == bucket->data || table->comparator(entry, bucket->data)) {
1725         void *data = bucket->data;
1726 
1727         if (delete) {
1728             if (bucket->next) {
1729                 struct hash_entry *next = bucket->next;
1730 
1731                 /* Bump the first overflow entry into the bucket head, then save
1732                    the previous first overflow entry for later recycling.  */
1733                 *bucket = *next;
1734                 free_entry(table, next);
1735             } else {
1736                 bucket->data = NULL;
1737             }
1738         }
1739 
1740         return data;
1741     }
1742 
1743     /* Scan the bucket overflow.  */
1744     for (cursor = bucket; cursor->next; cursor = cursor->next) {
1745         if (entry == cursor->next->data
1746             || table->comparator(entry, cursor->next->data)) {
1747             void *data = cursor->next->data;
1748 
1749             if (delete) {
1750                 struct hash_entry *next = cursor->next;
1751 
1752                 /* Unlink the entry to delete, then save the freed entry for later
1753                    recycling.  */
1754                 cursor->next = next->next;
1755                 free_entry(table, next);
1756             }
1757 
1758             return data;
1759         }
1760     }
1761 
1762     /* No entry found.  */
1763     return NULL;
1764 }
1765 
1766 /* Internal helper, to move entries from |src| to |dst|.  Both tables
1767    must share the same free entry list.  If |safe|, only move overflow
1768    entries, saving bucket heads for later, so that no allocations will
1769    occur.  Return false if the free entry list is exhausted and an
1770    allocation fails.  */
1771 
1772 static bool
1773 transfer_entries(Hash_table *dst, Hash_table *src, bool safe) {
1774     struct hash_entry *bucket;
1775     struct hash_entry *cursor;
1776     struct hash_entry *next;
1777     for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
1778         if (bucket->data) {
1779             void *data;
1780             struct hash_entry *new_bucket;
1781 
1782             /* Within each bucket, transfer overflow entries first and
1783                then the bucket head, to minimize memory pressure.
1784                After all, the only time we might allocate is when
1785                moving the bucket head, but moving overflow entries
1786                first may create free entries that can be recycled by
1787                the time we finally get to the bucket head.  */
1788             for (cursor = bucket->next; cursor; cursor = next) {
1789                 data = cursor->data;
1790                 new_bucket = safe_hasher(dst, data);
1791 
1792                 next = cursor->next;
1793 
1794                 if (new_bucket->data) {
1795                     /* Merely relink an existing entry, when moving
1796                        from a bucket overflow into a bucket
1797                        overflow.  */
1798                     cursor->next = new_bucket->next;
1799                     new_bucket->next = cursor;
1800                 } else {
1801                     /* Free an existing entry, when moving from a
1802                        bucket overflow into a bucket header.  */
1803                     new_bucket->data = data;
1804                     dst->n_buckets_used++;
1805                     free_entry(dst, cursor);
1806                 }
1807             }
1808             /* Now move the bucket head.  Be sure that if we fail due
1809                to allocation failure that the src table is in a
1810                consistent state.  */
1811             data = bucket->data;
1812             bucket->next = NULL;
1813             if (safe)
1814                 continue;
1815             new_bucket = safe_hasher(dst, data);
1816 
1817             if (new_bucket->data) {
1818                 /* Allocate or recycle an entry, when moving from a
1819                    bucket header into a bucket overflow.  */
1820                 struct hash_entry *new_entry = allocate_entry(dst);
1821 
1822                 if (new_entry == NULL)
1823                     return false;
1824 
1825                 new_entry->data = data;
1826                 new_entry->next = new_bucket->next;
1827                 new_bucket->next = new_entry;
1828             } else {
1829                 /* Move from one bucket header to another.  */
1830                 new_bucket->data = data;
1831                 dst->n_buckets_used++;
1832             }
1833             bucket->data = NULL;
1834             src->n_buckets_used--;
1835         }
1836     return true;
1837 }
1838 
1839 /* For an already existing hash table, change the number of buckets
1840    through specifying |candidate|.  The contents of the hash table are
1841    preserved.  The new number of buckets is automatically selected so
1842    as to {\it guarantee} that the table may receive at least
1843    |candidate| different user entries, including those already in the
1844    table, before any other growth of the hash table size occurs.  If
1845    |tuning->is_n_buckets| is true, then |candidate| specifies the
1846    exact number of buckets desired.  Return true iff the rehash
1847    succeeded.  */
1848 
1849 bool
1850 hash_rehash(Hash_table *table, size_t candidate) {
1851     Hash_table storage;
1852     Hash_table *new_table;
1853     size_t new_size = compute_bucket_size(candidate, table->tuning);
1854 
1855     if (!new_size)
1856         return false;
1857     if (new_size == table->n_buckets)
1858         return true;
1859     new_table = &storage;
1860     new_table->bucket = calloc(new_size, sizeof *new_table->bucket);
1861     if (new_table->bucket == NULL)
1862         return false;
1863     new_table->n_buckets = new_size;
1864     new_table->bucket_limit = new_table->bucket + new_size;
1865     new_table->n_buckets_used = 0;
1866     new_table->n_entries = 0;
1867     new_table->tuning = table->tuning;
1868     new_table->hasher = table->hasher;
1869     new_table->comparator = table->comparator;
1870     new_table->data_freer = table->data_freer;
1871 
1872     /* In order for the transfer to successfully complete, we need
1873        additional overflow entries when distinct buckets in the old
1874        table collide into a common bucket in the new table.  The worst
1875        case possible is a hasher that gives a good spread with the old
1876        size, but returns a constant with the new size; if we were to
1877        guarantee |table->n_buckets_used-1| free entries in advance, then
1878        the transfer would be guaranteed to not allocate memory.
1879        However, for large tables, a guarantee of no further allocation
1880        introduces a lot of extra memory pressure, all for an unlikely
1881        corner case (most rehashes reduce, rather than increase, the
1882        number of overflow entries needed).  So, we instead ensure that
1883        the transfer process can be reversed if we hit a memory
1884        allocation failure mid-transfer.  */
1885 
1886     /* Merely reuse the extra old space into the new table.  */
1887 #if USE_OBSTACK
1888     new_table->entry_stack = table->entry_stack;
1889 #endif
1890     new_table->free_entry_list = table->free_entry_list;
1891 
1892     if (transfer_entries(new_table, table, false)) {
1893         /* Entries transferred successfully; tie up the loose ends.  */
1894         free(table->bucket);
1895         table->bucket = new_table->bucket;
1896         table->bucket_limit = new_table->bucket_limit;
1897         table->n_buckets = new_table->n_buckets;
1898         table->n_buckets_used = new_table->n_buckets_used;
1899         table->free_entry_list = new_table->free_entry_list;
1900         /* |table->n_entries| and |table->entry_stack| already hold their value.  */
1901         return true;
1902     }
1903 
1904     /* We've allocated |new_table->bucket| (and possibly some
1905        entries), exhausted the free list, and moved some but not all
1906        entries into |new_table|.  We must undo the partial move before
1907        returning failure.  The only way to get into this situation is
1908        if |new_table| uses fewer buckets than the old table, so we
1909        will reclaim some free entries as overflows in the new table
1910        are put back into distinct buckets in the old table.
1911 
1912        There are some pathological cases where a single pass through
1913        the table requires more intermediate overflow entries than
1914        using two passes.  Two passes give worse cache performance and
1915        takes longer, but at this point, we're already out of memory,
1916        so slow and safe is better than failure.  */
1917     table->free_entry_list = new_table->free_entry_list;
1918     if (!(transfer_entries(table, new_table, true)
1919           && transfer_entries(table, new_table, false)))
1920         abort();
1921     /* |table->n_entries| already holds its value.  */
1922     free(new_table->bucket);
1923     return false;
1924 }
1925 
1926 /* Insert |entry| into hash |table| if there is not already a matching
1927    entry.
1928 
1929    Return -1 upon memory allocation failure.
1930    Return 1 if insertion succeeded.
1931    Return 0 if there is already a matching entry in the table, and in
1932    that case, if |matched_ent| is non-|NULL|, set |*matched_ent| to
1933    that entry.
1934 
1935    This interface is easier to use than |hash_insert| when you must
1936    distinguish between the latter two cases.  More importantly,
1937    |hash_insert| is unusable for some types of |entry| values.  When
1938    using |hash_insert|, the only way to distinguish those cases is to
1939    compare the return value and |entry|.  That works only when you can
1940    have two different |entry| values that point to data that compares
1941    "equal".  Thus, when the |entry| value is a simple scalar, you must
1942    use |hash_insert_if_absent|.  |entry| must not be |NULL|.  */
1943 int
1944 hash_insert_if_absent(Hash_table *table, void const *entry,
1945                       void const **matched_ent) {
1946     void *data;
1947     struct hash_entry *bucket;
1948 
1949     /* The caller cannot insert a |NULL| entry, since |hash_lookup|
1950        returns |NULL| to indicate "not found", and |hash_find_entry|
1951        uses |bucket->data == NULL| to indicate an empty bucket.  */
1952     if (!entry)
1953         abort();
1954 
1955     /* If there's a matching entry already in the table, return that.  */
1956     if ((data = hash_find_entry(table, entry, &bucket, false)) != NULL) {
1957         if (matched_ent)
1958             *matched_ent = data;
1959         return 0;
1960     }
1961 
1962     /* If the growth threshold of the buckets in use has been reached,
1963        increase the table size and rehash.  There's no point in
1964        checking the number of entries: if the hashing function is
1965        ill-conditioned, rehashing is not likely to improve it.  */
1966 
1967     if (table->n_buckets_used
1968         > table->tuning->growth_threshold * table->n_buckets) {
1969         /* Check more fully, before starting real work.  If tuning
1970            arguments became invalid, the second check will rely on
1971            proper defaults.  */
1972         check_tuning(table);
1973         if (table->n_buckets_used
1974             > table->tuning->growth_threshold * table->n_buckets) {
1975             const Hash_tuning *tuning = table->tuning;
1976             float candidate =
1977                     (tuning->is_n_buckets
1978                      ? (table->n_buckets * tuning->growth_factor)
1979                      : (table->n_buckets * tuning->growth_factor
1980                         * tuning->growth_threshold));
1981 
1982             if (SIZE_MAX <= candidate)
1983                 return -1;
1984 
1985             /* If the rehash fails, arrange to return |NULL|.  */
1986             if (!hash_rehash(table, candidate))
1987                 return -1;
1988 
1989             /* Update the bucket we are interested in.  */
1990             if (hash_find_entry(table, entry, &bucket, false) != NULL)
1991                 abort();
1992         }
1993     }
1994 
1995     /* |entry| is not matched, it should be inserted.  */
1996 
1997     if (bucket->data) {
1998         struct hash_entry *new_entry = allocate_entry(table);
1999 
2000         if (new_entry == NULL)
2001             return -1;
2002 
2003         /* Add |entry| in the overflow of the bucket.  */
2004 
2005         new_entry->data = (void *) entry;
2006         new_entry->next = bucket->next;
2007         bucket->next = new_entry;
2008         table->n_entries++;
2009         return 1;
2010     }
2011 
2012     /* Add |entry| right in the bucket head.  */
2013 
2014     bucket->data = (void *) entry;
2015     table->n_entries++;
2016     table->n_buckets_used++;
2017 
2018     return 1;
2019 }
2020 
2021 /* If |entry| matches an entry already in the hash table, return the
2022    pointer to the entry from the table.  Otherwise, insert |entry| and
2023    return |entry|.  Return |NULL| if the storage required for
2024    insertion cannot be allocated.  This implementation does not
2025    support duplicate entries or insertion of |NULL|.  */
2026 
2027 void *
2028 hash_insert(Hash_table *table, void const *entry) {
2029     void const *matched_ent;
2030     int err = hash_insert_if_absent(table, entry, &matched_ent);
2031     return (err == -1
2032             ? NULL
2033             : (void *) (err == 0 ? matched_ent : entry));
2034 }
2035 
2036 /* If |entry| is already in the table, remove it and return the
2037    just-deleted data (the user may want to deallocate its storage).
2038    If |entry| is not in the table, don't modify the table and return
2039    |NULL|.  */
2040 
2041 void *
2042 hash_delete(Hash_table *table, const void *entry) {
2043     void *data;
2044     struct hash_entry *bucket;
2045 
2046     data = hash_find_entry(table, entry, &bucket, true);
2047     if (!data)
2048         return NULL;
2049 
2050     table->n_entries--;
2051     if (!bucket->data) {
2052         table->n_buckets_used--;
2053 
2054         /* If the shrink threshold of the buckets in use has been
2055            reached, rehash into a smaller table.  */
2056 
2057         if (table->n_buckets_used
2058             < table->tuning->shrink_threshold * table->n_buckets) {
2059             /* Check more fully, before starting real work.  If tuning
2060                arguments became invalid, the second check will rely on
2061                proper defaults.  */
2062             check_tuning(table);
2063             if (table->n_buckets_used
2064                 < table->tuning->shrink_threshold * table->n_buckets) {
2065                 const Hash_tuning *tuning = table->tuning;
2066                 size_t candidate =
2067                         (tuning->is_n_buckets
2068                          ? table->n_buckets * tuning->shrink_factor
2069                          : (table->n_buckets * tuning->shrink_factor
2070                             * tuning->growth_threshold));
2071 
2072                 if (!hash_rehash(table, candidate)) {
2073                     /* Failure to allocate memory in an attempt to
2074                        shrink the table is not fatal.  But since
2075                        memory is low, we can at least be kind and free
2076                        any spare entries, rather than keeping them
2077                        tied up in the free entry list.  */
2078 #if !USE_OBSTACK
2079                     struct hash_entry *cursor = table->free_entry_list;
2080                     struct hash_entry *next;
2081                     while (cursor) {
2082                         next = cursor->next;
2083                         free(cursor);
2084                         cursor = next;
2085                     }
2086                     table->free_entry_list = NULL;
2087 #endif
2088                 }
2089             }
2090         }
2091     }
2092 
2093     return data;
2094 }
2095 
2096 /* Testing.  */
2097 
2098 #if 0
2099 
2100 void
2101 hash_print (const Hash_table *table)
2102 {
2103   struct hash_entry *bucket = (struct hash_entry *) table->bucket;
2104 
2105   for ( ; bucket < table->bucket_limit; bucket++)
2106     {
2107       struct hash_entry *cursor;
2108 
2109       if (bucket)
2110         printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
2111 
2112       for (cursor = bucket; cursor; cursor = cursor->next)
2113         {
2114           char const *s = cursor->data;
2115           /* FIXME */
2116           if (s)
2117             printf ("  %s\n", s);
2118         }
2119     }
2120 }
2121 
2122 #endif

-
+ 861F7244DB30BFBA8540A2E1D615A4226E0CB4E7D177B1850149E6829B23D84B56482EF4DD5D04DB6AA6F8DDA409EF8043A7612670DF5F87A89BD7B3F46F8037
vtools/lib/hash.h

(0 . 0)(1 . 87)
2127 
2128 /* A generic hash table package.  */
2129 
2130 /* Make sure |use_obstack| is defined to 1 if you want the allocator
2131    to use obstacks instead of malloc, and recompile |hash.c| with same
2132    setting.  */
2133 
2134 #ifndef HASH_H_
2135 # define HASH_H_
2136 
2137 # include <stdio.h>
2138 # include <stdbool.h>
2139 
2140 typedef size_t (*Hash_hasher)(const void *, size_t);
2141 
2142 typedef bool (*Hash_comparator)(const void *, const void *);
2143 
2144 typedef void (*Hash_data_freer)(void *);
2145 
2146 typedef bool (*Hash_processor)(void *, void *);
2147 
2148 struct hash_tuning {
2149     /* This structure is mainly used for |hash_initialize|, see the
2150        block documentation of |hash_reset_tuning| for more complete
2151        comments.  */
2152 
2153     float shrink_threshold;     /* ratio of used buckets to trigger a shrink */
2154     float shrink_factor;        /* ratio of new smaller size to original size */
2155     float growth_threshold;     /* ratio of used buckets to trigger a growth */
2156     float growth_factor;        /* ratio of new bigger size to original size */
2157     bool is_n_buckets;          /* if |candidate| really means table size */
2158 };
2159 
2160 typedef struct hash_tuning Hash_tuning;
2161 
2162 struct hash_table;
2163 
2164 typedef struct hash_table Hash_table;
2165 
2166 /* Information and lookup.  */
2167 size_t hash_get_n_buckets(const Hash_table *);
2168 
2169 size_t hash_get_n_buckets_used(const Hash_table *);
2170 
2171 size_t hash_get_n_entries(const Hash_table *);
2172 
2173 size_t hash_get_max_bucket_length(const Hash_table *);
2174 
2175 bool hash_table_ok(const Hash_table *);
2176 
2177 void hash_print_statistics(const Hash_table *, FILE *);
2178 
2179 void *hash_lookup(const Hash_table *, const void *);
2180 
2181 /* Walking.  */
2182 void *hash_get_first(const Hash_table *);
2183 
2184 void *hash_get_next(const Hash_table *, const void *);
2185 
2186 size_t hash_get_entries(const Hash_table *, void **, size_t);
2187 
2188 size_t hash_do_for_each(const Hash_table *, Hash_processor, void *);
2189 
2190 /* Allocation and clean-up.  */
2191 size_t hash_string(const char *, size_t);
2192 
2193 void hash_reset_tuning(Hash_tuning *);
2194 
2195 Hash_table *hash_initialize(size_t, const Hash_tuning *,
2196                             Hash_hasher, Hash_comparator,
2197                             Hash_data_freer);
2198 
2199 void hash_clear(Hash_table *);
2200 
2201 void hash_free(Hash_table *);
2202 
2203 /* Insertion and deletion.  */
2204 bool hash_rehash(Hash_table *, size_t);
2205 
2206 void *hash_insert(Hash_table *, const void *);
2207 
2208 int hash_insert_if_absent(Hash_table *table, const void *entry,
2209                           const void **matched_ent);
2210 
2211 void *hash_delete(Hash_table *, const void *);
2212 
2213 #endif

-
+ C93FDF3DDE65200053B084BF2FE5963801DDA892F384A6737E9D898465DE9F7C964470D068088CE67C3FEAB269A6C4A5D346F05031464FF16E04C0BB163D3C7C
vtools/lib/progname.c

(0 . 0)(1 . 79)
2218 
2219 /* Specification.  */
2220 #include "progname.h"
2221 
2222 #include <stdio.h>
2223 #include <stdlib.h>
2224 #include <string.h>
2225 
2226 
2227 /* String containing name the program is called with.  To be
2228    initialized by |main()|.  */
2229 const char *program_name = NULL;
2230 
2231 /* Set |program_name|, based on |argv[0]|.  |argv0| must be a string
2232    allocated with indefinite extent, and must not be modified after
2233    this call.  */
2234 void
2235 set_program_name(const char *argv0) {
2236     /* libtool creates a temporary executable whose name is sometimes
2237        prefixed with "lt-" (depends on the platform).  It also makes
2238        |argv[0]| absolute.  But the name of the temporary executable
2239        is a detail that should not be visible to the end user and to
2240        the test suite.  Remove this |"<dirname>/.libs/"| or
2241        |"<dirname>/.libs/lt-"| prefix here.  */
2242     const char *slash;
2243     const char *base;
2244 
2245     /* Sanity check.  POSIX requires the invoking process to pass a
2246        non-|NULL| |argv[0]|.  */
2247     if (argv0 == NULL) {
2248         /* It's a bug in the invoking program.  Help diagnosing
2249            it.  */
2250         fputs("A NULL argv[0] was passed through an exec system call.\n",
2251               stderr);
2252         abort();
2253     }
2254 
2255     slash = strrchr(argv0, '/');
2256     base = (slash != NULL ? slash + 1 : argv0);
2257     if (base - argv0 >= 7 && strncmp(base - 7, "/.libs/", 7) == 0) {
2258         argv0 = base;
2259         if (strncmp(base, "lt-", 3) == 0) {
2260             argv0 = base + 3;
2261             /* On glibc systems, remove the "lt-" prefix from the
2262                variable |program_invocation_short_name|.  */
2263 #if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME
2264             program_invocation_short_name = (char *) argv0;
2265 #endif
2266         }
2267     }
2268 
2269     /* But don't strip off a leading |<dirname>/| in general, because
2270        when the user runs
2271 
2272            |/some/hidden/place/bin/cp foo foo|
2273 
2274        he should get the error message
2275 
2276            |/some/hidden/place/bin/cp: `foo' and `foo' are the same file|
2277 
2278        not
2279 
2280           | cp: `foo' and `foo' are the same file|
2281      */
2282 
2283     program_name = argv0;
2284 
2285     /* On glibc systems, the |error()| function comes from libc and
2286        uses the variable |program_invocation_name|, not
2287        |program_name|.  So set this variable as well.  */
2288 #if HAVE_DECL_PROGRAM_INVOCATION_NAME
2289     program_invocation_name = (char *) argv0;
2290 #endif
2291 }
2292 
2293 const char *
2294 get_program_name(void) {
2295     return program_name;
2296 }

-
+ C6915DC61245B2A3953757373428554EEC0020588350347EDB2BD0EC9C541C1CABF9B15A1264C936D7D5CF5268CA185797CA0E0D8A0644404C9147CAC3DF449F
vtools/lib/progname.h

(0 . 0)(1 . 20)
2301 
2302 #ifndef _PROGNAME_H
2303 #define _PROGNAME_H
2304 
2305 /* Programs using this file should do the following in |main()|:
2306      |set_program_name (argv[0]);| */
2307 
2308 /* String containing name the program is called with.  */
2309 extern const char *program_name;
2310 
2311 /* Set |program_name|, based on |argv[0]|.  |argv0| must be a string
2312    allocated with indefinite extent, and must not be modified after
2313    this call.  */
2314 extern void set_program_name(const char *argv0);
2315 
2316 /* Return |program_name|. This is similar to getprogname for systems
2317    that don't support it. */
2318 extern const char *get_program_name(void);
2319 
2320 #endif

-
+ 8361650753226E8F2780AA6A9AFF24223B456FF648590333B6AE2D37281FAE485C572119DD2B66B4DC1B2DB46DFBE8168443CA72ABF1072C0446265CC576EFAD
vtools/lib/xalloc.c

(0 . 0)(1 . 107)
2325 #include <stdlib.h>
2326 #include <string.h>
2327 #include "xalloc.h"
2328 #include "error.h"
2329 
2330 /* 1 if |calloc| is known to be compatible with GNU |calloc|.  This
2331    matters if we are not also using the |calloc| module, which defines
2332    |HAVE_CALLOC_GNU| and supports the GNU API even on non-GNU
2333    platforms.  */
2334 #if defined HAVE_CALLOC_GNU || (defined __GLIBC__ && !defined __UCLIBC__)
2335 enum { HAVE_GNU_CALLOC = 1 };
2336 #else
2337 enum { HAVE_GNU_CALLOC = 0 };
2338 #endif
2339 
2340 void
2341 xalloc_die(void) {
2342     error(2, 0, "%s", "memory exhausted");
2343 
2344     /* |_Noreturn| cannot be given to error, since it may return if
2345        its first argument is 0.  To help compilers understand the
2346        |xalloc_die| does not return, call abort.  Also, the abort is a
2347        safety feature if |exit_failure| is 0 (which shouldn't
2348        happen).  */
2349     abort();
2350 }
2351 
2352 /* Allocate |n| bytes of memory dynamically, with error checking.  */
2353 
2354 void *
2355 xmalloc(size_t n) {
2356     void *p = malloc(n);
2357     if (!p && n != 0)
2358         xalloc_die();
2359     return p;
2360 }
2361 
2362 /* Change the size of an allocated block of memory |p| to |n| bytes,
2363    with error checking.  */
2364 
2365 void *
2366 xrealloc(void *p, size_t n) {
2367     if (!n && p) {
2368         /* The GNU and C99 realloc behaviors disagree here.  Act like
2369            GNU, even if the underlying realloc is C99.  */
2370         free(p);
2371         return NULL;
2372     }
2373 
2374     p = realloc(p, n);
2375     if (!p && n)
2376         xalloc_die();
2377     return p;
2378 }
2379 
2380 /* If |p| is null, allocate a block of at least |*pn| bytes;
2381    otherwise, reallocate |p| so that it contains more than |*pn|
2382    bytes.  |*PN| must be nonzero unless |p| is |NULL|.  Set |*pn| to
2383    the new block's size, and return the pointer to the new block.
2384    |*pn| is never set to zero, and the returned pointer is never
2385    |NULL|.  */
2386 
2387 void *
2388 x2realloc(void *p, size_t *pn) {
2389     return x2nrealloc(p, pn, 1);
2390 }
2391 
2392 /* Allocate |s| bytes of zeroed memory dynamically, with error
2393    checking.  There's no need for |xnzalloc (n, s)|, since it would be
2394    equivalent to |xcalloc (n, s)|.  */
2395 
2396 void *
2397 xzalloc(size_t s) {
2398     return memset (xmalloc(s), 0, s);
2399 }
2400 
2401 /* Allocate zeroed memory for |n| elements of |s| bytes, with error
2402    checking.  |s| must be nonzero.  */
2403 
2404 void *
2405 xcalloc(size_t n, size_t s) {
2406     void *p;
2407     /* Test for overflow, since objects with size greater than
2408        |PTRDIFF_MAX| cause pointer subtraction to go awry.  Omit
2409        size-zero tests if |HAVE_GNU_CALLOC|, since GNU calloc never
2410        returns |NULL| if successful.  */
2411     if (xalloc_oversized (n, s)
2412         || (!(p = calloc(n, s)) && (HAVE_GNU_CALLOC || n != 0)))
2413         xalloc_die();
2414     return p;
2415 }
2416 
2417 /* Clone an object |p| of size |s|, with error checking.  There's no
2418    need for |xnmemdup (p, n, s)|, since |xmemdup (p, n * s)| works
2419    without any need for an arithmetic overflow check.  */
2420 
2421 void *
2422 xmemdup(void const *p, size_t s) {
2423     return memcpy (xmalloc(s), p, s);
2424 }
2425 
2426 /* Clone |string|.  */
2427 
2428 char *
2429 xstrdup(char const *string) {
2430     return xmemdup(string, strlen(string) + 1);
2431 }

-
+ 2146359188E16E7186846AE0B1026DC41BB7EC2C58FCE67D3F02F99BDB851F9325483084B55FFD5BCB4B0D36C08799BF3198BAAEDF7595B6E0EE85A042806446
vtools/lib/xalloc.h

(0 . 0)(1 . 170)
2436 #ifndef XALLOC_H_
2437 #define XALLOC_H_
2438 
2439 #include <stddef.h>
2440 #include <stdint.h>
2441 #include "system.h"
2442 
2443 /* This function is always triggered when memory is exhausted.  It
2444    must be defined by the application, either explicitly or by using
2445    gnulib's xalloc-die module.  This is the function to call when one
2446    wants the program to die because of a memory allocation
2447    failure.  */
2448 extern _Noreturn void xalloc_die(void);
2449 
2450 void *xmalloc(size_t s);
2451 void *xzalloc(size_t s);
2452 void *xcalloc(size_t n, size_t s);
2453 void *xrealloc(void *p, size_t s);
2454 void *x2realloc(void *p, size_t *pn);
2455 void *xmemdup(void const *p, size_t s);
2456 char *xstrdup(char const *str);
2457 
2458 /* In the following macros, |t| must be an elementary or
2459    structure/union or typedef'ed type, or a pointer to such a type.
2460    To apply one of the following macros to a function pointer or array
2461    type, you need to typedef it first and use the typedef name.  */
2462 
2463 /* Allocate an object of type |t| dynamically, with error checking. */
2464 #define XMALLOC(t) ((t *) xmalloc (sizeof (t)))
2465 
2466 /* Allocate memory for |n| elements of type |t|, with error
2467    checking. */
2468 #define XNMALLOC(n, t) \
2469    ((t *) (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t))))
2470 
2471 /* Allocate an object of type |t| dynamically, with error checking,
2472    and zero it.  */
2473 #define XZALLOC(t) ((t *) xzalloc (sizeof (t)))
2474 
2475 /* Allocate memory for |n| elements of type |t|, with error checking,
2476    and zero it.  */
2477 #define XCALLOC(n, t) \
2478    ((t *) (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t))))
2479 
2480 
2481 /* Allocate an array of |n| objects, each with |s| bytes of memory,
2482    dynamically, with error checking.  |s| must be nonzero.  */
2483 
2484 inline void *xnmalloc(size_t n, size_t s);
2485 
2486 inline void *
2487 xnmalloc(size_t n, size_t s) {
2488     if (xalloc_oversized (n, s))
2489         xalloc_die();
2490     return xmalloc(n * s);
2491 }
2492 
2493 /* Change the size of an allocated block of memory |p| to an array of
2494    |n| objects each of |s| bytes, with error checking.  |s| must be
2495    nonzero.  */
2496 
2497 inline void *xnrealloc(void *p, size_t n, size_t s);
2498 
2499 inline void *
2500 xnrealloc(void *p, size_t n, size_t s) {
2501     if (xalloc_oversized (n, s))
2502         xalloc_die();
2503     return xrealloc(p, n * s);
2504 }
2505 
2506 /* If |p| is |NULL|, allocate a block of at least |*pn| such objects;
2507    otherwise, reallocate |p| so that it contains more than |*pn|
2508    objects each of |s| bytes.  |s| must be nonzero.  Set |*pn| to the
2509    new number of objects, and return the pointer to the new block.
2510    |*PN| is never set to zero, and the returned pointer is never |NULL|.
2511 
2512    Repeated reallocations are guaranteed to make progress, either by
2513    allocating an initial block with a nonzero size, or by allocating a
2514    larger block.
2515 
2516    In the following implementation, nonzero sizes are increased by a
2517    factor of approximately 1.5 so that repeated reallocations have
2518    $O(N)$ overall cost rather than $O(N^2)$ cost, but the
2519    specification for this function does not guarantee that rate.
2520 
2521    Here is an example of use:
2522 
2523      |int *p = NULL;
2524      size_t used = 0;
2525      size_t allocated = 0;
2526 
2527      void
2528      append_int (int value)
2529        {
2530          if (used == allocated)
2531            p = x2nrealloc (p, &allocated, sizeof *p);
2532          p[used++] = value;
2533        }|
2534 
2535    This causes |x2nrealloc| to allocate a block of some nonzero size
2536    the first time it is called.
2537 
2538    To have finer-grained control over the initial size, set |*pn| to a
2539    nonzero value before calling this function with |p == NULL|.  For
2540    example:
2541 
2542      |int *p = NULL;
2543      size_t used = 0;
2544      size_t allocated = 0;
2545      size_t allocated1 = 1000;
2546 
2547      void
2548      append_int (int value)
2549        {
2550          if (used == allocated)
2551            {
2552              p = x2nrealloc (p, &allocated1, sizeof *p);
2553              allocated = allocated1;
2554            }
2555          p[used++] = value;
2556        }|
2557 
2558    */
2559 
2560 static inline void *
2561 x2nrealloc(void *p, size_t *pn, size_t s) {
2562     size_t n = *pn;
2563 
2564     if (!p) {
2565         if (!n) {
2566             /* The approximate size to use for initial small
2567                allocation requests, when the invoking code specifies
2568                an old size of zero.  This is the largest "small"
2569                request for the GNU C library malloc.  */
2570             enum {
2571                 DEFAULT_MXFAST = 64 * sizeof(size_t) / 4
2572             };
2573 
2574             n = DEFAULT_MXFAST / s;
2575             n += !n;
2576         }
2577         if (xalloc_oversized (n, s))
2578             xalloc_die();
2579     } else {
2580         /* Set $n = \lfloor 1.5 * n \rfloor + 1$ so that progress is
2581            made even if $n = 0$.  Check for overflow, so that $n * s$
2582            stays in both |ptrdiff_t| and |size_t| range.  The check
2583            may be slightly conservative, but an exact check isn't
2584            worth the trouble.  */
2585         if ((PTRDIFF_MAX < SIZE_MAX ? PTRDIFF_MAX : SIZE_MAX) / 3 * 2 / s
2586             <= n)
2587             xalloc_die();
2588         n += n / 2 + 1;
2589     }
2590 
2591     *pn = n;
2592     return xrealloc(p, n * s);
2593 }
2594 
2595 /* Return a pointer to a new buffer of |n| bytes.  This is like
2596    xmalloc, except it returns |char *|.  */
2597 
2598 static inline char *xcharalloc(size_t n);
2599 
2600 static inline char *
2601 xcharalloc(size_t n) {
2602     return XNMALLOC (n, char);
2603 }
2604 
2605 #endif

-
+ AAB77CCF9C4FA86348F84D661509CF482F958D1A1ED4A55F608683510A8C5AB3E3A12E6C9FA61B6DDF31A493D5BE586EF7362D00E1D91542A130B27EC0DC5AD3
vtools/manifest

(0 . 0)(1 . 1)
2610 510300 phf vtools_genesis Initial release of vtools. Genesis contains stripped down version of diffutils 3.6, with most of functionality not relevant to vpatch removed.

-
+ 7C13EDE685F2C92F24A015E44B9609494E4D5DA9BC6A192B62A9CAB92F36C2543F5092FDADD7436FEAC45CA0AB54D19EA0692848C5C94F9819B885BEEC08E179
vtools/src/analyze.c

(0 . 0)(1 . 597)
2615 
2616 #include "diff.h"
2617 #include <cmpbuf.h>
2618 #include <error.h>
2619 #include <xalloc.h>
2620 #include <stdlib.h>
2621 
2622 /* The core of the Diff algorithm.  */
2623 #define ELEMENT lin
2624 #define EQUAL(x, y) ((x) == (y))
2625 #define OFFSET lin
2626 #define EXTRA_CONTEXT_FIELDS /* none */
2627 #define NOTE_DELETE(c, xoff) (files[0].changed[files[0].realindexes[xoff]] = 1)
2628 #define NOTE_INSERT(c, yoff) (files[1].changed[files[1].realindexes[yoff]] = 1)
2629 #define USE_HEURISTIC 1
2630 #include <limits.h>
2631 #include <diffseq.h>
2632 #include <sys/stat.h>
2633 
2634 /* Discard lines from one file that have no matches in the other file.
2635 
2636    A line which is discarded will not be considered by the actual
2637    comparison algorithm; it will be as if that line were not in the
2638    file.  The file's |realindexes| table maps virtual line numbers
2639    (which don't count the discarded lines) into real line numbers;
2640    this is how the actual comparison algorithm produces results that
2641    are comprehensible when the discarded lines are counted.
2642 
2643    When we discard a line, we also mark it as a deletion or insertion
2644    so that it will be printed in the output.  */
2645 
2646 static void
2647 discard_confusing_lines(struct file_data filevec[]) {
2648     int f;
2649     lin i;
2650     char *discarded[2];
2651     lin *equiv_count[2];
2652     lin *p;
2653 
2654     /* Allocate our results.  */
2655     p = xmalloc((filevec[0].buffered_lines + filevec[1].buffered_lines)
2656                 * (2 * sizeof *p));
2657     for (f = 0; f < 2; f++) {
2658         filevec[f].undiscarded = p;
2659         p += filevec[f].buffered_lines;
2660         filevec[f].realindexes = p;
2661         p += filevec[f].buffered_lines;
2662     }
2663 
2664     /* Set up |equiv_count[f][i]| as the number of lines in file |f|
2665        that fall in equivalence class |i|.  */
2666 
2667     p = zalloc(filevec[0].equiv_max * (2 * sizeof *p));
2668     equiv_count[0] = p;
2669     equiv_count[1] = p + filevec[0].equiv_max;
2670 
2671     for (i = 0; i < filevec[0].buffered_lines; ++i)
2672         ++equiv_count[0][filevec[0].equivs[i]];
2673     for (i = 0; i < filevec[1].buffered_lines; ++i)
2674         ++equiv_count[1][filevec[1].equivs[i]];
2675 
2676     /* Set up tables of which lines are going to be discarded.  */
2677 
2678     discarded[0] = zalloc(filevec[0].buffered_lines
2679                           + filevec[1].buffered_lines);
2680     discarded[1] = discarded[0] + filevec[0].buffered_lines;
2681 
2682     /* Mark to be discarded each line that matches no line of the
2683        other file.  If a line matches many lines, mark it as
2684        provisionally discardable.  */
2685 
2686     for (f = 0; f < 2; f++) {
2687         size_t end = filevec[f].buffered_lines;
2688         char *discards = discarded[f];
2689         lin *counts = equiv_count[1 - f];
2690         lin *equivs = filevec[f].equivs;
2691         size_t many = 5;
2692         size_t tem = end / 64;
2693 
2694         /* Multiply |many| by approximate square root of number of
2695        lines.  That is the threshold for provisionally discardable
2696        lines.  */
2697         while ((tem = tem >> 2) > 0)
2698             many *= 2;
2699 
2700         for (i = 0; i < end; i++) {
2701             lin nmatch;
2702             if (equivs[i] == 0)
2703                 continue;
2704             nmatch = counts[equivs[i]];
2705             if (nmatch == 0)
2706                 discards[i] = 1;
2707             else if (nmatch > many)
2708                 discards[i] = 2;
2709         }
2710     }
2711 
2712     /* Don't really discard the provisional lines except when they
2713        occur in a run of discardables, with nonprovisionals at the
2714        beginning and end.  */
2715 
2716     for (f = 0; f < 2; f++) {
2717         lin end = filevec[f].buffered_lines;
2718         register char *discards = discarded[f];
2719 
2720         for (i = 0; i < end; i++) {
2721             /* Cancel provisional discards not in middle of run of
2722                discards.  */
2723             if (discards[i] == 2)
2724                 discards[i] = 0;
2725             else if (discards[i] != 0) {
2726                 /* We have found a nonprovisional discard.  */
2727                 register lin j;
2728                 lin length;
2729                 lin provisional = 0;
2730 
2731                 /* Find end of this run of discardable lines.  Count
2732                    how many are provisionally discardable.  */
2733                 for (j = i; j < end; j++) {
2734                     if (discards[j] == 0)
2735                         break;
2736                     if (discards[j] == 2)
2737                         ++provisional;
2738                 }
2739 
2740                 /* Cancel provisional discards at end, and shrink the
2741                    run.  */
2742                 while (j > i && discards[j - 1] == 2)
2743                     discards[--j] = 0, --provisional;
2744 
2745                 /* Now we have the length of a run of discardable
2746                    lines whose first and last are not provisional.  */
2747                 length = j - i;
2748 
2749                 /* If $1/4$ of the lines in the run are provisional,
2750                    cancel discarding of all provisional lines in the
2751                    run.  */
2752                 if (provisional * 4 > length) {
2753                     while (j > i)
2754                         if (discards[--j] == 2)
2755                             discards[j] = 0;
2756                 } else {
2757                     register lin consec;
2758                     lin minimum = 1;
2759                     lin tem = length >> 2;
2760 
2761                     /* |minimum| is approximate square root of
2762                        |length/4|.  A subrun of two or more
2763                        provisionals can stand when |length| is at
2764                        least 16.  A subrun of 4 or more can stand when
2765                        |LENGTH >= 64|.  */
2766                     while (0 < (tem >>= 2))
2767                         minimum <<= 1;
2768                     minimum++;
2769 
2770                     /* Cancel any subrun of |minimum| or more
2771                        provisionals within the larger run.  */
2772                     for (j = 0, consec = 0; j < length; j++)
2773                         if (discards[i + j] != 2)
2774                             consec = 0;
2775                         else if (minimum == ++consec)
2776                             /* Back up to start of subrun, to cancel
2777                                it all.  */
2778                             j -= consec;
2779                         else if (minimum < consec)
2780                             discards[i + j] = 0;
2781 
2782                     /* Scan from beginning of run until we find 3 or
2783                        more nonprovisionals in a row or until the
2784                        first nonprovisional at least 8 lines in.
2785                        Until that point, cancel any provisionals.  */
2786                     for (j = 0, consec = 0; j < length; j++) {
2787                         if (j >= 8 && discards[i + j] == 1)
2788                             break;
2789                         if (discards[i + j] == 2)
2790                             consec = 0, discards[i + j] = 0;
2791                         else if (discards[i + j] == 0)
2792                             consec = 0;
2793                         else
2794                             consec++;
2795                         if (consec == 3)
2796                             break;
2797                     }
2798 
2799                     /* I advances to the last line of the run.  */
2800                     i += length - 1;
2801 
2802                     /* Same thing, from end.  */
2803                     for (j = 0, consec = 0; j < length; j++) {
2804                         if (j >= 8 && discards[i - j] == 1)
2805                             break;
2806                         if (discards[i - j] == 2)
2807                             consec = 0, discards[i - j] = 0;
2808                         else if (discards[i - j] == 0)
2809                             consec = 0;
2810                         else
2811                             consec++;
2812                         if (consec == 3)
2813                             break;
2814                     }
2815                 }
2816             }
2817         }
2818     }
2819 
2820     /* Actually discard the lines. */
2821     for (f = 0; f < 2; f++) {
2822         char *discards = discarded[f];
2823         lin end = filevec[f].buffered_lines;
2824         lin j = 0;
2825         for (i = 0; i < end; ++i)
2826             if (minimal || discards[i] == 0) {
2827                 filevec[f].undiscarded[j] = filevec[f].equivs[i];
2828                 filevec[f].realindexes[j++] = i;
2829             } else
2830                 filevec[f].changed[i] = 1;
2831         filevec[f].nondiscarded_lines = j;
2832     }
2833 
2834     free(discarded[0]);
2835     free(equiv_count[0]);
2836 }
2837 
2838 /* Adjust inserts/deletes of identical lines to join changes as much
2839    as possible.
2840 
2841    We do something when a run of changed lines include a line at one
2842    end and have an excluded, identical line at the other.  We are free
2843    to choose which identical line is included.  |compareseq| usually
2844    chooses the one at the beginning, but usually it is cleaner to
2845    consider the following identical line to be the "change".  */
2846 
2847 static void
2848 shift_boundaries(struct file_data filevec[]) {
2849     int f;
2850 
2851     for (f = 0; f < 2; f++) {
2852         char *changed = filevec[f].changed;
2853         char *other_changed = filevec[1 - f].changed;
2854         lin const *equivs = filevec[f].equivs;
2855         lin i = 0;
2856         lin j = 0;
2857         lin i_end = filevec[f].buffered_lines;
2858 
2859         while (1) {
2860             lin runlength, start, corresponding;
2861 
2862             /* Scan forwards to find beginning of another run of
2863                changes.  Also keep track of the corresponding point in
2864                the other file.  */
2865 
2866             while (i < i_end && !changed[i]) {
2867                 while (other_changed[j++])
2868                     continue;
2869                 i++;
2870             }
2871 
2872             if (i == i_end)
2873                 break;
2874 
2875             start = i;
2876 
2877             /* Find the end of this run of changes.  */
2878 
2879             while (changed[++i])
2880                 continue;
2881             while (other_changed[j])
2882                 j++;
2883 
2884             do {
2885                 /* Record the length of this run of changes, so that
2886                we can later determine whether the run has grown.  */
2887                 runlength = i - start;
2888 
2889                 /* Move the changed region back, so long as the
2890                previous unchanged line matches the last changed one.
2891                This merges with previous changed regions.  */
2892 
2893                 while (start && equivs[start - 1] == equivs[i - 1]) {
2894                     changed[--start] = 1;
2895                     changed[--i] = 0;
2896                     while (changed[start - 1])
2897                         start--;
2898                     while (other_changed[--j])
2899                         continue;
2900                 }
2901 
2902                 /* Set |corresponding| to the end of the changed run,
2903                at the last point where it corresponds to a changed run
2904                in the other file.  |corresponding == i_end| means no
2905                such point has been found.  */
2906                 corresponding = other_changed[j - 1] ? i : i_end;
2907 
2908                 /* Move the changed region forward, so long as the
2909                first changed line matches the following unchanged one.
2910                This merges with following changed regions.  Do this
2911                second, so that if there are no merges, the changed
2912                region is moved forward as far as possible.  */
2913 
2914                 while (i != i_end && equivs[start] == equivs[i]) {
2915                     changed[start++] = 0;
2916                     changed[i++] = 1;
2917                     while (changed[i])
2918                         i++;
2919                     while (other_changed[++j])
2920                         corresponding = i;
2921                 }
2922             } while (runlength != i - start);
2923 
2924             /* If possible, move the fully-merged run of changes back
2925                to a corresponding run in the other file.  */
2926 
2927             while (corresponding < i) {
2928                 changed[--start] = 1;
2929                 changed[--i] = 0;
2930                 while (other_changed[--j])
2931                     continue;
2932             }
2933         }
2934     }
2935 }
2936 
2937 /* Cons an additional entry onto the front of an edit script |old|.
2938    |line0| and |line1| are the first affected lines in the two files
2939    (origin 0).  |deleted| is the number of lines deleted here from
2940    file 0.  |inserted| is the number of lines inserted here in file 1.
2941 
2942    If |deleted| is 0 then |line0| is the number of the line before
2943    which the insertion was done; vice versa for |inserted| and
2944    |line1|.  */
2945 
2946 static struct change *
2947 add_change(lin line0, lin line1, lin deleted, lin inserted,
2948            struct change *old) {
2949     struct change *new = xmalloc(sizeof *new);
2950 
2951     new->line0 = line0;
2952     new->line1 = line1;
2953     new->inserted = inserted;
2954     new->deleted = deleted;
2955     new->link = old;
2956     return new;
2957 }
2958 
2959 /* Scan the tables of which lines are inserted and deleted, producing
2960    an edit script in reverse order.  */
2961 
2962 static struct change *
2963 build_reverse_script(struct file_data const filevec[]) {
2964     struct change *script = 0;
2965     char *changed0 = filevec[0].changed;
2966     char *changed1 = filevec[1].changed;
2967     lin len0 = filevec[0].buffered_lines;
2968     lin len1 = filevec[1].buffered_lines;
2969 
2970     /* Note that |changedN[lenN]| does exist, and is 0.  */
2971 
2972     lin i0 = 0, i1 = 0;
2973 
2974     while (i0 < len0 || i1 < len1) {
2975         if (changed0[i0] | changed1[i1]) {
2976             lin line0 = i0, line1 = i1;
2977 
2978             /* Find \# lines changed here in each file.  */
2979             while (changed0[i0]) ++i0;
2980             while (changed1[i1]) ++i1;
2981 
2982             /* Record this change.  */
2983             script = add_change(line0, line1, i0 - line0, i1 - line1, script);
2984         }
2985 
2986         /* We have reached lines in the two files that match each
2987            other.  */
2988         i0++, i1++;
2989     }
2990 
2991     return script;
2992 }
2993 
2994 /* Scan the tables of which lines are inserted and deleted, producing
2995    an edit script in forward order.  */
2996 
2997 static struct change *
2998 build_script(struct file_data const filevec[]) {
2999     struct change *script = 0;
3000     char *changed0 = filevec[0].changed;
3001     char *changed1 = filevec[1].changed;
3002     lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
3003 
3004     /* Note that |changedN[-1]| does exist, and is 0.  */
3005 
3006     while (i0 >= 0 || i1 >= 0) {
3007         if (changed0[i0 - 1] | changed1[i1 - 1]) {
3008             lin line0 = i0, line1 = i1;
3009 
3010             /* Find \# lines changed here in each file.  */
3011             while (changed0[i0 - 1]) --i0;
3012             while (changed1[i1 - 1]) --i1;
3013 
3014             /* Record this change.  */
3015             script = add_change(i0, i1, line0 - i0, line1 - i1, script);
3016         }
3017 
3018         /* We have reached lines in the two files that match each
3019            other.  */
3020         i0--, i1--;
3021     }
3022 
3023     return script;
3024 }
3025 
3026 /* If |changes|, briefly report that two files differed.  */
3027 static void
3028 briefly_report(int changes, struct file_data const filevec[]) {
3029     if (changes)
3030         message((brief
3031                  ? "Files %s and %s differ\n"
3032                  : "Binary files %s and %s differ\n"),
3033                 file_label[0] ? file_label[0] : filevec[0].name,
3034                 file_label[1] ? file_label[1] : filevec[1].name);
3035 }
3036 
3037 /* Report the differences of two files.  */
3038 int
3039 diff_2_files(struct comparison *cmp) {
3040     int f;
3041     struct change *e, *p;
3042     struct change *script;
3043     int changes;
3044 
3045 
3046     /* If we have detected that either file is binary, compare the two
3047        files as binary.  This can happen only when the first chunk is
3048        read. */
3049 
3050     if (read_files(cmp->file, files_can_be_treated_as_binary)) {
3051         /* Files with different lengths must be different.  */
3052         if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
3053             && 0 < cmp->file[0].stat.st_size
3054             && 0 < cmp->file[1].stat.st_size
3055             && (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
3056             && (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
3057             changes = 1;
3058 
3059             /* Standard input equals itself.  */
3060         else if (cmp->file[0].desc == cmp->file[1].desc)
3061             changes = 0;
3062 
3063         else
3064             /* Scan both files, a buffer at a time, looking for a
3065                difference.  */
3066         {
3067             /* Allocate same-sized buffers for both files.  */
3068             size_t lcm_max = PTRDIFF_MAX - 1;
3069             size_t buffer_size =
3070                     buffer_lcm(sizeof(word),
3071                                buffer_lcm(STAT_BLOCKSIZE (cmp->file[0].stat),
3072                                           STAT_BLOCKSIZE (cmp->file[1].stat),
3073                                           lcm_max),
3074                                lcm_max);
3075             for (f = 0; f < 2; f++)
3076                 cmp->file[f].buffer = xrealloc(cmp->file[f].buffer, buffer_size);
3077 
3078             for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0) {
3079                 /* Read a buffer's worth from both files.  */
3080                 for (f = 0; f < 2; f++)
3081                     if (0 <= cmp->file[f].desc)
3082                         file_block_read(&cmp->file[f],
3083                                         buffer_size - cmp->file[f].buffered);
3084 
3085                 /* If the buffers differ, the files differ.  */
3086                 if (cmp->file[0].buffered != cmp->file[1].buffered
3087                     || memcmp(cmp->file[0].buffer,
3088                               cmp->file[1].buffer,
3089                               cmp->file[0].buffered)) {
3090                     changes = 1;
3091                     break;
3092                 }
3093 
3094                 /* If we reach end of file, the files are the
3095                    same.  */
3096                 if (cmp->file[0].buffered != buffer_size) {
3097                     changes = 0;
3098                     break;
3099                 }
3100             }
3101         }
3102 
3103         briefly_report(changes, cmp->file);
3104     } else {
3105         struct context ctxt;
3106         lin diags;
3107         lin too_expensive;
3108 
3109         /* Allocate vectors for the results of comparison: a flag for
3110        each line of each file, saying whether that line is an
3111        insertion or deletion.  Allocate an extra element, always 0, at
3112        each end of each vector.  */
3113 
3114         size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
3115         char *flag_space = zalloc(s);
3116         cmp->file[0].changed = flag_space + 1;
3117         cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
3118 
3119         /* Some lines are obviously insertions or deletions because
3120        they don't match anything.  Detect them now, and avoid even
3121        thinking about them in the main comparison algorithm.  */
3122 
3123         discard_confusing_lines(cmp->file);
3124 
3125         /* Now do the main comparison algorithm, considering just the
3126        undiscarded lines.  */
3127 
3128         ctxt.xvec = cmp->file[0].undiscarded;
3129         ctxt.yvec = cmp->file[1].undiscarded;
3130         diags = (cmp->file[0].nondiscarded_lines
3131                  + cmp->file[1].nondiscarded_lines + 3);
3132         ctxt.fdiag = xmalloc(diags * (2 * sizeof *ctxt.fdiag));
3133         ctxt.bdiag = ctxt.fdiag + diags;
3134         ctxt.fdiag += cmp->file[1].nondiscarded_lines + 1;
3135         ctxt.bdiag += cmp->file[1].nondiscarded_lines + 1;
3136 
3137         ctxt.heuristic = speed_large_files;
3138 
3139         /* Set |too_expensive| to be the approximate square root of
3140        the input size, bounded below by 4096.  4096 seems to be good
3141        for circa-2016 CPUs; see |Bug#16848| and |Bug#24715|.  */
3142         too_expensive = 1;
3143         for (; diags != 0; diags >>= 2)
3144             too_expensive <<= 1;
3145         ctxt.too_expensive = MAX (4096, too_expensive);
3146 
3147         files[0] = cmp->file[0];
3148         files[1] = cmp->file[1];
3149 
3150         compareseq(0, cmp->file[0].nondiscarded_lines,
3151                    0, cmp->file[1].nondiscarded_lines, minimal, &ctxt);
3152 
3153         free(ctxt.fdiag - (cmp->file[1].nondiscarded_lines + 1));
3154 
3155         /* Modify the results slightly to make them prettier in cases
3156        where that can validly be done.  */
3157 
3158         shift_boundaries(cmp->file);
3159 
3160         /* Get the results of comparison in the form of a chain of
3161        |struct change|s -- an edit script.  */
3162 
3163         script = build_script(cmp->file);
3164 
3165         /* Set |changes| if we had any diffs.  If some changes are
3166        ignored, we must scan the script to decide.  */
3167         changes = (script != 0);
3168 
3169         if (brief)
3170             briefly_report(changes, cmp->file);
3171         else {
3172             if (changes) {
3173                 /* Record info for starting up output, to be used if
3174                and when we have some output to print.  */
3175                 setup_output(file_label[0] ? file_label[0] : cmp->file[0].name,
3176                              file_label[1] ? file_label[1] : cmp->file[1].name,
3177                              cmp->parent != 0);
3178                 print_context_script(script);
3179                 finish_output();
3180             }
3181         }
3182 
3183         free(cmp->file[0].undiscarded);
3184 
3185         free(flag_space);
3186 
3187         for (f = 0; f < 2; f++) {
3188             free(cmp->file[f].equivs);
3189             free(cmp->file[f].linbuf + cmp->file[f].linbuf_base);
3190         }
3191 
3192         for (e = script; e; e = p) {
3193             p = e->link;
3194             free(e);
3195         }
3196 
3197         for (f = 0; f < 2; ++f)
3198             if (cmp->file[f].missing_newline) {
3199                 error(0, 0, "%s: %s\n",
3200                       file_label[f] ? file_label[f] : cmp->file[f].name,
3201                       "No newline at end of file");
3202                 changes = 2;
3203             }
3204     }
3205 
3206     if (cmp->file[0].buffer != cmp->file[1].buffer)
3207         free(cmp->file[0].buffer);
3208     free(cmp->file[1].buffer);
3209 
3210     return changes;
3211 }

-
+ F5B3E5C736BE0C6C4E4FDCC50E2A436C17D3E84FE47E6125DD0FA775C9CFFCC625A98FE7091A036593E21CDAFA1E533FD8A04640649CD6ED89DA499C94333668
vtools/src/context.c

(0 . 0)(1 . 211)
3216 
3217 #include <stdlib.h>
3218 #include "diff.h"
3219 
3220 /* Print a label for a context diff, with a file name and date or a
3221    label.  */
3222 
3223 static void
3224 print_context_label(char const *mark,
3225                     struct file_data *inf,
3226                     char const *name,
3227                     char const *label) {
3228     if (label)
3229         fprintf(outfile, "%s %s\n", mark, label);
3230     else {
3231         fprintf(outfile, "%s %s false\n", mark, name);
3232     }
3233 }
3234 
3235 /* Print a header for a context diff, with the file names and dates.  */
3236 
3237 void
3238 print_context_header(struct file_data inf[], char const *const *names) {
3239     print_context_label("---", &inf[0], names[0], file_label[0]);
3240     print_context_label("+++", &inf[1], names[1], file_label[1]);
3241 }
3242 
3243 /* Print an edit script in context format.  */
3244 
3245 void
3246 print_context_script(struct change *script) {
3247     struct change *e;
3248     for (e = script; e; e = e->link)
3249         e->ignore = false;
3250 
3251     print_script(script);
3252 }
3253 
3254 /* Print a pair of line numbers with a comma, translated for file
3255    |file|.  If the second number is smaller, use the first in place of
3256    it.  If the numbers are equal, print just one number.
3257 
3258    Args |a| and |b| are internal line numbers.  We print the
3259    translated (real) line numbers.  */
3260 
3261 static void
3262 print_unidiff_number_range(struct file_data const *file, lin a, lin b) {
3263     printint trans_a, trans_b;
3264     translate_range(file, a, b, &trans_a, &trans_b);
3265 
3266     /* We can have |b < a| in the case of a range of no lines.  In
3267        this case, we print the line number before the range, which is
3268        |b|.  It would be more logical to print |a|, but `patch'
3269        expects |b| in order to detect diffs against empty files.  */
3270     if (trans_b <= trans_a)
3271         fprintf(outfile, trans_b < trans_a ? "%"pI"d,0" : "%"pI"d", trans_b);
3272     else
3273         fprintf(outfile, "%"pI"d,%"pI"d", trans_a, trans_b - trans_a + 1);
3274 }
3275 
3276 /* Print a portion of an edit script in unidiff format.  |hunk| is the
3277    beginning of the portion to be printed.  The end is marked by a
3278    |link| that has been nulled out.
3279 
3280    Prints out lines from both files, and precedes each line with the
3281    appropriate flag-character.  */
3282 
3283 void
3284 pr_unidiff_hunk(struct change *hunk) {
3285     lin first0, last0, first1, last1;
3286     lin i, j, k;
3287     struct change *next;
3288     FILE *out;
3289 
3290     /* Determine range of line numbers involved in each file.  */
3291 
3292     if (!analyze_hunk(hunk, &first0, &last0, &first1, &last1))
3293         return;
3294 
3295     /* Include a context's width before and after.  */
3296 
3297     i = -files[0].prefix_lines;
3298     first0 = MAX (first0 - context, i);
3299     first1 = MAX (first1 - context, i);
3300     if (last0 < files[0].valid_lines - context)
3301         last0 += context;
3302     else
3303         last0 = files[0].valid_lines - 1;
3304     if (last1 < files[1].valid_lines - context)
3305         last1 += context;
3306     else
3307         last1 = files[1].valid_lines - 1;
3308 
3309     begin_output();
3310     out = outfile;
3311 
3312     fputs("@@ -", out);
3313     print_unidiff_number_range(&files[0], first0, last0);
3314     fputs(" +", out);
3315     print_unidiff_number_range(&files[1], first1, last1);
3316     fputs(" @@", out);
3317 
3318     putc('\n', out);
3319 
3320     next = hunk;
3321     i = first0;
3322     j = first1;
3323 
3324     while (i <= last0 || j <= last1) {
3325 
3326         /* If the line isn't a difference, output the context from
3327            file 0. */
3328 
3329         if (!next || i < next->line0) {
3330             char const *const *line = &files[0].linbuf[i++];
3331             if (!(suppress_blank_empty && **line == '\n'))
3332                 putc(' ', out);
3333             print_1_line(NULL, line);
3334             j++;
3335         } else {
3336             /* For each difference, first output the deleted part. */
3337 
3338             k = next->deleted;
3339             while (k--) {
3340                 char const *const *line = &files[0].linbuf[i++];
3341                 putc('-', out);
3342                 print_1_line_nl(NULL, line, true);
3343                 if (line[1][-1] == '\n')
3344                     putc('\n', out);
3345             }
3346 
3347             /* Then output the inserted part. */
3348 
3349             k = next->inserted;
3350             while (k--) {
3351                 char const *const *line = &files[1].linbuf[j++];
3352                 putc('+', out);
3353                 print_1_line_nl(NULL, line, true);
3354                 if (line[1][-1] == '\n')
3355                     putc('\n', out);
3356             }
3357 
3358             /* We're done with this hunk, so on to the next! */
3359 
3360             next = next->link;
3361         }
3362     }
3363 }
3364 
3365 /* Scan a (forward-ordered) edit script for the first place that more
3366    than |2*context| unchanged lines appear, and return a pointer to
3367    the |struct change| for the last change before those lines.  */
3368 
3369 struct change *
3370 find_hunk(struct change *start) {
3371     struct change *prev;
3372     lin top0, top1;
3373     lin thresh;
3374 
3375     /* Threshold distance is |context| if the second change is
3376        ignorable, |2 * context + 1| otherwise.  Integer overflow can't
3377        happen, due to |CONTEXT_LIM|.  */
3378     lin ignorable_threshold = context;
3379     lin non_ignorable_threshold = 2 * context + 1;
3380 
3381     do {
3382         /* Compute number of first line in each file beyond this changed.  */
3383         top0 = start->line0 + start->deleted;
3384         top1 = start->line1 + start->inserted;
3385         prev = start;
3386         start = start->link;
3387         thresh = (start && start->ignore
3388                   ? ignorable_threshold
3389                   : non_ignorable_threshold);
3390         /* It is not supposed to matter which file we check in the
3391        end-test.  If it would matter, crash.  */
3392         if (start && start->line0 - top0 != start->line1 - top1)
3393             abort();
3394     } while (start
3395              /* Keep going if less than |thresh| lines elapse before
3396             the affected line.  */
3397              && start->line0 - top0 < thresh);
3398 
3399     return prev;
3400 }
3401 
3402 /* Set the |ignore| flag properly in each change in script.  It should
3403    be 1 if all the lines inserted or deleted in that change are
3404    ignorable lines.  */
3405 
3406 static void
3407 mark_ignorable(struct change *script) {
3408     while (script) {
3409         struct change *next = script->link;
3410         lin first0, last0, first1, last1;
3411 
3412         /* Turn this change into a hunk: detach it from the others.  */
3413         script->link = NULL;
3414 
3415         /* Determine whether this change is ignorable.  */
3416         script->ignore = !analyze_hunk(script,
3417                                        &first0, &last0, &first1, &last1);
3418 
3419         /* Reconnect the chain as before.  */
3420         script->link = next;
3421 
3422         /* Advance to the following change.  */
3423         script = next;
3424     }
3425 }
3426

-
+ 908272F9A1C8A15CAC809A9C6DBF96F7CD60B40BC3EBAD0F1DF368F6D5DFA10DA38E7E6F8CD09DC994E2291B95334C7768D885633536302E3C47181452A2B839
vtools/src/diff.c

(0 . 0)(1 . 454)
3431 #define GDIFF_MAIN
3432 
3433 #include "diff.h"
3434 #include <dirname.h>
3435 #include <error.h>
3436 #include <filenamecat.h>
3437 #include <progname.h>
3438 #include <xalloc.h>
3439 #include <getopt.h>
3440 #include <fcntl.h>
3441 #include <stdlib.h>
3442 #include <inttypes.h>
3443 #include <filetype.h>
3444 
3445 #define STREQ(a, b) (strcmp (a, b) == 0)
3446 #define ISDIGIT(c) ((unsigned int) (c) - '0' <= 9)
3447 
3448 static int compare_files(struct comparison const *, char const *, char const *);
3449 
3450 static void try_help(char const *, char const *);
3451 
3452 static void check_stdout(void);
3453 
3454 static char const shortopts[] = "0123456789aU:dHL:qurN";
3455 
3456 /* Return a string containing the command options with which diff was
3457    invoked.  Spaces appear between what were separate |argv|-elements.
3458    There is a space at the beginning but none at the end.  If there
3459    were no options, the result is an empty string.
3460 
3461    Arguments: |optionvec|, a vector containing separate
3462    |argv|-elements, and |count|, the length of that vector.
3463 
3464    todo phf: Implicitly add the -uNr since that's the defaults, and
3465    this combination of flags is what the original vdiff has.*/
3466 
3467 static char *
3468 option_list(char **optionvec, int count) {
3469     char prefix[] = " -uNr";
3470     int i;
3471     size_t size = 1 + sizeof prefix;
3472     char *result;
3473     char *p;
3474 
3475     for (i = 0; i < count; i++)
3476         size += strlen(optionvec[i]);
3477 
3478     p = result = xmalloc(size);
3479     p = stpncpy(p, prefix, sizeof prefix - 1);
3480 
3481     for (i = 0; i < count; i++) {
3482         *p++ = ' ';
3483         p = stpncpy(p, optionvec[i], strlen(optionvec[i])-1);
3484     }
3485 
3486     *p = '\0';
3487     return result;
3488 }
3489 
3490 int
3491 main(int argc, char **argv) {
3492     int exit_status = EXIT_SUCCESS;
3493     int c;
3494     int prev = -1;
3495     lin ocontext = -1;
3496     bool explicit_context = false;
3497     char const *from_file = NULL;
3498     char const *to_file = NULL;
3499     uintmax_t numval;
3500     char *numend;
3501     set_program_name(argv[0]);
3502 
3503     /* Decode the options.  */
3504 
3505     while ((c = getopt(argc, argv, shortopts)) != -1) {
3506         switch (c) {
3507             case 0:
3508                 break;
3509 
3510             case '0':
3511             case '1':
3512             case '2':
3513             case '3':
3514             case '4':
3515             case '5':
3516             case '6':
3517             case '7':
3518             case '8':
3519             case '9':
3520                 ocontext = (!ISDIGIT (prev)
3521                             ? c - '0'
3522                             : (ocontext - (c - '0' <= CONTEXT_MAX % 10)
3523                                < CONTEXT_MAX / 10)
3524                               ? 10 * ocontext + (c - '0')
3525                               : CONTEXT_MAX);
3526                 break;
3527 
3528             case 'a':
3529                 text = true;
3530                 break;
3531 
3532             case 'U': {
3533                 if (optarg) {
3534                     numval = strtoumax(optarg, &numend, 10);
3535                     if (*numend)
3536                         try_help("invalid context length '%s'", optarg);
3537                     if (CONTEXT_MAX < numval)
3538                         numval = CONTEXT_MAX;
3539                 } else
3540                     numval = 3;
3541 
3542                 if (context < numval)
3543                     context = numval;
3544                 explicit_context = true;
3545             }
3546                 break;
3547 
3548             case 'd':
3549                 minimal = true;
3550                 break;
3551 
3552             case 'H':
3553                 speed_large_files = true;
3554                 break;
3555 
3556             case 'L':
3557                 if (!file_label[0])
3558                     file_label[0] = optarg;
3559                 else if (!file_label[1])
3560                     file_label[1] = optarg;
3561                 else
3562                     fatal("too many file label options");
3563                 break;
3564 
3565             case 'q':
3566                 brief = true;
3567                 break;
3568 
3569             case 'u':
3570             case 'r':
3571             case 'N':
3572                 /* compat */
3573                 break;
3574 
3575             default:
3576                 try_help(NULL, NULL);
3577         }
3578         prev = c;
3579     }
3580 
3581     if (context < 3)
3582         context = 3;
3583     suppress_blank_empty = false;
3584 
3585     if (0 <= ocontext
3586         && (context < ocontext
3587             || (ocontext < context && !explicit_context)))
3588         context = ocontext;
3589 
3590     /* Make the horizon at least as large as the context, so that
3591        |shift_boundaries| has more freedom to shift the first and last
3592        hunks.  */
3593     if (horizon_lines < context)
3594         horizon_lines = context;
3595 
3596     files_can_be_treated_as_binary = false;
3597 
3598     switch_string = option_list(argv + 1, optind - 1);
3599 
3600     if (from_file) {
3601         if (to_file)
3602             fatal("--from-file and --to-file both specified");
3603         else
3604             for (; optind < argc; optind++) {
3605                 int status = compare_files(NULL, from_file, argv[optind]);
3606                 if (exit_status < status)
3607                     exit_status = status;
3608             }
3609     } else {
3610         if (to_file)
3611             for (; optind < argc; optind++) {
3612                 int status = compare_files(NULL, argv[optind], to_file);
3613                 if (exit_status < status)
3614                     exit_status = status;
3615             }
3616         else {
3617             if (argc - optind != 2) {
3618                 if (argc - optind < 2)
3619                     try_help("missing operand after '%s'", argv[argc - 1]);
3620                 else
3621                     try_help("extra operand '%s'", argv[optind + 2]);
3622             }
3623 
3624             exit_status = compare_files(NULL, argv[optind], argv[optind + 1]);
3625         }
3626     }
3627 
3628     check_stdout();
3629     exit(exit_status);
3630     return exit_status;
3631 }
3632 
3633 static void
3634 try_help(char const *reason_msgid, char const *operand) {
3635     if (reason_msgid)
3636         error(0, 0, reason_msgid, operand);
3637     exit(EXIT_TROUBLE);
3638 }
3639 
3640 static void
3641 check_stdout(void) {
3642     if (ferror(stdout))
3643         fatal("write failed");
3644     else if (fclose(stdout) != 0)
3645         pfatal_with_name("standard output");
3646 }
3647 
3648 /* Compare two files (or dirs) with parent comparison |parent| and
3649    names |name0| and |name1|.  (If |parent| is null, then the first
3650    name is just |name0|, etc.)  This is self-contained; it opens the
3651    files and closes them.
3652 
3653    Value is |EXIT_SUCCESS| if files are the same, |EXIT_FAILURE| if
3654    different, |EXIT_TROUBLE| if there is a problem opening them.  */
3655 
3656 static int
3657 compare_files(struct comparison const *parent,
3658               char const *name0,
3659               char const *name1) {
3660     struct comparison cmp;
3661 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
3662     register int f;
3663     int status = EXIT_SUCCESS;
3664     bool same_files;
3665     char *free0;
3666     char *free1;
3667 
3668     memset (cmp.file, 0, sizeof cmp.file);
3669     cmp.parent = parent;
3670 
3671     /* |cmp.file[f].desc| markers */
3672 #define NONEXISTENT (-1) /* nonexistent file */
3673 #define UNOPENED (-2) /* unopened file (e.g. directory) */
3674 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded |errno| value */
3675 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of |ERRNO_ENCODE| */
3676 
3677     cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT;
3678     cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT;
3679 
3680     /* Now record the full name of each file, including nonexistent ones.  */
3681 
3682     if (!name0)
3683         name0 = name1;
3684     if (!name1)
3685         name1 = name0;
3686 
3687     if (!parent) {
3688         free0 = NULL;
3689         free1 = NULL;
3690         cmp.file[0].name = name0;
3691         cmp.file[1].name = name1;
3692     } else {
3693         cmp.file[0].name = free0
3694                 = file_name_concat(parent->file[0].name, name0, NULL);
3695         cmp.file[1].name = free1
3696                 = file_name_concat(parent->file[1].name, name1, NULL);
3697     }
3698 
3699     /* Stat the files.  */
3700 
3701     for (f = 0; f < 2; f++) {
3702         if (cmp.file[f].desc != NONEXISTENT) {
3703             if (f && file_name_cmp(cmp.file[f].name, cmp.file[0].name) == 0) {
3704                 cmp.file[f].desc = cmp.file[0].desc;
3705                 cmp.file[f].stat = cmp.file[0].stat;
3706             } else if (STREQ (cmp.file[f].name, "-")) {
3707                 cmp.file[f].desc = STDIN_FILENO;
3708                 if (fstat(STDIN_FILENO, &cmp.file[f].stat) != 0)
3709                     cmp.file[f].desc = ERRNO_ENCODE (errno);
3710                 else {
3711                     if (S_ISREG (cmp.file[f].stat.st_mode)) {
3712                         off_t pos = lseek(STDIN_FILENO, 0, SEEK_CUR);
3713                         if (pos < 0)
3714                             cmp.file[f].desc = ERRNO_ENCODE (errno);
3715                         else
3716                             cmp.file[f].stat.st_size =
3717                                     MAX (0, cmp.file[f].stat.st_size - pos);
3718                     }
3719                 }
3720             } else if (stat(cmp.file[f].name, &cmp.file[f].stat) != 0)
3721                 cmp.file[f].desc = ERRNO_ENCODE (errno);
3722         }
3723     }
3724 
3725     /* Mark files as nonexistent as needed for |-N| and |-P|, if they
3726        are inaccessible empty regular files (the kind of files that
3727        |patch| creates to indicate nonexistent backups), or if they
3728        are top-level files that do not exist but their counterparts do
3729        exist.  */
3730     for (f = 0; f < 2; f++)
3731         if (cmp.file[f].desc == UNOPENED
3732             ? (S_ISREG (cmp.file[f].stat.st_mode)
3733                && !(cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
3734                && cmp.file[f].stat.st_size == 0)
3735             : ((cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
3736                 || cmp.file[f].desc == ERRNO_ENCODE (EBADF))
3737                && !parent
3738                && (cmp.file[1 - f].desc == UNOPENED
3739                    || cmp.file[1 - f].desc == STDIN_FILENO)))
3740             cmp.file[f].desc = NONEXISTENT;
3741 
3742     for (f = 0; f < 2; f++)
3743         if (cmp.file[f].desc == NONEXISTENT) {
3744             memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
3745             cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
3746         }
3747 
3748     for (f = 0; f < 2; f++) {
3749         int e = ERRNO_DECODE (cmp.file[f].desc);
3750         if (0 <= e) {
3751             errno = e;
3752             perror_with_name(cmp.file[f].name);
3753             status = EXIT_TROUBLE;
3754         }
3755     }
3756 
3757     if (status == EXIT_SUCCESS && !parent && DIR_P (0) != DIR_P (1)) {
3758         /* If one is a directory, and it was specified in the command
3759        line, use the file in that dir with the other file's
3760        basename.  */
3761 
3762         int fnm_arg = DIR_P (0);
3763         int dir_arg = 1 - fnm_arg;
3764         char const *fnm = cmp.file[fnm_arg].name;
3765         char const *dir = cmp.file[dir_arg].name;
3766         char const *filename = cmp.file[dir_arg].name = free0
3767                 = find_dir_file_pathname(dir, last_component(fnm));
3768 
3769         if (STREQ (fnm, "-"))
3770             fatal("cannot compare '-' to a directory");
3771 
3772         if (stat(filename, &cmp.file[dir_arg].stat)
3773             != 0) {
3774             perror_with_name(filename);
3775             status = EXIT_TROUBLE;
3776         }
3777     }
3778 
3779     if (status != EXIT_SUCCESS) {
3780         /* One of the files should exist but does not.  */
3781     } else if (cmp.file[0].desc == NONEXISTENT
3782                && cmp.file[1].desc == NONEXISTENT) {
3783         /* Neither file "exists", so there's nothing to compare.  */
3784    } else if ((same_files
3785                        = (cmp.file[0].desc != NONEXISTENT
3786                           && cmp.file[1].desc != NONEXISTENT
3787                           && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
3788                           && same_file_attributes (&cmp.file[0].stat,
3789                                                    &cmp.file[1].stat)))) {
3790       /* The two named files are actually the same physical file.  We
3791          know they are identical without actually reading them.  */
3792     } else if (DIR_P (0) & DIR_P (1)) {
3793         /* If both are directories, compare the files in them.  */
3794 
3795         status = diff_dirs(&cmp, compare_files);
3796     } else if ((DIR_P (0) | DIR_P (1))
3797                || (parent
3798                    && !((S_ISREG (cmp.file[0].stat.st_mode)
3799                          || S_ISLNK (cmp.file[0].stat.st_mode))
3800                         && (S_ISREG (cmp.file[1].stat.st_mode)
3801                             || S_ISLNK  (cmp.file[1].stat.st_mode))))) {
3802         if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) {
3803             /* We have a subdirectory that exists only in one directory.  */
3804 
3805             status = diff_dirs(&cmp, compare_files);
3806         } else {
3807             /* We have two files that are not to be compared.  */
3808 
3809             message5("File %s is a %s while file %s is a %s\n",
3810                      file_label[0] ? file_label[0] : cmp.file[0].name,
3811                      file_type(&cmp.file[0].stat),
3812                      file_label[1] ? file_label[1] : cmp.file[1].name,
3813                      file_type(&cmp.file[1].stat));
3814 
3815             /* This is a difference.  */
3816             status = EXIT_FAILURE;
3817         }
3818     } else if (S_ISLNK (cmp.file[0].stat.st_mode)
3819                || S_ISLNK (cmp.file[1].stat.st_mode)) {
3820         /* We get here only if we use |lstat()|, not |stat()|.  */
3821         status = EXIT_FAILURE;
3822     } else if (files_can_be_treated_as_binary
3823                && S_ISREG (cmp.file[0].stat.st_mode)
3824                && S_ISREG (cmp.file[1].stat.st_mode)
3825                && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
3826                && 0 < cmp.file[0].stat.st_size
3827                && 0 < cmp.file[1].stat.st_size) {
3828         message("Files %s and %s differ\n",
3829                 file_label[0] ? file_label[0] : cmp.file[0].name,
3830                 file_label[1] ? file_label[1] : cmp.file[1].name);
3831         status = EXIT_FAILURE;
3832     } else {
3833         /* Both exist and neither is a directory.  */
3834 
3835         /* Open the files and record their descriptors.  */
3836 
3837         if (cmp.file[0].desc == UNOPENED)
3838             if ((cmp.file[0].desc = open(cmp.file[0].name, O_RDONLY, 0)) < 0) {
3839                 perror_with_name(cmp.file[0].name);
3840                 status = EXIT_TROUBLE;
3841             }
3842         if (cmp.file[1].desc == UNOPENED) {
3843             if (same_files)
3844                 cmp.file[1].desc = cmp.file[0].desc;
3845             else if ((cmp.file[1].desc = open(cmp.file[1].name, O_RDONLY, 0)) < 0) {
3846                 perror_with_name(cmp.file[1].name);
3847                 status = EXIT_TROUBLE;
3848             }
3849         }
3850 
3851         /* Compare the files, if no error was found.  */
3852 
3853         if (status == EXIT_SUCCESS) {
3854             status = diff_2_files(&cmp);
3855         }
3856 
3857         /* Close the file descriptors.  */
3858 
3859         if (0 <= cmp.file[0].desc && close(cmp.file[0].desc) != 0) {
3860             perror_with_name(cmp.file[0].name);
3861             status = EXIT_TROUBLE;
3862         }
3863         if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
3864             && close(cmp.file[1].desc) != 0) {
3865             perror_with_name(cmp.file[1].name);
3866             status = EXIT_TROUBLE;
3867         }
3868     }
3869 
3870     /* Now the comparison has been done, if no error prevented it, and
3871        |status| is the value this function will return.  */
3872 
3873     if (status != EXIT_SUCCESS) {
3874         /* Flush stdout so that the user sees differences immediately.
3875        This can hurt performance, unfortunately.  */
3876         if (fflush(stdout) != 0)
3877             pfatal_with_name("standard output");
3878     }
3879 
3880     free(free0);
3881     free(free1);
3882 
3883     return status;
3884 }

-
+ 8389679F305C168B019ADB6AE495F60C1B71EFC35052D94B6672829B437E0607E77D66D457C81128110B6ECEFCFC41ADB55EFAB8AF91E9B7C76DAF5597EF08F6
vtools/src/diff.h

(0 . 0)(1 . 249)
3889 #include "system.h"
3890 #include <stdio.h>
3891 #include <string.h>
3892 #include <sys/stat.h>
3893 
3894 /* What kind of changes a hunk contains.  */
3895 enum changes {
3896     /* No changes: lines common to both files.  */
3897             UNCHANGED,
3898 
3899     /* Deletes only: lines taken from just the first file.  */
3900             OLD,
3901 
3902     /* Inserts only: lines taken from just the second file.  */
3903             NEW,
3904 
3905     /* Both deletes and inserts: a hunk containing both old and new lines.  */
3906             CHANGED
3907 };
3908 
3909 /* Variables for command line options */
3910 
3911 #ifndef GDIFF_MAIN
3912 # define XTERN extern
3913 #else
3914 # define XTERN
3915 #endif
3916 
3917 /* Number of lines of context to show in each set of diffs.  This is
3918    zero when context is not to be shown.  */
3919 XTERN lin context;
3920 
3921 /* Consider all files as text files (-a).  Don't interpret codes over
3922    0177 as implying a "binary file".  */
3923 XTERN bool text;
3924 
3925 /* Number of lines to keep in identical prefix and suffix.  */
3926 XTERN lin horizon_lines;
3927 
3928 /* Files can be compared byte-by-byte, as if they were binary.  This
3929    depends on various options.  */
3930 XTERN bool files_can_be_treated_as_binary;
3931 
3932 /* File labels for |-c| output headers (|--label|).  */
3933 XTERN char *file_label[2];
3934 
3935 /* Say only whether files differ, not how (|-q|).  */
3936 XTERN bool brief;
3937 
3938 /* Do not output an initial space or tab before the text of an empty line.  */
3939 XTERN bool suppress_blank_empty;
3940 
3941 /* In directory comparison, specify file to start with (|-S|).  This
3942    is used for resuming an aborted comparison.  All file names less
3943    than this name are ignored.  */
3944 XTERN char const *starting_file;
3945 
3946 /* String containing all the command options diff received, with
3947    spaces between and at the beginning but none at the end.  If there
3948    were no options given, this string is empty.  */
3949 XTERN char *switch_string;
3950 
3951 /* Use heuristics for better speed with large files with a small
3952    density of changes.  */
3953 XTERN bool speed_large_files;
3954 
3955 /* Don't discard lines.  This makes things slower (sometimes much
3956    slower) but will find a guaranteed minimal set of changes.  */
3957 XTERN bool minimal;
3958 
3959 /* The result of comparison is an ``edit script'': a chain of |struct
3960    change|.  Each |struct change| represents one place where some
3961    lines are deleted and some are inserted.
3962 
3963    |line0| and |line1| are the first affected lines in the two files
3964    (origin 0).  |deleted| is the number of lines deleted here from file
3965    0.  |inserted| is the number of lines inserted here in file 1.
3966 
3967    If |deleted| is 0 then |line0| is the number of the line before
3968    which the insertion was done; vice versa for |inserted| and
3969    |line1|.  */
3970 
3971 struct change {
3972     struct change *link;        /* Previous or next edit command  */
3973     lin inserted;            /* \# lines of file 1 changed here.  */
3974     lin deleted;            /* \# lines of file 0 changed here.  */
3975     lin line0;            /* Line number of 1st deleted line.  */
3976     lin line1;            /* Line number of 1st inserted line.  */
3977     bool ignore;            /* Flag used in |context.c|.  */
3978 };
3979 
3980 /* Structures that describe the input files.  */
3981 
3982 /* Data on one input file being compared.  */
3983 
3984 struct file_data {
3985     int desc;    /* File descriptor  */
3986     char const *name;    /* File name  */
3987     struct stat stat;    /* File status */
3988 
3989     /* Buffer in which text of file is read.  */
3990     word *buffer;
3991 
3992     /* Allocated size of buffer, in bytes.  Always a multiple of
3993        sizeof |*buffer|.  */
3994     size_t bufsize;
3995 
3996     /* Number of valid bytes now in the buffer.  */
3997     size_t buffered;
3998 
3999     /* Array of pointers to lines in the file.  */
4000     char const **linbuf;
4001 
4002     /* |linbuf_base <= buffered_lines <= valid_lines <= alloc_lines|.
4003        |linebuf[linbuf_base ... buffered_lines - 1]| are possibly differing.
4004        |linebuf[linbuf_base ... valid_lines - 1]| contain valid data.
4005        |linebuf[linbuf_base ... alloc_lines - 1]| are allocated.  */
4006     lin linbuf_base, buffered_lines, valid_lines, alloc_lines;
4007 
4008     /* Pointer to end of prefix of this file to ignore when hashing.  */
4009     char const *prefix_end;
4010 
4011     /* Count of lines in the prefix.  There are this many lines in the
4012        file before |linbuf[0]|.  */
4013     lin prefix_lines;
4014 
4015     /* Pointer to start of suffix of this file to ignore when hashing.  */
4016     char const *suffix_begin;
4017 
4018     /* Vector, indexed by line number, containing an equivalence code
4019        for each line.  It is this vector that is actually compared
4020        with that of another file to generate differences.  */
4021     lin *equivs;
4022 
4023     /* Vector, like the previous one except that the elements for
4024        discarded lines have been squeezed out.  */
4025     lin *undiscarded;
4026 
4027     /* Vector mapping virtual line numbers (not counting discarded
4028        lines) to real ones (counting those lines).  Both are
4029        $origin-0$.  */
4030     lin *realindexes;
4031 
4032     /* Total number of nondiscarded lines.  */
4033     lin nondiscarded_lines;
4034 
4035     /* Vector, indexed by real $origin-0$ line number, containing 1
4036        for a line that is an insertion or a deletion.  The results of
4037        comparison are stored here.  */
4038     char *changed;
4039 
4040     /* 1 if file ends in a line with no final newline.  */
4041     bool missing_newline;
4042 
4043     /* 1 if at end of file.  */
4044     bool eof;
4045 
4046     /* 1 more than the maximum equivalence value used for this or its
4047        sibling file.  */
4048     lin equiv_max;
4049 };
4050 
4051 /* The file buffer, considered as an array of bytes rather than as an
4052    array of words.  */
4053 #define FILE_BUFFER(f) ((char *) (f)->buffer)
4054 
4055 /* Data on two input files being compared.  */
4056 
4057 struct comparison {
4058     struct file_data file[2];
4059     struct comparison const *parent;  /* parent, if a recursive comparison */
4060 };
4061 
4062 /* Describe the two files currently being compared.  */
4063 
4064 XTERN struct file_data files[2];
4065 
4066 /* Stdio stream to output diffs to.  */
4067 
4068 XTERN FILE *outfile;
4069 
4070 /* Declare various functions.  */
4071 
4072 /* analyze.c */
4073 extern int diff_2_files(struct comparison *);
4074 
4075 /* context.c */
4076 extern void print_context_header(struct file_data[], char const *const *);
4077 
4078 extern void print_context_script(struct change *);
4079 
4080 /* dir.c */
4081 extern int diff_dirs(struct comparison const *,
4082                      int (*)(struct comparison const *,
4083                              char const *, char const *));
4084 
4085 extern char *find_dir_file_pathname(char const *, char const *);
4086 
4087 /* diff.h */
4088 extern struct change *find_hunk(struct change *);
4089 
4090 extern void pr_unidiff_hunk(struct change *);
4091 
4092 /* io.c */
4093 extern void file_block_read(struct file_data *, size_t);
4094 
4095 extern bool read_files(struct file_data[], bool);
4096 
4097 /* util.c */
4098 
4099 extern bool lines_differ(char const *, char const *);
4100 
4101 extern lin translate_line_number(struct file_data const *, lin);
4102 
4103 extern struct change *find_change(struct change *);
4104 
4105 extern void *zalloc(size_t);
4106 
4107 extern enum changes analyze_hunk(struct change *, lin *, lin *, lin *, lin *);
4108 
4109 extern void begin_output(void);
4110 
4111 extern void debug_script(struct change *);
4112 
4113 extern void fatal(char const *) __attribute__((noreturn));
4114 
4115 extern void finish_output(void);
4116 
4117 extern void message(char const *, char const *, char const *);
4118 
4119 extern void message5(char const *, char const *, char const *,
4120                      char const *, char const *);
4121 
4122 extern void output_1_line(char const *, char const *);
4123 
4124 extern void perror_with_name(char const *);
4125 
4126 extern void pfatal_with_name(char const *) __attribute__((noreturn));
4127 
4128 extern void print_1_line(char const *, char const *const *);
4129 
4130 extern void print_1_line_nl(char const *, char const *const *, bool);
4131 
4132 extern void print_script(struct change *);
4133 
4134 extern void setup_output(char const *, char const *, bool);
4135 
4136 extern void translate_range(struct file_data const *, lin, lin,
4137                             printint *, printint *);

-
+ 195F6A1A811E7D09915A8596772D8E0DDA1C9B2F7C7F1FCF32F9671FADE50794BDEACBCA12874C7A8B8A909D84BC69286B27832EA83B6E208E50D8B958C685BA
vtools/src/dir.c

(0 . 0)(1 . 218)
4142 
4143 #include "diff.h"
4144 #include <error.h>
4145 #include <filenamecat.h>
4146 #include <setjmp.h>
4147 #include <xalloc.h>
4148 #include <dirent.h>
4149 #include <stdlib.h>
4150 
4151 /* Read the directory named by |dir| and store into |dirdata| a sorted
4152    vector of filenames for its contents.  |dir->desc == -1| means this
4153    directory is known to be nonexistent, so set |dirdata| to an empty
4154    vector.  Return -1 (setting |errno|) if error, 0 otherwise.  */
4155 
4156 struct dirdata {
4157     size_t nnames;    /* Number of names.  */
4158     char const **names;    /* Sorted names of files in dir, followed by 0.  */
4159     char *data;    /* Allocated storage for file names.  */
4160 };
4161 
4162 static bool dir_loop(struct comparison const *, int);
4163 
4164 
4165 /* Read a directory and get its vector of names.  */
4166 
4167 static bool
4168 dir_read(struct file_data const *dir, struct dirdata *dirdata) {
4169     register struct dirent *next;
4170     register size_t i;
4171 
4172     /* Address of block containing the files that are described.  */
4173     char const **names;
4174 
4175     /* Number of files in directory.  */
4176     size_t nnames;
4177 
4178     /* Allocated and used storage for file name data.  */
4179     char *data;
4180     size_t data_alloc, data_used;
4181 
4182     dirdata->names = 0;
4183     dirdata->data = 0;
4184     nnames = 0;
4185     data = 0;
4186 
4187     if (dir->desc != -1) {
4188         /* Open the directory and check for errors.  */
4189         register DIR *reading = opendir(dir->name);
4190         if (!reading)
4191             return false;
4192 
4193         /* Initialize the table of filenames.  */
4194 
4195         data_alloc = 512;
4196         data_used = 0;
4197         dirdata->data = data = xmalloc(data_alloc);
4198 
4199         /* Read the directory entries, and insert the subfiles
4200        into the |data| table.  */
4201 
4202         while ((errno = 0, (next = readdir(reading)) != 0)) {
4203             char *d_name = next->d_name;
4204             size_t d_size = strlen (next->d_name) + 1;
4205 
4206             /* Ignore "." and "..".  */
4207             if (d_name[0] == '.'
4208                 && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0)))
4209                 continue;
4210 
4211             while (data_alloc < data_used + d_size) {
4212                 if (PTRDIFF_MAX / 2 <= data_alloc)
4213                     xalloc_die();
4214                 dirdata->data = data = xrealloc(data, data_alloc *= 2);
4215             }
4216 
4217             memcpy (data + data_used, d_name, d_size);
4218             data_used += d_size;
4219             nnames++;
4220         }
4221         if (errno) {
4222             int e = errno;
4223             closedir(reading);
4224             errno = e;
4225             return false;
4226         }
4227         if (closedir(reading) != 0)
4228             return false;
4229     }
4230 
4231     /* Create the |names| table from the |data| table.  */
4232     if (PTRDIFF_MAX / sizeof *names - 1 <= nnames)
4233         xalloc_die();
4234     dirdata->names = names = xmalloc((nnames + 1) * sizeof *names);
4235     dirdata->nnames = nnames;
4236     for (i = 0; i < nnames; i++) {
4237         names[i] = data;
4238         data += strlen(data) + 1;
4239     }
4240     names[nnames] = 0;
4241     return true;
4242 }
4243 
4244 /* Compare file names, returning a value compatible with strcmp.  */
4245 
4246 #define compare_names strcmp
4247 
4248 /* Compare names FILE1 and FILE2 when sorting a directory.  */
4249 
4250 static int
4251 compare_names_for_qsort(void const *file1, void const *file2) {
4252     char const *const *f1 = file1;
4253     char const *const *f2 = file2;
4254     char const *name1 = *f1;
4255     char const *name2 = *f2;
4256     return compare_names(name1, name2);
4257 }
4258 
4259 /* Compare the contents of two directories named in |cmp|.  This is a
4260    top-level routine; it does everything necessary for diff on two
4261    directories.
4262 
4263    |cmp->file[0].desc == -1| says directory |cmp->file[0]| doesn't
4264    exist, but pretend it is empty.  Likewise for |cmp->file[1]|.
4265 
4266    |handle_file| is a caller-provided subroutine called to handle each file.
4267    It gets three operands: |cmp|, name of file in dir 0, name of file in dir 1.
4268    These names are relative to the original working directory.
4269 
4270    For a file that appears in only one of the dirs, one of the name-args
4271    to |handle_file| is zero.
4272 
4273    Returns the maximum of all the values returned by |handle_file|,
4274    or |exit_trouble| if trouble is encountered in opening files.  */
4275 
4276 int
4277 diff_dirs(struct comparison const *cmp,
4278           int (*handle_file)(struct comparison const *,
4279                              char const *, char const *)) {
4280     struct dirdata dirdata[2];
4281     int volatile val = EXIT_SUCCESS;
4282     int i;
4283 
4284     if ((cmp->file[0].desc == -1 || dir_loop(cmp, 0))
4285         && (cmp->file[1].desc == -1 || dir_loop(cmp, 1))) {
4286         error(0, 0, "%s: recursive directory loop",
4287               cmp->file[cmp->file[0].desc == -1].name);
4288         return EXIT_TROUBLE;
4289     }
4290 
4291     /* Get contents of both dirs.  */
4292     for (i = 0; i < 2; i++)
4293         if (!dir_read(&cmp->file[i], &dirdata[i])) {
4294             perror_with_name(cmp->file[i].name);
4295             val = EXIT_TROUBLE;
4296         }
4297 
4298     if (val == EXIT_SUCCESS) {
4299         char const **volatile names[2];
4300         names[0] = dirdata[0].names;
4301         names[1] = dirdata[1].names;
4302 
4303         /* Sort the directories.  */
4304         for (i = 0; i < 2; i++)
4305             qsort(names[i], dirdata[i].nnames, sizeof *dirdata[i].names,
4306                   compare_names_for_qsort);
4307 
4308         /* If |-S name| was given, and this is the topmost level of
4309        comparison, ignore all file names less than the specified
4310        starting name.  */
4311 
4312         if (starting_file && !cmp->parent) {
4313             while (*names[0] && compare_names(*names[0], starting_file) < 0)
4314                 names[0]++;
4315             while (*names[1] && compare_names(*names[1], starting_file) < 0)
4316                 names[1]++;
4317         }
4318 
4319         /* Loop while files remain in one or both dirs.  */
4320         while (*names[0] || *names[1]) {
4321             /* Compare next name in dir 0 with next name in dir 1.  At
4322                the end of a dir, pretend the "next name" in that dir
4323                is very large.  */
4324             int nameorder = (!*names[0] ? 1 : !*names[1] ? -1
4325                                                          : compare_names(*names[0], *names[1]));
4326             int v1 = (*handle_file)(cmp,
4327                                     0 < nameorder ? 0 : *names[0]++,
4328                                     nameorder < 0 ? 0 : *names[1]++);
4329             if (val < v1)
4330                 val = v1;
4331         }
4332     }
4333 
4334     for (i = 0; i < 2; i++) {
4335         free(dirdata[i].names);
4336         free(dirdata[i].data);
4337     }
4338 
4339     return val;
4340 }
4341 
4342 /* Return nonzero if $cmp$ is looping recursively in argument $i$.  */
4343 
4344 static bool
4345 dir_loop(struct comparison const *cmp, int i) {
4346     struct comparison const *p = cmp;
4347     while ((p = p->parent))
4348         if (0 < same_file (&p->file[i].stat, &cmp->file[i].stat))
4349             return true;
4350     return false;
4351 }
4352 
4353 /* Find a matching filename in a directory.  */
4354 
4355 char *
4356 find_dir_file_pathname(char const *dir, char const *file) {
4357     const char *match = file;
4358     return file_name_concat(dir, match, NULL);
4359 }

-
+ 9AB0950EFF37136222DC461537F8448EBC36DFF595E77AF294E48F9142657ED6DA78D72C232ECC890E28BA75A2160D7AB3A09E1A7AA61C50AC95C7A747EF2B7A
vtools/src/io.c

(0 . 0)(1 . 596)
4364 
4365 #include "diff.h"
4366 #include <cmpbuf.h>
4367 #include <xalloc.h>
4368 #include <string.h>
4369 #include <limits.h>
4370 #include <stdlib.h>
4371 
4372 /* Rotate an unsigned value to the left.  */
4373 #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
4374 
4375 /* Given a hash value and a new character, return a new hash
4376    value.  */
4377 #define HASH(h, c) ((c) + ROL (h, 7))
4378 
4379 /* The type of a hash value.  */
4380 typedef size_t hash_value;
4381 
4382 /* Lines are put into equivalence classes of lines that match in
4383    |lines_differ|.  Each equivalence class is represented by one of
4384    these structures, but only while the classes are being computed.
4385    Afterward, each class is represented by a number.  */
4386 struct equivclass {
4387     lin next;        /* Next item in this bucket.  */
4388     hash_value hash;    /* Hash of lines in this class.  */
4389     char const *line;    /* A line that fits this class.  */
4390     size_t length;    /* That line's length, not counting its newline.  */
4391 };
4392 
4393 /* Hash-table: array of buckets, each being a chain of equivalence
4394    classes.  |buckets[-1]| is reserved for incomplete lines.  */
4395 static lin *buckets;
4396 
4397 /* Number of buckets in the hash table array, not counting
4398    |buckets[-1]|.  */
4399 static size_t nbuckets;
4400 
4401 /* Array in which the equivalence classes are allocated.  The
4402    bucket-chains go through the elements in this array.  The number of
4403    an equivalence class is its index in this array.  */
4404 static struct equivclass *equivs;
4405 
4406 /* Index of first free element in the array |equivs|.  */
4407 static lin equivs_index;
4408 
4409 /* Number of elements allocated in the array |equivs|.  */
4410 static lin equivs_alloc;
4411 
4412 /* Read a block of data into a file buffer, checking for EOF and
4413    error.  */
4414 
4415 void
4416 file_block_read(struct file_data *current, size_t size) {
4417     if (size && !current->eof) {
4418         size_t s = block_read(current->desc,
4419                               FILE_BUFFER (current) + current->buffered, size);
4420         if (s == SIZE_MAX)
4421             pfatal_with_name(current->name);
4422 
4423         current->buffered += s;
4424         current->eof = s < size;
4425     }
4426 }
4427 
4428 /* Check for binary files and compare them for exact identity.  */
4429 
4430 /* Return 1 if |buf| contains a non text character.  |size| is the
4431    number of characters in |buf|.  */
4432 
4433 #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
4434 
4435 /* Get ready to read the current file.  Return nonzero if |skip_test|
4436    is zero, and if it appears to be a binary file.  */
4437 
4438 static bool
4439 sip(struct file_data *current, bool skip_test) {
4440     /* If we have a nonexistent file at this stage, treat it as
4441        empty.  */
4442     if (current->desc < 0) {
4443         /* Leave room for a sentinel.  */
4444         current->bufsize = sizeof(word);
4445         current->buffer = xmalloc(current->bufsize);
4446     } else {
4447         current->bufsize = buffer_lcm(sizeof(word),
4448                                       STAT_BLOCKSIZE (current->stat),
4449                                       PTRDIFF_MAX - 2 * sizeof(word));
4450         current->buffer = xmalloc(current->bufsize);
4451 
4452         if (!skip_test) {
4453             /* Check first part of file to see if it's a binary file.  */
4454 
4455             off_t buffered;
4456             file_block_read(current, current->bufsize);
4457             buffered = current->buffered;
4458             return binary_file_p (current->buffer, buffered);
4459         }
4460     }
4461 
4462     current->buffered = 0;
4463     current->eof = false;
4464     return false;
4465 }
4466 
4467 /* Slurp the rest of the current file completely into memory.  */
4468 
4469 static void
4470 slurp(struct file_data *current) {
4471     size_t cc;
4472 
4473     if (current->desc < 0) {
4474         /* The file is nonexistent.  */
4475         return;
4476     }
4477 
4478     if (S_ISREG (current->stat.st_mode)) {
4479         /* It's a regular file; slurp in the rest all at once.  */
4480 
4481         /* Get the size out of the stat block.  Allocate just enough
4482        room for appended newline plus word sentinel, plus
4483        word-alignment since we want the buffer word-aligned.  */
4484         size_t file_size = current->stat.st_size;
4485         cc = file_size + 2 * sizeof(word) - file_size % sizeof(word);
4486         if (file_size != current->stat.st_size || cc < file_size
4487             || PTRDIFF_MAX <= cc)
4488             xalloc_die();
4489 
4490         if (current->bufsize < cc) {
4491             current->bufsize = cc;
4492             current->buffer = xrealloc(current->buffer, cc);
4493         }
4494 
4495         /* Try to read at least 1 more byte than the size indicates,
4496        to detect whether the file is growing.  This is a nicety for
4497        users who run 'diff' on files while they are changing.  */
4498 
4499         if (current->buffered <= file_size) {
4500             file_block_read(current, file_size + 1 - current->buffered);
4501             if (current->buffered <= file_size)
4502                 return;
4503         }
4504     }
4505 
4506     /* It's not a regular file, or it's a growing regular file; read
4507        it, growing the buffer as needed.  */
4508 
4509     file_block_read(current, current->bufsize - current->buffered);
4510 
4511     if (current->buffered) {
4512         while (current->buffered == current->bufsize) {
4513             if (PTRDIFF_MAX / 2 - sizeof(word) < current->bufsize)
4514                 xalloc_die();
4515             current->bufsize *= 2;
4516             current->buffer = xrealloc(current->buffer, current->bufsize);
4517             file_block_read(current, current->bufsize - current->buffered);
4518         }
4519 
4520         /* Allocate just enough room for appended newline plus word
4521        sentinel, plus word-alignment.  */
4522         cc = current->buffered + 2 * sizeof(word);
4523         current->bufsize = cc - cc % sizeof(word);
4524         current->buffer = xrealloc(current->buffer, current->bufsize);
4525     }
4526 }
4527 
4528 /* Split the file into lines, simultaneously computing the equivalence
4529    class for each line.  */
4530 
4531 static void
4532 find_and_hash_each_line(struct file_data *current) {
4533     char const *p = current->prefix_end;
4534     lin i, *bucket;
4535     size_t length;
4536 
4537     /* Cache often-used quantities in local variables to help the compiler.  */
4538     char const **linbuf = current->linbuf;
4539     lin alloc_lines = current->alloc_lines;
4540     lin line = 0;
4541     lin linbuf_base = current->linbuf_base;
4542     lin *cureqs = xmalloc(alloc_lines * sizeof *cureqs);
4543     struct equivclass *eqs = equivs;
4544     lin eqs_index = equivs_index;
4545     lin eqs_alloc = equivs_alloc;
4546     char const *suffix_begin = current->suffix_begin;
4547     char const *bufend = FILE_BUFFER (current) + current->buffered;
4548 
4549 
4550     while (p < suffix_begin) {
4551         char const *ip = p;
4552         hash_value h = 0;
4553         unsigned char c;
4554 
4555         /* Hash this line until we find a newline.  */
4556         while ((c = *p++) != '\n')
4557             h = HASH (h, c);
4558 
4559         bucket = &buckets[h % nbuckets];
4560         length = p - ip - 1;
4561 
4562         for (i = *bucket;; i = eqs[i].next)
4563             if (!i) {
4564                 /* Create a new equivalence class in this bucket.  */
4565                 i = eqs_index++;
4566                 if (i == eqs_alloc) {
4567                     if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
4568                         xalloc_die();
4569                     eqs_alloc *= 2;
4570                     eqs = xrealloc(eqs, eqs_alloc * sizeof *eqs);
4571                 }
4572                 eqs[i].next = *bucket;
4573                 eqs[i].hash = h;
4574                 eqs[i].line = ip;
4575                 eqs[i].length = length;
4576                 *bucket = i;
4577                 break;
4578             } else if (eqs[i].hash == h) {
4579                 char const *eqline = eqs[i].line;
4580 
4581                 /* Reuse existing class if |lines_differ| reports the
4582                    lines equal.  */
4583                 if (eqs[i].length == length) {
4584                     /* Reuse existing equivalence class if the lines
4585                        are identical.  This detects the common case of
4586                        exact identity faster than |lines_differ|
4587                        would.  */
4588                     if (memcmp(eqline, ip, length) == 0)
4589                         break;
4590                     continue;
4591                 } else
4592                     continue;
4593             }
4594 
4595         /* Maybe increase the size of the line table.  */
4596         if (line == alloc_lines) {
4597             /* Double |(alloc_lines - linbuf_base)| by adding to
4598                |alloc_lines|.  */
4599             if (PTRDIFF_MAX / 3 <= alloc_lines
4600                 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
4601                 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
4602                 xalloc_die();
4603             alloc_lines = 2 * alloc_lines - linbuf_base;
4604             cureqs = xrealloc(cureqs, alloc_lines * sizeof *cureqs);
4605             linbuf += linbuf_base;
4606             linbuf = xrealloc(linbuf,
4607                               (alloc_lines - linbuf_base) * sizeof *linbuf);
4608             linbuf -= linbuf_base;
4609         }
4610         linbuf[line] = ip;
4611         cureqs[line] = i;
4612         ++line;
4613     }
4614 
4615     current->buffered_lines = line;
4616 
4617     for (i = 0;; i++) {
4618         /* Record the line start for lines in the suffix that we care
4619        about.  Record one more line start than lines, so that we can
4620        compute the length of any buffered line.  */
4621         if (line == alloc_lines) {
4622             /* Double |(alloc_lines - linbuf_base)| by adding to
4623                |alloc_lines|.  */
4624             if (PTRDIFF_MAX / 3 <= alloc_lines
4625                 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
4626                 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
4627                 xalloc_die();
4628             alloc_lines = 2 * alloc_lines - linbuf_base;
4629             linbuf += linbuf_base;
4630             linbuf = xrealloc(linbuf,
4631                               (alloc_lines - linbuf_base) * sizeof *linbuf);
4632             linbuf -= linbuf_base;
4633         }
4634         linbuf[line] = p;
4635 
4636         if (p == bufend) {
4637             /* If the last line is incomplete and we do not silently
4638                complete lines, don't count its appended newline.  */
4639             if (current->missing_newline && false) /* our output style not robust */
4640                 linbuf[line]--;
4641             break;
4642         }
4643 
4644         line++;
4645 
4646         while (*p++ != '\n')
4647             continue;
4648     }
4649 
4650     /* Done with cache in local variables.  */
4651     current->linbuf = linbuf;
4652     current->valid_lines = line;
4653     current->alloc_lines = alloc_lines;
4654     current->equivs = cureqs;
4655     equivs = eqs;
4656     equivs_alloc = eqs_alloc;
4657     equivs_index = eqs_index;
4658 }
4659 
4660 /* Prepare the text.  Make sure the text end is initialized.  Make
4661    sure text ends in a newline, but remember that we had to add one.
4662    Strip trailing CRs, if that was requested.  */
4663 
4664 static void
4665 prepare_text(struct file_data *current) {
4666     size_t buffered = current->buffered;
4667     char *p = FILE_BUFFER (current);
4668 
4669     if (buffered == 0 || p[buffered - 1] == '\n')
4670         current->missing_newline = false;
4671     else {
4672         p[buffered++] = '\n';
4673         current->missing_newline = true;
4674     }
4675 
4676     if (!p)
4677         return;
4678 
4679     /* Don't use uninitialized storage when planting or using sentinels.  */
4680     memset (p + buffered, 0, sizeof(word));
4681 
4682     current->buffered = buffered;
4683 }
4684 
4685 /* We have found |n| lines in a buffer of size |s|; guess the
4686    proportionate number of lines that will be found in a buffer of
4687    size |t|.  However, do not guess a number of lines so large that
4688    the resulting line table might cause overflow in size
4689    calculations.  */
4690 static lin
4691 guess_lines(lin n, size_t s, size_t t) {
4692     size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
4693     lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
4694     return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof(char *) + 1) - 5) + 5;
4695 }
4696 
4697 /* Given a vector of two |file_data| objects, find the identical
4698    prefixes and suffixes of each object.  */
4699 
4700 static void
4701 find_identical_ends(struct file_data filevec[]) {
4702     word *w0, *w1;
4703     char *p0, *p1, *buffer0, *buffer1;
4704     char const *end0, *beg0;
4705     char const **linbuf0, **linbuf1;
4706     lin i, lines;
4707     size_t n0, n1;
4708     lin alloc_lines0, alloc_lines1;
4709     lin buffered_prefix, prefix_count, prefix_mask;
4710     lin middle_guess, suffix_guess;
4711 
4712     slurp(&filevec[0]);
4713     prepare_text(&filevec[0]);
4714     if (filevec[0].desc != filevec[1].desc) {
4715         slurp(&filevec[1]);
4716         prepare_text(&filevec[1]);
4717     } else {
4718         filevec[1].buffer = filevec[0].buffer;
4719         filevec[1].bufsize = filevec[0].bufsize;
4720         filevec[1].buffered = filevec[0].buffered;
4721         filevec[1].missing_newline = filevec[0].missing_newline;
4722     }
4723 
4724     /* Find identical prefix.  */
4725 
4726     w0 = filevec[0].buffer;
4727     w1 = filevec[1].buffer;
4728     p0 = buffer0 = (char *) w0;
4729     p1 = buffer1 = (char *) w1;
4730     n0 = filevec[0].buffered;
4731     n1 = filevec[1].buffered;
4732 
4733     if (p0 == p1)
4734         /* The buffers are the same; sentinels won't work.  */
4735         p0 = p1 += n1;
4736     else {
4737         /* Insert end sentinels, in this case characters that are
4738        guaranteed to make the equality test false, and thus terminate
4739        the loop.  */
4740 
4741         if (n0 < n1)
4742             p0[n0] = ~p1[n0];
4743         else
4744             p1[n1] = ~p0[n1];
4745 
4746         /* Loop until first mismatch, or to the sentinel
4747            characters.  */
4748 
4749         /* Compare a word at a time for speed.  */
4750         while (*w0 == *w1)
4751             w0++, w1++;
4752 
4753         /* Do the last few bytes of comparison a byte at a time.  */
4754         p0 = (char *) w0;
4755         p1 = (char *) w1;
4756         while (*p0 == *p1)
4757             p0++, p1++;
4758 
4759         /* Don't mistakenly count missing newline as part of
4760            prefix.  */
4761         if (false
4762             && ((buffer0 + n0 - filevec[0].missing_newline < p0)
4763                 !=
4764                 (buffer1 + n1 - filevec[1].missing_newline < p1)))
4765             p0--, p1--;
4766     }
4767 
4768     /* Now |p0| and |p1| point at the first nonmatching
4769        characters.  */
4770 
4771     /* Skip back to last line-beginning in the prefix, and then
4772        discard up to |horizon_lines| lines from the prefix.  */
4773     i = horizon_lines;
4774     while (p0 != buffer0 && (p0[-1] != '\n' || i--))
4775         p0--, p1--;
4776 
4777     /* Record the prefix.  */
4778     filevec[0].prefix_end = p0;
4779     filevec[1].prefix_end = p1;
4780 
4781     /* Find identical suffix.  */
4782 
4783     /* |p0| and |p1| point beyond the last chars not yet compared.  */
4784     p0 = buffer0 + n0;
4785     p1 = buffer1 + n1;
4786 
4787     if (filevec[0].missing_newline == filevec[1].missing_newline) {
4788         end0 = p0;    /* Addr of last char in file 0.  */
4789 
4790         /* Get value of |p0| at which we should stop scanning
4791        backward: this is when either |p0| or |p1| points just past the
4792        last char of the identical prefix.  */
4793         beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
4794 
4795         /* Scan back until chars don't match or we reach that point.  */
4796         while (p0 != beg0)
4797             if (*--p0 != *--p1) {
4798                 /* Point at the first char of the matching suffix.  */
4799                 ++p0, ++p1;
4800                 beg0 = p0;
4801                 break;
4802             }
4803 
4804         /* Are we at a line-beginning in both files?  If not, add the
4805        rest of this line to the main body.  Discard up to
4806        |horizon_lines| lines from the identical suffix.  Also, discard
4807        one extra line, because |shift_boundaries| may need it.  */
4808         i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
4809                               &&
4810                               (buffer1 == p1 || p1[-1] == '\n'));
4811         while (i-- && p0 != end0)
4812             while (*p0++ != '\n')
4813                 continue;
4814 
4815         p1 += p0 - beg0;
4816     }
4817 
4818     /* Record the suffix.  */
4819     filevec[0].suffix_begin = p0;
4820     filevec[1].suffix_begin = p1;
4821 
4822     /* Calculate number of lines of prefix to save.
4823 
4824        |prefix_count == 0| means save the whole prefix; we need this
4825        for options like |-D| that output the whole file, or for
4826        enormous contexts (to avoid worrying about arithmetic
4827        overflow).  We also need it for options like |-F| that output
4828        some preceding line; at least we will need to find the last few
4829        lines, but since we don't know how many, it's easiest to find
4830        them all.
4831 
4832        Otherwise, |prefix_count != 0|.  Save just |prefix_count| lines
4833        at start of the line buffer; they'll be moved to the proper
4834        location later.  Handle 1 more line than the context says
4835        (because we count 1 too many), rounded up to the next power of
4836        2 to speed index computation.  */
4837 
4838     prefix_count = 0;
4839     alloc_lines0 = guess_lines(0, 0, n0);
4840     prefix_mask = prefix_count - 1;
4841     lines = 0;
4842     linbuf0 = xmalloc(alloc_lines0 * sizeof *linbuf0);
4843     p0 = buffer0;
4844 
4845     /* If the prefix is needed, find the prefix lines.  */
4846     end0 = filevec[0].prefix_end;
4847     while (p0 != end0) {
4848         lin l = lines++ & prefix_mask;
4849         if (l == alloc_lines0) {
4850             if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
4851                 xalloc_die();
4852             alloc_lines0 *= 2;
4853             linbuf0 = xrealloc(linbuf0, alloc_lines0 * sizeof *linbuf0);
4854         }
4855         linbuf0[l] = p0;
4856         while (*p0++ != '\n')
4857             continue;
4858     }
4859     buffered_prefix = prefix_count && context < lines ? context : lines;
4860 
4861     /* Allocate line buffer 1.  */
4862 
4863     middle_guess = guess_lines(lines, p0 - buffer0, p1 - filevec[1].prefix_end);
4864     suffix_guess = guess_lines(lines, p0 - buffer0, buffer1 + n1 - p1);
4865     alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
4866     if (alloc_lines1 < buffered_prefix
4867         || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
4868         xalloc_die();
4869     linbuf1 = xmalloc(alloc_lines1 * sizeof *linbuf1);
4870 
4871     if (buffered_prefix != lines) {
4872         /* Rotate prefix lines to proper location.  */
4873         for (i = 0; i < buffered_prefix; i++)
4874             linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
4875         for (i = 0; i < buffered_prefix; i++)
4876             linbuf0[i] = linbuf1[i];
4877     }
4878 
4879     /* Initialize line buffer 1 from line buffer 0.  */
4880     for (i = 0; i < buffered_prefix; i++)
4881         linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
4882 
4883     /* Record the line buffer, adjusted so that
4884        |linbuf[0]| points at the first differing line.  */
4885     filevec[0].linbuf = linbuf0 + buffered_prefix;
4886     filevec[1].linbuf = linbuf1 + buffered_prefix;
4887     filevec[0].linbuf_base = filevec[1].linbuf_base = -buffered_prefix;
4888     filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
4889     filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
4890     filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
4891 }
4892 
4893 /* If $1 < k$, then $(2^k - {\rm prime\_offset}_k)$ is the largest
4894    prime less than $2^k$.  This table is derived from
4895    Chris~K.~Caldwell's list
4896    \url{http://www.utm.edu/research/primes/lists/2small/}.  */
4897 
4898 static unsigned char const prime_offset[] =
4899         {
4900                 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
4901                 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
4902                 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
4903                 55, 93, 1, 57, 25
4904         };
4905 
4906 /* Given a vector of two |file_data| objects, read the file associated
4907    with each one, and build the table of equivalence classes.  Return
4908    nonzero if either file appears to be a binary file.  If
4909    |pretend_binary| is nonzero, pretend they are binary
4910    regardless.  */
4911 
4912 bool
4913 read_files(struct file_data filevec[], bool pretend_binary) {
4914     int i;
4915     bool skip_test = text | pretend_binary;
4916     bool appears_binary = pretend_binary | sip(&filevec[0], skip_test);
4917 
4918     if (filevec[0].desc != filevec[1].desc)
4919         appears_binary |= sip(&filevec[1], skip_test | appears_binary);
4920     else {
4921         filevec[1].buffer = filevec[0].buffer;
4922         filevec[1].bufsize = filevec[0].bufsize;
4923         filevec[1].buffered = filevec[0].buffered;
4924     }
4925     if (appears_binary) {
4926         return true;
4927     }
4928 
4929     find_identical_ends(filevec);
4930 
4931     equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
4932     if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
4933         xalloc_die();
4934     equivs = xmalloc(equivs_alloc * sizeof *equivs);
4935     /* Equivalence class 0 is permanently safe for lines that were not
4936        hashed.  Real equivalence classes start at 1.  */
4937     equivs_index = 1;
4938 
4939     /* Allocate (one plus) a prime number of hash buckets.  Use a
4940        prime number between $1/3$ and $2/3$ of the value of
4941        |equiv_allocs|, approximately.  */
4942     for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
4943         continue;
4944     nbuckets = ((size_t) 1 << i) - prime_offset[i];
4945     if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
4946         xalloc_die();
4947     buckets = zalloc((nbuckets + 1) * sizeof *buckets);
4948     buckets++;
4949 
4950     for (i = 0; i < 2; i++)
4951         find_and_hash_each_line(&filevec[i]);
4952 
4953     filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
4954 
4955     free(equivs);
4956     free(buckets - 1);
4957 
4958     return false;
4959 }

-
+ E0C8C657FA184C8C6F20705B9C9FAE293A0C3BF380E60CDC65BA2449CD28D3F0964E42998601F868444C0F19752BB58934BB75DFFD7F0DCB31170CA7C6538A84
vtools/src/system.h

(0 . 0)(1 . 156)
4964 #ifndef SYSTEM_H
4965 #define SYSTEM_H
4966 
4967 #define _GNU_SOURCE
4968 
4969 #include <stddef.h>
4970 /* freebsd */
4971 #include <stdint.h>
4972 #include <unistd.h>
4973 /* linux */
4974 #include <inttypes.h>
4975 #include <sys/types.h>
4976 
4977 #define STAT_BLOCKSIZE(s) ((s).st_blksize)
4978 
4979 #define EXIT_TROUBLE 2
4980 
4981 #include <errno.h>
4982 
4983 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
4984 #define MAX(a, b) ((a) >= (b) ? (a) : /**/(b))
4985 
4986 /* Return 1 if an array of |n| objects, each of size |s|, cannot exist
4987    reliably due to size or |ptrdiff_t| arithmetic overflow.  |s| must
4988    be positive and N must be nonnegative.  This is a macro, not a
4989    function, so that it works correctly even when |SIZE_MAX < n|.
4990 
4991    True if |n * s| would overflow in a |size_t| calculation, or would
4992    generate a value larger than |PTRDIFF_MAX|.  This expands to a
4993    constant expression if |n| and |s| are both constants.  By gnulib
4994    convention, |SIZE_MAX| represents overflow in size calculations, so
4995    the conservative |size_t|-based dividend to use here is |SIZE_MAX -
4996    1|.  */
4997 
4998 #define xalloc_oversized(n, s) \
4999     ((size_t) (PTRDIFF_MAX < SIZE_MAX ? PTRDIFF_MAX : SIZE_MAX - 1) / (s) < (n))
5000 
5001 #include <stdbool.h>
5002 
5003 /* Type used for fast comparison of several bytes at a time. This used
5004    to be |uintmax_t|, but changing it to |size_t| made plain 'cmp' 90\%
5005    faster (|GCC 4.8.1|, |x86|).  */
5006 
5007 #ifndef word
5008 # define word size_t
5009 #endif
5010 
5011 #include <signal.h> /* SA_RESTART */
5012 
5013 #include <limits.h>
5014 #ifndef SSIZE_MAX
5015 # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
5016 #endif
5017 
5018 /* The signed integer type of a line number.  Since files are read
5019    into main memory, |ptrdiff_t| should be wide enough.  */
5020 
5021 typedef ptrdiff_t lin;
5022 #define LIN_MAX PTRDIFF_MAX
5023 
5024 /* The signed integer type for printing line numbers, and its printf
5025    length modifier.  This is not simply |ptrdiff_t|, to cater to older
5026    and/or nonstandard C libraries where |"l"| works but |"ll"| and
5027    |"t"| do not, or where |long| is too narrow and |"ll"| works but
5028    |"t"| does not.  */
5029 
5030 #if LIN_MAX <= LONG_MAX
5031 typedef long int printint;
5032 # define pI "l"
5033 #elif LIN_MAX <= LLONG_MAX
5034 typedef long long int printint;
5035 # define pI "ll"
5036 #else
5037 typedef ptrdiff_t printint;
5038 # define pI "t"
5039 #endif
5040 
5041 /* Limit so that |2 * context + 1| does not overflow.  */
5042 
5043 #define CONTEXT_MAX ((LIN_MAX - 1) / 2)
5044 
5045 
5046 #define file_name_cmp strcmp
5047 
5048 /* Do struct stat |*s|, |*t| describe the same special file?  */
5049 #ifndef same_special_file
5050 # if HAVE_STRUCT_STAT_ST_RDEV && defined S_ISBLK && defined S_ISCHR
5051 #  define same_special_file(s, t) \
5052      (((S_ISBLK ((s)->st_mode) && S_ISBLK ((t)->st_mode)) \
5053        || (S_ISCHR ((s)->st_mode) && S_ISCHR ((t)->st_mode))) \
5054       && (s)->st_rdev == (t)->st_rdev)
5055 # else
5056 #  define same_special_file(s, t) 0
5057 # endif
5058 #endif
5059 
5060 /* Do |struct stat *s, *t| describe the same file?  Answer -1 if
5061    unknown.  */
5062 #ifndef same_file
5063 # define same_file(s, t) \
5064     ((((s)->st_ino == (t)->st_ino) && ((s)->st_dev == (t)->st_dev)) \
5065      || same_special_file (s, t))
5066 #endif
5067 
5068 /* Do |struct stat *s, *t| have the same file attributes?
5069 
5070    POSIX says that two files are identical if |st_ino| and |st_dev|
5071    are the same, but many file systems incorrectly assign the same
5072    (device, inode) pair to two distinct files, including:
5073 
5074    - GNU/Linux NFS servers that export all local file systems as a
5075      single NFS file system, if a local device number |(st_dev)| exceeds
5076      255, or if a local inode number |(st_ino)| exceeds 16777215.
5077 
5078    - Network Appliance NFS servers in snapshot directories; see
5079      Network Appliance bug \#195.
5080 
5081    - ClearCase MVFS; see bug id ATRia04618.
5082 
5083    Check whether two files that purport to be the same have the same
5084    attributes, to work around instances of this common bug.  Do not
5085    inspect all attributes, only attributes useful in checking for this
5086    bug.
5087 
5088    It's possible for two distinct files on a buggy file system to have
5089    the same attributes, but it's not worth slowing down all
5090    implementations (or complicating the configuration) to cater to
5091    these rare cases in buggy implementations.  */
5092 
5093 #ifndef same_file_attributes
5094 # define same_file_attributes(s, t) \
5095    ((s)->st_mode == (t)->st_mode \
5096     && (s)->st_nlink == (t)->st_nlink \
5097     && (s)->st_uid == (t)->st_uid \
5098     && (s)->st_gid == (t)->st_gid \
5099     && (s)->st_size == (t)->st_size \
5100     && (s)->st_mtime == (t)->st_mtime \
5101     && (s)->st_ctime == (t)->st_ctime)
5102 #endif
5103 
5104 #define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
5105 
5106 #define BB_LITTLE_ENDIAN 1
5107 
5108 #ifdef __APPLE__
5109 #include <libkern/OSByteOrder.h>
5110 #define SWAP_BE64(x) OSSwapInt64(x)
5111 #elif __FreeBSD__
5112 #include <sys/endian.h>
5113 #define SWAP_BE64(x) bswap64(x)
5114 #elif __linux__
5115 #include <byteswap.h>
5116 #define SWAP_BE64(x) bswap_64(x)
5117 #endif
5118 
5119 #endif

-
+ 79F0962254F05215DF25FF996EF028819A31E13BEAB4B0C10B9955AB48A571EA5C7714155E30EE244A6699D4CB3A8EB2588A60CC4F3F4F8342E3F2671E0A66D7
vtools/src/util.c

(0 . 0)(1 . 428)
5124 
5125 #include "diff.h"
5126 #include <error.h>
5127 #include <xalloc.h>
5128 #include <stdlib.h>
5129 
5130 /* Use when a system call returns non-zero status.  |name| should
5131    normally be the file name.  */
5132 
5133 void
5134 perror_with_name(char const *name) {
5135     error(0, errno, "%s", name);
5136 }
5137 
5138 /* Use when a system call returns non-zero status and that is
5139    fatal.  */
5140 
5141 void
5142 pfatal_with_name(char const *name) {
5143     int e = errno;
5144     fprintf(stderr, "%s", name);
5145     exit(EXIT_TROUBLE);
5146 }
5147 
5148 /* Print an error message containing |msgid|, then exit.  */
5149 
5150 void
5151 fatal(char const *msgid) {
5152     fprintf(stderr, "%s", msgid);
5153     exit(EXIT_TROUBLE);
5154 }
5155 
5156 /* Like |printf|, except if -l in effect then save the message and
5157    print later.  This is used for things like "Only in ...".  */
5158 
5159 void
5160 message(char const *format_msgid, char const *arg1, char const *arg2) {
5161     message5(format_msgid, arg1, arg2, 0, 0);
5162 }
5163 
5164 void
5165 message5(char const *format_msgid, char const *arg1, char const *arg2,
5166          char const *arg3, char const *arg4) {
5167     fprintf(stderr, format_msgid, arg1, arg2, arg3, arg4);
5168 }
5169 
5170 static char const *current_name0;
5171 static char const *current_name1;
5172 static bool currently_recursive;
5173 
5174 /* Call before outputting the results of comparing files |name0| and |name1|
5175    to set up |outfile|, the stdio stream for the output to go to.
5176 
5177    Usually, |outfile| is just stdout.  But when -l was specified we
5178    fork off a |pr| and make |outfile| a pipe to it.  'pr' then outputs
5179    to our stdout.  */
5180 
5181 void
5182 setup_output(char const *name0, char const *name1, bool recursive) {
5183     current_name0 = name0;
5184     current_name1 = name1;
5185     currently_recursive = recursive;
5186     outfile = 0;
5187 }
5188 
5189 static char c_escape_char(char c) {
5190     switch (c) {
5191         case '\a':
5192             return 'a';
5193         case '\b':
5194             return 'b';
5195         case '\t':
5196             return 't';
5197         case '\n':
5198             return 'n';
5199         case '\v':
5200             return 'v';
5201         case '\f':
5202             return 'f';
5203         case '\r':
5204             return 'r';
5205         case '"':
5206             return '"';
5207         case '\\':
5208             return '\\';
5209         default:
5210             return c < 32;
5211     }
5212 }
5213 
5214 static char *
5215 c_escape(char const *str) {
5216     char const *s;
5217     size_t plus = 0;
5218     bool must_quote = false;
5219 
5220     for (s = str; *s; s++) {
5221         char c = *s;
5222 
5223         if (c == ' ') {
5224             must_quote = true;
5225             continue;
5226         }
5227         switch (c_escape_char(*s)) {
5228             case 1:
5229                 plus += 3;
5230                 /* fall through */
5231             case 0:
5232                 break;
5233             default:
5234                 plus++;
5235                 break;
5236         }
5237     }
5238 
5239     if (must_quote || plus) {
5240         size_t s_len = s - str;
5241         char *buffer = xmalloc(s_len + plus + 3);
5242         char *b = buffer;
5243 
5244         *b++ = '"';
5245         for (s = str; *s; s++) {
5246             char c = *s;
5247             char escape = c_escape_char(c);
5248 
5249             switch (escape) {
5250                 case 0:
5251                     *b++ = c;
5252                     break;
5253                 case 1:
5254                     *b++ = '\\';
5255                     *b++ = ((c >> 6) & 03) + '0';
5256                     *b++ = ((c >> 3) & 07) + '0';
5257                     *b++ = ((c >> 0) & 07) + '0';
5258                     break;
5259                 default:
5260                     *b++ = '\\';
5261                     *b++ = escape;
5262                     break;
5263             }
5264         }
5265         *b++ = '"';
5266         *b = 0;
5267         return buffer;
5268     }
5269 
5270     return (char *) str;
5271 }
5272 
5273 void
5274 begin_output(void) {
5275     char *names[2];
5276     char *name;
5277 
5278     if (outfile != 0)
5279         return;
5280 
5281     names[0] = c_escape(current_name0);
5282     names[1] = c_escape(current_name1);
5283 
5284     /* Construct the header of this piece of diff.  */
5285     if(asprintf(&name, "diff%s %s %s", switch_string, names[0], names[1]) == -1)
5286         xalloc_die();
5287 
5288     outfile = stdout;
5289 
5290     /* If handling multiple files (because scanning a directory),
5291    print which files the following output is about.  */
5292     if (currently_recursive)
5293         printf("%s\n", name);
5294 
5295     free(name);
5296 
5297     print_context_header(files, (char const *const *) names);
5298 
5299     if (names[0] != current_name0)
5300         free(names[0]);
5301     if (names[1] != current_name1)
5302         free(names[1]);
5303 }
5304 
5305 void
5306 finish_output(void) {
5307     if (outfile != 0 && outfile != stdout) {
5308         if (ferror(outfile))
5309             fatal("write failed");
5310         if (fclose(outfile) != 0)
5311             pfatal_with_name("write failed");
5312     }
5313 
5314     outfile = 0;
5315 }
5316 
5317 /* Compare two lines (typically one from each input file) according to
5318    the command line options.  For efficiency, this is invoked only
5319    when the lines do not match exactly but an option like -i might
5320    cause us to ignore the difference.  Return nonzero if the lines
5321    differ.  */
5322 
5323 bool
5324 lines_differ(char const *s1, char const *s2) {
5325     register char const *t1 = s1;
5326     register char const *t2 = s2;
5327     size_t column = 0;
5328 
5329     while (1) {
5330         register unsigned char c1 = *t1++;
5331         register unsigned char c2 = *t2++;
5332 
5333         /* Test for exact char equality first, since it's a common
5334            case.  */
5335         if (c1 != c2) {
5336             break;
5337         }
5338         if (c1 == '\n')
5339             return false;
5340 
5341         column++;
5342     }
5343 
5344     return true;
5345 }
5346 
5347 /* Find the consecutive changes at the start of the script START.
5348    Return the last link before the first gap.  */
5349 
5350 struct change *
5351 find_change(struct change *start) {
5352     return start;
5353 }
5354 
5355 /* Divide |script| into pieces by calling |hunkfun| and print each piece
5356    with |printfun|.  Both functions take one arg, an edit script.
5357 
5358    |hunkfun| is called with the tail of the script and returns the
5359    last link that belongs together with the start of the tail.
5360 
5361    |printfun| takes a subscript which belongs together (with a null
5362    link at the end) and prints it.  */
5363 
5364 void
5365 print_script(struct change *script) {
5366     struct change *next = script;
5367 
5368     while (next) {
5369         struct change *this, *end;
5370 
5371         /* Find a set of changes that belong together.  */
5372         this = next;
5373         end = find_hunk(next);
5374 
5375         /* Disconnect them from the rest of the changes, making them a
5376        hunk, and remember the rest for next iteration.  */
5377         next = end->link;
5378         end->link = 0;
5379 #ifdef DEBUG
5380         debug_script (this);
5381 #endif
5382 
5383         /* Print this hunk.  */
5384         pr_unidiff_hunk(this);
5385 
5386         /* Reconnect the script so it will all be freed properly.  */
5387         end->link = next;
5388     }
5389 }
5390 
5391 /* Print the text of a single line |line|, flagging it with the
5392    characters in |line_flag| (which say whether the line is inserted,
5393    deleted, changed, etc.).  |line_flag| must not end in a blank,
5394    unless it is a single blank.  */
5395 
5396 void
5397 print_1_line(char const *line_flag, char const *const *line) {
5398     print_1_line_nl(line_flag, line, false);
5399 }
5400 
5401 /* Print the text of a single line |line|, flagging it with the
5402    characters in |line_flag| (which say whether the line is inserted,
5403    deleted, changed, etc.).  |line_flag| must not end in a blank,
5404    unless it is a single blank.  If |skip_nl| is set, then the final
5405    |'\n'| is not printed.  */
5406 
5407 void
5408 print_1_line_nl(char const *line_flag, char const *const *line, bool skip_nl) {
5409     char const *base = line[0], *limit = line[1]; /* Help the compiler.  */
5410     FILE *out = outfile; /* Help the compiler some more.  */
5411     char const *flag_format = 0;
5412 
5413     /* If -T was specified, use a Tab between the line-flag and the
5414        text.  Otherwise use a Space (as Unix diff does).  Print
5415        neither space nor tab if line-flags are empty.  But omit
5416        trailing blanks if requested.  */
5417 
5418     if (line_flag && *line_flag) {
5419         char const *flag_format_1 = flag_format = "%s ";
5420         char const *line_flag_1 = line_flag;
5421 
5422         if (suppress_blank_empty && **line == '\n') {
5423             flag_format_1 = "%s";
5424 
5425             /* This hack to omit trailing blanks takes advantage of
5426                the fact that the only way that |line_flag| can end in
5427                a blank is when |line_flag| consists of a single
5428                blank.  */
5429             line_flag_1 += *line_flag_1 == ' ';
5430         }
5431 
5432         fprintf(out, flag_format_1, line_flag_1);
5433     }
5434 
5435     output_1_line(base, limit - (skip_nl && limit[-1] == '\n'));
5436 
5437     if ((!line_flag || line_flag[0]) && limit[-1] != '\n') {
5438         fprintf(out, "\n\\ %s\n", "No newline at end of file");
5439     }
5440 }
5441 
5442 /* Output a line from |base| up to |limit|. */
5443 
5444 void
5445 output_1_line(char const *base, char const *limit) {
5446     const size_t MAX_CHUNK = 1024;
5447     size_t left = limit - base;
5448     while (left) {
5449         size_t to_write = MIN (left, MAX_CHUNK);
5450         size_t written = fwrite(base, sizeof(char), to_write, outfile);
5451         if (written < to_write)
5452             return;
5453         base += written;
5454         left -= written;
5455     }
5456 }
5457 
5458 /* Translate an internal line number (an index into diff's table of
5459    lines) into an actual line number in the input file.  The internal
5460    line number is |i|.  |file| points to the data on the file.
5461 
5462    Internal line numbers count from 0 starting after the prefix.
5463    Actual line numbers count from 1 within the entire file.  */
5464 
5465 lin
5466 translate_line_number(struct file_data const *file, lin i) {
5467     return i + file->prefix_lines + 1;
5468 }
5469 
5470 /* Translate a line number range.  This is always done for printing,
5471    so for convenience translate to printint rather than lin, so that
5472    the caller can use |printf| with |"%"pI"d"| without casting.  */
5473 
5474 void
5475 translate_range(struct file_data const *file,
5476                 lin a, lin b,
5477                 printint *aptr, printint *bptr) {
5478     *aptr = translate_line_number(file, a - 1) + 1;
5479     *bptr = translate_line_number(file, b + 1) - 1;
5480 }
5481 
5482 /* Look at a hunk of edit script and report the range of lines in each
5483    file that it applies to.  |hunk| is the start of the hunk, which is
5484    a chain of |struct change|.  The first and last line numbers of
5485    file 0 are stored in |*first0| and |*last0|, and likewise for file
5486    1 in |*first1| and |*last1|.  Note that these are internal line numbers
5487    that count from 0.
5488 
5489    If no lines from file 0 are deleted, then |first0| is |last0+1|.
5490 
5491    Return |UNCHANGED| if only ignorable lines are inserted or deleted,
5492    |OLD| if lines of file 0 are deleted,
5493    |NEW| if lines of file 1 are inserted,
5494    and |CHANGED| if both kinds of changes are found. */
5495 
5496 enum changes
5497 analyze_hunk(struct change *hunk,
5498              lin *first0, lin *last0,
5499              lin *first1, lin *last1) {
5500     struct change *next;
5501     lin l0, l1;
5502     lin show_from, show_to;
5503     /* If 0, ignore zero-length lines;
5504        if |SIZE_MAX|, do not ignore lines just because of their length.  */
5505 
5506     char const *const *linbuf0 = files[0].linbuf;  /* Help the compiler.  */
5507     char const *const *linbuf1 = files[1].linbuf;
5508 
5509     show_from = show_to = 0;
5510 
5511     *first0 = hunk->line0;
5512     *first1 = hunk->line1;
5513 
5514     next = hunk;
5515     do {
5516         l0 = next->line0 + next->deleted - 1;
5517         l1 = next->line1 + next->inserted - 1;
5518         show_from += next->deleted;
5519         show_to += next->inserted;
5520     } while ((next = next->link) != 0);
5521 
5522     *last0 = l0;
5523     *last1 = l1;
5524 
5525     return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
5526 }
5527 
5528 /* Yield a new block of |size| bytes, initialized to zero.  */
5529 
5530 void *
5531 zalloc(size_t size) {
5532     void *p = xmalloc(size);
5533     memset (p, 0, size);
5534     return p;
5535 }
5536 
5537 void
5538 debug_script(struct change *sp) {
5539     fflush(stdout);
5540 
5541     for (; sp; sp = sp->link) {
5542         printint line0 = sp->line0;
5543         printint line1 = sp->line1;
5544         printint deleted = sp->deleted;
5545         printint inserted = sp->inserted;
5546         fprintf(stderr, "%3"pI"d %3"pI"d delete %"pI"d insert %"pI"d\n",
5547                 line0, line1, deleted, inserted);
5548     }
5549 
5550     fflush(stderr);
5551 }