tree checksum vpatch file split hunks

all signers: phf

antecedents:

press order:

vtools_genesisphf

patch:

-
+ AE3D7FE005038F18760EF9C78E0AA83BEAD91FE39020238FF712E46CD52682D0DA8DDF2067CDC77247C1466564CAF7089B5670CB7EA684B0076F7AA125945351
vtools/Makefile
(0 . 0)(1 . 37)
5
6 CC:=gcc
7 LDFLAGS:=-static
8
9 SOURCES=lib/cmpbuf.c \
10 lib/dirname.c \
11 lib/error.c \
12 lib/filenamecat.c \
13 lib/filetype.c \
14 lib/hash.c \
15 lib/progname.c \
16 lib/xalloc.c \
17 src/analyze.c \
18 src/context.c \
19 src/diff.c \
20 src/dir.c \
21 src/io.c \
22 src/util.c
23 HEADERS=lib/cmpbuf.h \
24 lib/diffseq.h \
25 lib/dirname.h \
26 lib/error.h \
27 lib/filenamecat.h \
28 lib/filetype.h \
29 lib/hash.h \
30 lib/progname.h \
31 lib/xalloc.h \
32 src/diff.h \
33 src/system.h
34 OBJECTS=$(SRCS:.c=.o)
35
36 vdiff: $(SOURCES) $(HEADERS)
37 $(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -std=c99 -Ilib/ -Isrc/ -o $@ $(SOURCES)
38
39 clean:
40 @rm -f vdiff
41
-
+ 48454218170DAD22CB7BD9088EC90F7F808FCDE914962D0FCA20B3331BEF95AFE749D1A218D21AE02F00588C8DC05607B35C1B7C7CD3B852D58AC0D504DD3C30
vtools/lib/cmpbuf.c
(0 . 0)(1 . 57)
46 #include <errno.h>
47 #include <limits.h>
48 #include <signal.h>
49 #include <unistd.h>
50 #include <stdint.h>
51 #include "system.h"
52 #include "cmpbuf.h"
53
54 /* Read |nbytes| bytes from descriptor |fd| into |buf|. |nbytes| must
55 not be |SIZE_MAX|. Return the number of characters successfully
56 read. On error, return |SIZE_MAX|, setting |errno|. The number
57 returned is always |nbytes| unless end-of-file or error. */
58
59 size_t
60 block_read(int fd, char *buf, size_t nbytes) {
61 char *bp = buf;
62 char const *buflim = buf + nbytes;
63 size_t readlim = MIN (SSIZE_MAX, SIZE_MAX);
64
65 do {
66 size_t bytes_remaining = buflim - bp;
67 size_t bytes_to_read = MIN (bytes_remaining, readlim);
68 ssize_t nread = read(fd, bp, bytes_to_read);
69 if (nread <= 0) {
70 if (nread == 0)
71 break;
72 return SIZE_MAX;
73 }
74 bp += nread;
75 } while (bp < buflim);
76
77 return bp - buf;
78 }
79
80 /* Least common multiple of two buffer sizes |a| and |b|. However, if
81 either |a| or |b| is zero, or if the multiple is greater than
82 |lcm_max|, return a reasonable buffer size. */
83
84 size_t
85 buffer_lcm(size_t a, size_t b, size_t lcm_max) {
86 size_t lcm, m, n, q, r;
87
88 /* Yield reasonable values if buffer sizes are zero. */
89 if (!a)
90 return b ? b : 8 * 1024;
91 if (!b)
92 return a;
93
94 /* |n = gcd (a, b)| */
95 for (m = a, n = b; (r = m % n) != 0; m = n, n = r)
96 continue;
97
98 /* Yield a if there is an overflow. */
99 q = a / n;
100 lcm = q * b;
101 return lcm <= lcm_max && lcm / b == q ? lcm : a;
102 }
-
+ E52FB69AF603972D2409D2AA52D2FA4C7A5AA143778B1B2738207678FB27C38BA97BCB1200E32A6CAB425AD90E3FFB4A0720ED47D8D8A123484082EB9D0A1EFC
vtools/lib/cmpbuf.h
(0 . 0)(1 . 4)
107
108 size_t block_read(int, char *, size_t);
109
110 size_t buffer_lcm(size_t, size_t, size_t);
-
+ 8802BDF2CCDB11A2618988AB2791E89185379A02892642599CD00559C57C383AD85BFA5DAA5261508372143E5FF34054010E72C01D52063CDA88F4D3F97CCBBA
vtools/lib/diffseq.h
(0 . 0)(1 . 478)
115
116 /* The basic idea is to consider two vectors as similar if, when
117 transforming the first vector into the second vector through a
118 sequence of edits (inserts and deletes of one element each), this
119 sequence is short - or equivalently, if the ordered list of
120 elements that are untouched by these edits is long. For a good
121 introduction to the subject, read about the ``Levenshtein
122 distance'' in Wikipedia.
123
124 The basic algorithm is described in: ``An O(ND) Difference
125 Algorithm and its Variations'', Eugene W. Myers, Algorithmica
126 Vol. 1, 1986, pp. 251-266,
127 \url{http://dx.doi.org/10.1007/BF01840446}. See especially section
128 4.2, which describes the variation used below.
129
130 The basic algorithm was independently discovered as described in:
131 ``Algorithms for Approximate String Matching'', Esko Ukkonen,
132 Information and Control Vol. 64, 1985, pp. 100-118,
133 \url{http://dx.doi.org/10.1016/S0019-9958(85)80046-2}.
134
135 Unless the |find_minimal| flag is set, this code uses the
136 |TOO_EXPENSIVE| heuristic, by Paul Eggert, to limit the cost to
137 $O(N^1.5 \log N)$ at the price of producing suboptimal output for
138 large inputs with many differences. */
139
140 /* Before including this file, you need to define:
141 |ELEMENT| The element type of the vectors being compared.
142 |EQUAL| A two-argument macro that tests two elements for
143 equality.
144 |OFFSET| A signed integer type sufficient to hold the
145 difference between two indices. Usually
146 something like |ptrdiff_t|.
147 |EXTRA_CONTEXT_FIELDS| Declarations of fields for 'struct context'.
148 |NOTE_DELETE(ctxt, xoff)| Record the removal of the object xvec[xoff].
149 |NOTE_INSERT(ctxt, yoff)| Record the insertion of the object yvec[yoff].
150 |EARLY_ABORT(ctxt)| (Optional) A boolean expression that triggers an
151 early abort of the computation.
152 |USE_HEURISTIC| (Optional) Define if you want to support the
153 heuristic for large vectors.
154 It is also possible to use this file with abstract arrays. In this case,
155 xvec and yvec are not represented in memory. They only exist conceptually.
156 In this case, the list of defines above is amended as follows:
157 |ELEMENT| Undefined.
158 |EQUAL| Undefined.
159 |XVECREF_YVECREF_EQUAL(ctxt, xoff, yoff)|
160 A three-argument macro: References xvec[xoff] and
161 yvec[yoff] and tests these elements for equality.
162 Before including this file, you also need to include:
163 |#include <limits.h>
164 #include <stdbool.h>|
165 */
166
167 /* Maximum value of type |OFFSET|. */
168 #define OFFSET_MAX \
169 ((((OFFSET)1 << (sizeof (OFFSET) * CHAR_BIT - 2)) - 1) * 2 + 1)
170
171 /* Default to no early abort. */
172 #ifndef EARLY_ABORT
173 # define EARLY_ABORT(ctxt) false
174 #endif
175
176 /* Use this to suppress gcc's ``...may be used before initialized''
177 warnings. Beware: The Code argument must not contain commas. */
178 #ifndef IF_LINT
179 # if defined GCC_LINT || defined lint
180 # define IF_LINT(Code) Code
181 # else
182 # define IF_LINT(Code) /* empty */
183 # endif
184 #endif
185
186 /* As above, but when Code must contain one comma. */
187 #ifndef IF_LINT2
188 # if defined GCC_LINT || defined lint
189 # define IF_LINT2(Code1, Code2) Code1, Code2
190 # else
191 # define IF_LINT2(Code1, Code2) /* empty */
192 # endif
193 #endif
194
195 /*
196 * Context of comparison operation.
197 */
198 struct context {
199 #ifdef ELEMENT
200 /* Vectors being compared. */
201 ELEMENT const *xvec;
202 ELEMENT const *yvec;
203 #endif
204
205 /* Extra fields. */
206 EXTRA_CONTEXT_FIELDS
207
208 /* Vector, indexed by diagonal, containing 1 + the X coordinate of
209 the point furthest along the given diagonal in the forward
210 search of the edit matrix. */
211 OFFSET *fdiag;
212
213 /* Vector, indexed by diagonal, containing the X coordinate of the
214 point furthest along the given diagonal in the backward search
215 of the edit matrix. */
216 OFFSET *bdiag;
217
218 #ifdef USE_HEURISTIC
219 /* This corresponds to the |diff --speed-large-files| flag. With
220 this heuristic, for vectors with a constant small density of
221 changes, the algorithm is linear in the vector size. */
222 bool heuristic;
223 #endif
224
225 /* Edit scripts longer than this are too expensive to compute. */
226 OFFSET too_expensive;
227
228 /* Snakes bigger than this are considered "big". */
229 #define SNAKE_LIMIT 20
230 };
231
232 struct partition {
233 /* Midpoints of this partition. */
234 OFFSET xmid;
235 OFFSET ymid;
236
237 /* True if low half will be analyzed minimally. */
238 bool lo_minimal;
239
240 /* Likewise for high half. */
241 bool hi_minimal;
242 };
243
244
245 /* Find the midpoint of the shortest edit script for a specified
246 portion of the two vectors.
247
248 Scan from the beginnings of the vectors, and simultaneously from
249 the ends, doing a breadth-first search through the space of
250 edit-sequence. When the two searches meet, we have found the
251 midpoint of the shortest edit sequence.
252
253 If |find_minimal| is true, find the minimal edit script regardless
254 of expense. Otherwise, if the search is too expensive, use
255 heuristics to stop the search and report a suboptimal answer.
256
257 Set |part->(xmid,ymid)| to the midpoint |(xmid,ymid)|. The
258 diagonal number |xmid - ymid| equals the number of inserted
259 elements minus the number of deleted elements (counting only
260 elements before the midpoint).
261
262 Set |part->lo_minimal| to true iff the minimal edit script for the
263 left half of the partition is known; similarly for
264 |part->hi_minimal|.
265
266 This function assumes that the first elements of the specified
267 portions of the two vectors do not match, and likewise that the
268 last elements do not match. The caller must trim matching elements
269 from the beginning and end of the portions it is going to specify.
270
271 If we return the "wrong" partitions, the worst this can do is cause
272 suboptimal diff output. It cannot cause incorrect diff output. */
273
274 static void
275 diag(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal,
276 struct partition *part, struct context *ctxt) {
277 OFFSET *const fd = ctxt->fdiag; /* Give the compiler a chance. */
278 OFFSET *const bd = ctxt->bdiag; /* Additional help for the compiler. */
279 #ifdef ELEMENT
280 ELEMENT const *const xv = ctxt->xvec; /* Still more help for the compiler. */
281 ELEMENT const *const yv = ctxt->yvec; /* And more and more . . . */
282 #define XREF_YREF_EQUAL(x, y) EQUAL (xv[x], yv[y])
283 #else
284 #define XREF_YREF_EQUAL(x,y) XVECREF_YVECREF_EQUAL (ctxt, x, y)
285 #endif
286 const OFFSET dmin = xoff - ylim; /* Minimum valid diagonal. */
287 const OFFSET dmax = xlim - yoff; /* Maximum valid diagonal. */
288 const OFFSET fmid = xoff - yoff; /* Center diagonal of top-down search. */
289 const OFFSET bmid = xlim - ylim; /* Center diagonal of bottom-up search. */
290 OFFSET fmin = fmid;
291 OFFSET fmax = fmid; /* Limits of top-down search. */
292 OFFSET bmin = bmid;
293 OFFSET bmax = bmid; /* Limits of bottom-up search. */
294 OFFSET c; /* Cost. */
295 bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd
296 diagonal with respect to the northwest. */
297
298 fd[fmid] = xoff;
299 bd[bmid] = xlim;
300
301 for (c = 1;; ++c) {
302 OFFSET d; /* Active diagonal. */
303 bool big_snake = false;
304
305 /* Extend the top-down search by an edit step in each diagonal. */
306 if (fmin > dmin)
307 fd[--fmin - 1] = -1;
308 else
309 ++fmin;
310 if (fmax < dmax)
311 fd[++fmax + 1] = -1;
312 else
313 --fmax;
314 for (d = fmax; d >= fmin; d -= 2) {
315 OFFSET x;
316 OFFSET y;
317 OFFSET tlo = fd[d - 1];
318 OFFSET thi = fd[d + 1];
319 OFFSET x0 = tlo < thi ? thi : tlo + 1;
320
321 for (x = x0, y = x0 - d;
322 x < xlim && y < ylim && XREF_YREF_EQUAL (x, y);
323 x++, y++)
324 continue;
325 if (x - x0 > SNAKE_LIMIT)
326 big_snake = true;
327 fd[d] = x;
328 if (odd && bmin <= d && d <= bmax && bd[d] <= x) {
329 part->xmid = x;
330 part->ymid = y;
331 part->lo_minimal = part->hi_minimal = true;
332 return;
333 }
334 }
335
336 /* Similarly extend the bottom-up search. */
337 if (bmin > dmin)
338 bd[--bmin - 1] = OFFSET_MAX;
339 else
340 ++bmin;
341 if (bmax < dmax)
342 bd[++bmax + 1] = OFFSET_MAX;
343 else
344 --bmax;
345 for (d = bmax; d >= bmin; d -= 2) {
346 OFFSET x;
347 OFFSET y;
348 OFFSET tlo = bd[d - 1];
349 OFFSET thi = bd[d + 1];
350 OFFSET x0 = tlo < thi ? tlo : thi - 1;
351
352 for (x = x0, y = x0 - d;
353 xoff < x && yoff < y && XREF_YREF_EQUAL (x - 1, y - 1);
354 x--, y--)
355 continue;
356 if (x0 - x > SNAKE_LIMIT)
357 big_snake = true;
358 bd[d] = x;
359 if (!odd && fmin <= d && d <= fmax && x <= fd[d]) {
360 part->xmid = x;
361 part->ymid = y;
362 part->lo_minimal = part->hi_minimal = true;
363 return;
364 }
365 }
366
367 if (find_minimal)
368 continue;
369
370 #ifdef USE_HEURISTIC
371 /* Heuristic: check occasionally for a diagonal that has made
372 lots of progress compared with the edit distance. If we
373 have any such, find the one that has made the most progress
374 and return it as if it had succeeded.
375
376 With this heuristic, for vectors with a constant small
377 density of changes, the algorithm is linear in the vector
378 size. */
379
380 if (200 < c && big_snake && ctxt->heuristic) {
381 {
382 OFFSET best = 0;
383
384 for (d = fmax; d >= fmin; d -= 2) {
385 OFFSET dd = d - fmid;
386 OFFSET x = fd[d];
387 OFFSET y = x - d;
388 OFFSET v = (x - xoff) * 2 - dd;
389
390 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
391 if (v > best
392 && xoff + SNAKE_LIMIT <= x && x < xlim
393 && yoff + SNAKE_LIMIT <= y && y < ylim) {
394 /* We have a good enough best diagonal;
395 now insist that it end with a
396 significant snake. */
397 int k;
398
399 for (k = 1; XREF_YREF_EQUAL (x - k, y - k); k++)
400 if (k == SNAKE_LIMIT) {
401 best = v;
402 part->xmid = x;
403 part->ymid = y;
404 break;
405 }
406 }
407 }
408 }
409 if (best > 0) {
410 part->lo_minimal = true;
411 part->hi_minimal = false;
412 return;
413 }
414 }
415
416 {
417 OFFSET best = 0;
418
419 for (d = bmax; d >= bmin; d -= 2) {
420 OFFSET dd = d - bmid;
421 OFFSET x = bd[d];
422 OFFSET y = x - d;
423 OFFSET v = (xlim - x) * 2 + dd;
424
425 if (v > 12 * (c + (dd < 0 ? -dd : dd))) {
426 if (v > best
427 && xoff < x && x <= xlim - SNAKE_LIMIT
428 && yoff < y && y <= ylim - SNAKE_LIMIT) {
429 /* We have a good enough best diagonal;
430 now insist that it end with a
431 significant snake. */
432 int k;
433
434 for (k = 0; XREF_YREF_EQUAL (x + k, y + k); k++)
435 if (k == SNAKE_LIMIT - 1) {
436 best = v;
437 part->xmid = x;
438 part->ymid = y;
439 break;
440 }
441 }
442 }
443 }
444 if (best > 0) {
445 part->lo_minimal = false;
446 part->hi_minimal = true;
447 return;
448 }
449 }
450 }
451 #endif
452
453 /* Heuristic: if we've gone well beyond the call of duty, give
454 up and report halfway between our best results so far. */
455 if (c >= ctxt->too_expensive) {
456 OFFSET fxybest;
457 OFFSET fxbest IF_LINT (= 0);
458 OFFSET bxybest;
459 OFFSET bxbest IF_LINT (= 0);
460
461 /* Find forward diagonal that maximizes |X + Y|. */
462 fxybest = -1;
463 for (d = fmax; d >= fmin; d -= 2) {
464 OFFSET x = MIN (fd[d], xlim);
465 OFFSET y = x - d;
466 if (ylim < y) {
467 x = ylim + d;
468 y = ylim;
469 }
470 if (fxybest < x + y) {
471 fxybest = x + y;
472 fxbest = x;
473 }
474 }
475
476 /* Find backward diagonal that minimizes |X + Y|. */
477 bxybest = OFFSET_MAX;
478 for (d = bmax; d >= bmin; d -= 2) {
479 OFFSET x = MAX (xoff, bd[d]);
480 OFFSET y = x - d;
481 if (y < yoff) {
482 x = yoff + d;
483 y = yoff;
484 }
485 if (x + y < bxybest) {
486 bxybest = x + y;
487 bxbest = x;
488 }
489 }
490
491 /* Use the better of the two diagonals. */
492 if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) {
493 part->xmid = fxbest;
494 part->ymid = fxybest - fxbest;
495 part->lo_minimal = true;
496 part->hi_minimal = false;
497 } else {
498 part->xmid = bxbest;
499 part->ymid = bxybest - bxbest;
500 part->lo_minimal = false;
501 part->hi_minimal = true;
502 }
503 return;
504 }
505 }
506 #undef XREF_YREF_EQUAL
507 }
508
509
510 /* Compare in detail contiguous subsequences of the two vectors which
511 are known, as a whole, to match each other.
512
513 The subsequence of vector 0 is |[xoff, xlim)| and likewise for
514 vector 1.
515
516 Note that |xlim, ylim| are exclusive bounds. All indices into the
517 vectors are origin-0.
518
519 If |find_minimal|, find a minimal difference no matter how
520 expensive it is.
521
522 The results are recorded by invoking |note_delete| and
523 |note_insert|.
524
525 Return false if terminated normally, or true if terminated through
526 early abort. */
527
528 static bool
529 compareseq(OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
530 bool find_minimal, struct context *ctxt) {
531 #ifdef ELEMENT
532 ELEMENT const *xv = ctxt->xvec; /* Help the compiler. */
533 ELEMENT const *yv = ctxt->yvec;
534 #define XREF_YREF_EQUAL(x, y) EQUAL (xv[x], yv[y])
535 #else
536 #define XREF_YREF_EQUAL(x,y) XVECREF_YVECREF_EQUAL (ctxt, x, y)
537 #endif
538
539 /* Slide down the bottom initial diagonal. */
540 while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xoff, yoff)) {
541 xoff++;
542 yoff++;
543 }
544
545 /* Slide up the top initial diagonal. */
546 while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xlim - 1, ylim - 1)) {
547 xlim--;
548 ylim--;
549 }
550
551 /* Handle simple cases. */
552 if (xoff == xlim)
553 while (yoff < ylim) {
554 NOTE_INSERT (ctxt, yoff);
555 if (EARLY_ABORT (ctxt))
556 return true;
557 yoff++;
558 }
559 else if (yoff == ylim)
560 while (xoff < xlim) {
561 NOTE_DELETE (ctxt, xoff);
562 if (EARLY_ABORT (ctxt))
563 return true;
564 xoff++;
565 }
566 else {
567 struct partition part IF_LINT2 (= { .xmid = 0, .ymid = 0 });
568
569 /* Find a point of correspondence in the middle of the vectors. */
570 diag(xoff, xlim, yoff, ylim, find_minimal, &part, ctxt);
571
572 /* Use the partitions to split this problem into subproblems. */
573 if (compareseq(xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt))
574 return true;
575 if (compareseq(part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt))
576 return true;
577 }
578
579 return false;
580 #undef XREF_YREF_EQUAL
581 }
582
583 #undef ELEMENT
584 #undef EQUAL
585 #undef OFFSET
586 #undef EXTRA_CONTEXT_FIELDS
587 #undef NOTE_DELETE
588 #undef NOTE_INSERT
589 #undef EARLY_ABORT
590 #undef USE_HEURISTIC
591 #undef XVECREF_YVECREF_EQUAL
592 #undef OFFSET_MAX
-
+ 1FBC6B96E09DF1B2470978CED1AF30DEFF7BE71E76AAF15873C60095E883C549309C7C5DAE5F608C0004F54B69884677D5FD1184B2224B453848D8879511F0FF
vtools/lib/dirname.c
(0 . 0)(1 . 41)
597 #include "dirname.h"
598 #include <string.h>
599 #include <stdbool.h>
600
601 /* Return the address of the last file name component of |name|. If
602 |name| has no relative file name components because it is a file
603 system root, return the empty string. */
604
605 char *
606 last_component(char const *name) {
607 char const *base = name + 0;
608 char const *p;
609 bool saw_slash = false;
610
611 while (((*base) == '/'))
612 base++;
613
614 for (p = base; *p; p++) {
615 if (((*p) == '/'))
616 saw_slash = true;
617 else if (saw_slash) {
618 base = p;
619 saw_slash = false;
620 }
621 }
622
623 return (char *) base;
624 }
625
626 /* Return the length of the basename |name|. Typically |name| is the
627 value returned by |base_name| or |last_component|. Act like
628 |strlen(name)|, except omit all trailing slashes. */
629
630 size_t
631 base_len(char const *name) {
632 size_t len;
633
634 for (len = strlen(name); 1 < len && ((name[len - 1]) == '/'); len--)
635 continue;
636 return len;
637 }
-
+ 5AC7F4190EFF243F401B3EB9E0AF4404BF391AE851FA443BB17A4C76BFAF9108239441352D5C7B90FA72EEA272965B5B54E8E72F1E42B2C99461D4EF2EF095A2
vtools/lib/dirname.h
(0 . 0)(1 . 10)
642 #ifndef DIRNAME_H_
643 #define DIRNAME_H_ 1
644
645 #include <stddef.h>
646
647 size_t base_len(char const *file);
648
649 char *last_component(char const *file);
650
651 #endif
-
+ 347F7CC6CC2D4AE711BAFCD7E08FEC2D76F741562B452CC8EEFBDAB6EE2A14781D162E4C4B597D528FD29D3F8295675A3636E0CB0C7F432778E56E79B1786A89
vtools/lib/error.c
(0 . 0)(1 . 78)
656 /* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */
657
658 #include "system.h"
659 #include "error.h"
660 #include "progname.h"
661 #include <stdarg.h>
662 #include <stdio.h>
663 #include <stdlib.h>
664 #include <string.h>
665 #include <fcntl.h>
666
667 /* If |NULL|, error will flush |stdout|, then print on |stderr| the
668 program name, a colon and a space. Otherwise, error will call this
669 function without parameters instead. */
670 void (*error_print_progname)(void);
671
672 /* This variable is incremented each time |error| is called. */
673 unsigned int error_message_count;
674
675
676 /* Return non-zero if |fd| is open. */
677 static int
678 is_open(int fd) {
679 return 0 <= fcntl(fd, F_GETFL);
680 }
681
682 static void
683 flush_stdout(void) {
684 if (is_open(1))
685 fflush(stdout);
686 }
687
688 static void
689 print_errno_message(int errnum) {
690 char const *s;
691 char errbuf[1024];
692 if (strerror_r(errnum, errbuf, sizeof errbuf) == 0)
693 s = errbuf;
694 else
695 s = 0;
696 if (!s)
697 s = "Unknown system error";
698 fprintf(stderr, ": %s", s);
699 }
700
701 static void
702 error_tail(int status, int errnum, const char *message, va_list args) {
703 vfprintf(stderr, message, args);
704 va_end (args);
705
706 ++error_message_count;
707 if (errnum)
708 print_errno_message(errnum);
709 putc ('\n', stderr);
710 fflush(stderr);
711 if (status)
712 exit(status);
713 }
714
715
716 /* Print the program name and error message |message|, which is a
717 printf-style format string with optional args. If |errnum| is
718 nonzero, print its corresponding system error message. Exit with
719 status |status| if it is nonzero. */
720 void
721 error(int status, int errnum, const char *message, ...) {
722 va_list args;
723 flush_stdout();
724 if (error_print_progname)
725 (*error_print_progname)();
726 else {
727 fprintf(stderr, "%s: ", get_program_name());
728 }
729
730 va_start (args, message);
731 error_tail(status, errnum, message, args);
732 }
733
-
+ D8B7E0F6AEA24BE22BCFA208F4A3A115B6F30E9C38A3E5DCF7386BF8781684286D1368703CEA7D51AE15EE29AA9D5D7524C4D6F7A487A2877053B90F85CD64BF
vtools/lib/error.h
(0 . 0)(1 . 25)
738 #ifndef _ERROR_H
739 #define _ERROR_H 1
740
741 /* Print a message with |fprintf(stderr, format, ...);| if |errnum| is
742 nonzero, follow it with ": " and |strerror(errnum)|. If |status|
743 is nonzero, terminate the program with |exit(status)|. */
744
745 extern void error(int __status, int __errnum, const char *__format, ...);
746
747 extern void error_at_line(int __status, int __errnum, const char *__fname,
748 unsigned int __lineno, const char *__format, ...);
749
750 /* If |NULL|, error will flush |stdout|, then print on |stderr| the
751 program name, a colon and a space. Otherwise, error will call this
752 function without parameters instead. */
753 extern void (*error_print_progname)(void);
754
755 /* This variable is incremented each time |error| is called. */
756 extern unsigned int error_message_count;
757
758 /* Sometimes we want to have at most one error per line. This
759 variable controls whether this mode is selected or not. */
760 extern int error_one_per_line;
761
762 #endif
-
+ 06209A17B6C66FDBBF324A004AC8853D54FD3C2A69F985A27DBD8DC65E5310C4228C1F9B41D7C639D24B47515641413AA57E35460B6246F41C8C961C0A5889FA
vtools/lib/filenamecat.c
(0 . 0)(1 . 83)
767
768 /* Written by Jim Meyering. */
769
770 /* Specification. */
771 #include "filenamecat.h"
772
773 #include <stdlib.h>
774 #include <string.h>
775 #include "xalloc.h"
776
777 #include "dirname.h"
778
779 #if !HAVE_MEMPCPY && !defined mempcpy
780 # define mempcpy(D, S, N) ((void *) ((char *) memcpy (D, S, N) + (N)))
781 #endif
782
783 /* Return the longest suffix of |f| that is a relative file name. If
784 it has no such suffix, return the empty string. */
785
786 static char const *
787 longest_relative_suffix(char const *f) {
788 for (f += 0; ((*f) == '/'); f++)
789 continue;
790 return f;
791 }
792
793 /* Concatenate two file name components, |dir| and |abase|, in
794 newly-allocated storage and return the result. The resulting file
795 name |f| is such that the commands |ls F| and |(cd DIR; ls BASE)|
796 refer to the same file, where |base| is |abase| with any file
797 system prefixes and leading separators removed. Arrange for a
798 directory separator if necessary between |dir| and |base| in the
799 result, removing any redundant separators. In any case, if
800 |base_in_result| is non-|NULL|, set |*base_in_result| to point to
801 the copy of |abase| in the returned concatenation. However, if
802 |abase| begins with more than one slash, set |*base_in_result| to
803 point to the sole corresponding slash that is copied into the
804 result buffer.
805
806 Return |NULL| if |malloc| fails. */
807
808 char *
809 mfile_name_concat(char const *dir, char const *abase, char **base_in_result) {
810 char const *dirbase = last_component(dir);
811 size_t dirbaselen = base_len(dirbase);
812 size_t dirlen = dirbase - dir + dirbaselen;
813 size_t needs_separator = (dirbaselen && !((dirbase[dirbaselen - 1]) == '/'));
814
815 char const *base = longest_relative_suffix(abase);
816 size_t baselen = strlen(base);
817
818 char *p_concat = malloc(dirlen + needs_separator + baselen + 1);
819 char *p;
820
821 if (p_concat == NULL)
822 return NULL;
823
824 p = mempcpy (p_concat, dir, dirlen);
825 *p = '/';
826 p += needs_separator;
827
828 if (base_in_result)
829 *base_in_result = p - (abase[0] == '/');
830
831 p = mempcpy (p, base, baselen);
832 *p = '\0';
833
834 return p_concat;
835 }
836
837 /* Written by Jim Meyering. */
838
839 /* Just like |mfile_name_concat| (|filenamecat-lgpl.c|), except,
840 rather than returning |NULL| upon |malloc| failure, here, we report
841 the "memory exhausted" condition and exit. */
842
843 char *
844 file_name_concat(char const *dir, char const *abase, char **base_in_result) {
845 char *p = mfile_name_concat(dir, abase, base_in_result);
846 if (p == NULL)
847 xalloc_die();
848 return p;
849 }
-
+ F462FD0AC792B50B17CA4AB2BFE91037CABCCE21BF7BA1D44EDFF9DFCA42F930547262163D7795EADFF22FB05F4F591936A2476B9E1CB619E4B54FC177AD6A17
vtools/lib/filenamecat.h
(0 . 0)(1 . 7)
854 /* Written by Jim Meyering. */
855
856 char *file_name_concat(char const *dir, char const *base,
857 char **base_in_result);
858
859 char *mfile_name_concat(char const *dir, char const *base,
860 char **base_in_result);
-
+ 44E3DF4102126EABBC8E4C0B424A650BD2AA11B1E1A84BD9169019A77DA0C3F4005D0A7708A8DBA5F005A642FAB96C0D9F02E788CCF6EEBF89246C8ADA039C73
vtools/lib/filetype.c
(0 . 0)(1 . 87)
865 /* Written by Paul Eggert. */
866
867 #include "filetype.h"
868
869 char const *
870 file_type(struct stat const *st) {
871 /* To keep diagnostics grammatical in English, the returned string
872 must start with a consonant. */
873
874 /* Do these three first, as they're the most common. */
875
876 if (S_ISREG (st->st_mode))
877 return st->st_size == 0 ? ("regular empty file") : ("regular file");
878
879 if (S_ISDIR (st->st_mode))
880 return ("directory");
881
882 if (S_ISLNK (st->st_mode))
883 return ("symbolic link");
884
885 #if 0
886 /* Do the |S_TYPEIS*| macros next, as they may be implemented in
887 terms of |S_ISNAM|, and we want the more-specialized
888 interpretation. */
889
890 if (S_TYPEISMQ (st))
891 return ("message queue");
892
893 if (S_TYPEISSEM (st))
894 return ("semaphore");
895
896 if (S_TYPEISSHM (st))
897 return ("shared memory object");
898
899 if (S_TYPEISTMO (st))
900 return ("typed memory object");
901
902 /* The remaining are in alphabetical order. */
903
904 if (S_ISBLK (st->st_mode))
905 return ("block special file");
906
907 if (S_ISCHR (st->st_mode))
908 return ("character special file");
909
910 if (S_ISCTG (st->st_mode))
911 return ("contiguous data");
912
913 if (S_ISFIFO (st->st_mode))
914 return ("fifo");
915
916 if (S_ISDOOR (st->st_mode))
917 return ("door");
918
919 if (S_ISMPB (st->st_mode))
920 return ("multiplexed block special file");
921
922 if (S_ISMPC (st->st_mode))
923 return ("multiplexed character special file");
924
925 if (S_ISMPX (st->st_mode))
926 return ("multiplexed file");
927
928 if (S_ISNAM (st->st_mode))
929 return ("named file");
930
931 if (S_ISNWK (st->st_mode))
932 return ("network special file");
933
934 if (S_ISOFD (st->st_mode))
935 return ("migrated file with data");
936
937 if (S_ISOFL (st->st_mode))
938 return ("migrated file without data");
939
940 if (S_ISPORT (st->st_mode))
941 return ("port");
942
943 if (S_ISSOCK (st->st_mode))
944 return ("socket");
945
946 if (S_ISWHT (st->st_mode))
947 return ("whiteout");
948 #endif
949
950 return ("weird file");
951 }
-
+ 2EBF923F7C2DC3578DEFD13F195F5CBF919B510468A5F523B462BC85E48BB3E8B66E9A4A08C1998BECB0A165BB3B8C4D75D81F0A2C32725DA1593F1EFEDCE6F1
vtools/lib/filetype.h
(0 . 0)(1 . 11)
956 /* Written by Paul Eggert and Jim Meyering. */
957
958 #ifndef FILE_TYPE_H
959 #define FILE_TYPE_H 1
960
961 #include <sys/types.h>
962 #include <sys/stat.h>
963
964 char const *file_type(struct stat const *);
965
966 #endif
-
+ 17F92FC88844A70C26B1CCD4CC068F81EB9320557802E744A3190C944EAB08E5DBCB58CAF37008DE8031261D3BA6E584F9795F24B11B3B47B2243FAEA18F180B
vtools/lib/hash.c
(0 . 0)(1 . 1152)
971 /* A generic hash table package. */
972
973 /* Define |USE_OBSTACK| to 1 if you want the allocator to use obstacks
974 instead of |malloc|. If you change |USE_OBSTACK|, you have to
975 recompile! */
976
977 #include "hash.h"
978 #include "system.h"
979
980 #include <stdlib.h>
981 #include <limits.h>
982
983 #if USE_OBSTACK
984 # include "obstack.h"
985 # ifndef obstack_chunk_alloc
986 # define obstack_chunk_alloc malloc
987 # endif
988 # ifndef obstack_chunk_free
989 # define obstack_chunk_free free
990 # endif
991 #endif
992
993 struct hash_entry {
994 void *data;
995 struct hash_entry *next;
996 };
997
998 struct hash_table {
999 /* The array of buckets starts at |bucket| and extends to
1000 |bucket_limit-1|, for a possibility of |n_buckets|. Among
1001 those, |n_buckets_used| buckets are not empty, there are
1002 |n_entries| active entries in the table. */
1003 struct hash_entry *bucket;
1004 struct hash_entry const *bucket_limit;
1005 size_t n_buckets;
1006 size_t n_buckets_used;
1007 size_t n_entries;
1008
1009 /* Tuning arguments, kept in a physically separate structure. */
1010 const Hash_tuning *tuning;
1011
1012 /* Three functions are given to |hash_initialize|, see the
1013 documentation block for this function. In a word, |hasher|
1014 randomizes a user entry into a number up from 0 up to some
1015 maximum minus 1; |comparator| returns true if two user entries
1016 compare equally; and |data_freer| is the cleanup function for a
1017 user entry. */
1018 Hash_hasher hasher;
1019 Hash_comparator comparator;
1020 Hash_data_freer data_freer;
1021
1022 /* A linked list of freed struct |hash_entry| structs. */
1023 struct hash_entry *free_entry_list;
1024
1025 #if USE_OBSTACK
1026 /* Whenever obstacks are used, it is possible to allocate all
1027 overflowed entries into a single stack, so they all can be
1028 freed in a single operation. It is not clear if the speedup is
1029 worth the trouble. */
1030 struct obstack entry_stack;
1031 #endif
1032 };
1033
1034 /* A hash table contains many internal entries, each holding a pointer
1035 to some user-provided data (also called a user entry). An entry
1036 indistinctly refers to both the internal entry and its associated
1037 user entry. A user entry contents may be hashed by a randomization
1038 function (the hashing function, or just "hasher" for short) into a
1039 number (or "slot") between 0 and the current table size. At each
1040 slot position in the hash table, starts a linked chain of entries
1041 for which the user data all hash to this slot. A bucket is the
1042 collection of all entries hashing to the same slot.
1043
1044 A good "hasher" function will distribute entries rather evenly in
1045 buckets. In the ideal case, the length of each bucket is roughly
1046 the number of entries divided by the table size. Finding the slot
1047 for a data is usually done in constant time by the "hasher", and
1048 the later finding of a precise entry is linear in time with the
1049 size of the bucket. Consequently, a larger hash table size (that
1050 is, a larger number of buckets) is prone to yielding shorter
1051 chains, {\bf given} the "hasher" function behaves properly.
1052
1053 Long buckets slow down the lookup algorithm. One might use big
1054 hash table sizes in hope to reduce the average length of buckets,
1055 but this might become inordinate, as unused slots in the hash table
1056 take some space. The best bet is to make sure you are using a good
1057 "hasher" function (beware that those are not that easy to write!
1058 :-), and to use a table size larger than the actual number of
1059 entries. */
1060
1061 /* If an insertion makes the ratio of nonempty buckets to table size
1062 larger than the growth threshold (a number between 0.0 and 1.0),
1063 then increase the table size by multiplying by the growth factor (a
1064 number greater than 1.0). The growth threshold defaults to 0.8,
1065 and the growth factor defaults to 1.414, meaning that the table
1066 will have doubled its size every second time 80\% of the buckets
1067 get used. */
1068 #define DEFAULT_GROWTH_THRESHOLD 0.8f
1069 #define DEFAULT_GROWTH_FACTOR 1.414f
1070
1071 /* If a deletion empties a bucket and causes the ratio of used buckets
1072 to table size to become smaller than the shrink threshold (a number
1073 between 0.0 and 1.0), then shrink the table by multiplying by the
1074 shrink factor (a number greater than the shrink threshold but
1075 smaller than 1.0). The shrink threshold and factor default to 0.0
1076 and 1.0, meaning that the table never shrinks. */
1077 #define DEFAULT_SHRINK_THRESHOLD 0.0f
1078 #define DEFAULT_SHRINK_FACTOR 1.0f
1079
1080 /* Use this to initialize or reset a |tuning| structure to some
1081 sensible values. */
1082 static const Hash_tuning default_tuning =
1083 {
1084 DEFAULT_SHRINK_THRESHOLD,
1085 DEFAULT_SHRINK_FACTOR,
1086 DEFAULT_GROWTH_THRESHOLD,
1087 DEFAULT_GROWTH_FACTOR,
1088 false
1089 };
1090
1091 /* Information and lookup. */
1092
1093 /* The following few functions provide information about the overall
1094 hash table organization: the number of entries, number of buckets
1095 and maximum length of buckets. */
1096
1097 /* Return the number of buckets in the hash table. The table size,
1098 the total number of buckets (used plus unused), or the maximum
1099 number of slots, are the same quantity. */
1100
1101 size_t
1102 hash_get_n_buckets(const Hash_table *table) {
1103 return table->n_buckets;
1104 }
1105
1106 /* Return the number of slots in use (non-empty buckets). */
1107
1108 size_t
1109 hash_get_n_buckets_used(const Hash_table *table) {
1110 return table->n_buckets_used;
1111 }
1112
1113 /* Return the number of active entries. */
1114
1115 size_t
1116 hash_get_n_entries(const Hash_table *table) {
1117 return table->n_entries;
1118 }
1119
1120 /* Return the length of the longest chain (bucket). */
1121
1122 size_t
1123 hash_get_max_bucket_length(const Hash_table *table) {
1124 struct hash_entry const *bucket;
1125 size_t max_bucket_length = 0;
1126
1127 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1128 if (bucket->data) {
1129 struct hash_entry const *cursor = bucket;
1130 size_t bucket_length = 1;
1131
1132 while (cursor = cursor->next, cursor)
1133 bucket_length++;
1134
1135 if (bucket_length > max_bucket_length)
1136 max_bucket_length = bucket_length;
1137 }
1138 }
1139
1140 return max_bucket_length;
1141 }
1142
1143 /* Do a mild validation of a hash table, by traversing it and checking
1144 two statistics. */
1145
1146 bool
1147 hash_table_ok(const Hash_table *table) {
1148 struct hash_entry const *bucket;
1149 size_t n_buckets_used = 0;
1150 size_t n_entries = 0;
1151
1152 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1153 if (bucket->data) {
1154 struct hash_entry const *cursor = bucket;
1155
1156 /* Count bucket head. */
1157 n_buckets_used++;
1158 n_entries++;
1159
1160 /* Count bucket overflow. */
1161 while (cursor = cursor->next, cursor)
1162 n_entries++;
1163 }
1164 }
1165
1166 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
1167 return true;
1168
1169 return false;
1170 }
1171
1172 void
1173 hash_print_statistics(const Hash_table *table, FILE *stream) {
1174 size_t n_entries = hash_get_n_entries(table);
1175 size_t n_buckets = hash_get_n_buckets(table);
1176 size_t n_buckets_used = hash_get_n_buckets_used(table);
1177 size_t max_bucket_length = hash_get_max_bucket_length(table);
1178
1179 fprintf(stream, "# entries: %lu\n", (unsigned long int) n_entries);
1180 fprintf(stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
1181 fprintf(stream, "# buckets used: %lu (%.2f%%)\n",
1182 (unsigned long int) n_buckets_used,
1183 (100.0 * n_buckets_used) / n_buckets);
1184 fprintf(stream, "max bucket length: %lu\n",
1185 (unsigned long int) max_bucket_length);
1186 }
1187
1188 /* Hash |key| and return a pointer to the selected bucket. If
1189 |table->hasher| misbehaves, abort. */
1190 static struct hash_entry *
1191 safe_hasher(const Hash_table *table, const void *key) {
1192 size_t n = table->hasher(key, table->n_buckets);
1193 if (!(n < table->n_buckets))
1194 abort();
1195 return table->bucket + n;
1196 }
1197
1198 /* If |entry| matches an entry already in the hash table, return the
1199 entry from the table. Otherwise, return |NULL|. */
1200
1201 void *
1202 hash_lookup(const Hash_table *table, const void *entry) {
1203 struct hash_entry const *bucket = safe_hasher(table, entry);
1204 struct hash_entry const *cursor;
1205
1206 if (bucket->data == NULL)
1207 return NULL;
1208
1209 for (cursor = bucket; cursor; cursor = cursor->next)
1210 if (entry == cursor->data || table->comparator(entry, cursor->data))
1211 return cursor->data;
1212
1213 return NULL;
1214 }
1215
1216 /* Walking. */
1217
1218 /* The functions in this page traverse the hash table and process the
1219 contained entries. For the traversal to work properly, the hash
1220 table should not be resized nor modified while any particular entry
1221 is being processed. In particular, entries should not be added,
1222 and an entry may be removed only if there is no shrink threshold
1223 and the entry being removed has already been passed to
1224 |hash_get_next|. */
1225
1226 /* Return the first data in the table, or |NULL| if the table is
1227 empty. */
1228
1229 void *
1230 hash_get_first(const Hash_table *table) {
1231 struct hash_entry const *bucket;
1232
1233 if (table->n_entries == 0)
1234 return NULL;
1235
1236 for (bucket = table->bucket;; bucket++)
1237 if (!(bucket < table->bucket_limit))
1238 abort();
1239 else if (bucket->data)
1240 return bucket->data;
1241 }
1242
1243 /* Return the user data for the entry following |entry|, where |entry|
1244 has been returned by a previous call to either |hash_get_first| or
1245 |hash_get_next|. Return |NULL| if there are no more entries. */
1246
1247 void *
1248 hash_get_next(const Hash_table *table, const void *entry) {
1249 struct hash_entry const *bucket = safe_hasher(table, entry);
1250 struct hash_entry const *cursor;
1251
1252 /* Find next entry in the same bucket. */
1253 cursor = bucket;
1254 do {
1255 if (cursor->data == entry && cursor->next)
1256 return cursor->next->data;
1257 cursor = cursor->next;
1258 } while (cursor != NULL);
1259
1260 /* Find first entry in any subsequent bucket. */
1261 while (++bucket < table->bucket_limit)
1262 if (bucket->data)
1263 return bucket->data;
1264
1265 /* None found. */
1266 return NULL;
1267 }
1268
1269 /* Fill |buffer| with pointers to active user entries in the hash
1270 table, then return the number of pointers copied. Do not copy more
1271 than |buffer_size| pointers. */
1272
1273 size_t
1274 hash_get_entries(const Hash_table *table, void **buffer,
1275 size_t buffer_size) {
1276 size_t counter = 0;
1277 struct hash_entry const *bucket;
1278 struct hash_entry const *cursor;
1279
1280 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1281 if (bucket->data) {
1282 for (cursor = bucket; cursor; cursor = cursor->next) {
1283 if (counter >= buffer_size)
1284 return counter;
1285 buffer[counter++] = cursor->data;
1286 }
1287 }
1288 }
1289
1290 return counter;
1291 }
1292
1293 /* Call a |processor| function for each entry of a hash table, and
1294 return the number of entries for which the processor function
1295 returned success. A pointer to some |processor_data| which will be
1296 made available to each call to the processor function. The
1297 |processor| accepts two arguments: the first is the user entry
1298 being walked into, the second is the value of |processor_data| as
1299 received. The walking continue for as long as the |processor|
1300 function returns nonzero. When it returns zero, the walking is
1301 interrupted. */
1302
1303 size_t
1304 hash_do_for_each(const Hash_table *table, Hash_processor processor,
1305 void *processor_data) {
1306 size_t counter = 0;
1307 struct hash_entry const *bucket;
1308 struct hash_entry const *cursor;
1309
1310 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1311 if (bucket->data) {
1312 for (cursor = bucket; cursor; cursor = cursor->next) {
1313 if (!processor(cursor->data, processor_data))
1314 return counter;
1315 counter++;
1316 }
1317 }
1318 }
1319
1320 return counter;
1321 }
1322
1323 /* Allocation and clean-up. */
1324
1325 /* Return a hash index for a |NULL|-terminated |string| between |0| and
1326 |n_buckets-1|. This is a convenience routine for constructing
1327 other hashing functions. */
1328
1329 #if USE_DIFF_HASH
1330
1331 /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01:
1332 ``Please see B. J. McKenzie, R. Harries \& T. Bell, Selecting a
1333 hashing algorithm, Software--practice \& experience 20, 2 (Feb
1334 1990), 209-224. Good hash algorithms tend to be domain-specific,
1335 so what's good for [diffutils'] |io.c| may not be good for your
1336 application.'' */
1337
1338 size_t
1339 hash_string (const char *string, size_t n_buckets)
1340 {
1341 # define HASH_ONE_CHAR(Value, Byte) \
1342 ((Byte) + rotl_sz (Value, 7))
1343
1344 size_t value = 0;
1345 unsigned char ch;
1346
1347 for (; (ch = *string); string++)
1348 value = HASH_ONE_CHAR (value, ch);
1349 return value % n_buckets;
1350
1351 # undef HASH_ONE_CHAR
1352 }
1353
1354 #else
1355
1356 /* This one comes from |recode|, and performs a bit better than the
1357 above as per a few experiments. It is inspired from a hashing
1358 routine found in the very old Cyber `snoop', itself written in
1359 typical Greg Mansfield style. (By the way, what happened to this
1360 excellent man? Is he still alive?) */
1361
1362 size_t
1363 hash_string(const char *string, size_t n_buckets) {
1364 size_t value = 0;
1365 unsigned char ch;
1366
1367 for (; (ch = *string); string++)
1368 value = (value * 31 + ch) % n_buckets;
1369 return value;
1370 }
1371
1372 #endif
1373
1374 /* Return true if |candidate| is a prime number. |candidate| should
1375 be an odd number at least equal to 11. */
1376
1377 static bool
1378 is_prime(size_t candidate) {
1379 size_t divisor = 3;
1380 size_t square = divisor * divisor;
1381
1382 while (square < candidate && (candidate % divisor)) {
1383 divisor++;
1384 square += 4 * divisor;
1385 divisor++;
1386 }
1387
1388 return (candidate % divisor ? true : false);
1389 }
1390
1391 /* Round a given |candidate| number up to the nearest prime, and
1392 return that prime. Primes lower than 10 are merely skipped. */
1393
1394 static size_t
1395 next_prime(size_t candidate) {
1396 /* Skip small primes. */
1397 if (candidate < 10)
1398 candidate = 10;
1399
1400 /* Make it definitely odd. */
1401 candidate |= 1;
1402
1403 while (SIZE_MAX != candidate && !is_prime(candidate))
1404 candidate += 2;
1405
1406 return candidate;
1407 }
1408
1409 void
1410 hash_reset_tuning(Hash_tuning *tuning) {
1411 *tuning = default_tuning;
1412 }
1413
1414 /* Given a |size_t| argument |x|, return the value corresponding to
1415 rotating the bits |n| steps to the right. |n| must be between 1 to
1416 |(CHAR_BIT * sizeof (size_t) - 1)| inclusive. */
1417 static inline size_t
1418 rotr_sz(size_t x, int n) {
1419 return ((x >> n) | (x << ((CHAR_BIT * sizeof x) - n))) & SIZE_MAX;
1420 }
1421
1422 /* If the user passes a |NULL| hasher, we hash the raw pointer. */
1423 static size_t
1424 raw_hasher(const void *data, size_t n) {
1425 /* When hashing unique pointers, it is often the case that they
1426 were generated by malloc and thus have the property that the
1427 low-order bits are 0. As this tends to give poorer performance
1428 with small tables, we rotate the pointer value before
1429 performing division, in an attempt to improve hash quality. */
1430 size_t val = rotr_sz((size_t) data, 3);
1431 return val % n;
1432 }
1433
1434 /* If the user passes a |NULL| comparator, we use pointer
1435 comparison. */
1436 static bool
1437 raw_comparator(const void *a, const void *b) {
1438 return a == b;
1439 }
1440
1441
1442 /* For the given hash |table|, check the user supplied tuning
1443 structure for reasonable values, and return true if there is no
1444 gross error with it. Otherwise, definitively reset the |tuning|
1445 field to some acceptable default in the hash table (that is, the
1446 user loses the right of further modifying tuning arguments), and
1447 return false. */
1448
1449 static bool
1450 check_tuning(Hash_table *table) {
1451 const Hash_tuning *tuning = table->tuning;
1452 float epsilon;
1453 if (tuning == &default_tuning)
1454 return true;
1455
1456 /* Be a bit stricter than mathematics would require, so that
1457 rounding errors in size calculations do not cause allocations
1458 to fail to grow or shrink as they should. The smallest
1459 allocation is 11 (due to |next_prime|'s algorithm), so an
1460 epsilon of 0.1 should be good enough. */
1461 epsilon = 0.1f;
1462
1463 if (epsilon < tuning->growth_threshold
1464 && tuning->growth_threshold < 1 - epsilon
1465 && 1 + epsilon < tuning->growth_factor
1466 && 0 <= tuning->shrink_threshold
1467 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
1468 && tuning->shrink_factor <= 1
1469 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
1470 return true;
1471
1472 table->tuning = &default_tuning;
1473 return false;
1474 }
1475
1476 /* Compute the size of the bucket array for the given |candidate| and
1477 |tuning|, or return 0 if there is no possible way to allocate that
1478 many entries. */
1479
1480 static size_t
1481 compute_bucket_size(size_t candidate, const Hash_tuning *tuning) {
1482 if (!tuning->is_n_buckets) {
1483 float new_candidate = candidate / tuning->growth_threshold;
1484 if (SIZE_MAX <= new_candidate)
1485 return 0;
1486 candidate = new_candidate;
1487 }
1488 candidate = next_prime(candidate);
1489 if (xalloc_oversized (candidate, sizeof(struct hash_entry *)))
1490 return 0;
1491 return candidate;
1492 }
1493
1494 /* Allocate and return a new hash table, or |NULL| upon failure. The
1495 initial number of buckets is automatically selected so as to {\it
1496 guarantee} that you may insert at least |candidate| different user
1497 entries before any growth of the hash table size occurs. So, if
1498 have a reasonably tight a-priori upper bound on the number of
1499 entries you intend to insert in the hash table, you may save some
1500 table memory and insertion time, by specifying it here. If the
1501 |is_n_buckets| field of the |tuning| structure is true, the
1502 |candidate| argument has its meaning changed to the wanted number
1503 of buckets.
1504
1505 |tuning| points to a structure of user-supplied values, in case
1506 some fine tuning is wanted over the default behavior of the hasher.
1507 If |tuning| is |NULL|, the default tuning parameters are used
1508 instead. If |tuning| is provided but the values requested are out
1509 of bounds or might cause rounding errors, return |NULL|.
1510
1511 The user-supplied |hasher| function, when not |NULL|, accepts two
1512 arguments |entry| and |table_size|. It computes, by hashing
1513 |entry| contents, a slot number for that entry which should be in
1514 the range |0..table_size-1|. This slot number is then returned.
1515
1516 The user-supplied |comparator| function, when not |NULL|, accepts
1517 two arguments pointing to user data, it then returns true for a
1518 pair of entries that compare equal, or false otherwise. This
1519 function is internally called on entries which are already known to
1520 hash to the same bucket index, but which are distinct pointers.
1521
1522 The user-supplied |data_freer| function, when not |NULL|, may be
1523 later called with the user data as an argument, just before the
1524 entry containing the data gets freed. This happens from within
1525 |hash_free| or |hash_clear|. You should specify this function only
1526 if you want these functions to free all of your |data| data. This
1527 is typically the case when your data is simply an auxiliary struct
1528 that you have |malloc|'d to aggregate several values. */
1529
1530 Hash_table *
1531 hash_initialize(size_t candidate, const Hash_tuning *tuning,
1532 Hash_hasher hasher, Hash_comparator comparator,
1533 Hash_data_freer data_freer) {
1534 Hash_table *table;
1535
1536 if (hasher == NULL)
1537 hasher = raw_hasher;
1538 if (comparator == NULL)
1539 comparator = raw_comparator;
1540
1541 table = malloc(sizeof *table);
1542 if (table == NULL)
1543 return NULL;
1544
1545 if (!tuning)
1546 tuning = &default_tuning;
1547 table->tuning = tuning;
1548 if (!check_tuning(table)) {
1549 /* Fail if the tuning options are invalid. This is the only
1550 occasion when the user gets some feedback about it. Once
1551 the table is created, if the user provides invalid tuning
1552 options, we silently revert to using the defaults, and
1553 ignore further request to change the tuning options. */
1554 goto fail;
1555 }
1556
1557 table->n_buckets = compute_bucket_size(candidate, tuning);
1558 if (!table->n_buckets)
1559 goto fail;
1560
1561 table->bucket = calloc(table->n_buckets, sizeof *table->bucket);
1562 if (table->bucket == NULL)
1563 goto fail;
1564 table->bucket_limit = table->bucket + table->n_buckets;
1565 table->n_buckets_used = 0;
1566 table->n_entries = 0;
1567
1568 table->hasher = hasher;
1569 table->comparator = comparator;
1570 table->data_freer = data_freer;
1571
1572 table->free_entry_list = NULL;
1573 #if USE_OBSTACK
1574 obstack_init (&table->entry_stack);
1575 #endif
1576 return table;
1577
1578 fail:
1579 free(table);
1580 return NULL;
1581 }
1582
1583 /* Make all buckets empty, placing any chained entries on the free
1584 list. Apply the user-specified function |data_freer| (if any) to
1585 the datas of any affected entries. */
1586
1587 void
1588 hash_clear(Hash_table *table) {
1589 struct hash_entry *bucket;
1590
1591 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1592 if (bucket->data) {
1593 struct hash_entry *cursor;
1594 struct hash_entry *next;
1595
1596 /* Free the bucket overflow. */
1597 for (cursor = bucket->next; cursor; cursor = next) {
1598 if (table->data_freer)
1599 table->data_freer(cursor->data);
1600 cursor->data = NULL;
1601
1602 next = cursor->next;
1603 /* Relinking is done one entry at a time, as it is to
1604 be expected that overflows are either rare or
1605 short. */
1606 cursor->next = table->free_entry_list;
1607 table->free_entry_list = cursor;
1608 }
1609
1610 /* Free the bucket head. */
1611 if (table->data_freer)
1612 table->data_freer(bucket->data);
1613 bucket->data = NULL;
1614 bucket->next = NULL;
1615 }
1616 }
1617
1618 table->n_buckets_used = 0;
1619 table->n_entries = 0;
1620 }
1621
1622 /* Reclaim all storage associated with a hash table. If a
1623 |data_freer| function has been supplied by the user when the hash
1624 table was created, this function applies it to the data of each
1625 entry before freeing that entry. */
1626
1627 void
1628 hash_free(Hash_table *table) {
1629 struct hash_entry *bucket;
1630 struct hash_entry *cursor;
1631 struct hash_entry *next;
1632
1633 /* Call the user |data_freer| function. */
1634 if (table->data_freer && table->n_entries) {
1635 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1636 if (bucket->data) {
1637 for (cursor = bucket; cursor; cursor = cursor->next)
1638 table->data_freer(cursor->data);
1639 }
1640 }
1641 }
1642
1643 #if USE_OBSTACK
1644
1645 obstack_free (&table->entry_stack, NULL);
1646
1647 #else
1648
1649 /* Free all bucket overflowed entries. */
1650 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++) {
1651 for (cursor = bucket->next; cursor; cursor = next) {
1652 next = cursor->next;
1653 free(cursor);
1654 }
1655 }
1656
1657 /* Also reclaim the internal list of previously freed entries. */
1658 for (cursor = table->free_entry_list; cursor; cursor = next) {
1659 next = cursor->next;
1660 free(cursor);
1661 }
1662
1663 #endif
1664
1665 /* Free the remainder of the hash table structure. */
1666 free(table->bucket);
1667 free(table);
1668 }
1669
1670 /* Insertion and deletion. */
1671
1672 /* Get a new hash entry for a bucket overflow, possibly by recycling a
1673 previously freed one. If this is not possible, allocate a new
1674 one. */
1675
1676 static struct hash_entry *
1677 allocate_entry(Hash_table *table) {
1678 struct hash_entry *new;
1679
1680 if (table->free_entry_list) {
1681 new = table->free_entry_list;
1682 table->free_entry_list = new->next;
1683 } else {
1684 #if USE_OBSTACK
1685 new = obstack_alloc (&table->entry_stack, sizeof *new);
1686 #else
1687 new = malloc(sizeof *new);
1688 #endif
1689 }
1690
1691 return new;
1692 }
1693
1694 /* Free a hash entry which was part of some bucket overflow, saving it
1695 for later recycling. */
1696
1697 static void
1698 free_entry(Hash_table *table, struct hash_entry *entry) {
1699 entry->data = NULL;
1700 entry->next = table->free_entry_list;
1701 table->free_entry_list = entry;
1702 }
1703
1704 /* This private function is used to help with insertion and deletion.
1705 When |entry| matches an entry in the table, return a pointer to the
1706 corresponding user data and set |*bucket_head| to the head of the
1707 selected bucket. Otherwise, return |NULL|. When |delete| is true
1708 and |entry| matches an entry in the table, unlink the matching
1709 entry. */
1710
1711 static void *
1712 hash_find_entry(Hash_table *table, const void *entry,
1713 struct hash_entry **bucket_head, bool delete) {
1714 struct hash_entry *bucket = safe_hasher(table, entry);
1715 struct hash_entry *cursor;
1716
1717 *bucket_head = bucket;
1718
1719 /* Test for empty bucket. */
1720 if (bucket->data == NULL)
1721 return NULL;
1722
1723 /* See if the entry is the first in the bucket. */
1724 if (entry == bucket->data || table->comparator(entry, bucket->data)) {
1725 void *data = bucket->data;
1726
1727 if (delete) {
1728 if (bucket->next) {
1729 struct hash_entry *next = bucket->next;
1730
1731 /* Bump the first overflow entry into the bucket head, then save
1732 the previous first overflow entry for later recycling. */
1733 *bucket = *next;
1734 free_entry(table, next);
1735 } else {
1736 bucket->data = NULL;
1737 }
1738 }
1739
1740 return data;
1741 }
1742
1743 /* Scan the bucket overflow. */
1744 for (cursor = bucket; cursor->next; cursor = cursor->next) {
1745 if (entry == cursor->next->data
1746 || table->comparator(entry, cursor->next->data)) {
1747 void *data = cursor->next->data;
1748
1749 if (delete) {
1750 struct hash_entry *next = cursor->next;
1751
1752 /* Unlink the entry to delete, then save the freed entry for later
1753 recycling. */
1754 cursor->next = next->next;
1755 free_entry(table, next);
1756 }
1757
1758 return data;
1759 }
1760 }
1761
1762 /* No entry found. */
1763 return NULL;
1764 }
1765
1766 /* Internal helper, to move entries from |src| to |dst|. Both tables
1767 must share the same free entry list. If |safe|, only move overflow
1768 entries, saving bucket heads for later, so that no allocations will
1769 occur. Return false if the free entry list is exhausted and an
1770 allocation fails. */
1771
1772 static bool
1773 transfer_entries(Hash_table *dst, Hash_table *src, bool safe) {
1774 struct hash_entry *bucket;
1775 struct hash_entry *cursor;
1776 struct hash_entry *next;
1777 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
1778 if (bucket->data) {
1779 void *data;
1780 struct hash_entry *new_bucket;
1781
1782 /* Within each bucket, transfer overflow entries first and
1783 then the bucket head, to minimize memory pressure.
1784 After all, the only time we might allocate is when
1785 moving the bucket head, but moving overflow entries
1786 first may create free entries that can be recycled by
1787 the time we finally get to the bucket head. */
1788 for (cursor = bucket->next; cursor; cursor = next) {
1789 data = cursor->data;
1790 new_bucket = safe_hasher(dst, data);
1791
1792 next = cursor->next;
1793
1794 if (new_bucket->data) {
1795 /* Merely relink an existing entry, when moving
1796 from a bucket overflow into a bucket
1797 overflow. */
1798 cursor->next = new_bucket->next;
1799 new_bucket->next = cursor;
1800 } else {
1801 /* Free an existing entry, when moving from a
1802 bucket overflow into a bucket header. */
1803 new_bucket->data = data;
1804 dst->n_buckets_used++;
1805 free_entry(dst, cursor);
1806 }
1807 }
1808 /* Now move the bucket head. Be sure that if we fail due
1809 to allocation failure that the src table is in a
1810 consistent state. */
1811 data = bucket->data;
1812 bucket->next = NULL;
1813 if (safe)
1814 continue;
1815 new_bucket = safe_hasher(dst, data);
1816
1817 if (new_bucket->data) {
1818 /* Allocate or recycle an entry, when moving from a
1819 bucket header into a bucket overflow. */
1820 struct hash_entry *new_entry = allocate_entry(dst);
1821
1822 if (new_entry == NULL)
1823 return false;
1824
1825 new_entry->data = data;
1826 new_entry->next = new_bucket->next;
1827 new_bucket->next = new_entry;
1828 } else {
1829 /* Move from one bucket header to another. */
1830 new_bucket->data = data;
1831 dst->n_buckets_used++;
1832 }
1833 bucket->data = NULL;
1834 src->n_buckets_used--;
1835 }
1836 return true;
1837 }
1838
1839 /* For an already existing hash table, change the number of buckets
1840 through specifying |candidate|. The contents of the hash table are
1841 preserved. The new number of buckets is automatically selected so
1842 as to {\it guarantee} that the table may receive at least
1843 |candidate| different user entries, including those already in the
1844 table, before any other growth of the hash table size occurs. If
1845 |tuning->is_n_buckets| is true, then |candidate| specifies the
1846 exact number of buckets desired. Return true iff the rehash
1847 succeeded. */
1848
1849 bool
1850 hash_rehash(Hash_table *table, size_t candidate) {
1851 Hash_table storage;
1852 Hash_table *new_table;
1853 size_t new_size = compute_bucket_size(candidate, table->tuning);
1854
1855 if (!new_size)
1856 return false;
1857 if (new_size == table->n_buckets)
1858 return true;
1859 new_table = &storage;
1860 new_table->bucket = calloc(new_size, sizeof *new_table->bucket);
1861 if (new_table->bucket == NULL)
1862 return false;
1863 new_table->n_buckets = new_size;
1864 new_table->bucket_limit = new_table->bucket + new_size;
1865 new_table->n_buckets_used = 0;
1866 new_table->n_entries = 0;
1867 new_table->tuning = table->tuning;
1868 new_table->hasher = table->hasher;
1869 new_table->comparator = table->comparator;
1870 new_table->data_freer = table->data_freer;
1871
1872 /* In order for the transfer to successfully complete, we need
1873 additional overflow entries when distinct buckets in the old
1874 table collide into a common bucket in the new table. The worst
1875 case possible is a hasher that gives a good spread with the old
1876 size, but returns a constant with the new size; if we were to
1877 guarantee |table->n_buckets_used-1| free entries in advance, then
1878 the transfer would be guaranteed to not allocate memory.
1879 However, for large tables, a guarantee of no further allocation
1880 introduces a lot of extra memory pressure, all for an unlikely
1881 corner case (most rehashes reduce, rather than increase, the
1882 number of overflow entries needed). So, we instead ensure that
1883 the transfer process can be reversed if we hit a memory
1884 allocation failure mid-transfer. */
1885
1886 /* Merely reuse the extra old space into the new table. */
1887 #if USE_OBSTACK
1888 new_table->entry_stack = table->entry_stack;
1889 #endif
1890 new_table->free_entry_list = table->free_entry_list;
1891
1892 if (transfer_entries(new_table, table, false)) {
1893 /* Entries transferred successfully; tie up the loose ends. */
1894 free(table->bucket);
1895 table->bucket = new_table->bucket;
1896 table->bucket_limit = new_table->bucket_limit;
1897 table->n_buckets = new_table->n_buckets;
1898 table->n_buckets_used = new_table->n_buckets_used;
1899 table->free_entry_list = new_table->free_entry_list;
1900 /* |table->n_entries| and |table->entry_stack| already hold their value. */
1901 return true;
1902 }
1903
1904 /* We've allocated |new_table->bucket| (and possibly some
1905 entries), exhausted the free list, and moved some but not all
1906 entries into |new_table|. We must undo the partial move before
1907 returning failure. The only way to get into this situation is
1908 if |new_table| uses fewer buckets than the old table, so we
1909 will reclaim some free entries as overflows in the new table
1910 are put back into distinct buckets in the old table.
1911
1912 There are some pathological cases where a single pass through
1913 the table requires more intermediate overflow entries than
1914 using two passes. Two passes give worse cache performance and
1915 takes longer, but at this point, we're already out of memory,
1916 so slow and safe is better than failure. */
1917 table->free_entry_list = new_table->free_entry_list;
1918 if (!(transfer_entries(table, new_table, true)
1919 && transfer_entries(table, new_table, false)))
1920 abort();
1921 /* |table->n_entries| already holds its value. */
1922 free(new_table->bucket);
1923 return false;
1924 }
1925
1926 /* Insert |entry| into hash |table| if there is not already a matching
1927 entry.
1928
1929 Return -1 upon memory allocation failure.
1930 Return 1 if insertion succeeded.
1931 Return 0 if there is already a matching entry in the table, and in
1932 that case, if |matched_ent| is non-|NULL|, set |*matched_ent| to
1933 that entry.
1934
1935 This interface is easier to use than |hash_insert| when you must
1936 distinguish between the latter two cases. More importantly,
1937 |hash_insert| is unusable for some types of |entry| values. When
1938 using |hash_insert|, the only way to distinguish those cases is to
1939 compare the return value and |entry|. That works only when you can
1940 have two different |entry| values that point to data that compares
1941 "equal". Thus, when the |entry| value is a simple scalar, you must
1942 use |hash_insert_if_absent|. |entry| must not be |NULL|. */
1943 int
1944 hash_insert_if_absent(Hash_table *table, void const *entry,
1945 void const **matched_ent) {
1946 void *data;
1947 struct hash_entry *bucket;
1948
1949 /* The caller cannot insert a |NULL| entry, since |hash_lookup|
1950 returns |NULL| to indicate "not found", and |hash_find_entry|
1951 uses |bucket->data == NULL| to indicate an empty bucket. */
1952 if (!entry)
1953 abort();
1954
1955 /* If there's a matching entry already in the table, return that. */
1956 if ((data = hash_find_entry(table, entry, &bucket, false)) != NULL) {
1957 if (matched_ent)
1958 *matched_ent = data;
1959 return 0;
1960 }
1961
1962 /* If the growth threshold of the buckets in use has been reached,
1963 increase the table size and rehash. There's no point in
1964 checking the number of entries: if the hashing function is
1965 ill-conditioned, rehashing is not likely to improve it. */
1966
1967 if (table->n_buckets_used
1968 > table->tuning->growth_threshold * table->n_buckets) {
1969 /* Check more fully, before starting real work. If tuning
1970 arguments became invalid, the second check will rely on
1971 proper defaults. */
1972 check_tuning(table);
1973 if (table->n_buckets_used
1974 > table->tuning->growth_threshold * table->n_buckets) {
1975 const Hash_tuning *tuning = table->tuning;
1976 float candidate =
1977 (tuning->is_n_buckets
1978 ? (table->n_buckets * tuning->growth_factor)
1979 : (table->n_buckets * tuning->growth_factor
1980 * tuning->growth_threshold));
1981
1982 if (SIZE_MAX <= candidate)
1983 return -1;
1984
1985 /* If the rehash fails, arrange to return |NULL|. */
1986 if (!hash_rehash(table, candidate))
1987 return -1;
1988
1989 /* Update the bucket we are interested in. */
1990 if (hash_find_entry(table, entry, &bucket, false) != NULL)
1991 abort();
1992 }
1993 }
1994
1995 /* |entry| is not matched, it should be inserted. */
1996
1997 if (bucket->data) {
1998 struct hash_entry *new_entry = allocate_entry(table);
1999
2000 if (new_entry == NULL)
2001 return -1;
2002
2003 /* Add |entry| in the overflow of the bucket. */
2004
2005 new_entry->data = (void *) entry;
2006 new_entry->next = bucket->next;
2007 bucket->next = new_entry;
2008 table->n_entries++;
2009 return 1;
2010 }
2011
2012 /* Add |entry| right in the bucket head. */
2013
2014 bucket->data = (void *) entry;
2015 table->n_entries++;
2016 table->n_buckets_used++;
2017
2018 return 1;
2019 }
2020
2021 /* If |entry| matches an entry already in the hash table, return the
2022 pointer to the entry from the table. Otherwise, insert |entry| and
2023 return |entry|. Return |NULL| if the storage required for
2024 insertion cannot be allocated. This implementation does not
2025 support duplicate entries or insertion of |NULL|. */
2026
2027 void *
2028 hash_insert(Hash_table *table, void const *entry) {
2029 void const *matched_ent;
2030 int err = hash_insert_if_absent(table, entry, &matched_ent);
2031 return (err == -1
2032 ? NULL
2033 : (void *) (err == 0 ? matched_ent : entry));
2034 }
2035
2036 /* If |entry| is already in the table, remove it and return the
2037 just-deleted data (the user may want to deallocate its storage).
2038 If |entry| is not in the table, don't modify the table and return
2039 |NULL|. */
2040
2041 void *
2042 hash_delete(Hash_table *table, const void *entry) {
2043 void *data;
2044 struct hash_entry *bucket;
2045
2046 data = hash_find_entry(table, entry, &bucket, true);
2047 if (!data)
2048 return NULL;
2049
2050 table->n_entries--;
2051 if (!bucket->data) {
2052 table->n_buckets_used--;
2053
2054 /* If the shrink threshold of the buckets in use has been
2055 reached, rehash into a smaller table. */
2056
2057 if (table->n_buckets_used
2058 < table->tuning->shrink_threshold * table->n_buckets) {
2059 /* Check more fully, before starting real work. If tuning
2060 arguments became invalid, the second check will rely on
2061 proper defaults. */
2062 check_tuning(table);
2063 if (table->n_buckets_used
2064 < table->tuning->shrink_threshold * table->n_buckets) {
2065 const Hash_tuning *tuning = table->tuning;
2066 size_t candidate =
2067 (tuning->is_n_buckets
2068 ? table->n_buckets * tuning->shrink_factor
2069 : (table->n_buckets * tuning->shrink_factor
2070 * tuning->growth_threshold));
2071
2072 if (!hash_rehash(table, candidate)) {
2073 /* Failure to allocate memory in an attempt to
2074 shrink the table is not fatal. But since
2075 memory is low, we can at least be kind and free
2076 any spare entries, rather than keeping them
2077 tied up in the free entry list. */
2078 #if !USE_OBSTACK
2079 struct hash_entry *cursor = table->free_entry_list;
2080 struct hash_entry *next;
2081 while (cursor) {
2082 next = cursor->next;
2083 free(cursor);
2084 cursor = next;
2085 }
2086 table->free_entry_list = NULL;
2087 #endif
2088 }
2089 }
2090 }
2091 }
2092
2093 return data;
2094 }
2095
2096 /* Testing. */
2097
2098 #if 0
2099
2100 void
2101 hash_print (const Hash_table *table)
2102 {
2103 struct hash_entry *bucket = (struct hash_entry *) table->bucket;
2104
2105 for ( ; bucket < table->bucket_limit; bucket++)
2106 {
2107 struct hash_entry *cursor;
2108
2109 if (bucket)
2110 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
2111
2112 for (cursor = bucket; cursor; cursor = cursor->next)
2113 {
2114 char const *s = cursor->data;
2115 /* FIXME */
2116 if (s)
2117 printf (" %s\n", s);
2118 }
2119 }
2120 }
2121
2122 #endif
-
+ 861F7244DB30BFBA8540A2E1D615A4226E0CB4E7D177B1850149E6829B23D84B56482EF4DD5D04DB6AA6F8DDA409EF8043A7612670DF5F87A89BD7B3F46F8037
vtools/lib/hash.h
(0 . 0)(1 . 87)
2127
2128 /* A generic hash table package. */
2129
2130 /* Make sure |use_obstack| is defined to 1 if you want the allocator
2131 to use obstacks instead of malloc, and recompile |hash.c| with same
2132 setting. */
2133
2134 #ifndef HASH_H_
2135 # define HASH_H_
2136
2137 # include <stdio.h>
2138 # include <stdbool.h>
2139
2140 typedef size_t (*Hash_hasher)(const void *, size_t);
2141
2142 typedef bool (*Hash_comparator)(const void *, const void *);
2143
2144 typedef void (*Hash_data_freer)(void *);
2145
2146 typedef bool (*Hash_processor)(void *, void *);
2147
2148 struct hash_tuning {
2149 /* This structure is mainly used for |hash_initialize|, see the
2150 block documentation of |hash_reset_tuning| for more complete
2151 comments. */
2152
2153 float shrink_threshold; /* ratio of used buckets to trigger a shrink */
2154 float shrink_factor; /* ratio of new smaller size to original size */
2155 float growth_threshold; /* ratio of used buckets to trigger a growth */
2156 float growth_factor; /* ratio of new bigger size to original size */
2157 bool is_n_buckets; /* if |candidate| really means table size */
2158 };
2159
2160 typedef struct hash_tuning Hash_tuning;
2161
2162 struct hash_table;
2163
2164 typedef struct hash_table Hash_table;
2165
2166 /* Information and lookup. */
2167 size_t hash_get_n_buckets(const Hash_table *);
2168
2169 size_t hash_get_n_buckets_used(const Hash_table *);
2170
2171 size_t hash_get_n_entries(const Hash_table *);
2172
2173 size_t hash_get_max_bucket_length(const Hash_table *);
2174
2175 bool hash_table_ok(const Hash_table *);
2176
2177 void hash_print_statistics(const Hash_table *, FILE *);
2178
2179 void *hash_lookup(const Hash_table *, const void *);
2180
2181 /* Walking. */
2182 void *hash_get_first(const Hash_table *);
2183
2184 void *hash_get_next(const Hash_table *, const void *);
2185
2186 size_t hash_get_entries(const Hash_table *, void **, size_t);
2187
2188 size_t hash_do_for_each(const Hash_table *, Hash_processor, void *);
2189
2190 /* Allocation and clean-up. */
2191 size_t hash_string(const char *, size_t);
2192
2193 void hash_reset_tuning(Hash_tuning *);
2194
2195 Hash_table *hash_initialize(size_t, const Hash_tuning *,
2196 Hash_hasher, Hash_comparator,
2197 Hash_data_freer);
2198
2199 void hash_clear(Hash_table *);
2200
2201 void hash_free(Hash_table *);
2202
2203 /* Insertion and deletion. */
2204 bool hash_rehash(Hash_table *, size_t);
2205
2206 void *hash_insert(Hash_table *, const void *);
2207
2208 int hash_insert_if_absent(Hash_table *table, const void *entry,
2209 const void **matched_ent);
2210
2211 void *hash_delete(Hash_table *, const void *);
2212
2213 #endif
-
+ C93FDF3DDE65200053B084BF2FE5963801DDA892F384A6737E9D898465DE9F7C964470D068088CE67C3FEAB269A6C4A5D346F05031464FF16E04C0BB163D3C7C
vtools/lib/progname.c
(0 . 0)(1 . 79)
2218
2219 /* Specification. */
2220 #include "progname.h"
2221
2222 #include <stdio.h>
2223 #include <stdlib.h>
2224 #include <string.h>
2225
2226
2227 /* String containing name the program is called with. To be
2228 initialized by |main()|. */
2229 const char *program_name = NULL;
2230
2231 /* Set |program_name|, based on |argv[0]|. |argv0| must be a string
2232 allocated with indefinite extent, and must not be modified after
2233 this call. */
2234 void
2235 set_program_name(const char *argv0) {
2236 /* libtool creates a temporary executable whose name is sometimes
2237 prefixed with "lt-" (depends on the platform). It also makes
2238 |argv[0]| absolute. But the name of the temporary executable
2239 is a detail that should not be visible to the end user and to
2240 the test suite. Remove this |"<dirname>/.libs/"| or
2241 |"<dirname>/.libs/lt-"| prefix here. */
2242 const char *slash;
2243 const char *base;
2244
2245 /* Sanity check. POSIX requires the invoking process to pass a
2246 non-|NULL| |argv[0]|. */
2247 if (argv0 == NULL) {
2248 /* It's a bug in the invoking program. Help diagnosing
2249 it. */
2250 fputs("A NULL argv[0] was passed through an exec system call.\n",
2251 stderr);
2252 abort();
2253 }
2254
2255 slash = strrchr(argv0, '/');
2256 base = (slash != NULL ? slash + 1 : argv0);
2257 if (base - argv0 >= 7 && strncmp(base - 7, "/.libs/", 7) == 0) {
2258 argv0 = base;
2259 if (strncmp(base, "lt-", 3) == 0) {
2260 argv0 = base + 3;
2261 /* On glibc systems, remove the "lt-" prefix from the
2262 variable |program_invocation_short_name|. */
2263 #if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME
2264 program_invocation_short_name = (char *) argv0;
2265 #endif
2266 }
2267 }
2268
2269 /* But don't strip off a leading |<dirname>/| in general, because
2270 when the user runs
2271
2272 |/some/hidden/place/bin/cp foo foo|
2273
2274 he should get the error message
2275
2276 |/some/hidden/place/bin/cp: `foo' and `foo' are the same file|
2277
2278 not
2279
2280 | cp: `foo' and `foo' are the same file|
2281 */
2282
2283 program_name = argv0;
2284
2285 /* On glibc systems, the |error()| function comes from libc and
2286 uses the variable |program_invocation_name|, not
2287 |program_name|. So set this variable as well. */
2288 #if HAVE_DECL_PROGRAM_INVOCATION_NAME
2289 program_invocation_name = (char *) argv0;
2290 #endif
2291 }
2292
2293 const char *
2294 get_program_name(void) {
2295 return program_name;
2296 }
-
+ C6915DC61245B2A3953757373428554EEC0020588350347EDB2BD0EC9C541C1CABF9B15A1264C936D7D5CF5268CA185797CA0E0D8A0644404C9147CAC3DF449F
vtools/lib/progname.h
(0 . 0)(1 . 20)
2301
2302 #ifndef _PROGNAME_H
2303 #define _PROGNAME_H
2304
2305 /* Programs using this file should do the following in |main()|:
2306 |set_program_name (argv[0]);| */
2307
2308 /* String containing name the program is called with. */
2309 extern const char *program_name;
2310
2311 /* Set |program_name|, based on |argv[0]|. |argv0| must be a string
2312 allocated with indefinite extent, and must not be modified after
2313 this call. */
2314 extern void set_program_name(const char *argv0);
2315
2316 /* Return |program_name|. This is similar to getprogname for systems
2317 that don't support it. */
2318 extern const char *get_program_name(void);
2319
2320 #endif
-
+ 8361650753226E8F2780AA6A9AFF24223B456FF648590333B6AE2D37281FAE485C572119DD2B66B4DC1B2DB46DFBE8168443CA72ABF1072C0446265CC576EFAD
vtools/lib/xalloc.c
(0 . 0)(1 . 107)
2325 #include <stdlib.h>
2326 #include <string.h>
2327 #include "xalloc.h"
2328 #include "error.h"
2329
2330 /* 1 if |calloc| is known to be compatible with GNU |calloc|. This
2331 matters if we are not also using the |calloc| module, which defines
2332 |HAVE_CALLOC_GNU| and supports the GNU API even on non-GNU
2333 platforms. */
2334 #if defined HAVE_CALLOC_GNU || (defined __GLIBC__ && !defined __UCLIBC__)
2335 enum { HAVE_GNU_CALLOC = 1 };
2336 #else
2337 enum { HAVE_GNU_CALLOC = 0 };
2338 #endif
2339
2340 void
2341 xalloc_die(void) {
2342 error(2, 0, "%s", "memory exhausted");
2343
2344 /* |_Noreturn| cannot be given to error, since it may return if
2345 its first argument is 0. To help compilers understand the
2346 |xalloc_die| does not return, call abort. Also, the abort is a
2347 safety feature if |exit_failure| is 0 (which shouldn't
2348 happen). */
2349 abort();
2350 }
2351
2352 /* Allocate |n| bytes of memory dynamically, with error checking. */
2353
2354 void *
2355 xmalloc(size_t n) {
2356 void *p = malloc(n);
2357 if (!p && n != 0)
2358 xalloc_die();
2359 return p;
2360 }
2361
2362 /* Change the size of an allocated block of memory |p| to |n| bytes,
2363 with error checking. */
2364
2365 void *
2366 xrealloc(void *p, size_t n) {
2367 if (!n && p) {
2368 /* The GNU and C99 realloc behaviors disagree here. Act like
2369 GNU, even if the underlying realloc is C99. */
2370 free(p);
2371 return NULL;
2372 }
2373
2374 p = realloc(p, n);
2375 if (!p && n)
2376 xalloc_die();
2377 return p;
2378 }
2379
2380 /* If |p| is null, allocate a block of at least |*pn| bytes;
2381 otherwise, reallocate |p| so that it contains more than |*pn|
2382 bytes. |*PN| must be nonzero unless |p| is |NULL|. Set |*pn| to
2383 the new block's size, and return the pointer to the new block.
2384 |*pn| is never set to zero, and the returned pointer is never
2385 |NULL|. */
2386
2387 void *
2388 x2realloc(void *p, size_t *pn) {
2389 return x2nrealloc(p, pn, 1);
2390 }
2391
2392 /* Allocate |s| bytes of zeroed memory dynamically, with error
2393 checking. There's no need for |xnzalloc (n, s)|, since it would be
2394 equivalent to |xcalloc (n, s)|. */
2395
2396 void *
2397 xzalloc(size_t s) {
2398 return memset (xmalloc(s), 0, s);
2399 }
2400
2401 /* Allocate zeroed memory for |n| elements of |s| bytes, with error
2402 checking. |s| must be nonzero. */
2403
2404 void *
2405 xcalloc(size_t n, size_t s) {
2406 void *p;
2407 /* Test for overflow, since objects with size greater than
2408 |PTRDIFF_MAX| cause pointer subtraction to go awry. Omit
2409 size-zero tests if |HAVE_GNU_CALLOC|, since GNU calloc never
2410 returns |NULL| if successful. */
2411 if (xalloc_oversized (n, s)
2412 || (!(p = calloc(n, s)) && (HAVE_GNU_CALLOC || n != 0)))
2413 xalloc_die();
2414 return p;
2415 }
2416
2417 /* Clone an object |p| of size |s|, with error checking. There's no
2418 need for |xnmemdup (p, n, s)|, since |xmemdup (p, n * s)| works
2419 without any need for an arithmetic overflow check. */
2420
2421 void *
2422 xmemdup(void const *p, size_t s) {
2423 return memcpy (xmalloc(s), p, s);
2424 }
2425
2426 /* Clone |string|. */
2427
2428 char *
2429 xstrdup(char const *string) {
2430 return xmemdup(string, strlen(string) + 1);
2431 }
-
+ 2146359188E16E7186846AE0B1026DC41BB7EC2C58FCE67D3F02F99BDB851F9325483084B55FFD5BCB4B0D36C08799BF3198BAAEDF7595B6E0EE85A042806446
vtools/lib/xalloc.h
(0 . 0)(1 . 170)
2436 #ifndef XALLOC_H_
2437 #define XALLOC_H_
2438
2439 #include <stddef.h>
2440 #include <stdint.h>
2441 #include "system.h"
2442
2443 /* This function is always triggered when memory is exhausted. It
2444 must be defined by the application, either explicitly or by using
2445 gnulib's xalloc-die module. This is the function to call when one
2446 wants the program to die because of a memory allocation
2447 failure. */
2448 extern _Noreturn void xalloc_die(void);
2449
2450 void *xmalloc(size_t s);
2451 void *xzalloc(size_t s);
2452 void *xcalloc(size_t n, size_t s);
2453 void *xrealloc(void *p, size_t s);
2454 void *x2realloc(void *p, size_t *pn);
2455 void *xmemdup(void const *p, size_t s);
2456 char *xstrdup(char const *str);
2457
2458 /* In the following macros, |t| must be an elementary or
2459 structure/union or typedef'ed type, or a pointer to such a type.
2460 To apply one of the following macros to a function pointer or array
2461 type, you need to typedef it first and use the typedef name. */
2462
2463 /* Allocate an object of type |t| dynamically, with error checking. */
2464 #define XMALLOC(t) ((t *) xmalloc (sizeof (t)))
2465
2466 /* Allocate memory for |n| elements of type |t|, with error
2467 checking. */
2468 #define XNMALLOC(n, t) \
2469 ((t *) (sizeof (t) == 1 ? xmalloc (n) : xnmalloc (n, sizeof (t))))
2470
2471 /* Allocate an object of type |t| dynamically, with error checking,
2472 and zero it. */
2473 #define XZALLOC(t) ((t *) xzalloc (sizeof (t)))
2474
2475 /* Allocate memory for |n| elements of type |t|, with error checking,
2476 and zero it. */
2477 #define XCALLOC(n, t) \
2478 ((t *) (sizeof (t) == 1 ? xzalloc (n) : xcalloc (n, sizeof (t))))
2479
2480
2481 /* Allocate an array of |n| objects, each with |s| bytes of memory,
2482 dynamically, with error checking. |s| must be nonzero. */
2483
2484 inline void *xnmalloc(size_t n, size_t s);
2485
2486 inline void *
2487 xnmalloc(size_t n, size_t s) {
2488 if (xalloc_oversized (n, s))
2489 xalloc_die();
2490 return xmalloc(n * s);
2491 }
2492
2493 /* Change the size of an allocated block of memory |p| to an array of
2494 |n| objects each of |s| bytes, with error checking. |s| must be
2495 nonzero. */
2496
2497 inline void *xnrealloc(void *p, size_t n, size_t s);
2498
2499 inline void *
2500 xnrealloc(void *p, size_t n, size_t s) {
2501 if (xalloc_oversized (n, s))
2502 xalloc_die();
2503 return xrealloc(p, n * s);
2504 }
2505
2506 /* If |p| is |NULL|, allocate a block of at least |*pn| such objects;
2507 otherwise, reallocate |p| so that it contains more than |*pn|
2508 objects each of |s| bytes. |s| must be nonzero. Set |*pn| to the
2509 new number of objects, and return the pointer to the new block.
2510 |*PN| is never set to zero, and the returned pointer is never |NULL|.
2511
2512 Repeated reallocations are guaranteed to make progress, either by
2513 allocating an initial block with a nonzero size, or by allocating a
2514 larger block.
2515
2516 In the following implementation, nonzero sizes are increased by a
2517 factor of approximately 1.5 so that repeated reallocations have
2518 $O(N)$ overall cost rather than $O(N^2)$ cost, but the
2519 specification for this function does not guarantee that rate.
2520
2521 Here is an example of use:
2522
2523 |int *p = NULL;
2524 size_t used = 0;
2525 size_t allocated = 0;
2526
2527 void
2528 append_int (int value)
2529 {
2530 if (used == allocated)
2531 p = x2nrealloc (p, &allocated, sizeof *p);
2532 p[used++] = value;
2533 }|
2534
2535 This causes |x2nrealloc| to allocate a block of some nonzero size
2536 the first time it is called.
2537
2538 To have finer-grained control over the initial size, set |*pn| to a
2539 nonzero value before calling this function with |p == NULL|. For
2540 example:
2541
2542 |int *p = NULL;
2543 size_t used = 0;
2544 size_t allocated = 0;
2545 size_t allocated1 = 1000;
2546
2547 void
2548 append_int (int value)
2549 {
2550 if (used == allocated)
2551 {
2552 p = x2nrealloc (p, &allocated1, sizeof *p);
2553 allocated = allocated1;
2554 }
2555 p[used++] = value;
2556 }|
2557
2558 */
2559
2560 static inline void *
2561 x2nrealloc(void *p, size_t *pn, size_t s) {
2562 size_t n = *pn;
2563
2564 if (!p) {
2565 if (!n) {
2566 /* The approximate size to use for initial small
2567 allocation requests, when the invoking code specifies
2568 an old size of zero. This is the largest "small"
2569 request for the GNU C library malloc. */
2570 enum {
2571 DEFAULT_MXFAST = 64 * sizeof(size_t) / 4
2572 };
2573
2574 n = DEFAULT_MXFAST / s;
2575 n += !n;
2576 }
2577 if (xalloc_oversized (n, s))
2578 xalloc_die();
2579 } else {
2580 /* Set $n = \lfloor 1.5 * n \rfloor + 1$ so that progress is
2581 made even if $n = 0$. Check for overflow, so that $n * s$
2582 stays in both |ptrdiff_t| and |size_t| range. The check
2583 may be slightly conservative, but an exact check isn't
2584 worth the trouble. */
2585 if ((PTRDIFF_MAX < SIZE_MAX ? PTRDIFF_MAX : SIZE_MAX) / 3 * 2 / s
2586 <= n)
2587 xalloc_die();
2588 n += n / 2 + 1;
2589 }
2590
2591 *pn = n;
2592 return xrealloc(p, n * s);
2593 }
2594
2595 /* Return a pointer to a new buffer of |n| bytes. This is like
2596 xmalloc, except it returns |char *|. */
2597
2598 static inline char *xcharalloc(size_t n);
2599
2600 static inline char *
2601 xcharalloc(size_t n) {
2602 return XNMALLOC (n, char);
2603 }
2604
2605 #endif
-
+ AAB77CCF9C4FA86348F84D661509CF482F958D1A1ED4A55F608683510A8C5AB3E3A12E6C9FA61B6DDF31A493D5BE586EF7362D00E1D91542A130B27EC0DC5AD3
vtools/manifest
(0 . 0)(1 . 1)
2610 510300 phf vtools_genesis Initial release of vtools. Genesis contains stripped down version of diffutils 3.6, with most of functionality not relevant to vpatch removed.
-
+ 7C13EDE685F2C92F24A015E44B9609494E4D5DA9BC6A192B62A9CAB92F36C2543F5092FDADD7436FEAC45CA0AB54D19EA0692848C5C94F9819B885BEEC08E179
vtools/src/analyze.c
(0 . 0)(1 . 597)
2615
2616 #include "diff.h"
2617 #include <cmpbuf.h>
2618 #include <error.h>
2619 #include <xalloc.h>
2620 #include <stdlib.h>
2621
2622 /* The core of the Diff algorithm. */
2623 #define ELEMENT lin
2624 #define EQUAL(x, y) ((x) == (y))
2625 #define OFFSET lin
2626 #define EXTRA_CONTEXT_FIELDS /* none */
2627 #define NOTE_DELETE(c, xoff) (files[0].changed[files[0].realindexes[xoff]] = 1)
2628 #define NOTE_INSERT(c, yoff) (files[1].changed[files[1].realindexes[yoff]] = 1)
2629 #define USE_HEURISTIC 1
2630 #include <limits.h>
2631 #include <diffseq.h>
2632 #include <sys/stat.h>
2633
2634 /* Discard lines from one file that have no matches in the other file.
2635
2636 A line which is discarded will not be considered by the actual
2637 comparison algorithm; it will be as if that line were not in the
2638 file. The file's |realindexes| table maps virtual line numbers
2639 (which don't count the discarded lines) into real line numbers;
2640 this is how the actual comparison algorithm produces results that
2641 are comprehensible when the discarded lines are counted.
2642
2643 When we discard a line, we also mark it as a deletion or insertion
2644 so that it will be printed in the output. */
2645
2646 static void
2647 discard_confusing_lines(struct file_data filevec[]) {
2648 int f;
2649 lin i;
2650 char *discarded[2];
2651 lin *equiv_count[2];
2652 lin *p;
2653
2654 /* Allocate our results. */
2655 p = xmalloc((filevec[0].buffered_lines + filevec[1].buffered_lines)
2656 * (2 * sizeof *p));
2657 for (f = 0; f < 2; f++) {
2658 filevec[f].undiscarded = p;
2659 p += filevec[f].buffered_lines;
2660 filevec[f].realindexes = p;
2661 p += filevec[f].buffered_lines;
2662 }
2663
2664 /* Set up |equiv_count[f][i]| as the number of lines in file |f|
2665 that fall in equivalence class |i|. */
2666
2667 p = zalloc(filevec[0].equiv_max * (2 * sizeof *p));
2668 equiv_count[0] = p;
2669 equiv_count[1] = p + filevec[0].equiv_max;
2670
2671 for (i = 0; i < filevec[0].buffered_lines; ++i)
2672 ++equiv_count[0][filevec[0].equivs[i]];
2673 for (i = 0; i < filevec[1].buffered_lines; ++i)
2674 ++equiv_count[1][filevec[1].equivs[i]];
2675
2676 /* Set up tables of which lines are going to be discarded. */
2677
2678 discarded[0] = zalloc(filevec[0].buffered_lines
2679 + filevec[1].buffered_lines);
2680 discarded[1] = discarded[0] + filevec[0].buffered_lines;
2681
2682 /* Mark to be discarded each line that matches no line of the
2683 other file. If a line matches many lines, mark it as
2684 provisionally discardable. */
2685
2686 for (f = 0; f < 2; f++) {
2687 size_t end = filevec[f].buffered_lines;
2688 char *discards = discarded[f];
2689 lin *counts = equiv_count[1 - f];
2690 lin *equivs = filevec[f].equivs;
2691 size_t many = 5;
2692 size_t tem = end / 64;
2693
2694 /* Multiply |many| by approximate square root of number of
2695 lines. That is the threshold for provisionally discardable
2696 lines. */
2697 while ((tem = tem >> 2) > 0)
2698 many *= 2;
2699
2700 for (i = 0; i < end; i++) {
2701 lin nmatch;
2702 if (equivs[i] == 0)
2703 continue;
2704 nmatch = counts[equivs[i]];
2705 if (nmatch == 0)
2706 discards[i] = 1;
2707 else if (nmatch > many)
2708 discards[i] = 2;
2709 }
2710 }
2711
2712 /* Don't really discard the provisional lines except when they
2713 occur in a run of discardables, with nonprovisionals at the
2714 beginning and end. */
2715
2716 for (f = 0; f < 2; f++) {
2717 lin end = filevec[f].buffered_lines;
2718 register char *discards = discarded[f];
2719
2720 for (i = 0; i < end; i++) {
2721 /* Cancel provisional discards not in middle of run of
2722 discards. */
2723 if (discards[i] == 2)
2724 discards[i] = 0;
2725 else if (discards[i] != 0) {
2726 /* We have found a nonprovisional discard. */
2727 register lin j;
2728 lin length;
2729 lin provisional = 0;
2730
2731 /* Find end of this run of discardable lines. Count
2732 how many are provisionally discardable. */
2733 for (j = i; j < end; j++) {
2734 if (discards[j] == 0)
2735 break;
2736 if (discards[j] == 2)
2737 ++provisional;
2738 }
2739
2740 /* Cancel provisional discards at end, and shrink the
2741 run. */
2742 while (j > i && discards[j - 1] == 2)
2743 discards[--j] = 0, --provisional;
2744
2745 /* Now we have the length of a run of discardable
2746 lines whose first and last are not provisional. */
2747 length = j - i;
2748
2749 /* If $1/4$ of the lines in the run are provisional,
2750 cancel discarding of all provisional lines in the
2751 run. */
2752 if (provisional * 4 > length) {
2753 while (j > i)
2754 if (discards[--j] == 2)
2755 discards[j] = 0;
2756 } else {
2757 register lin consec;
2758 lin minimum = 1;
2759 lin tem = length >> 2;
2760
2761 /* |minimum| is approximate square root of
2762 |length/4|. A subrun of two or more
2763 provisionals can stand when |length| is at
2764 least 16. A subrun of 4 or more can stand when
2765 |LENGTH >= 64|. */
2766 while (0 < (tem >>= 2))
2767 minimum <<= 1;
2768 minimum++;
2769
2770 /* Cancel any subrun of |minimum| or more
2771 provisionals within the larger run. */
2772 for (j = 0, consec = 0; j < length; j++)
2773 if (discards[i + j] != 2)
2774 consec = 0;
2775 else if (minimum == ++consec)
2776 /* Back up to start of subrun, to cancel
2777 it all. */
2778 j -= consec;
2779 else if (minimum < consec)
2780 discards[i + j] = 0;
2781
2782 /* Scan from beginning of run until we find 3 or
2783 more nonprovisionals in a row or until the
2784 first nonprovisional at least 8 lines in.
2785 Until that point, cancel any provisionals. */
2786 for (j = 0, consec = 0; j < length; j++) {
2787 if (j >= 8 && discards[i + j] == 1)
2788 break;
2789 if (discards[i + j] == 2)
2790 consec = 0, discards[i + j] = 0;
2791 else if (discards[i + j] == 0)
2792 consec = 0;
2793 else
2794 consec++;
2795 if (consec == 3)
2796 break;
2797 }
2798
2799 /* I advances to the last line of the run. */
2800 i += length - 1;
2801
2802 /* Same thing, from end. */
2803 for (j = 0, consec = 0; j < length; j++) {
2804 if (j >= 8 && discards[i - j] == 1)
2805 break;
2806 if (discards[i - j] == 2)
2807 consec = 0, discards[i - j] = 0;
2808 else if (discards[i - j] == 0)
2809 consec = 0;
2810 else
2811 consec++;
2812 if (consec == 3)
2813 break;
2814 }
2815 }
2816 }
2817 }
2818 }
2819
2820 /* Actually discard the lines. */
2821 for (f = 0; f < 2; f++) {
2822 char *discards = discarded[f];
2823 lin end = filevec[f].buffered_lines;
2824 lin j = 0;
2825 for (i = 0; i < end; ++i)
2826 if (minimal || discards[i] == 0) {
2827 filevec[f].undiscarded[j] = filevec[f].equivs[i];
2828 filevec[f].realindexes[j++] = i;
2829 } else
2830 filevec[f].changed[i] = 1;
2831 filevec[f].nondiscarded_lines = j;
2832 }
2833
2834 free(discarded[0]);
2835 free(equiv_count[0]);
2836 }
2837
2838 /* Adjust inserts/deletes of identical lines to join changes as much
2839 as possible.
2840
2841 We do something when a run of changed lines include a line at one
2842 end and have an excluded, identical line at the other. We are free
2843 to choose which identical line is included. |compareseq| usually
2844 chooses the one at the beginning, but usually it is cleaner to
2845 consider the following identical line to be the "change". */
2846
2847 static void
2848 shift_boundaries(struct file_data filevec[]) {
2849 int f;
2850
2851 for (f = 0; f < 2; f++) {
2852 char *changed = filevec[f].changed;
2853 char *other_changed = filevec[1 - f].changed;
2854 lin const *equivs = filevec[f].equivs;
2855 lin i = 0;
2856 lin j = 0;
2857 lin i_end = filevec[f].buffered_lines;
2858
2859 while (1) {
2860 lin runlength, start, corresponding;
2861
2862 /* Scan forwards to find beginning of another run of
2863 changes. Also keep track of the corresponding point in
2864 the other file. */
2865
2866 while (i < i_end && !changed[i]) {
2867 while (other_changed[j++])
2868 continue;
2869 i++;
2870 }
2871
2872 if (i == i_end)
2873 break;
2874
2875 start = i;
2876
2877 /* Find the end of this run of changes. */
2878
2879 while (changed[++i])
2880 continue;
2881 while (other_changed[j])
2882 j++;
2883
2884 do {
2885 /* Record the length of this run of changes, so that
2886 we can later determine whether the run has grown. */
2887 runlength = i - start;
2888
2889 /* Move the changed region back, so long as the
2890 previous unchanged line matches the last changed one.
2891 This merges with previous changed regions. */
2892
2893 while (start && equivs[start - 1] == equivs[i - 1]) {
2894 changed[--start] = 1;
2895 changed[--i] = 0;
2896 while (changed[start - 1])
2897 start--;
2898 while (other_changed[--j])
2899 continue;
2900 }
2901
2902 /* Set |corresponding| to the end of the changed run,
2903 at the last point where it corresponds to a changed run
2904 in the other file. |corresponding == i_end| means no
2905 such point has been found. */
2906 corresponding = other_changed[j - 1] ? i : i_end;
2907
2908 /* Move the changed region forward, so long as the
2909 first changed line matches the following unchanged one.
2910 This merges with following changed regions. Do this
2911 second, so that if there are no merges, the changed
2912 region is moved forward as far as possible. */
2913
2914 while (i != i_end && equivs[start] == equivs[i]) {
2915 changed[start++] = 0;
2916 changed[i++] = 1;
2917 while (changed[i])
2918 i++;
2919 while (other_changed[++j])
2920 corresponding = i;
2921 }
2922 } while (runlength != i - start);
2923
2924 /* If possible, move the fully-merged run of changes back
2925 to a corresponding run in the other file. */
2926
2927 while (corresponding < i) {
2928 changed[--start] = 1;
2929 changed[--i] = 0;
2930 while (other_changed[--j])
2931 continue;
2932 }
2933 }
2934 }
2935 }
2936
2937 /* Cons an additional entry onto the front of an edit script |old|.
2938 |line0| and |line1| are the first affected lines in the two files
2939 (origin 0). |deleted| is the number of lines deleted here from
2940 file 0. |inserted| is the number of lines inserted here in file 1.
2941
2942 If |deleted| is 0 then |line0| is the number of the line before
2943 which the insertion was done; vice versa for |inserted| and
2944 |line1|. */
2945
2946 static struct change *
2947 add_change(lin line0, lin line1, lin deleted, lin inserted,
2948 struct change *old) {
2949 struct change *new = xmalloc(sizeof *new);
2950
2951 new->line0 = line0;
2952 new->line1 = line1;
2953 new->inserted = inserted;
2954 new->deleted = deleted;
2955 new->link = old;
2956 return new;
2957 }
2958
2959 /* Scan the tables of which lines are inserted and deleted, producing
2960 an edit script in reverse order. */
2961
2962 static struct change *
2963 build_reverse_script(struct file_data const filevec[]) {
2964 struct change *script = 0;
2965 char *changed0 = filevec[0].changed;
2966 char *changed1 = filevec[1].changed;
2967 lin len0 = filevec[0].buffered_lines;
2968 lin len1 = filevec[1].buffered_lines;
2969
2970 /* Note that |changedN[lenN]| does exist, and is 0. */
2971
2972 lin i0 = 0, i1 = 0;
2973
2974 while (i0 < len0 || i1 < len1) {
2975 if (changed0[i0] | changed1[i1]) {
2976 lin line0 = i0, line1 = i1;
2977
2978 /* Find \# lines changed here in each file. */
2979 while (changed0[i0]) ++i0;
2980 while (changed1[i1]) ++i1;
2981
2982 /* Record this change. */
2983 script = add_change(line0, line1, i0 - line0, i1 - line1, script);
2984 }
2985
2986 /* We have reached lines in the two files that match each
2987 other. */
2988 i0++, i1++;
2989 }
2990
2991 return script;
2992 }
2993
2994 /* Scan the tables of which lines are inserted and deleted, producing
2995 an edit script in forward order. */
2996
2997 static struct change *
2998 build_script(struct file_data const filevec[]) {
2999 struct change *script = 0;
3000 char *changed0 = filevec[0].changed;
3001 char *changed1 = filevec[1].changed;
3002 lin i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
3003
3004 /* Note that |changedN[-1]| does exist, and is 0. */
3005
3006 while (i0 >= 0 || i1 >= 0) {
3007 if (changed0[i0 - 1] | changed1[i1 - 1]) {
3008 lin line0 = i0, line1 = i1;
3009
3010 /* Find \# lines changed here in each file. */
3011 while (changed0[i0 - 1]) --i0;
3012 while (changed1[i1 - 1]) --i1;
3013
3014 /* Record this change. */
3015 script = add_change(i0, i1, line0 - i0, line1 - i1, script);
3016 }
3017
3018 /* We have reached lines in the two files that match each
3019 other. */
3020 i0--, i1--;
3021 }
3022
3023 return script;
3024 }
3025
3026 /* If |changes|, briefly report that two files differed. */
3027 static void
3028 briefly_report(int changes, struct file_data const filevec[]) {
3029 if (changes)
3030 message((brief
3031 ? "Files %s and %s differ\n"
3032 : "Binary files %s and %s differ\n"),
3033 file_label[0] ? file_label[0] : filevec[0].name,
3034 file_label[1] ? file_label[1] : filevec[1].name);
3035 }
3036
3037 /* Report the differences of two files. */
3038 int
3039 diff_2_files(struct comparison *cmp) {
3040 int f;
3041 struct change *e, *p;
3042 struct change *script;
3043 int changes;
3044
3045
3046 /* If we have detected that either file is binary, compare the two
3047 files as binary. This can happen only when the first chunk is
3048 read. */
3049
3050 if (read_files(cmp->file, files_can_be_treated_as_binary)) {
3051 /* Files with different lengths must be different. */
3052 if (cmp->file[0].stat.st_size != cmp->file[1].stat.st_size
3053 && 0 < cmp->file[0].stat.st_size
3054 && 0 < cmp->file[1].stat.st_size
3055 && (cmp->file[0].desc < 0 || S_ISREG (cmp->file[0].stat.st_mode))
3056 && (cmp->file[1].desc < 0 || S_ISREG (cmp->file[1].stat.st_mode)))
3057 changes = 1;
3058
3059 /* Standard input equals itself. */
3060 else if (cmp->file[0].desc == cmp->file[1].desc)
3061 changes = 0;
3062
3063 else
3064 /* Scan both files, a buffer at a time, looking for a
3065 difference. */
3066 {
3067 /* Allocate same-sized buffers for both files. */
3068 size_t lcm_max = PTRDIFF_MAX - 1;
3069 size_t buffer_size =
3070 buffer_lcm(sizeof(word),
3071 buffer_lcm(STAT_BLOCKSIZE (cmp->file[0].stat),
3072 STAT_BLOCKSIZE (cmp->file[1].stat),
3073 lcm_max),
3074 lcm_max);
3075 for (f = 0; f < 2; f++)
3076 cmp->file[f].buffer = xrealloc(cmp->file[f].buffer, buffer_size);
3077
3078 for (;; cmp->file[0].buffered = cmp->file[1].buffered = 0) {
3079 /* Read a buffer's worth from both files. */
3080 for (f = 0; f < 2; f++)
3081 if (0 <= cmp->file[f].desc)
3082 file_block_read(&cmp->file[f],
3083 buffer_size - cmp->file[f].buffered);
3084
3085 /* If the buffers differ, the files differ. */
3086 if (cmp->file[0].buffered != cmp->file[1].buffered
3087 || memcmp(cmp->file[0].buffer,
3088 cmp->file[1].buffer,
3089 cmp->file[0].buffered)) {
3090 changes = 1;
3091 break;
3092 }
3093
3094 /* If we reach end of file, the files are the
3095 same. */
3096 if (cmp->file[0].buffered != buffer_size) {
3097 changes = 0;
3098 break;
3099 }
3100 }
3101 }
3102
3103 briefly_report(changes, cmp->file);
3104 } else {
3105 struct context ctxt;
3106 lin diags;
3107 lin too_expensive;
3108
3109 /* Allocate vectors for the results of comparison: a flag for
3110 each line of each file, saying whether that line is an
3111 insertion or deletion. Allocate an extra element, always 0, at
3112 each end of each vector. */
3113
3114 size_t s = cmp->file[0].buffered_lines + cmp->file[1].buffered_lines + 4;
3115 char *flag_space = zalloc(s);
3116 cmp->file[0].changed = flag_space + 1;
3117 cmp->file[1].changed = flag_space + cmp->file[0].buffered_lines + 3;
3118
3119 /* Some lines are obviously insertions or deletions because
3120 they don't match anything. Detect them now, and avoid even
3121 thinking about them in the main comparison algorithm. */
3122
3123 discard_confusing_lines(cmp->file);
3124
3125 /* Now do the main comparison algorithm, considering just the
3126 undiscarded lines. */
3127
3128 ctxt.xvec = cmp->file[0].undiscarded;
3129 ctxt.yvec = cmp->file[1].undiscarded;
3130 diags = (cmp->file[0].nondiscarded_lines
3131 + cmp->file[1].nondiscarded_lines + 3);
3132 ctxt.fdiag = xmalloc(diags * (2 * sizeof *ctxt.fdiag));
3133 ctxt.bdiag = ctxt.fdiag + diags;
3134 ctxt.fdiag += cmp->file[1].nondiscarded_lines + 1;
3135 ctxt.bdiag += cmp->file[1].nondiscarded_lines + 1;
3136
3137 ctxt.heuristic = speed_large_files;
3138
3139 /* Set |too_expensive| to be the approximate square root of
3140 the input size, bounded below by 4096. 4096 seems to be good
3141 for circa-2016 CPUs; see |Bug#16848| and |Bug#24715|. */
3142 too_expensive = 1;
3143 for (; diags != 0; diags >>= 2)
3144 too_expensive <<= 1;
3145 ctxt.too_expensive = MAX (4096, too_expensive);
3146
3147 files[0] = cmp->file[0];
3148 files[1] = cmp->file[1];
3149
3150 compareseq(0, cmp->file[0].nondiscarded_lines,
3151 0, cmp->file[1].nondiscarded_lines, minimal, &ctxt);
3152
3153 free(ctxt.fdiag - (cmp->file[1].nondiscarded_lines + 1));
3154
3155 /* Modify the results slightly to make them prettier in cases
3156 where that can validly be done. */
3157
3158 shift_boundaries(cmp->file);
3159
3160 /* Get the results of comparison in the form of a chain of
3161 |struct change|s -- an edit script. */
3162
3163 script = build_script(cmp->file);
3164
3165 /* Set |changes| if we had any diffs. If some changes are
3166 ignored, we must scan the script to decide. */
3167 changes = (script != 0);
3168
3169 if (brief)
3170 briefly_report(changes, cmp->file);
3171 else {
3172 if (changes) {
3173 /* Record info for starting up output, to be used if
3174 and when we have some output to print. */
3175 setup_output(file_label[0] ? file_label[0] : cmp->file[0].name,
3176 file_label[1] ? file_label[1] : cmp->file[1].name,
3177 cmp->parent != 0);
3178 print_context_script(script);
3179 finish_output();
3180 }
3181 }
3182
3183 free(cmp->file[0].undiscarded);
3184
3185 free(flag_space);
3186
3187 for (f = 0; f < 2; f++) {
3188 free(cmp->file[f].equivs);
3189 free(cmp->file[f].linbuf + cmp->file[f].linbuf_base);
3190 }
3191
3192 for (e = script; e; e = p) {
3193 p = e->link;
3194 free(e);
3195 }
3196
3197 for (f = 0; f < 2; ++f)
3198 if (cmp->file[f].missing_newline) {
3199 error(0, 0, "%s: %s\n",
3200 file_label[f] ? file_label[f] : cmp->file[f].name,
3201 "No newline at end of file");
3202 changes = 2;
3203 }
3204 }
3205
3206 if (cmp->file[0].buffer != cmp->file[1].buffer)
3207 free(cmp->file[0].buffer);
3208 free(cmp->file[1].buffer);
3209
3210 return changes;
3211 }
-
+ F5B3E5C736BE0C6C4E4FDCC50E2A436C17D3E84FE47E6125DD0FA775C9CFFCC625A98FE7091A036593E21CDAFA1E533FD8A04640649CD6ED89DA499C94333668
vtools/src/context.c
(0 . 0)(1 . 211)
3216
3217 #include <stdlib.h>
3218 #include "diff.h"
3219
3220 /* Print a label for a context diff, with a file name and date or a
3221 label. */
3222
3223 static void
3224 print_context_label(char const *mark,
3225 struct file_data *inf,
3226 char const *name,
3227 char const *label) {
3228 if (label)
3229 fprintf(outfile, "%s %s\n", mark, label);
3230 else {
3231 fprintf(outfile, "%s %s false\n", mark, name);
3232 }
3233 }
3234
3235 /* Print a header for a context diff, with the file names and dates. */
3236
3237 void
3238 print_context_header(struct file_data inf[], char const *const *names) {
3239 print_context_label("---", &inf[0], names[0], file_label[0]);
3240 print_context_label("+++", &inf[1], names[1], file_label[1]);
3241 }
3242
3243 /* Print an edit script in context format. */
3244
3245 void
3246 print_context_script(struct change *script) {
3247 struct change *e;
3248 for (e = script; e; e = e->link)
3249 e->ignore = false;
3250
3251 print_script(script);
3252 }
3253
3254 /* Print a pair of line numbers with a comma, translated for file
3255 |file|. If the second number is smaller, use the first in place of
3256 it. If the numbers are equal, print just one number.
3257
3258 Args |a| and |b| are internal line numbers. We print the
3259 translated (real) line numbers. */
3260
3261 static void
3262 print_unidiff_number_range(struct file_data const *file, lin a, lin b) {
3263 printint trans_a, trans_b;
3264 translate_range(file, a, b, &trans_a, &trans_b);
3265
3266 /* We can have |b < a| in the case of a range of no lines. In
3267 this case, we print the line number before the range, which is
3268 |b|. It would be more logical to print |a|, but `patch'
3269 expects |b| in order to detect diffs against empty files. */
3270 if (trans_b <= trans_a)
3271 fprintf(outfile, trans_b < trans_a ? "%"pI"d,0" : "%"pI"d", trans_b);
3272 else
3273 fprintf(outfile, "%"pI"d,%"pI"d", trans_a, trans_b - trans_a + 1);
3274 }
3275
3276 /* Print a portion of an edit script in unidiff format. |hunk| is the
3277 beginning of the portion to be printed. The end is marked by a
3278 |link| that has been nulled out.
3279
3280 Prints out lines from both files, and precedes each line with the
3281 appropriate flag-character. */
3282
3283 void
3284 pr_unidiff_hunk(struct change *hunk) {
3285 lin first0, last0, first1, last1;
3286 lin i, j, k;
3287 struct change *next;
3288 FILE *out;
3289
3290 /* Determine range of line numbers involved in each file. */
3291
3292 if (!analyze_hunk(hunk, &first0, &last0, &first1, &last1))
3293 return;
3294
3295 /* Include a context's width before and after. */
3296
3297 i = -files[0].prefix_lines;
3298 first0 = MAX (first0 - context, i);
3299 first1 = MAX (first1 - context, i);
3300 if (last0 < files[0].valid_lines - context)
3301 last0 += context;
3302 else
3303 last0 = files[0].valid_lines - 1;
3304 if (last1 < files[1].valid_lines - context)
3305 last1 += context;
3306 else
3307 last1 = files[1].valid_lines - 1;
3308
3309 begin_output();
3310 out = outfile;
3311
3312 fputs("@@ -", out);
3313 print_unidiff_number_range(&files[0], first0, last0);
3314 fputs(" +", out);
3315 print_unidiff_number_range(&files[1], first1, last1);
3316 fputs(" @@", out);
3317
3318 putc('\n', out);
3319
3320 next = hunk;
3321 i = first0;
3322 j = first1;
3323
3324 while (i <= last0 || j <= last1) {
3325
3326 /* If the line isn't a difference, output the context from
3327 file 0. */
3328
3329 if (!next || i < next->line0) {
3330 char const *const *line = &files[0].linbuf[i++];
3331 if (!(suppress_blank_empty && **line == '\n'))
3332 putc(' ', out);
3333 print_1_line(NULL, line);
3334 j++;
3335 } else {
3336 /* For each difference, first output the deleted part. */
3337
3338 k = next->deleted;
3339 while (k--) {
3340 char const *const *line = &files[0].linbuf[i++];
3341 putc('-', out);
3342 print_1_line_nl(NULL, line, true);
3343 if (line[1][-1] == '\n')
3344 putc('\n', out);
3345 }
3346
3347 /* Then output the inserted part. */
3348
3349 k = next->inserted;
3350 while (k--) {
3351 char const *const *line = &files[1].linbuf[j++];
3352 putc('+', out);
3353 print_1_line_nl(NULL, line, true);
3354 if (line[1][-1] == '\n')
3355 putc('\n', out);
3356 }
3357
3358 /* We're done with this hunk, so on to the next! */
3359
3360 next = next->link;
3361 }
3362 }
3363 }
3364
3365 /* Scan a (forward-ordered) edit script for the first place that more
3366 than |2*context| unchanged lines appear, and return a pointer to
3367 the |struct change| for the last change before those lines. */
3368
3369 struct change *
3370 find_hunk(struct change *start) {
3371 struct change *prev;
3372 lin top0, top1;
3373 lin thresh;
3374
3375 /* Threshold distance is |context| if the second change is
3376 ignorable, |2 * context + 1| otherwise. Integer overflow can't
3377 happen, due to |CONTEXT_LIM|. */
3378 lin ignorable_threshold = context;
3379 lin non_ignorable_threshold = 2 * context + 1;
3380
3381 do {
3382 /* Compute number of first line in each file beyond this changed. */
3383 top0 = start->line0 + start->deleted;
3384 top1 = start->line1 + start->inserted;
3385 prev = start;
3386 start = start->link;
3387 thresh = (start && start->ignore
3388 ? ignorable_threshold
3389 : non_ignorable_threshold);
3390 /* It is not supposed to matter which file we check in the
3391 end-test. If it would matter, crash. */
3392 if (start && start->line0 - top0 != start->line1 - top1)
3393 abort();
3394 } while (start
3395 /* Keep going if less than |thresh| lines elapse before
3396 the affected line. */
3397 && start->line0 - top0 < thresh);
3398
3399 return prev;
3400 }
3401
3402 /* Set the |ignore| flag properly in each change in script. It should
3403 be 1 if all the lines inserted or deleted in that change are
3404 ignorable lines. */
3405
3406 static void
3407 mark_ignorable(struct change *script) {
3408 while (script) {
3409 struct change *next = script->link;
3410 lin first0, last0, first1, last1;
3411
3412 /* Turn this change into a hunk: detach it from the others. */
3413 script->link = NULL;
3414
3415 /* Determine whether this change is ignorable. */
3416 script->ignore = !analyze_hunk(script,
3417 &first0, &last0, &first1, &last1);
3418
3419 /* Reconnect the chain as before. */
3420 script->link = next;
3421
3422 /* Advance to the following change. */
3423 script = next;
3424 }
3425 }
3426
-
+ 908272F9A1C8A15CAC809A9C6DBF96F7CD60B40BC3EBAD0F1DF368F6D5DFA10DA38E7E6F8CD09DC994E2291B95334C7768D885633536302E3C47181452A2B839
vtools/src/diff.c
(0 . 0)(1 . 454)
3431 #define GDIFF_MAIN
3432
3433 #include "diff.h"
3434 #include <dirname.h>
3435 #include <error.h>
3436 #include <filenamecat.h>
3437 #include <progname.h>
3438 #include <xalloc.h>
3439 #include <getopt.h>
3440 #include <fcntl.h>
3441 #include <stdlib.h>
3442 #include <inttypes.h>
3443 #include <filetype.h>
3444
3445 #define STREQ(a, b) (strcmp (a, b) == 0)
3446 #define ISDIGIT(c) ((unsigned int) (c) - '0' <= 9)
3447
3448 static int compare_files(struct comparison const *, char const *, char const *);
3449
3450 static void try_help(char const *, char const *);
3451
3452 static void check_stdout(void);
3453
3454 static char const shortopts[] = "0123456789aU:dHL:qurN";
3455
3456 /* Return a string containing the command options with which diff was
3457 invoked. Spaces appear between what were separate |argv|-elements.
3458 There is a space at the beginning but none at the end. If there
3459 were no options, the result is an empty string.
3460
3461 Arguments: |optionvec|, a vector containing separate
3462 |argv|-elements, and |count|, the length of that vector.
3463
3464 todo phf: Implicitly add the -uNr since that's the defaults, and
3465 this combination of flags is what the original vdiff has.*/
3466
3467 static char *
3468 option_list(char **optionvec, int count) {
3469 char prefix[] = " -uNr";
3470 int i;
3471 size_t size = 1 + sizeof prefix;
3472 char *result;
3473 char *p;
3474
3475 for (i = 0; i < count; i++)
3476 size += strlen(optionvec[i]);
3477
3478 p = result = xmalloc(size);
3479 p = stpncpy(p, prefix, sizeof prefix - 1);
3480
3481 for (i = 0; i < count; i++) {
3482 *p++ = ' ';
3483 p = stpncpy(p, optionvec[i], strlen(optionvec[i])-1);
3484 }
3485
3486 *p = '\0';
3487 return result;
3488 }
3489
3490 int
3491 main(int argc, char **argv) {
3492 int exit_status = EXIT_SUCCESS;
3493 int c;
3494 int prev = -1;
3495 lin ocontext = -1;
3496 bool explicit_context = false;
3497 char const *from_file = NULL;
3498 char const *to_file = NULL;
3499 uintmax_t numval;
3500 char *numend;
3501 set_program_name(argv[0]);
3502
3503 /* Decode the options. */
3504
3505 while ((c = getopt(argc, argv, shortopts)) != -1) {
3506 switch (c) {
3507 case 0:
3508 break;
3509
3510 case '0':
3511 case '1':
3512 case '2':
3513 case '3':
3514 case '4':
3515 case '5':
3516 case '6':
3517 case '7':
3518 case '8':
3519 case '9':
3520 ocontext = (!ISDIGIT (prev)
3521 ? c - '0'
3522 : (ocontext - (c - '0' <= CONTEXT_MAX % 10)
3523 < CONTEXT_MAX / 10)
3524 ? 10 * ocontext + (c - '0')
3525 : CONTEXT_MAX);
3526 break;
3527
3528 case 'a':
3529 text = true;
3530 break;
3531
3532 case 'U': {
3533 if (optarg) {
3534 numval = strtoumax(optarg, &numend, 10);
3535 if (*numend)
3536 try_help("invalid context length '%s'", optarg);
3537 if (CONTEXT_MAX < numval)
3538 numval = CONTEXT_MAX;
3539 } else
3540 numval = 3;
3541
3542 if (context < numval)
3543 context = numval;
3544 explicit_context = true;
3545 }
3546 break;
3547
3548 case 'd':
3549 minimal = true;
3550 break;
3551
3552 case 'H':
3553 speed_large_files = true;
3554 break;
3555
3556 case 'L':
3557 if (!file_label[0])
3558 file_label[0] = optarg;
3559 else if (!file_label[1])
3560 file_label[1] = optarg;
3561 else
3562 fatal("too many file label options");
3563 break;
3564
3565 case 'q':
3566 brief = true;
3567 break;
3568
3569 case 'u':
3570 case 'r':
3571 case 'N':
3572 /* compat */
3573 break;
3574
3575 default:
3576 try_help(NULL, NULL);
3577 }
3578 prev = c;
3579 }
3580
3581 if (context < 3)
3582 context = 3;
3583 suppress_blank_empty = false;
3584
3585 if (0 <= ocontext
3586 && (context < ocontext
3587 || (ocontext < context && !explicit_context)))
3588 context = ocontext;
3589
3590 /* Make the horizon at least as large as the context, so that
3591 |shift_boundaries| has more freedom to shift the first and last
3592 hunks. */
3593 if (horizon_lines < context)
3594 horizon_lines = context;
3595
3596 files_can_be_treated_as_binary = false;
3597
3598 switch_string = option_list(argv + 1, optind - 1);
3599
3600 if (from_file) {
3601 if (to_file)
3602 fatal("--from-file and --to-file both specified");
3603 else
3604 for (; optind < argc; optind++) {
3605 int status = compare_files(NULL, from_file, argv[optind]);
3606 if (exit_status < status)
3607 exit_status = status;
3608 }
3609 } else {
3610 if (to_file)
3611 for (; optind < argc; optind++) {
3612 int status = compare_files(NULL, argv[optind], to_file);
3613 if (exit_status < status)
3614 exit_status = status;
3615 }
3616 else {
3617 if (argc - optind != 2) {
3618 if (argc - optind < 2)
3619 try_help("missing operand after '%s'", argv[argc - 1]);
3620 else
3621 try_help("extra operand '%s'", argv[optind + 2]);
3622 }
3623
3624 exit_status = compare_files(NULL, argv[optind], argv[optind + 1]);
3625 }
3626 }
3627
3628 check_stdout();
3629 exit(exit_status);
3630 return exit_status;
3631 }
3632
3633 static void
3634 try_help(char const *reason_msgid, char const *operand) {
3635 if (reason_msgid)
3636 error(0, 0, reason_msgid, operand);
3637 exit(EXIT_TROUBLE);
3638 }
3639
3640 static void
3641 check_stdout(void) {
3642 if (ferror(stdout))
3643 fatal("write failed");
3644 else if (fclose(stdout) != 0)
3645 pfatal_with_name("standard output");
3646 }
3647
3648 /* Compare two files (or dirs) with parent comparison |parent| and
3649 names |name0| and |name1|. (If |parent| is null, then the first
3650 name is just |name0|, etc.) This is self-contained; it opens the
3651 files and closes them.
3652
3653 Value is |EXIT_SUCCESS| if files are the same, |EXIT_FAILURE| if
3654 different, |EXIT_TROUBLE| if there is a problem opening them. */
3655
3656 static int
3657 compare_files(struct comparison const *parent,
3658 char const *name0,
3659 char const *name1) {
3660 struct comparison cmp;
3661 #define DIR_P(f) (S_ISDIR (cmp.file[f].stat.st_mode) != 0)
3662 register int f;
3663 int status = EXIT_SUCCESS;
3664 bool same_files;
3665 char *free0;
3666 char *free1;
3667
3668 memset (cmp.file, 0, sizeof cmp.file);
3669 cmp.parent = parent;
3670
3671 /* |cmp.file[f].desc| markers */
3672 #define NONEXISTENT (-1) /* nonexistent file */
3673 #define UNOPENED (-2) /* unopened file (e.g. directory) */
3674 #define ERRNO_ENCODE(errno) (-3 - (errno)) /* encoded |errno| value */
3675 #define ERRNO_DECODE(desc) (-3 - (desc)) /* inverse of |ERRNO_ENCODE| */
3676
3677 cmp.file[0].desc = name0 ? UNOPENED : NONEXISTENT;
3678 cmp.file[1].desc = name1 ? UNOPENED : NONEXISTENT;
3679
3680 /* Now record the full name of each file, including nonexistent ones. */
3681
3682 if (!name0)
3683 name0 = name1;
3684 if (!name1)
3685 name1 = name0;
3686
3687 if (!parent) {
3688 free0 = NULL;
3689 free1 = NULL;
3690 cmp.file[0].name = name0;
3691 cmp.file[1].name = name1;
3692 } else {
3693 cmp.file[0].name = free0
3694 = file_name_concat(parent->file[0].name, name0, NULL);
3695 cmp.file[1].name = free1
3696 = file_name_concat(parent->file[1].name, name1, NULL);
3697 }
3698
3699 /* Stat the files. */
3700
3701 for (f = 0; f < 2; f++) {
3702 if (cmp.file[f].desc != NONEXISTENT) {
3703 if (f && file_name_cmp(cmp.file[f].name, cmp.file[0].name) == 0) {
3704 cmp.file[f].desc = cmp.file[0].desc;
3705 cmp.file[f].stat = cmp.file[0].stat;
3706 } else if (STREQ (cmp.file[f].name, "-")) {
3707 cmp.file[f].desc = STDIN_FILENO;
3708 if (fstat(STDIN_FILENO, &cmp.file[f].stat) != 0)
3709 cmp.file[f].desc = ERRNO_ENCODE (errno);
3710 else {
3711 if (S_ISREG (cmp.file[f].stat.st_mode)) {
3712 off_t pos = lseek(STDIN_FILENO, 0, SEEK_CUR);
3713 if (pos < 0)
3714 cmp.file[f].desc = ERRNO_ENCODE (errno);
3715 else
3716 cmp.file[f].stat.st_size =
3717 MAX (0, cmp.file[f].stat.st_size - pos);
3718 }
3719 }
3720 } else if (stat(cmp.file[f].name, &cmp.file[f].stat) != 0)
3721 cmp.file[f].desc = ERRNO_ENCODE (errno);
3722 }
3723 }
3724
3725 /* Mark files as nonexistent as needed for |-N| and |-P|, if they
3726 are inaccessible empty regular files (the kind of files that
3727 |patch| creates to indicate nonexistent backups), or if they
3728 are top-level files that do not exist but their counterparts do
3729 exist. */
3730 for (f = 0; f < 2; f++)
3731 if (cmp.file[f].desc == UNOPENED
3732 ? (S_ISREG (cmp.file[f].stat.st_mode)
3733 && !(cmp.file[f].stat.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO))
3734 && cmp.file[f].stat.st_size == 0)
3735 : ((cmp.file[f].desc == ERRNO_ENCODE (ENOENT)
3736 || cmp.file[f].desc == ERRNO_ENCODE (EBADF))
3737 && !parent
3738 && (cmp.file[1 - f].desc == UNOPENED
3739 || cmp.file[1 - f].desc == STDIN_FILENO)))
3740 cmp.file[f].desc = NONEXISTENT;
3741
3742 for (f = 0; f < 2; f++)
3743 if (cmp.file[f].desc == NONEXISTENT) {
3744 memset (&cmp.file[f].stat, 0, sizeof cmp.file[f].stat);
3745 cmp.file[f].stat.st_mode = cmp.file[1 - f].stat.st_mode;
3746 }
3747
3748 for (f = 0; f < 2; f++) {
3749 int e = ERRNO_DECODE (cmp.file[f].desc);
3750 if (0 <= e) {
3751 errno = e;
3752 perror_with_name(cmp.file[f].name);
3753 status = EXIT_TROUBLE;
3754 }
3755 }
3756
3757 if (status == EXIT_SUCCESS && !parent && DIR_P (0) != DIR_P (1)) {
3758 /* If one is a directory, and it was specified in the command
3759 line, use the file in that dir with the other file's
3760 basename. */
3761
3762 int fnm_arg = DIR_P (0);
3763 int dir_arg = 1 - fnm_arg;
3764 char const *fnm = cmp.file[fnm_arg].name;
3765 char const *dir = cmp.file[dir_arg].name;
3766 char const *filename = cmp.file[dir_arg].name = free0
3767 = find_dir_file_pathname(dir, last_component(fnm));
3768
3769 if (STREQ (fnm, "-"))
3770 fatal("cannot compare '-' to a directory");
3771
3772 if (stat(filename, &cmp.file[dir_arg].stat)
3773 != 0) {
3774 perror_with_name(filename);
3775 status = EXIT_TROUBLE;
3776 }
3777 }
3778
3779 if (status != EXIT_SUCCESS) {
3780 /* One of the files should exist but does not. */
3781 } else if (cmp.file[0].desc == NONEXISTENT
3782 && cmp.file[1].desc == NONEXISTENT) {
3783 /* Neither file "exists", so there's nothing to compare. */
3784 } else if ((same_files
3785 = (cmp.file[0].desc != NONEXISTENT
3786 && cmp.file[1].desc != NONEXISTENT
3787 && 0 < same_file (&cmp.file[0].stat, &cmp.file[1].stat)
3788 && same_file_attributes (&cmp.file[0].stat,
3789 &cmp.file[1].stat)))) {
3790 /* The two named files are actually the same physical file. We
3791 know they are identical without actually reading them. */
3792 } else if (DIR_P (0) & DIR_P (1)) {
3793 /* If both are directories, compare the files in them. */
3794
3795 status = diff_dirs(&cmp, compare_files);
3796 } else if ((DIR_P (0) | DIR_P (1))
3797 || (parent
3798 && !((S_ISREG (cmp.file[0].stat.st_mode)
3799 || S_ISLNK (cmp.file[0].stat.st_mode))
3800 && (S_ISREG (cmp.file[1].stat.st_mode)
3801 || S_ISLNK (cmp.file[1].stat.st_mode))))) {
3802 if (cmp.file[0].desc == NONEXISTENT || cmp.file[1].desc == NONEXISTENT) {
3803 /* We have a subdirectory that exists only in one directory. */
3804
3805 status = diff_dirs(&cmp, compare_files);
3806 } else {
3807 /* We have two files that are not to be compared. */
3808
3809 message5("File %s is a %s while file %s is a %s\n",
3810 file_label[0] ? file_label[0] : cmp.file[0].name,
3811 file_type(&cmp.file[0].stat),
3812 file_label[1] ? file_label[1] : cmp.file[1].name,
3813 file_type(&cmp.file[1].stat));
3814
3815 /* This is a difference. */
3816 status = EXIT_FAILURE;
3817 }
3818 } else if (S_ISLNK (cmp.file[0].stat.st_mode)
3819 || S_ISLNK (cmp.file[1].stat.st_mode)) {
3820 /* We get here only if we use |lstat()|, not |stat()|. */
3821 status = EXIT_FAILURE;
3822 } else if (files_can_be_treated_as_binary
3823 && S_ISREG (cmp.file[0].stat.st_mode)
3824 && S_ISREG (cmp.file[1].stat.st_mode)
3825 && cmp.file[0].stat.st_size != cmp.file[1].stat.st_size
3826 && 0 < cmp.file[0].stat.st_size
3827 && 0 < cmp.file[1].stat.st_size) {
3828 message("Files %s and %s differ\n",
3829 file_label[0] ? file_label[0] : cmp.file[0].name,
3830 file_label[1] ? file_label[1] : cmp.file[1].name);
3831 status = EXIT_FAILURE;
3832 } else {
3833 /* Both exist and neither is a directory. */
3834
3835 /* Open the files and record their descriptors. */
3836
3837 if (cmp.file[0].desc == UNOPENED)
3838 if ((cmp.file[0].desc = open(cmp.file[0].name, O_RDONLY, 0)) < 0) {
3839 perror_with_name(cmp.file[0].name);
3840 status = EXIT_TROUBLE;
3841 }
3842 if (cmp.file[1].desc == UNOPENED) {
3843 if (same_files)
3844 cmp.file[1].desc = cmp.file[0].desc;
3845 else if ((cmp.file[1].desc = open(cmp.file[1].name, O_RDONLY, 0)) < 0) {
3846 perror_with_name(cmp.file[1].name);
3847 status = EXIT_TROUBLE;
3848 }
3849 }
3850
3851 /* Compare the files, if no error was found. */
3852
3853 if (status == EXIT_SUCCESS) {
3854 status = diff_2_files(&cmp);
3855 }
3856
3857 /* Close the file descriptors. */
3858
3859 if (0 <= cmp.file[0].desc && close(cmp.file[0].desc) != 0) {
3860 perror_with_name(cmp.file[0].name);
3861 status = EXIT_TROUBLE;
3862 }
3863 if (0 <= cmp.file[1].desc && cmp.file[0].desc != cmp.file[1].desc
3864 && close(cmp.file[1].desc) != 0) {
3865 perror_with_name(cmp.file[1].name);
3866 status = EXIT_TROUBLE;
3867 }
3868 }
3869
3870 /* Now the comparison has been done, if no error prevented it, and
3871 |status| is the value this function will return. */
3872
3873 if (status != EXIT_SUCCESS) {
3874 /* Flush stdout so that the user sees differences immediately.
3875 This can hurt performance, unfortunately. */
3876 if (fflush(stdout) != 0)
3877 pfatal_with_name("standard output");
3878 }
3879
3880 free(free0);
3881 free(free1);
3882
3883 return status;
3884 }
-
+ 8389679F305C168B019ADB6AE495F60C1B71EFC35052D94B6672829B437E0607E77D66D457C81128110B6ECEFCFC41ADB55EFAB8AF91E9B7C76DAF5597EF08F6
vtools/src/diff.h
(0 . 0)(1 . 249)
3889 #include "system.h"
3890 #include <stdio.h>
3891 #include <string.h>
3892 #include <sys/stat.h>
3893
3894 /* What kind of changes a hunk contains. */
3895 enum changes {
3896 /* No changes: lines common to both files. */
3897 UNCHANGED,
3898
3899 /* Deletes only: lines taken from just the first file. */
3900 OLD,
3901
3902 /* Inserts only: lines taken from just the second file. */
3903 NEW,
3904
3905 /* Both deletes and inserts: a hunk containing both old and new lines. */
3906 CHANGED
3907 };
3908
3909 /* Variables for command line options */
3910
3911 #ifndef GDIFF_MAIN
3912 # define XTERN extern
3913 #else
3914 # define XTERN
3915 #endif
3916
3917 /* Number of lines of context to show in each set of diffs. This is
3918 zero when context is not to be shown. */
3919 XTERN lin context;
3920
3921 /* Consider all files as text files (-a). Don't interpret codes over
3922 0177 as implying a "binary file". */
3923 XTERN bool text;
3924
3925 /* Number of lines to keep in identical prefix and suffix. */
3926 XTERN lin horizon_lines;
3927
3928 /* Files can be compared byte-by-byte, as if they were binary. This
3929 depends on various options. */
3930 XTERN bool files_can_be_treated_as_binary;
3931
3932 /* File labels for |-c| output headers (|--label|). */
3933 XTERN char *file_label[2];
3934
3935 /* Say only whether files differ, not how (|-q|). */
3936 XTERN bool brief;
3937
3938 /* Do not output an initial space or tab before the text of an empty line. */
3939 XTERN bool suppress_blank_empty;
3940
3941 /* In directory comparison, specify file to start with (|-S|). This
3942 is used for resuming an aborted comparison. All file names less
3943 than this name are ignored. */
3944 XTERN char const *starting_file;
3945
3946 /* String containing all the command options diff received, with
3947 spaces between and at the beginning but none at the end. If there
3948 were no options given, this string is empty. */
3949 XTERN char *switch_string;
3950
3951 /* Use heuristics for better speed with large files with a small
3952 density of changes. */
3953 XTERN bool speed_large_files;
3954
3955 /* Don't discard lines. This makes things slower (sometimes much
3956 slower) but will find a guaranteed minimal set of changes. */
3957 XTERN bool minimal;
3958
3959 /* The result of comparison is an ``edit script'': a chain of |struct
3960 change|. Each |struct change| represents one place where some
3961 lines are deleted and some are inserted.
3962
3963 |line0| and |line1| are the first affected lines in the two files
3964 (origin 0). |deleted| is the number of lines deleted here from file
3965 0. |inserted| is the number of lines inserted here in file 1.
3966
3967 If |deleted| is 0 then |line0| is the number of the line before
3968 which the insertion was done; vice versa for |inserted| and
3969 |line1|. */
3970
3971 struct change {
3972 struct change *link; /* Previous or next edit command */
3973 lin inserted; /* \# lines of file 1 changed here. */
3974 lin deleted; /* \# lines of file 0 changed here. */
3975 lin line0; /* Line number of 1st deleted line. */
3976 lin line1; /* Line number of 1st inserted line. */
3977 bool ignore; /* Flag used in |context.c|. */
3978 };
3979
3980 /* Structures that describe the input files. */
3981
3982 /* Data on one input file being compared. */
3983
3984 struct file_data {
3985 int desc; /* File descriptor */
3986 char const *name; /* File name */
3987 struct stat stat; /* File status */
3988
3989 /* Buffer in which text of file is read. */
3990 word *buffer;
3991
3992 /* Allocated size of buffer, in bytes. Always a multiple of
3993 sizeof |*buffer|. */
3994 size_t bufsize;
3995
3996 /* Number of valid bytes now in the buffer. */
3997 size_t buffered;
3998
3999 /* Array of pointers to lines in the file. */
4000 char const **linbuf;
4001
4002 /* |linbuf_base <= buffered_lines <= valid_lines <= alloc_lines|.
4003 |linebuf[linbuf_base ... buffered_lines - 1]| are possibly differing.
4004 |linebuf[linbuf_base ... valid_lines - 1]| contain valid data.
4005 |linebuf[linbuf_base ... alloc_lines - 1]| are allocated. */
4006 lin linbuf_base, buffered_lines, valid_lines, alloc_lines;
4007
4008 /* Pointer to end of prefix of this file to ignore when hashing. */
4009 char const *prefix_end;
4010
4011 /* Count of lines in the prefix. There are this many lines in the
4012 file before |linbuf[0]|. */
4013 lin prefix_lines;
4014
4015 /* Pointer to start of suffix of this file to ignore when hashing. */
4016 char const *suffix_begin;
4017
4018 /* Vector, indexed by line number, containing an equivalence code
4019 for each line. It is this vector that is actually compared
4020 with that of another file to generate differences. */
4021 lin *equivs;
4022
4023 /* Vector, like the previous one except that the elements for
4024 discarded lines have been squeezed out. */
4025 lin *undiscarded;
4026
4027 /* Vector mapping virtual line numbers (not counting discarded
4028 lines) to real ones (counting those lines). Both are
4029 $origin-0$. */
4030 lin *realindexes;
4031
4032 /* Total number of nondiscarded lines. */
4033 lin nondiscarded_lines;
4034
4035 /* Vector, indexed by real $origin-0$ line number, containing 1
4036 for a line that is an insertion or a deletion. The results of
4037 comparison are stored here. */
4038 char *changed;
4039
4040 /* 1 if file ends in a line with no final newline. */
4041 bool missing_newline;
4042
4043 /* 1 if at end of file. */
4044 bool eof;
4045
4046 /* 1 more than the maximum equivalence value used for this or its
4047 sibling file. */
4048 lin equiv_max;
4049 };
4050
4051 /* The file buffer, considered as an array of bytes rather than as an
4052 array of words. */
4053 #define FILE_BUFFER(f) ((char *) (f)->buffer)
4054
4055 /* Data on two input files being compared. */
4056
4057 struct comparison {
4058 struct file_data file[2];
4059 struct comparison const *parent; /* parent, if a recursive comparison */
4060 };
4061
4062 /* Describe the two files currently being compared. */
4063
4064 XTERN struct file_data files[2];
4065
4066 /* Stdio stream to output diffs to. */
4067
4068 XTERN FILE *outfile;
4069
4070 /* Declare various functions. */
4071
4072 /* analyze.c */
4073 extern int diff_2_files(struct comparison *);
4074
4075 /* context.c */
4076 extern void print_context_header(struct file_data[], char const *const *);
4077
4078 extern void print_context_script(struct change *);
4079
4080 /* dir.c */
4081 extern int diff_dirs(struct comparison const *,
4082 int (*)(struct comparison const *,
4083 char const *, char const *));
4084
4085 extern char *find_dir_file_pathname(char const *, char const *);
4086
4087 /* diff.h */
4088 extern struct change *find_hunk(struct change *);
4089
4090 extern void pr_unidiff_hunk(struct change *);
4091
4092 /* io.c */
4093 extern void file_block_read(struct file_data *, size_t);
4094
4095 extern bool read_files(struct file_data[], bool);
4096
4097 /* util.c */
4098
4099 extern bool lines_differ(char const *, char const *);
4100
4101 extern lin translate_line_number(struct file_data const *, lin);
4102
4103 extern struct change *find_change(struct change *);
4104
4105 extern void *zalloc(size_t);
4106
4107 extern enum changes analyze_hunk(struct change *, lin *, lin *, lin *, lin *);
4108
4109 extern void begin_output(void);
4110
4111 extern void debug_script(struct change *);
4112
4113 extern void fatal(char const *) __attribute__((noreturn));
4114
4115 extern void finish_output(void);
4116
4117 extern void message(char const *, char const *, char const *);
4118
4119 extern void message5(char const *, char const *, char const *,
4120 char const *, char const *);
4121
4122 extern void output_1_line(char const *, char const *);
4123
4124 extern void perror_with_name(char const *);
4125
4126 extern void pfatal_with_name(char const *) __attribute__((noreturn));
4127
4128 extern void print_1_line(char const *, char const *const *);
4129
4130 extern void print_1_line_nl(char const *, char const *const *, bool);
4131
4132 extern void print_script(struct change *);
4133
4134 extern void setup_output(char const *, char const *, bool);
4135
4136 extern void translate_range(struct file_data const *, lin, lin,
4137 printint *, printint *);
-
+ 195F6A1A811E7D09915A8596772D8E0DDA1C9B2F7C7F1FCF32F9671FADE50794BDEACBCA12874C7A8B8A909D84BC69286B27832EA83B6E208E50D8B958C685BA
vtools/src/dir.c
(0 . 0)(1 . 218)
4142
4143 #include "diff.h"
4144 #include <error.h>
4145 #include <filenamecat.h>
4146 #include <setjmp.h>
4147 #include <xalloc.h>
4148 #include <dirent.h>
4149 #include <stdlib.h>
4150
4151 /* Read the directory named by |dir| and store into |dirdata| a sorted
4152 vector of filenames for its contents. |dir->desc == -1| means this
4153 directory is known to be nonexistent, so set |dirdata| to an empty
4154 vector. Return -1 (setting |errno|) if error, 0 otherwise. */
4155
4156 struct dirdata {
4157 size_t nnames; /* Number of names. */
4158 char const **names; /* Sorted names of files in dir, followed by 0. */
4159 char *data; /* Allocated storage for file names. */
4160 };
4161
4162 static bool dir_loop(struct comparison const *, int);
4163
4164
4165 /* Read a directory and get its vector of names. */
4166
4167 static bool
4168 dir_read(struct file_data const *dir, struct dirdata *dirdata) {
4169 register struct dirent *next;
4170 register size_t i;
4171
4172 /* Address of block containing the files that are described. */
4173 char const **names;
4174
4175 /* Number of files in directory. */
4176 size_t nnames;
4177
4178 /* Allocated and used storage for file name data. */
4179 char *data;
4180 size_t data_alloc, data_used;
4181
4182 dirdata->names = 0;
4183 dirdata->data = 0;
4184 nnames = 0;
4185 data = 0;
4186
4187 if (dir->desc != -1) {
4188 /* Open the directory and check for errors. */
4189 register DIR *reading = opendir(dir->name);
4190 if (!reading)
4191 return false;
4192
4193 /* Initialize the table of filenames. */
4194
4195 data_alloc = 512;
4196 data_used = 0;
4197 dirdata->data = data = xmalloc(data_alloc);
4198
4199 /* Read the directory entries, and insert the subfiles
4200 into the |data| table. */
4201
4202 while ((errno = 0, (next = readdir(reading)) != 0)) {
4203 char *d_name = next->d_name;
4204 size_t d_size = strlen (next->d_name) + 1;
4205
4206 /* Ignore "." and "..". */
4207 if (d_name[0] == '.'
4208 && (d_name[1] == 0 || (d_name[1] == '.' && d_name[2] == 0)))
4209 continue;
4210
4211 while (data_alloc < data_used + d_size) {
4212 if (PTRDIFF_MAX / 2 <= data_alloc)
4213 xalloc_die();
4214 dirdata->data = data = xrealloc(data, data_alloc *= 2);
4215 }
4216
4217 memcpy (data + data_used, d_name, d_size);
4218 data_used += d_size;
4219 nnames++;
4220 }
4221 if (errno) {
4222 int e = errno;
4223 closedir(reading);
4224 errno = e;
4225 return false;
4226 }
4227 if (closedir(reading) != 0)
4228 return false;
4229 }
4230
4231 /* Create the |names| table from the |data| table. */
4232 if (PTRDIFF_MAX / sizeof *names - 1 <= nnames)
4233 xalloc_die();
4234 dirdata->names = names = xmalloc((nnames + 1) * sizeof *names);
4235 dirdata->nnames = nnames;
4236 for (i = 0; i < nnames; i++) {
4237 names[i] = data;
4238 data += strlen(data) + 1;
4239 }
4240 names[nnames] = 0;
4241 return true;
4242 }
4243
4244 /* Compare file names, returning a value compatible with strcmp. */
4245
4246 #define compare_names strcmp
4247
4248 /* Compare names FILE1 and FILE2 when sorting a directory. */
4249
4250 static int
4251 compare_names_for_qsort(void const *file1, void const *file2) {
4252 char const *const *f1 = file1;
4253 char const *const *f2 = file2;
4254 char const *name1 = *f1;
4255 char const *name2 = *f2;
4256 return compare_names(name1, name2);
4257 }
4258
4259 /* Compare the contents of two directories named in |cmp|. This is a
4260 top-level routine; it does everything necessary for diff on two
4261 directories.
4262
4263 |cmp->file[0].desc == -1| says directory |cmp->file[0]| doesn't
4264 exist, but pretend it is empty. Likewise for |cmp->file[1]|.
4265
4266 |handle_file| is a caller-provided subroutine called to handle each file.
4267 It gets three operands: |cmp|, name of file in dir 0, name of file in dir 1.
4268 These names are relative to the original working directory.
4269
4270 For a file that appears in only one of the dirs, one of the name-args
4271 to |handle_file| is zero.
4272
4273 Returns the maximum of all the values returned by |handle_file|,
4274 or |exit_trouble| if trouble is encountered in opening files. */
4275
4276 int
4277 diff_dirs(struct comparison const *cmp,
4278 int (*handle_file)(struct comparison const *,
4279 char const *, char const *)) {
4280 struct dirdata dirdata[2];
4281 int volatile val = EXIT_SUCCESS;
4282 int i;
4283
4284 if ((cmp->file[0].desc == -1 || dir_loop(cmp, 0))
4285 && (cmp->file[1].desc == -1 || dir_loop(cmp, 1))) {
4286 error(0, 0, "%s: recursive directory loop",
4287 cmp->file[cmp->file[0].desc == -1].name);
4288 return EXIT_TROUBLE;
4289 }
4290
4291 /* Get contents of both dirs. */
4292 for (i = 0; i < 2; i++)
4293 if (!dir_read(&cmp->file[i], &dirdata[i])) {
4294 perror_with_name(cmp->file[i].name);
4295 val = EXIT_TROUBLE;
4296 }
4297
4298 if (val == EXIT_SUCCESS) {
4299 char const **volatile names[2];
4300 names[0] = dirdata[0].names;
4301 names[1] = dirdata[1].names;
4302
4303 /* Sort the directories. */
4304 for (i = 0; i < 2; i++)
4305 qsort(names[i], dirdata[i].nnames, sizeof *dirdata[i].names,
4306 compare_names_for_qsort);
4307
4308 /* If |-S name| was given, and this is the topmost level of
4309 comparison, ignore all file names less than the specified
4310 starting name. */
4311
4312 if (starting_file && !cmp->parent) {
4313 while (*names[0] && compare_names(*names[0], starting_file) < 0)
4314 names[0]++;
4315 while (*names[1] && compare_names(*names[1], starting_file) < 0)
4316 names[1]++;
4317 }
4318
4319 /* Loop while files remain in one or both dirs. */
4320 while (*names[0] || *names[1]) {
4321 /* Compare next name in dir 0 with next name in dir 1. At
4322 the end of a dir, pretend the "next name" in that dir
4323 is very large. */
4324 int nameorder = (!*names[0] ? 1 : !*names[1] ? -1
4325 : compare_names(*names[0], *names[1]));
4326 int v1 = (*handle_file)(cmp,
4327 0 < nameorder ? 0 : *names[0]++,
4328 nameorder < 0 ? 0 : *names[1]++);
4329 if (val < v1)
4330 val = v1;
4331 }
4332 }
4333
4334 for (i = 0; i < 2; i++) {
4335 free(dirdata[i].names);
4336 free(dirdata[i].data);
4337 }
4338
4339 return val;
4340 }
4341
4342 /* Return nonzero if $cmp$ is looping recursively in argument $i$. */
4343
4344 static bool
4345 dir_loop(struct comparison const *cmp, int i) {
4346 struct comparison const *p = cmp;
4347 while ((p = p->parent))
4348 if (0 < same_file (&p->file[i].stat, &cmp->file[i].stat))
4349 return true;
4350 return false;
4351 }
4352
4353 /* Find a matching filename in a directory. */
4354
4355 char *
4356 find_dir_file_pathname(char const *dir, char const *file) {
4357 const char *match = file;
4358 return file_name_concat(dir, match, NULL);
4359 }
-
+ 9AB0950EFF37136222DC461537F8448EBC36DFF595E77AF294E48F9142657ED6DA78D72C232ECC890E28BA75A2160D7AB3A09E1A7AA61C50AC95C7A747EF2B7A
vtools/src/io.c
(0 . 0)(1 . 596)
4364
4365 #include "diff.h"
4366 #include <cmpbuf.h>
4367 #include <xalloc.h>
4368 #include <string.h>
4369 #include <limits.h>
4370 #include <stdlib.h>
4371
4372 /* Rotate an unsigned value to the left. */
4373 #define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
4374
4375 /* Given a hash value and a new character, return a new hash
4376 value. */
4377 #define HASH(h, c) ((c) + ROL (h, 7))
4378
4379 /* The type of a hash value. */
4380 typedef size_t hash_value;
4381
4382 /* Lines are put into equivalence classes of lines that match in
4383 |lines_differ|. Each equivalence class is represented by one of
4384 these structures, but only while the classes are being computed.
4385 Afterward, each class is represented by a number. */
4386 struct equivclass {
4387 lin next; /* Next item in this bucket. */
4388 hash_value hash; /* Hash of lines in this class. */
4389 char const *line; /* A line that fits this class. */
4390 size_t length; /* That line's length, not counting its newline. */
4391 };
4392
4393 /* Hash-table: array of buckets, each being a chain of equivalence
4394 classes. |buckets[-1]| is reserved for incomplete lines. */
4395 static lin *buckets;
4396
4397 /* Number of buckets in the hash table array, not counting
4398 |buckets[-1]|. */
4399 static size_t nbuckets;
4400
4401 /* Array in which the equivalence classes are allocated. The
4402 bucket-chains go through the elements in this array. The number of
4403 an equivalence class is its index in this array. */
4404 static struct equivclass *equivs;
4405
4406 /* Index of first free element in the array |equivs|. */
4407 static lin equivs_index;
4408
4409 /* Number of elements allocated in the array |equivs|. */
4410 static lin equivs_alloc;
4411
4412 /* Read a block of data into a file buffer, checking for EOF and
4413 error. */
4414
4415 void
4416 file_block_read(struct file_data *current, size_t size) {
4417 if (size && !current->eof) {
4418 size_t s = block_read(current->desc,
4419 FILE_BUFFER (current) + current->buffered, size);
4420 if (s == SIZE_MAX)
4421 pfatal_with_name(current->name);
4422
4423 current->buffered += s;
4424 current->eof = s < size;
4425 }
4426 }
4427
4428 /* Check for binary files and compare them for exact identity. */
4429
4430 /* Return 1 if |buf| contains a non text character. |size| is the
4431 number of characters in |buf|. */
4432
4433 #define binary_file_p(buf, size) (memchr (buf, 0, size) != 0)
4434
4435 /* Get ready to read the current file. Return nonzero if |skip_test|
4436 is zero, and if it appears to be a binary file. */
4437
4438 static bool
4439 sip(struct file_data *current, bool skip_test) {
4440 /* If we have a nonexistent file at this stage, treat it as
4441 empty. */
4442 if (current->desc < 0) {
4443 /* Leave room for a sentinel. */
4444 current->bufsize = sizeof(word);
4445 current->buffer = xmalloc(current->bufsize);
4446 } else {
4447 current->bufsize = buffer_lcm(sizeof(word),
4448 STAT_BLOCKSIZE (current->stat),
4449 PTRDIFF_MAX - 2 * sizeof(word));
4450 current->buffer = xmalloc(current->bufsize);
4451
4452 if (!skip_test) {
4453 /* Check first part of file to see if it's a binary file. */
4454
4455 off_t buffered;
4456 file_block_read(current, current->bufsize);
4457 buffered = current->buffered;
4458 return binary_file_p (current->buffer, buffered);
4459 }
4460 }
4461
4462 current->buffered = 0;
4463 current->eof = false;
4464 return false;
4465 }
4466
4467 /* Slurp the rest of the current file completely into memory. */
4468
4469 static void
4470 slurp(struct file_data *current) {
4471 size_t cc;
4472
4473 if (current->desc < 0) {
4474 /* The file is nonexistent. */
4475 return;
4476 }
4477
4478 if (S_ISREG (current->stat.st_mode)) {
4479 /* It's a regular file; slurp in the rest all at once. */
4480
4481 /* Get the size out of the stat block. Allocate just enough
4482 room for appended newline plus word sentinel, plus
4483 word-alignment since we want the buffer word-aligned. */
4484 size_t file_size = current->stat.st_size;
4485 cc = file_size + 2 * sizeof(word) - file_size % sizeof(word);
4486 if (file_size != current->stat.st_size || cc < file_size
4487 || PTRDIFF_MAX <= cc)
4488 xalloc_die();
4489
4490 if (current->bufsize < cc) {
4491 current->bufsize = cc;
4492 current->buffer = xrealloc(current->buffer, cc);
4493 }
4494
4495 /* Try to read at least 1 more byte than the size indicates,
4496 to detect whether the file is growing. This is a nicety for
4497 users who run 'diff' on files while they are changing. */
4498
4499 if (current->buffered <= file_size) {
4500 file_block_read(current, file_size + 1 - current->buffered);
4501 if (current->buffered <= file_size)
4502 return;
4503 }
4504 }
4505
4506 /* It's not a regular file, or it's a growing regular file; read
4507 it, growing the buffer as needed. */
4508
4509 file_block_read(current, current->bufsize - current->buffered);
4510
4511 if (current->buffered) {
4512 while (current->buffered == current->bufsize) {
4513 if (PTRDIFF_MAX / 2 - sizeof(word) < current->bufsize)
4514 xalloc_die();
4515 current->bufsize *= 2;
4516 current->buffer = xrealloc(current->buffer, current->bufsize);
4517 file_block_read(current, current->bufsize - current->buffered);
4518 }
4519
4520 /* Allocate just enough room for appended newline plus word
4521 sentinel, plus word-alignment. */
4522 cc = current->buffered + 2 * sizeof(word);
4523 current->bufsize = cc - cc % sizeof(word);
4524 current->buffer = xrealloc(current->buffer, current->bufsize);
4525 }
4526 }
4527
4528 /* Split the file into lines, simultaneously computing the equivalence
4529 class for each line. */
4530
4531 static void
4532 find_and_hash_each_line(struct file_data *current) {
4533 char const *p = current->prefix_end;
4534 lin i, *bucket;
4535 size_t length;
4536
4537 /* Cache often-used quantities in local variables to help the compiler. */
4538 char const **linbuf = current->linbuf;
4539 lin alloc_lines = current->alloc_lines;
4540 lin line = 0;
4541 lin linbuf_base = current->linbuf_base;
4542 lin *cureqs = xmalloc(alloc_lines * sizeof *cureqs);
4543 struct equivclass *eqs = equivs;
4544 lin eqs_index = equivs_index;
4545 lin eqs_alloc = equivs_alloc;
4546 char const *suffix_begin = current->suffix_begin;
4547 char const *bufend = FILE_BUFFER (current) + current->buffered;
4548
4549
4550 while (p < suffix_begin) {
4551 char const *ip = p;
4552 hash_value h = 0;
4553 unsigned char c;
4554
4555 /* Hash this line until we find a newline. */
4556 while ((c = *p++) != '\n')
4557 h = HASH (h, c);
4558
4559 bucket = &buckets[h % nbuckets];
4560 length = p - ip - 1;
4561
4562 for (i = *bucket;; i = eqs[i].next)
4563 if (!i) {
4564 /* Create a new equivalence class in this bucket. */
4565 i = eqs_index++;
4566 if (i == eqs_alloc) {
4567 if (PTRDIFF_MAX / (2 * sizeof *eqs) <= eqs_alloc)
4568 xalloc_die();
4569 eqs_alloc *= 2;
4570 eqs = xrealloc(eqs, eqs_alloc * sizeof *eqs);
4571 }
4572 eqs[i].next = *bucket;
4573 eqs[i].hash = h;
4574 eqs[i].line = ip;
4575 eqs[i].length = length;
4576 *bucket = i;
4577 break;
4578 } else if (eqs[i].hash == h) {
4579 char const *eqline = eqs[i].line;
4580
4581 /* Reuse existing class if |lines_differ| reports the
4582 lines equal. */
4583 if (eqs[i].length == length) {
4584 /* Reuse existing equivalence class if the lines
4585 are identical. This detects the common case of
4586 exact identity faster than |lines_differ|
4587 would. */
4588 if (memcmp(eqline, ip, length) == 0)
4589 break;
4590 continue;
4591 } else
4592 continue;
4593 }
4594
4595 /* Maybe increase the size of the line table. */
4596 if (line == alloc_lines) {
4597 /* Double |(alloc_lines - linbuf_base)| by adding to
4598 |alloc_lines|. */
4599 if (PTRDIFF_MAX / 3 <= alloc_lines
4600 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
4601 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
4602 xalloc_die();
4603 alloc_lines = 2 * alloc_lines - linbuf_base;
4604 cureqs = xrealloc(cureqs, alloc_lines * sizeof *cureqs);
4605 linbuf += linbuf_base;
4606 linbuf = xrealloc(linbuf,
4607 (alloc_lines - linbuf_base) * sizeof *linbuf);
4608 linbuf -= linbuf_base;
4609 }
4610 linbuf[line] = ip;
4611 cureqs[line] = i;
4612 ++line;
4613 }
4614
4615 current->buffered_lines = line;
4616
4617 for (i = 0;; i++) {
4618 /* Record the line start for lines in the suffix that we care
4619 about. Record one more line start than lines, so that we can
4620 compute the length of any buffered line. */
4621 if (line == alloc_lines) {
4622 /* Double |(alloc_lines - linbuf_base)| by adding to
4623 |alloc_lines|. */
4624 if (PTRDIFF_MAX / 3 <= alloc_lines
4625 || PTRDIFF_MAX / sizeof *cureqs <= 2 * alloc_lines - linbuf_base
4626 || PTRDIFF_MAX / sizeof *linbuf <= alloc_lines - linbuf_base)
4627 xalloc_die();
4628 alloc_lines = 2 * alloc_lines - linbuf_base;
4629 linbuf += linbuf_base;
4630 linbuf = xrealloc(linbuf,
4631 (alloc_lines - linbuf_base) * sizeof *linbuf);
4632 linbuf -= linbuf_base;
4633 }
4634 linbuf[line] = p;
4635
4636 if (p == bufend) {
4637 /* If the last line is incomplete and we do not silently
4638 complete lines, don't count its appended newline. */
4639 if (current->missing_newline && false) /* our output style not robust */
4640 linbuf[line]--;
4641 break;
4642 }
4643
4644 line++;
4645
4646 while (*p++ != '\n')
4647 continue;
4648 }
4649
4650 /* Done with cache in local variables. */
4651 current->linbuf = linbuf;
4652 current->valid_lines = line;
4653 current->alloc_lines = alloc_lines;
4654 current->equivs = cureqs;
4655 equivs = eqs;
4656 equivs_alloc = eqs_alloc;
4657 equivs_index = eqs_index;
4658 }
4659
4660 /* Prepare the text. Make sure the text end is initialized. Make
4661 sure text ends in a newline, but remember that we had to add one.
4662 Strip trailing CRs, if that was requested. */
4663
4664 static void
4665 prepare_text(struct file_data *current) {
4666 size_t buffered = current->buffered;
4667 char *p = FILE_BUFFER (current);
4668
4669 if (buffered == 0 || p[buffered - 1] == '\n')
4670 current->missing_newline = false;
4671 else {
4672 p[buffered++] = '\n';
4673 current->missing_newline = true;
4674 }
4675
4676 if (!p)
4677 return;
4678
4679 /* Don't use uninitialized storage when planting or using sentinels. */
4680 memset (p + buffered, 0, sizeof(word));
4681
4682 current->buffered = buffered;
4683 }
4684
4685 /* We have found |n| lines in a buffer of size |s|; guess the
4686 proportionate number of lines that will be found in a buffer of
4687 size |t|. However, do not guess a number of lines so large that
4688 the resulting line table might cause overflow in size
4689 calculations. */
4690 static lin
4691 guess_lines(lin n, size_t s, size_t t) {
4692 size_t guessed_bytes_per_line = n < 10 ? 32 : s / (n - 1);
4693 lin guessed_lines = MAX (1, t / guessed_bytes_per_line);
4694 return MIN (guessed_lines, PTRDIFF_MAX / (2 * sizeof(char *) + 1) - 5) + 5;
4695 }
4696
4697 /* Given a vector of two |file_data| objects, find the identical
4698 prefixes and suffixes of each object. */
4699
4700 static void
4701 find_identical_ends(struct file_data filevec[]) {
4702 word *w0, *w1;
4703 char *p0, *p1, *buffer0, *buffer1;
4704 char const *end0, *beg0;
4705 char const **linbuf0, **linbuf1;
4706 lin i, lines;
4707 size_t n0, n1;
4708 lin alloc_lines0, alloc_lines1;
4709 lin buffered_prefix, prefix_count, prefix_mask;
4710 lin middle_guess, suffix_guess;
4711
4712 slurp(&filevec[0]);
4713 prepare_text(&filevec[0]);
4714 if (filevec[0].desc != filevec[1].desc) {
4715 slurp(&filevec[1]);
4716 prepare_text(&filevec[1]);
4717 } else {
4718 filevec[1].buffer = filevec[0].buffer;
4719 filevec[1].bufsize = filevec[0].bufsize;
4720 filevec[1].buffered = filevec[0].buffered;
4721 filevec[1].missing_newline = filevec[0].missing_newline;
4722 }
4723
4724 /* Find identical prefix. */
4725
4726 w0 = filevec[0].buffer;
4727 w1 = filevec[1].buffer;
4728 p0 = buffer0 = (char *) w0;
4729 p1 = buffer1 = (char *) w1;
4730 n0 = filevec[0].buffered;
4731 n1 = filevec[1].buffered;
4732
4733 if (p0 == p1)
4734 /* The buffers are the same; sentinels won't work. */
4735 p0 = p1 += n1;
4736 else {
4737 /* Insert end sentinels, in this case characters that are
4738 guaranteed to make the equality test false, and thus terminate
4739 the loop. */
4740
4741 if (n0 < n1)
4742 p0[n0] = ~p1[n0];
4743 else
4744 p1[n1] = ~p0[n1];
4745
4746 /* Loop until first mismatch, or to the sentinel
4747 characters. */
4748
4749 /* Compare a word at a time for speed. */
4750 while (*w0 == *w1)
4751 w0++, w1++;
4752
4753 /* Do the last few bytes of comparison a byte at a time. */
4754 p0 = (char *) w0;
4755 p1 = (char *) w1;
4756 while (*p0 == *p1)
4757 p0++, p1++;
4758
4759 /* Don't mistakenly count missing newline as part of
4760 prefix. */
4761 if (false
4762 && ((buffer0 + n0 - filevec[0].missing_newline < p0)
4763 !=
4764 (buffer1 + n1 - filevec[1].missing_newline < p1)))
4765 p0--, p1--;
4766 }
4767
4768 /* Now |p0| and |p1| point at the first nonmatching
4769 characters. */
4770
4771 /* Skip back to last line-beginning in the prefix, and then
4772 discard up to |horizon_lines| lines from the prefix. */
4773 i = horizon_lines;
4774 while (p0 != buffer0 && (p0[-1] != '\n' || i--))
4775 p0--, p1--;
4776
4777 /* Record the prefix. */
4778 filevec[0].prefix_end = p0;
4779 filevec[1].prefix_end = p1;
4780
4781 /* Find identical suffix. */
4782
4783 /* |p0| and |p1| point beyond the last chars not yet compared. */
4784 p0 = buffer0 + n0;
4785 p1 = buffer1 + n1;
4786
4787 if (filevec[0].missing_newline == filevec[1].missing_newline) {
4788 end0 = p0; /* Addr of last char in file 0. */
4789
4790 /* Get value of |p0| at which we should stop scanning
4791 backward: this is when either |p0| or |p1| points just past the
4792 last char of the identical prefix. */
4793 beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
4794
4795 /* Scan back until chars don't match or we reach that point. */
4796 while (p0 != beg0)
4797 if (*--p0 != *--p1) {
4798 /* Point at the first char of the matching suffix. */
4799 ++p0, ++p1;
4800 beg0 = p0;
4801 break;
4802 }
4803
4804 /* Are we at a line-beginning in both files? If not, add the
4805 rest of this line to the main body. Discard up to
4806 |horizon_lines| lines from the identical suffix. Also, discard
4807 one extra line, because |shift_boundaries| may need it. */
4808 i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
4809 &&
4810 (buffer1 == p1 || p1[-1] == '\n'));
4811 while (i-- && p0 != end0)
4812 while (*p0++ != '\n')
4813 continue;
4814
4815 p1 += p0 - beg0;
4816 }
4817
4818 /* Record the suffix. */
4819 filevec[0].suffix_begin = p0;
4820 filevec[1].suffix_begin = p1;
4821
4822 /* Calculate number of lines of prefix to save.
4823
4824 |prefix_count == 0| means save the whole prefix; we need this
4825 for options like |-D| that output the whole file, or for
4826 enormous contexts (to avoid worrying about arithmetic
4827 overflow). We also need it for options like |-F| that output
4828 some preceding line; at least we will need to find the last few
4829 lines, but since we don't know how many, it's easiest to find
4830 them all.
4831
4832 Otherwise, |prefix_count != 0|. Save just |prefix_count| lines
4833 at start of the line buffer; they'll be moved to the proper
4834 location later. Handle 1 more line than the context says
4835 (because we count 1 too many), rounded up to the next power of
4836 2 to speed index computation. */
4837
4838 prefix_count = 0;
4839 alloc_lines0 = guess_lines(0, 0, n0);
4840 prefix_mask = prefix_count - 1;
4841 lines = 0;
4842 linbuf0 = xmalloc(alloc_lines0 * sizeof *linbuf0);
4843 p0 = buffer0;
4844
4845 /* If the prefix is needed, find the prefix lines. */
4846 end0 = filevec[0].prefix_end;
4847 while (p0 != end0) {
4848 lin l = lines++ & prefix_mask;
4849 if (l == alloc_lines0) {
4850 if (PTRDIFF_MAX / (2 * sizeof *linbuf0) <= alloc_lines0)
4851 xalloc_die();
4852 alloc_lines0 *= 2;
4853 linbuf0 = xrealloc(linbuf0, alloc_lines0 * sizeof *linbuf0);
4854 }
4855 linbuf0[l] = p0;
4856 while (*p0++ != '\n')
4857 continue;
4858 }
4859 buffered_prefix = prefix_count && context < lines ? context : lines;
4860
4861 /* Allocate line buffer 1. */
4862
4863 middle_guess = guess_lines(lines, p0 - buffer0, p1 - filevec[1].prefix_end);
4864 suffix_guess = guess_lines(lines, p0 - buffer0, buffer1 + n1 - p1);
4865 alloc_lines1 = buffered_prefix + middle_guess + MIN (context, suffix_guess);
4866 if (alloc_lines1 < buffered_prefix
4867 || PTRDIFF_MAX / sizeof *linbuf1 <= alloc_lines1)
4868 xalloc_die();
4869 linbuf1 = xmalloc(alloc_lines1 * sizeof *linbuf1);
4870
4871 if (buffered_prefix != lines) {
4872 /* Rotate prefix lines to proper location. */
4873 for (i = 0; i < buffered_prefix; i++)
4874 linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
4875 for (i = 0; i < buffered_prefix; i++)
4876 linbuf0[i] = linbuf1[i];
4877 }
4878
4879 /* Initialize line buffer 1 from line buffer 0. */
4880 for (i = 0; i < buffered_prefix; i++)
4881 linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
4882
4883 /* Record the line buffer, adjusted so that
4884 |linbuf[0]| points at the first differing line. */
4885 filevec[0].linbuf = linbuf0 + buffered_prefix;
4886 filevec[1].linbuf = linbuf1 + buffered_prefix;
4887 filevec[0].linbuf_base = filevec[1].linbuf_base = -buffered_prefix;
4888 filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
4889 filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
4890 filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
4891 }
4892
4893 /* If $1 < k$, then $(2^k - {\rm prime\_offset}_k)$ is the largest
4894 prime less than $2^k$. This table is derived from
4895 Chris~K.~Caldwell's list
4896 \url{http://www.utm.edu/research/primes/lists/2small/}. */
4897
4898 static unsigned char const prime_offset[] =
4899 {
4900 0, 0, 1, 1, 3, 1, 3, 1, 5, 3, 3, 9, 3, 1, 3, 19, 15, 1, 5, 1, 3, 9, 3,
4901 15, 3, 39, 5, 39, 57, 3, 35, 1, 5, 9, 41, 31, 5, 25, 45, 7, 87, 21,
4902 11, 57, 17, 55, 21, 115, 59, 81, 27, 129, 47, 111, 33, 55, 5, 13, 27,
4903 55, 93, 1, 57, 25
4904 };
4905
4906 /* Given a vector of two |file_data| objects, read the file associated
4907 with each one, and build the table of equivalence classes. Return
4908 nonzero if either file appears to be a binary file. If
4909 |pretend_binary| is nonzero, pretend they are binary
4910 regardless. */
4911
4912 bool
4913 read_files(struct file_data filevec[], bool pretend_binary) {
4914 int i;
4915 bool skip_test = text | pretend_binary;
4916 bool appears_binary = pretend_binary | sip(&filevec[0], skip_test);
4917
4918 if (filevec[0].desc != filevec[1].desc)
4919 appears_binary |= sip(&filevec[1], skip_test | appears_binary);
4920 else {
4921 filevec[1].buffer = filevec[0].buffer;
4922 filevec[1].bufsize = filevec[0].bufsize;
4923 filevec[1].buffered = filevec[0].buffered;
4924 }
4925 if (appears_binary) {
4926 return true;
4927 }
4928
4929 find_identical_ends(filevec);
4930
4931 equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
4932 if (PTRDIFF_MAX / sizeof *equivs <= equivs_alloc)
4933 xalloc_die();
4934 equivs = xmalloc(equivs_alloc * sizeof *equivs);
4935 /* Equivalence class 0 is permanently safe for lines that were not
4936 hashed. Real equivalence classes start at 1. */
4937 equivs_index = 1;
4938
4939 /* Allocate (one plus) a prime number of hash buckets. Use a
4940 prime number between $1/3$ and $2/3$ of the value of
4941 |equiv_allocs|, approximately. */
4942 for (i = 9; (size_t) 1 << i < equivs_alloc / 3; i++)
4943 continue;
4944 nbuckets = ((size_t) 1 << i) - prime_offset[i];
4945 if (PTRDIFF_MAX / sizeof *buckets <= nbuckets)
4946 xalloc_die();
4947 buckets = zalloc((nbuckets + 1) * sizeof *buckets);
4948 buckets++;
4949
4950 for (i = 0; i < 2; i++)
4951 find_and_hash_each_line(&filevec[i]);
4952
4953 filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
4954
4955 free(equivs);
4956 free(buckets - 1);
4957
4958 return false;
4959 }
-
+ E0C8C657FA184C8C6F20705B9C9FAE293A0C3BF380E60CDC65BA2449CD28D3F0964E42998601F868444C0F19752BB58934BB75DFFD7F0DCB31170CA7C6538A84
vtools/src/system.h
(0 . 0)(1 . 156)
4964 #ifndef SYSTEM_H
4965 #define SYSTEM_H
4966
4967 #define _GNU_SOURCE
4968
4969 #include <stddef.h>
4970 /* freebsd */
4971 #include <stdint.h>
4972 #include <unistd.h>
4973 /* linux */
4974 #include <inttypes.h>
4975 #include <sys/types.h>
4976
4977 #define STAT_BLOCKSIZE(s) ((s).st_blksize)
4978
4979 #define EXIT_TROUBLE 2
4980
4981 #include <errno.h>
4982
4983 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
4984 #define MAX(a, b) ((a) >= (b) ? (a) : /**/(b))
4985
4986 /* Return 1 if an array of |n| objects, each of size |s|, cannot exist
4987 reliably due to size or |ptrdiff_t| arithmetic overflow. |s| must
4988 be positive and N must be nonnegative. This is a macro, not a
4989 function, so that it works correctly even when |SIZE_MAX < n|.
4990
4991 True if |n * s| would overflow in a |size_t| calculation, or would
4992 generate a value larger than |PTRDIFF_MAX|. This expands to a
4993 constant expression if |n| and |s| are both constants. By gnulib
4994 convention, |SIZE_MAX| represents overflow in size calculations, so
4995 the conservative |size_t|-based dividend to use here is |SIZE_MAX -
4996 1|. */
4997
4998 #define xalloc_oversized(n, s) \
4999 ((size_t) (PTRDIFF_MAX < SIZE_MAX ? PTRDIFF_MAX : SIZE_MAX - 1) / (s) < (n))
5000
5001 #include <stdbool.h>
5002
5003 /* Type used for fast comparison of several bytes at a time. This used
5004 to be |uintmax_t|, but changing it to |size_t| made plain 'cmp' 90\%
5005 faster (|GCC 4.8.1|, |x86|). */
5006
5007 #ifndef word
5008 # define word size_t
5009 #endif
5010
5011 #include <signal.h> /* SA_RESTART */
5012
5013 #include <limits.h>
5014 #ifndef SSIZE_MAX
5015 # define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
5016 #endif
5017
5018 /* The signed integer type of a line number. Since files are read
5019 into main memory, |ptrdiff_t| should be wide enough. */
5020
5021 typedef ptrdiff_t lin;
5022 #define LIN_MAX PTRDIFF_MAX
5023
5024 /* The signed integer type for printing line numbers, and its printf
5025 length modifier. This is not simply |ptrdiff_t|, to cater to older
5026 and/or nonstandard C libraries where |"l"| works but |"ll"| and
5027 |"t"| do not, or where |long| is too narrow and |"ll"| works but
5028 |"t"| does not. */
5029
5030 #if LIN_MAX <= LONG_MAX
5031 typedef long int printint;
5032 # define pI "l"
5033 #elif LIN_MAX <= LLONG_MAX
5034 typedef long long int printint;
5035 # define pI "ll"
5036 #else
5037 typedef ptrdiff_t printint;
5038 # define pI "t"
5039 #endif
5040
5041 /* Limit so that |2 * context + 1| does not overflow. */
5042
5043 #define CONTEXT_MAX ((LIN_MAX - 1) / 2)
5044
5045
5046 #define file_name_cmp strcmp
5047
5048 /* Do struct stat |*s|, |*t| describe the same special file? */
5049 #ifndef same_special_file
5050 # if HAVE_STRUCT_STAT_ST_RDEV && defined S_ISBLK && defined S_ISCHR
5051 # define same_special_file(s, t) \
5052 (((S_ISBLK ((s)->st_mode) && S_ISBLK ((t)->st_mode)) \
5053 || (S_ISCHR ((s)->st_mode) && S_ISCHR ((t)->st_mode))) \
5054 && (s)->st_rdev == (t)->st_rdev)
5055 # else
5056 # define same_special_file(s, t) 0
5057 # endif
5058 #endif
5059
5060 /* Do |struct stat *s, *t| describe the same file? Answer -1 if
5061 unknown. */
5062 #ifndef same_file
5063 # define same_file(s, t) \
5064 ((((s)->st_ino == (t)->st_ino) && ((s)->st_dev == (t)->st_dev)) \
5065 || same_special_file (s, t))
5066 #endif
5067
5068 /* Do |struct stat *s, *t| have the same file attributes?
5069
5070 POSIX says that two files are identical if |st_ino| and |st_dev|
5071 are the same, but many file systems incorrectly assign the same
5072 (device, inode) pair to two distinct files, including:
5073
5074 - GNU/Linux NFS servers that export all local file systems as a
5075 single NFS file system, if a local device number |(st_dev)| exceeds
5076 255, or if a local inode number |(st_ino)| exceeds 16777215.
5077
5078 - Network Appliance NFS servers in snapshot directories; see
5079 Network Appliance bug \#195.
5080
5081 - ClearCase MVFS; see bug id ATRia04618.
5082
5083 Check whether two files that purport to be the same have the same
5084 attributes, to work around instances of this common bug. Do not
5085 inspect all attributes, only attributes useful in checking for this
5086 bug.
5087
5088 It's possible for two distinct files on a buggy file system to have
5089 the same attributes, but it's not worth slowing down all
5090 implementations (or complicating the configuration) to cater to
5091 these rare cases in buggy implementations. */
5092
5093 #ifndef same_file_attributes
5094 # define same_file_attributes(s, t) \
5095 ((s)->st_mode == (t)->st_mode \
5096 && (s)->st_nlink == (t)->st_nlink \
5097 && (s)->st_uid == (t)->st_uid \
5098 && (s)->st_gid == (t)->st_gid \
5099 && (s)->st_size == (t)->st_size \
5100 && (s)->st_mtime == (t)->st_mtime \
5101 && (s)->st_ctime == (t)->st_ctime)
5102 #endif
5103
5104 #define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
5105
5106 #define BB_LITTLE_ENDIAN 1
5107
5108 #ifdef __APPLE__
5109 #include <libkern/OSByteOrder.h>
5110 #define SWAP_BE64(x) OSSwapInt64(x)
5111 #elif __FreeBSD__
5112 #include <sys/endian.h>
5113 #define SWAP_BE64(x) bswap64(x)
5114 #elif __linux__
5115 #include <byteswap.h>
5116 #define SWAP_BE64(x) bswap_64(x)
5117 #endif
5118
5119 #endif
-
+ 79F0962254F05215DF25FF996EF028819A31E13BEAB4B0C10B9955AB48A571EA5C7714155E30EE244A6699D4CB3A8EB2588A60CC4F3F4F8342E3F2671E0A66D7
vtools/src/util.c
(0 . 0)(1 . 428)
5124
5125 #include "diff.h"
5126 #include <error.h>
5127 #include <xalloc.h>
5128 #include <stdlib.h>
5129
5130 /* Use when a system call returns non-zero status. |name| should
5131 normally be the file name. */
5132
5133 void
5134 perror_with_name(char const *name) {
5135 error(0, errno, "%s", name);
5136 }
5137
5138 /* Use when a system call returns non-zero status and that is
5139 fatal. */
5140
5141 void
5142 pfatal_with_name(char const *name) {
5143 int e = errno;
5144 fprintf(stderr, "%s", name);
5145 exit(EXIT_TROUBLE);
5146 }
5147
5148 /* Print an error message containing |msgid|, then exit. */
5149
5150 void
5151 fatal(char const *msgid) {
5152 fprintf(stderr, "%s", msgid);
5153 exit(EXIT_TROUBLE);
5154 }
5155
5156 /* Like |printf|, except if -l in effect then save the message and
5157 print later. This is used for things like "Only in ...". */
5158
5159 void
5160 message(char const *format_msgid, char const *arg1, char const *arg2) {
5161 message5(format_msgid, arg1, arg2, 0, 0);
5162 }
5163
5164 void
5165 message5(char const *format_msgid, char const *arg1, char const *arg2,
5166 char const *arg3, char const *arg4) {
5167 fprintf(stderr, format_msgid, arg1, arg2, arg3, arg4);
5168 }
5169
5170 static char const *current_name0;
5171 static char const *current_name1;
5172 static bool currently_recursive;
5173
5174 /* Call before outputting the results of comparing files |name0| and |name1|
5175 to set up |outfile|, the stdio stream for the output to go to.
5176
5177 Usually, |outfile| is just stdout. But when -l was specified we
5178 fork off a |pr| and make |outfile| a pipe to it. 'pr' then outputs
5179 to our stdout. */
5180
5181 void
5182 setup_output(char const *name0, char const *name1, bool recursive) {
5183 current_name0 = name0;
5184 current_name1 = name1;
5185 currently_recursive = recursive;
5186 outfile = 0;
5187 }
5188
5189 static char c_escape_char(char c) {
5190 switch (c) {
5191 case '\a':
5192 return 'a';
5193 case '\b':
5194 return 'b';
5195 case '\t':
5196 return 't';
5197 case '\n':
5198 return 'n';
5199 case '\v':
5200 return 'v';
5201 case '\f':
5202 return 'f';
5203 case '\r':
5204 return 'r';
5205 case '"':
5206 return '"';
5207 case '\\':
5208 return '\\';
5209 default:
5210 return c < 32;
5211 }
5212 }
5213
5214 static char *
5215 c_escape(char const *str) {
5216 char const *s;
5217 size_t plus = 0;
5218 bool must_quote = false;
5219
5220 for (s = str; *s; s++) {
5221 char c = *s;
5222
5223 if (c == ' ') {
5224 must_quote = true;
5225 continue;
5226 }
5227 switch (c_escape_char(*s)) {
5228 case 1:
5229 plus += 3;
5230 /* fall through */
5231 case 0:
5232 break;
5233 default:
5234 plus++;
5235 break;
5236 }
5237 }
5238
5239 if (must_quote || plus) {
5240 size_t s_len = s - str;
5241 char *buffer = xmalloc(s_len + plus + 3);
5242 char *b = buffer;
5243
5244 *b++ = '"';
5245 for (s = str; *s; s++) {
5246 char c = *s;
5247 char escape = c_escape_char(c);
5248
5249 switch (escape) {
5250 case 0:
5251 *b++ = c;
5252 break;
5253 case 1:
5254 *b++ = '\\';
5255 *b++ = ((c >> 6) & 03) + '0';
5256 *b++ = ((c >> 3) & 07) + '0';
5257 *b++ = ((c >> 0) & 07) + '0';
5258 break;
5259 default:
5260 *b++ = '\\';
5261 *b++ = escape;
5262 break;
5263 }
5264 }
5265 *b++ = '"';
5266 *b = 0;
5267 return buffer;
5268 }
5269
5270 return (char *) str;
5271 }
5272
5273 void
5274 begin_output(void) {
5275 char *names[2];
5276 char *name;
5277
5278 if (outfile != 0)
5279 return;
5280
5281 names[0] = c_escape(current_name0);
5282 names[1] = c_escape(current_name1);
5283
5284 /* Construct the header of this piece of diff. */
5285 if(asprintf(&name, "diff%s %s %s", switch_string, names[0], names[1]) == -1)
5286 xalloc_die();
5287
5288 outfile = stdout;
5289
5290 /* If handling multiple files (because scanning a directory),
5291 print which files the following output is about. */
5292 if (currently_recursive)
5293 printf("%s\n", name);
5294
5295 free(name);
5296
5297 print_context_header(files, (char const *const *) names);
5298
5299 if (names[0] != current_name0)
5300 free(names[0]);
5301 if (names[1] != current_name1)
5302 free(names[1]);
5303 }
5304
5305 void
5306 finish_output(void) {
5307 if (outfile != 0 && outfile != stdout) {
5308 if (ferror(outfile))
5309 fatal("write failed");
5310 if (fclose(outfile) != 0)
5311 pfatal_with_name("write failed");
5312 }
5313
5314 outfile = 0;
5315 }
5316
5317 /* Compare two lines (typically one from each input file) according to
5318 the command line options. For efficiency, this is invoked only
5319 when the lines do not match exactly but an option like -i might
5320 cause us to ignore the difference. Return nonzero if the lines
5321 differ. */
5322
5323 bool
5324 lines_differ(char const *s1, char const *s2) {
5325 register char const *t1 = s1;
5326 register char const *t2 = s2;
5327 size_t column = 0;
5328
5329 while (1) {
5330 register unsigned char c1 = *t1++;
5331 register unsigned char c2 = *t2++;
5332
5333 /* Test for exact char equality first, since it's a common
5334 case. */
5335 if (c1 != c2) {
5336 break;
5337 }
5338 if (c1 == '\n')
5339 return false;
5340
5341 column++;
5342 }
5343
5344 return true;
5345 }
5346
5347 /* Find the consecutive changes at the start of the script START.
5348 Return the last link before the first gap. */
5349
5350 struct change *
5351 find_change(struct change *start) {
5352 return start;
5353 }
5354
5355 /* Divide |script| into pieces by calling |hunkfun| and print each piece
5356 with |printfun|. Both functions take one arg, an edit script.
5357
5358 |hunkfun| is called with the tail of the script and returns the
5359 last link that belongs together with the start of the tail.
5360
5361 |printfun| takes a subscript which belongs together (with a null
5362 link at the end) and prints it. */
5363
5364 void
5365 print_script(struct change *script) {
5366 struct change *next = script;
5367
5368 while (next) {
5369 struct change *this, *end;
5370
5371 /* Find a set of changes that belong together. */
5372 this = next;
5373 end = find_hunk(next);
5374
5375 /* Disconnect them from the rest of the changes, making them a
5376 hunk, and remember the rest for next iteration. */
5377 next = end->link;
5378 end->link = 0;
5379 #ifdef DEBUG
5380 debug_script (this);
5381 #endif
5382
5383 /* Print this hunk. */
5384 pr_unidiff_hunk(this);
5385
5386 /* Reconnect the script so it will all be freed properly. */
5387 end->link = next;
5388 }
5389 }
5390
5391 /* Print the text of a single line |line|, flagging it with the
5392 characters in |line_flag| (which say whether the line is inserted,
5393 deleted, changed, etc.). |line_flag| must not end in a blank,
5394 unless it is a single blank. */
5395
5396 void
5397 print_1_line(char const *line_flag, char const *const *line) {
5398 print_1_line_nl(line_flag, line, false);
5399 }
5400
5401 /* Print the text of a single line |line|, flagging it with the
5402 characters in |line_flag| (which say whether the line is inserted,
5403 deleted, changed, etc.). |line_flag| must not end in a blank,
5404 unless it is a single blank. If |skip_nl| is set, then the final
5405 |'\n'| is not printed. */
5406
5407 void
5408 print_1_line_nl(char const *line_flag, char const *const *line, bool skip_nl) {
5409 char const *base = line[0], *limit = line[1]; /* Help the compiler. */
5410 FILE *out = outfile; /* Help the compiler some more. */
5411 char const *flag_format = 0;
5412
5413 /* If -T was specified, use a Tab between the line-flag and the
5414 text. Otherwise use a Space (as Unix diff does). Print
5415 neither space nor tab if line-flags are empty. But omit
5416 trailing blanks if requested. */
5417
5418 if (line_flag && *line_flag) {
5419 char const *flag_format_1 = flag_format = "%s ";
5420 char const *line_flag_1 = line_flag;
5421
5422 if (suppress_blank_empty && **line == '\n') {
5423 flag_format_1 = "%s";
5424
5425 /* This hack to omit trailing blanks takes advantage of
5426 the fact that the only way that |line_flag| can end in
5427 a blank is when |line_flag| consists of a single
5428 blank. */
5429 line_flag_1 += *line_flag_1 == ' ';
5430 }
5431
5432 fprintf(out, flag_format_1, line_flag_1);
5433 }
5434
5435 output_1_line(base, limit - (skip_nl && limit[-1] == '\n'));
5436
5437 if ((!line_flag || line_flag[0]) && limit[-1] != '\n') {
5438 fprintf(out, "\n\\ %s\n", "No newline at end of file");
5439 }
5440 }
5441
5442 /* Output a line from |base| up to |limit|. */
5443
5444 void
5445 output_1_line(char const *base, char const *limit) {
5446 const size_t MAX_CHUNK = 1024;
5447 size_t left = limit - base;
5448 while (left) {
5449 size_t to_write = MIN (left, MAX_CHUNK);
5450 size_t written = fwrite(base, sizeof(char), to_write, outfile);
5451 if (written < to_write)
5452 return;
5453 base += written;
5454 left -= written;
5455 }
5456 }
5457
5458 /* Translate an internal line number (an index into diff's table of
5459 lines) into an actual line number in the input file. The internal
5460 line number is |i|. |file| points to the data on the file.
5461
5462 Internal line numbers count from 0 starting after the prefix.
5463 Actual line numbers count from 1 within the entire file. */
5464
5465 lin
5466 translate_line_number(struct file_data const *file, lin i) {
5467 return i + file->prefix_lines + 1;
5468 }
5469
5470 /* Translate a line number range. This is always done for printing,
5471 so for convenience translate to printint rather than lin, so that
5472 the caller can use |printf| with |"%"pI"d"| without casting. */
5473
5474 void
5475 translate_range(struct file_data const *file,
5476 lin a, lin b,
5477 printint *aptr, printint *bptr) {
5478 *aptr = translate_line_number(file, a - 1) + 1;
5479 *bptr = translate_line_number(file, b + 1) - 1;
5480 }
5481
5482 /* Look at a hunk of edit script and report the range of lines in each
5483 file that it applies to. |hunk| is the start of the hunk, which is
5484 a chain of |struct change|. The first and last line numbers of
5485 file 0 are stored in |*first0| and |*last0|, and likewise for file
5486 1 in |*first1| and |*last1|. Note that these are internal line numbers
5487 that count from 0.
5488
5489 If no lines from file 0 are deleted, then |first0| is |last0+1|.
5490
5491 Return |UNCHANGED| if only ignorable lines are inserted or deleted,
5492 |OLD| if lines of file 0 are deleted,
5493 |NEW| if lines of file 1 are inserted,
5494 and |CHANGED| if both kinds of changes are found. */
5495
5496 enum changes
5497 analyze_hunk(struct change *hunk,
5498 lin *first0, lin *last0,
5499 lin *first1, lin *last1) {
5500 struct change *next;
5501 lin l0, l1;
5502 lin show_from, show_to;
5503 /* If 0, ignore zero-length lines;
5504 if |SIZE_MAX|, do not ignore lines just because of their length. */
5505
5506 char const *const *linbuf0 = files[0].linbuf; /* Help the compiler. */
5507 char const *const *linbuf1 = files[1].linbuf;
5508
5509 show_from = show_to = 0;
5510
5511 *first0 = hunk->line0;
5512 *first1 = hunk->line1;
5513
5514 next = hunk;
5515 do {
5516 l0 = next->line0 + next->deleted - 1;
5517 l1 = next->line1 + next->inserted - 1;
5518 show_from += next->deleted;
5519 show_to += next->inserted;
5520 } while ((next = next->link) != 0);
5521
5522 *last0 = l0;
5523 *last1 = l1;
5524
5525 return (show_from ? OLD : UNCHANGED) | (show_to ? NEW : UNCHANGED);
5526 }
5527
5528 /* Yield a new block of |size| bytes, initialized to zero. */
5529
5530 void *
5531 zalloc(size_t size) {
5532 void *p = xmalloc(size);
5533 memset (p, 0, size);
5534 return p;
5535 }
5536
5537 void
5538 debug_script(struct change *sp) {
5539 fflush(stdout);
5540
5541 for (; sp; sp = sp->link) {
5542 printint line0 = sp->line0;
5543 printint line1 = sp->line1;
5544 printint deleted = sp->deleted;
5545 printint inserted = sp->inserted;
5546 fprintf(stderr, "%3"pI"d %3"pI"d delete %"pI"d insert %"pI"d\n",
5547 line0, line1, deleted, inserted);
5548 }
5549
5550 fflush(stderr);
5551 }