1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23/*	  All Rights Reserved  	*/
24
25
26/*
27 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28 * Use is subject to license terms.
29 */
30
31#pragma ident	"%Z%%M%	%I%	%E% SMI"
32
33#include <fatal.h>
34#include <signal.h>
35#include <sys/types.h>
36#include <unistd.h>
37#include <stdio.h>
38#include <ctype.h>
39#include <string.h>
40#include <stdlib.h>
41#include <wait.h>
42
43#define	ONSIG	16
44
45/*
46 *	This program segments two files into pieces of <= seglim lines
47 *	(which is passed as a third argument or defaulted to some number)
48 *	and then executes diff upon the pieces. The output of
49 *	'diff' is then processed to make it look as if 'diff' had
50 *	processed the files whole. The reason for all this is that seglim
51 *	is a reasonable upper limit on the size of files that diff can
52 *	process.
53 *	NOTE -- by segmenting the files in this manner, it cannot be
54 *	guaranteed that the 'diffing' of the segments will generate
55 *	a minimal set of differences.
56 *	This process is most definitely not equivalent to 'diffing'
57 *	the files whole, assuming 'diff' could handle such large files.
58 *
59 *	'diff' is executed by a child process, generated by forking,
60 *	and communicates with this program through pipes.
61 */
62
63static char Error[128];
64
65static int seglim;	/* limit of size of file segment to be generated */
66
67static char diff[]  =  "/usr/bin/diff";
68static char tempskel[] = "/tmp/bdXXXXXX"; /* used to generate temp file names */
69static char tempfile[32];
70static char otmp[32], ntmp[32];
71static int	fflags;
72static int	fatal_num = 1;		/* exit number for fatal exit */
73static offset_t	linenum;
74static size_t obufsiz, nbufsiz, dbufsiz;
75static char *readline(char **, size_t *, FILE *);
76static void addgen(char **, size_t *, FILE *);
77static void delgen(char **, size_t *, FILE *);
78static void fixnum(char *);
79static void fatal(char *);
80static void setsig(void);
81static void setsig1(int);
82static char *satoi(char *, offset_t *);
83static FILE *maket(char *);
84
85static char *prognam;
86
87int
88main(int argc, char *argv[])
89{
90	FILE *poldfile, *pnewfile;
91	char *oline, *nline, *diffline;
92	char *olp, *nlp, *dp;
93	int otcnt, ntcnt;
94	pid_t i;
95	int pfd[2];
96	FILE *poldtemp, *pnewtemp, *pipeinp;
97	int status;
98
99	prognam = argv[0];
100	/*
101	 * Set flags for 'fatal' so that it will clean up,
102	 * produce a message, and terminate.
103	 */
104	fflags = FTLMSG | FTLCLN | FTLEXIT;
105
106	setsig();
107
108	if (argc < 3 || argc > 5)
109		fatal("arg count");
110
111	if (strcmp(argv[1], "-") == 0 && strcmp(argv[2], "-") == 0)
112		fatal("both files standard input");
113	if (strcmp(argv[1], "-") == 0)
114		poldfile = stdin;
115	else
116		if ((poldfile = fopen(argv[1], "r")) == NULL) {
117			(void) snprintf(Error, sizeof (Error),
118				"Can not open '%s'", argv[1]);
119			fatal(Error);
120		}
121	if (strcmp(argv[2], "-") == 0)
122		pnewfile = stdin;
123	else
124		if ((pnewfile = fopen(argv[2], "r")) == NULL) {
125			(void) snprintf(Error, sizeof (Error),
126				"Can not open '%s'", argv[2]);
127			fatal(Error);
128		}
129
130	seglim = 3500;
131
132	if (argc > 3) {
133		if (argv[3][0] == '-' && argv[3][1] == 's')
134			fflags &= ~FTLMSG;
135		else {
136			if ((seglim = atoi(argv[3])) == 0)
137				fatal("non-numeric limit");
138			if (argc == 5 && argv[4][0] == '-' &&
139					argv[4][1] == 's')
140				fflags &= ~FTLMSG;
141		}
142	}
143
144	linenum = 0;
145
146	/* Allocate the buffers and initialize their lengths */
147
148	obufsiz = BUFSIZ;
149	nbufsiz = BUFSIZ;
150	dbufsiz = BUFSIZ;
151
152	if ((oline = (char *)malloc(obufsiz)) == NULL ||
153	    (nline = (char *)malloc(nbufsiz)) == NULL ||
154	    (diffline = (char *)malloc(dbufsiz)) == NULL)
155		fatal("Out of memory");
156
157	/*
158	 * The following while-loop will prevent any lines
159	 * common to the beginning of both files from being
160	 * sent to 'diff'. Since the running time of 'diff' is
161	 * non-linear, this will help improve performance.
162	 * If, during this process, both files reach EOF, then
163	 * the files are equal and the program will terminate.
164	 * If either file reaches EOF before the other, the
165	 * program will generate the appropriate 'diff' output
166	 * itself, since this can be easily determined and will
167	 * avoid executing 'diff' completely.
168	 */
169	for (;;) {
170		olp = readline(&oline, &obufsiz, poldfile);
171		nlp = readline(&nline, &nbufsiz, pnewfile);
172
173		if (!olp && !nlp)	/* EOF found on both:  files equal */
174			return (0);
175
176		if (!olp) {
177			/*
178			 * The entire old file is a prefix of the
179			 * new file. Generate the appropriate "append"
180			 * 'diff'-like output, which is of the form:
181			 * 		nan, n
182			 * where 'n' represents a line-number.
183			 */
184			addgen(&nline, &nbufsiz, pnewfile);
185		}
186
187		if (!nlp) {
188			/*
189			 * The entire new file is a prefix of the
190			 * old file. Generate the appropriate "delete"
191			 * 'diff'-like output, which is of the form:
192			 * 		n, ndn
193			 * where 'n' represents a line-number.
194			 */
195			delgen(&oline, &obufsiz, poldfile);
196		}
197
198		if (strcmp(olp, nlp) == 0)
199			linenum++;
200		else
201			break;
202	}
203
204	/*
205	 * Here, first 'linenum' lines are equal.
206	 * The following while-loop segments both files into
207	 * seglim segments, forks and executes 'diff' on the
208	 * segments, and processes the resulting output of
209	 * 'diff', which is read from a pipe.
210	 */
211	for (;;) {
212		/* If both files are at EOF, everything is done. */
213		if (!olp && !nlp)	/* finished */
214			return (0);
215
216		if (!olp) {
217			/*
218			 * Generate appropriate "append"
219			 * output without executing 'diff'.
220			 */
221			addgen(&nline, &nbufsiz, pnewfile);
222		}
223
224		if (!nlp) {
225			/*
226			 * Generate appropriate "delete"
227			 * output without executing 'diff'.
228			 */
229			delgen(&oline, &obufsiz, poldfile);
230		}
231
232		/*
233		 * Create a temporary file to hold a segment
234		 * from the old file, and write it.
235		 */
236		poldtemp = maket(otmp);
237		otcnt = 0;
238		while (olp && otcnt < seglim) {
239			(void) fputs(oline, poldtemp);
240			if (ferror(poldtemp) != 0) {
241				fflags |= FTLMSG;
242				fatal("Can not write to temporary file");
243			}
244			olp = readline(&oline, &obufsiz, poldfile);
245			otcnt++;
246		}
247		(void) fclose(poldtemp);
248
249		/*
250		 * Create a temporary file to hold a segment
251		 * from the new file, and write it.
252		 */
253		pnewtemp = maket(ntmp);
254		ntcnt = 0;
255		while (nlp && ntcnt < seglim) {
256			(void) fputs(nline, pnewtemp);
257			if (ferror(pnewtemp) != 0) {
258				fflags |= FTLMSG;
259				fatal("Can not write to temporary file");
260			}
261			nlp = readline(&nline, &nbufsiz, pnewfile);
262			ntcnt++;
263		}
264		(void) fclose(pnewtemp);
265
266		/* Create pipes and fork.  */
267		if ((pipe(pfd)) == -1)
268			fatal("Can not create pipe");
269		if ((i = fork()) < (pid_t)0) {
270			(void) close(pfd[0]);
271			(void) close(pfd[1]);
272			fatal("Can not fork, try again");
273		} else if (i == (pid_t)0) {	/* child process */
274			(void) close(pfd[0]);
275			(void) close(1);
276			(void) dup(pfd[1]);
277			(void) close(pfd[1]);
278
279			/* Execute 'diff' on the segment files. */
280			(void) execlp(diff, diff, otmp, ntmp, 0);
281
282			/*
283			 * Exit code here must be > 1.
284			 * Parent process treats exit code of 1 from the child
285			 * as non-error because the child process "diff" exits
286			 * with a status of 1 when a difference is encountered.
287			 * The error here is a true error--the parent process
288			 * needs to detect it and exit with a non-zero status.
289			 */
290			(void) close(1);
291			(void) snprintf(Error, sizeof (Error),
292			    "Can not execute '%s'", diff);
293			fatal_num = 2;
294			fatal(Error);
295		} else {			/* parent process */
296			(void) close(pfd[1]);
297			pipeinp = fdopen(pfd[0], "r");
298
299			/* Process 'diff' output. */
300			while ((dp = readline(&diffline, &dbufsiz, pipeinp))) {
301				if (isdigit(*dp))
302					fixnum(diffline);
303				else
304					(void) printf("%s", diffline);
305			}
306
307			(void) fclose(pipeinp);
308
309			/* EOF on pipe. */
310			(void) wait(&status);
311			if (status&~0x100) {
312				(void) snprintf(Error, sizeof (Error),
313				    "'%s' failed", diff);
314				fatal(Error);
315			}
316		}
317		linenum += seglim;
318
319		/* Remove temporary files. */
320		(void) unlink(otmp);
321		(void) unlink(ntmp);
322	}
323}
324
325/* Routine to save remainder of a file. */
326static void
327saverest(char **linep, size_t *bufsizp, FILE *iptr)
328{
329	char *lp;
330	FILE *temptr;
331
332	temptr = maket(tempfile);
333
334	lp = *linep;
335
336	while (lp) {
337		(void) fputs(*linep, temptr);
338		linenum++;
339		lp = readline(linep, bufsizp, iptr);
340	}
341	(void) fclose(temptr);
342}
343
344/* Routine to write out data saved by 'saverest' and to remove the file. */
345static void
346putsave(char **linep, size_t *bufsizp, char type)
347{
348	FILE *temptr;
349
350	if ((temptr = fopen(tempfile, "r")) == NULL) {
351		(void) snprintf(Error, sizeof (Error),
352		    "Can not open tempfile ('%s')", tempfile); fatal(Error);
353	}
354
355	while (readline(linep, bufsizp, temptr))
356		(void) printf("%c %s", type, *linep);
357
358	(void) fclose(temptr);
359
360	(void) unlink(tempfile);
361}
362
363static void
364fixnum(char *lp)
365{
366	offset_t num;
367
368	while (*lp) {
369		switch (*lp) {
370
371		case 'a':
372		case 'c':
373		case 'd':
374		case ',':
375		case '\n':
376			(void) printf("%c", *lp);
377			lp++;
378			break;
379
380		default:
381			lp = satoi(lp, &num);
382			num += linenum;
383			(void) printf("%lld", num);
384		}
385	}
386}
387
388static void
389addgen(char **lpp, size_t *bufsizp, FILE *fp)
390{
391	offset_t oldline;
392	(void) printf("%llda%lld", linenum, linenum+1);
393
394	/* Save lines of new file. */
395	oldline = linenum + 1;
396	saverest(lpp, bufsizp, fp);
397
398	if (oldline < linenum)
399		(void) printf(",%lld\n", linenum);
400	else
401		(void) printf("\n");
402
403	/* Output saved lines, as 'diff' would. */
404	putsave(lpp, bufsizp, '>');
405
406	exit(0);
407}
408
409static void
410delgen(char **lpp, size_t *bufsizp, FILE *fp)
411{
412	offset_t savenum;
413
414	(void) printf("%lld", linenum+1);
415	savenum = linenum;
416
417	/* Save lines of old file. */
418	saverest(lpp, bufsizp, fp);
419
420	if (savenum +1 != linenum)
421		(void) printf(",%lldd%lld\n", linenum, savenum);
422	else
423		(void) printf("d%lld\n", savenum);
424
425	/* Output saved lines, as 'diff' would.  */
426	putsave(lpp, bufsizp, '<');
427
428	exit(0);
429}
430
431static void
432clean_up()
433{
434	(void) unlink(tempfile);
435	(void) unlink(otmp);
436	(void) unlink(ntmp);
437}
438
439static FILE *
440maket(char *file)
441{
442	FILE *iop;
443	int fd;
444
445	(void) strcpy(file, tempskel);
446	if ((fd = mkstemp(file)) == -1 ||
447		(iop = fdopen(fd, "w+")) == NULL) {
448		(void) snprintf(Error, sizeof (Error),
449		    "Can not open/create temp file ('%s')", file);
450		fatal(Error);
451	}
452	return (iop);
453}
454
455static void
456fatal(char *msg)
457/*
458 *	General purpose error handler.
459 *
460 *	The argument to fatal is a pointer to an error message string.
461 *	The action of this routine is driven completely from
462 *	the "fflags" global word (see <fatal.h>).
463 *
464 *	The FTLMSG bit controls the writing of the error
465 *	message on file descriptor 2.  A newline is written
466 *	after the user supplied message.
467 *
468 *	If the FTLCLN bit is on, clean_up is called.
469 */
470{
471	if (fflags & FTLMSG)
472		(void) fprintf(stderr, "%s: %s\n", prognam, msg);
473	if (fflags & FTLCLN)
474		clean_up();
475	if (fflags & FTLEXIT)
476		exit(fatal_num);
477}
478
479static void
480setsig()
481/*
482 *	General-purpose signal setting routine.
483 *	All non-ignored, non-caught signals are caught.
484 *	If a signal other than hangup, interrupt, or quit is caught,
485 *	a "user-oriented" message is printed on file descriptor 2.
486 *	If hangup, interrupt or quit is caught, that signal
487 *	is set to ignore.
488 *	Termination is like that of "fatal",
489 *	via "clean_up()"
490 */
491{
492	void (*act)(int);
493	int j;
494
495	for (j = 1; j < ONSIG; j++) {
496		act = signal(j, setsig1);
497		if (act == SIG_ERR)
498			continue;
499		if (act == SIG_DFL)
500			continue;
501		(void) signal(j, act);
502	}
503}
504
505static void
506setsig1(int sig)
507{
508
509	(void) signal(sig, SIG_IGN);
510	clean_up();
511	exit(1);
512}
513
514static char *
515satoi(char *p, offset_t *ip)
516{
517	offset_t sum;
518
519	sum = 0;
520	while (isdigit(*p))
521		sum = sum * 10 + (*p++ - '0');
522	*ip = sum;
523	return (p);
524}
525
526/*
527 * Read a line of data from a file.  If the current buffer is not large enough
528 * to contain the line, double the size of the buffer and continue reading.
529 * Loop until either the entire line is read or until there is no more space
530 * to be malloc'd.
531 */
532
533static char *
534readline(char **bufferp, size_t *bufsizp, FILE *filep)
535{
536	char *bufp;
537	size_t newsize;		/* number of bytes to make buffer */
538	size_t oldsize;
539
540	(*bufferp)[*bufsizp - 1] = '\t'; /* arbitrary non-zero character */
541	(*bufferp)[*bufsizp - 2] = ' ';	/* arbitrary non-newline char */
542	bufp = fgets(*bufferp, *bufsizp, filep);
543	if (bufp == NULL)
544		return (bufp);
545	while ((*bufferp)[*bufsizp -1] == '\0' &&
546	    (*bufferp)[*bufsizp - 2] != '\n' &&
547	    strlen(*bufferp) == *bufsizp - 1) {
548		newsize = 2 * (*bufsizp);
549		bufp = (char *)realloc((void *)*bufferp, newsize);
550		if (bufp == NULL)
551			fatal("Out of memory");
552		oldsize = *bufsizp;
553		*bufsizp = newsize;
554		*bufferp = bufp;
555		(*bufferp)[*bufsizp - 1] = '\t';
556		(*bufferp)[*bufsizp - 2] = ' ';
557		bufp = fgets(*bufferp + oldsize -1, oldsize + 1, filep);
558		if (bufp == NULL) {
559			if (filep->_flag & _IOEOF) {
560				bufp = *bufferp;
561				break;
562			} else
563				fatal("Read error");
564		} else
565			bufp = *bufferp;
566	}
567	return (bufp);
568}
569