xref: /illumos-gate/usr/src/contrib/ast/src/lib/libcmd/cmp.c (revision b30d1939)
1 /*
2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /***********************************************************************
6 *                                                                      *
7 *               This software is part of the ast package               *
8 *          Copyright (c) 1992-2012 AT&T Intellectual Property          *
9 *                      and is licensed under the                       *
10 *                 Eclipse Public License, Version 1.0                  *
11 *                    by AT&T Intellectual Property                     *
12 *                                                                      *
13 *                A copy of the License is available at                 *
14 *          http://www.eclipse.org/org/documents/epl-v10.html           *
15 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
16 *                                                                      *
17 *              Information and Software Systems Research               *
18 *                            AT&T Research                             *
19 *                           Florham Park NJ                            *
20 *                                                                      *
21 *                 Glenn Fowler <gsf@research.att.com>                  *
22 *                  David Korn <dgk@research.att.com>                   *
23 *                                                                      *
24 ***********************************************************************/
25 #pragma prototyped
26 /*
27  * David Korn
28  * Glenn Fowler
29  * AT&T Bell Laboratories
30  *
31  * cmp
32  */
33 
34 static const char usage[] =
35 "[-?\n@(#)$Id: cmp (AT&T Research) 2010-04-11 $\n]"
36 USAGE_LICENSE
37 "[+NAME?cmp - compare two files]"
38 "[+DESCRIPTION?\bcmp\b compares two files \afile1\a and \afile2\a. "
39     "\bcmp\b writes no output if the files are the same. By default, if the "
40     "files differ, the byte and line number at which the first difference "
41     "occurred are written to standard output. Bytes and lines are numbered "
42     "beginning with 1.]"
43 "[+?If \askip1\a or \askip2\a are specified, or the \b-i\b option is "
44     "specified, initial bytes of the corresponding file are skipped before "
45     "beginning the compare. The skip values are in bytes or can have a "
46     "suffix of \bk\b for kilobytes or \bm\b for megabytes.]"
47 "[+?If either \afile1\a or \afiles2\a is \b-\b, \bcmp\b uses standard "
48     "input starting at the current location.]"
49 "[b:print-bytes?Print differing bytes as 3 digit octal values.]"
50 "[c:print-chars?Print differing bytes as follows: non-space printable "
51     "characters as themselves; space and control characters as \b^\b "
52     "followed by a letter of the alphabet; and characters with the high bit "
53     "set as the lower 7 bit character prefixed by \bM^\b for 7 bit space and "
54     "non-printable characters and \bM-\b for all other characters. If the 7 "
55     "bit character encoding is not ASCII then the characters are converted "
56     "to ASCII to determine \ahigh bit set\a, and if set it is cleared and "
57     "converted back to the native encoding. Multibyte characters in the "
58     "current locale are treated as printable characters.]"
59 "[d:differences?Print at most \adifferences\a differences using "
60     "\b--verbose\b output format. \b--differences=0\b is equivalent to "
61     "\b--silent\b.]#[differences]"
62 "[i:ignore-initial|skip?Skip the the first \askip1\a bytes in \afile1\a "
63     "and the first \askip2\a bytes in \afile2\a. If \askip2\a is omitted "
64     "then \askip1\a is used.]:[skip1[::skip2]]:=0::0]"
65 "[l:verbose?Write the decimal byte number and the differing bytes (in "
66     "octal) for each difference.]"
67 "[n:count|bytes?Compare at most \acount\a bytes.]#[count]"
68 "[s:quiet|silent?Write nothing for differing files; return non-zero exit "
69     "status only.]"
70 "\n"
71 "\nfile1 file2 [skip1 [skip2]]\n"
72 "\n"
73 "[+EXIT STATUS?]"
74     "{"
75         "[+0?The files or portions compared are identical.]"
76         "[+1?The files are different.]"
77         "[+>1?An error occurred.]"
78     "}"
79 "[+SEE ALSO?\bcomm\b(1), \bdiff\b(1), \bcat\b(1)]"
80 ;
81 
82 #include <cmd.h>
83 #include <ls.h>
84 #include <ctype.h>
85 #include <ccode.h>
86 
87 #define CMP_VERBOSE	0x01
88 #define CMP_SILENT	0x02
89 #define CMP_CHARS	0x04
90 #define CMP_BYTES	0x08
91 
92 static void
pretty(Sfio_t * out,int o,int delim,int flags)93 pretty(Sfio_t *out, int o, int delim, int flags)
94 {
95 	int	c;
96 	int	m;
97 	char*	s;
98 	char	buf[10];
99 
100 	s = buf;
101 	if ((flags & CMP_BYTES) || !(flags & CMP_CHARS))
102 	{
103 		*s++ = ' ';
104 		if ((flags & CMP_CHARS) && delim != -1)
105 			*s++ = ' ';
106 		*s++ = '0' + ((o >> 6) & 07);
107 		*s++ = '0' + ((o >> 3) & 07);
108 		*s++ = '0' + (o & 07);
109 	}
110 	if (flags & CMP_CHARS)
111 	{
112 		*s++ = ' ';
113 		c = ccmapc(o, CC_NATIVE, CC_ASCII);
114 		if (c & 0x80)
115 		{
116 			m = 1;
117 			*s++ = 'M';
118 			c &= 0x7f;
119 			o = ccmapc(c, CC_ASCII, CC_NATIVE);
120 		}
121 		else
122 			m = 0;
123 		if (isspace(o) || !isprint(o))
124 		{
125 			if (!m)
126 				*s++ = ' ';
127 			*s++ = '^';
128 			c ^= 0x40;
129 			o = ccmapc(c, CC_ASCII, CC_NATIVE);
130 		}
131 		else if (m)
132 			*s++ = '-';
133 		else
134 		{
135 			*s++ = ' ';
136 			*s++ = ' ';
137 		}
138 		*s++ = o;
139 	}
140 	*s = 0;
141 	sfputr(out, buf, delim);
142 }
143 
144 /*
145  * compare two files
146  */
147 
148 static int
cmp(const char * file1,Sfio_t * f1,const char * file2,Sfio_t * f2,int flags,Sfoff_t count,Sfoff_t differences)149 cmp(const char* file1, Sfio_t* f1, const char* file2, Sfio_t* f2, int flags, Sfoff_t count, Sfoff_t differences)
150 {
151 	register int		c1;
152 	register int		c2;
153 	register unsigned char*	p1 = 0;
154 	register unsigned char*	p2 = 0;
155 	register Sfoff_t	lines = 1;
156 	register unsigned char*	e1 = 0;
157 	register unsigned char*	e2 = 0;
158 	Sfoff_t			pos = 0;
159 	int			n1 = 0;
160 	int			ret = 0;
161 	unsigned char*		last;
162 
163 	for (;;)
164 	{
165 		if ((c1 = e1 - p1) <= 0)
166 		{
167 			if (count > 0 && !(count -= n1))
168 				return ret;
169 			if (!(p1 = (unsigned char*)sfreserve(f1, SF_UNBOUND, 0)) || (c1 = sfvalue(f1)) <= 0)
170 			{
171 				if (sferror(f1)) {
172 					error(ERROR_exit(2),
173 					    "read error on %s", file1);
174 				}
175 				if ((e2 - p2) > 0 || sfreserve(f2, SF_UNBOUND, 0) && sfvalue(f2) > 0)
176 				{
177 					ret = 1;
178 					if (!(flags & CMP_SILENT))
179 						error(ERROR_exit(1), "EOF on %s", file1);
180 				}
181 				if (sferror(f2)) {
182 					error(ERROR_exit(2),
183 					    "read error on %s", file2);
184 				}
185 				return ret;
186 			}
187 			if (count > 0 && c1 > count)
188 				c1 = (int)count;
189 			e1 = p1 + c1;
190 			n1 = c1;
191 		}
192 		if ((c2 = e2 - p2) <= 0)
193 		{
194 			if (!(p2 = (unsigned char*)sfreserve(f2, SF_UNBOUND, 0)) || (c2 = sfvalue(f2)) <= 0)
195 			{
196 				if (sferror(f2)) {
197 					error(ERROR_exit(2),
198 					    "read error on %s", file2);
199 				}
200 				if (!(flags & CMP_SILENT))
201 					error(ERROR_exit(1), "EOF on %s", file2);
202 				return 1;
203 			}
204 			e2 = p2 + c2;
205 		}
206 		if (c1 > c2)
207 			c1 = c2;
208 		pos += c1;
209 		if (flags & CMP_SILENT)
210 		{
211 			if (memcmp(p1, p2, c1))
212 				return 1;
213 			p1 += c1;
214 			p2 += c1;
215 		}
216 		else
217 		{
218 			last = p1 + c1;
219 			while (p1 < last)
220 			{
221 				if ((c1 = *p1++) != *p2++)
222 				{
223 					if (differences >= 0)
224 					{
225 						if (!differences)
226 							return 1;
227 						differences--;
228 					}
229 #if 0
230 					if (!flags)
231 						sfprintf(sfstdout, "%s %s differ: char %I*d, line %I*u\n", file1, file2, sizeof(pos), pos - (last - p1), sizeof(lines), lines);
232 					else
233 					{
234 						sfprintf(sfstdout, "%6I*d", sizeof(pos), pos - (last - p1));
235 						pretty(sfstdout, c1, -1, flags);
236 						pretty(sfstdout, *(p2-1), '\n', flags);
237 					}
238 #else
239 					if (flags & CMP_VERBOSE)
240 						sfprintf(sfstdout, "%6I*d", sizeof(pos), pos - (last - p1));
241 					else
242 						sfprintf(sfstdout, "%s %s differ: char %I*d, line %I*u", file1, file2, sizeof(pos), pos - (last - p1), sizeof(lines), lines);
243 					if (flags & (CMP_BYTES|CMP_CHARS|CMP_VERBOSE))
244 					{
245 						sfputc(sfstdout, (flags & CMP_VERBOSE) ? ' ' : ',');
246 						pretty(sfstdout, c1, -1, flags);
247 						pretty(sfstdout, *(p2-1), '\n', flags);
248 					}
249 					else
250 						sfputc(sfstdout, '\n');
251 #endif
252 					if (!differences || differences < 0 && !(flags & CMP_VERBOSE))
253 						return 1;
254 					ret = 1;
255 				}
256 				if (c1 == '\n')
257 					lines++;
258 			}
259 		}
260 	}
261 }
262 
263 int
b_cmp(int argc,register char ** argv,Shbltin_t * context)264 b_cmp(int argc, register char** argv, Shbltin_t* context)
265 {
266 	char*		s;
267 	char*		e;
268 	char*		file1;
269 	char*		file2;
270 	int		n;
271 	struct stat	s1;
272 	struct stat	s2;
273 
274 	Sfio_t*		f1 = 0;
275 	Sfio_t*		f2 = 0;
276 	Sfoff_t		o1 = 0;
277 	Sfoff_t		o2 = 0;
278 	Sfoff_t		count = -1;
279 	Sfoff_t		differences = -1;
280 	int		flags = 0;
281 
282 	NoP(argc);
283 	cmdinit(argc, argv, context, ERROR_CATALOG, 0);
284 	for (;;)
285 	{
286 		switch (optget(argv, usage))
287 		{
288 		case 'b':
289 			flags |= CMP_BYTES;
290 			continue;
291 		case 'c':
292 			flags |= CMP_CHARS;
293 			continue;
294 		case 'd':
295 			flags |= CMP_VERBOSE;
296 			differences = opt_info.number;
297 			continue;
298 		case 'i':
299 			o1 = strtoll(opt_info.arg, &e, 0);
300 			if (*e == ':')
301 				o2 = strtoll(e + 1, &e, 0);
302 			else
303 				o2 = o1;
304 			if (*e)
305 			{
306 				error(2, "%s: skip1:skip2 expected", opt_info.arg);
307 				break;
308 			}
309 			continue;
310 		case 'l':
311 			flags |= CMP_VERBOSE;
312 			continue;
313 		case 'n':
314 			count = opt_info.number;
315 			continue;
316 		case 's':
317 			flags |= CMP_SILENT;
318 			continue;
319 		case ':':
320 			error(2, "%s", opt_info.arg);
321 			break;
322 		case '?':
323 			error(ERROR_usage(2), "%s", opt_info.arg);
324 			break;
325 		}
326 		break;
327 	}
328 	argv += opt_info.index;
329 	if (error_info.errors || !(file1 = *argv++) || !(file2 = *argv++))
330 		error(ERROR_usage(2), "%s", optusage(NiL));
331 	n = 2;
332 	if (streq(file1, "-"))
333 		f1 = sfstdin;
334 	else if (!(f1 = sfopen(NiL, file1, "r")))
335 	{
336 		if (!(flags & CMP_SILENT))
337 			error(ERROR_system(0), "%s: cannot open", file1);
338 		goto done;
339 	}
340 	if (streq(file2, "-"))
341 		f2 = sfstdin;
342 	else if (!(f2 = sfopen(NiL, file2, "r")))
343 	{
344 		if (!(flags & CMP_SILENT))
345 			error(ERROR_system(0), "%s: cannot open", file2);
346 		goto done;
347 	}
348 	if (s = *argv++)
349 	{
350 		o1 = strtoll(s, &e, 0);
351 		if (*e)
352 		{
353 			error(ERROR_exit(0), "%s: %s: invalid skip", file1, s);
354 			goto done;
355 		}
356 		if (s = *argv++)
357 		{
358 			o2 = strtoll(s, &e, 0);
359 			if (*e)
360 			{
361 				error(ERROR_exit(0), "%s: %s: invalid skip", file2, s);
362 				goto done;
363 			}
364 		}
365 		if (*argv)
366 		{
367 			error(ERROR_usage(0), "%s", optusage(NiL));
368 			goto done;
369 		}
370 	}
371 	if (o1 && sfseek(f1, o1, SEEK_SET) != o1)
372 	{
373 		if (!(flags & CMP_SILENT))
374 			error(ERROR_exit(0), "EOF on %s", file1);
375 		n = 1;
376 		goto done;
377 	}
378 	if (o2 && sfseek(f2, o2, SEEK_SET) != o2)
379 	{
380 		if (!(flags & CMP_SILENT))
381 			error(ERROR_exit(0), "EOF on %s", file2);
382 		n = 1;
383 		goto done;
384 	}
385 	if (fstat(sffileno(f1), &s1))
386 		error(ERROR_system(0), "%s: cannot stat", file1);
387 	else if (fstat(sffileno(f2), &s2))
388 		error(ERROR_system(0), "%s: cannot stat", file1);
389 	else if (s1.st_ino == s2.st_ino && s1.st_dev == s2.st_dev && o1 == o2)
390 		n = 0;
391 	else
392 		n = ((flags & CMP_SILENT) && S_ISREG(s1.st_mode) && S_ISREG(s2.st_mode) && (s1.st_size - o1) != (s2.st_size - o2)) ? 1 : cmp(file1, f1, file2, f2, flags, count, differences);
393  done:
394 	if (f1 && f1 != sfstdin)
395 		sfclose(f1);
396 	if (f2 && f2 != sfstdin)
397 		sfclose(f2);
398 	return n;
399 }
400