xref: /illumos-gate/usr/src/contrib/ast/src/lib/libcmd/cat.c (revision b30d1939)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1992-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                                                                      *
20 ***********************************************************************/
21 #pragma prototyped
22 /*
23  * David Korn
24  * Glenn Fowler
25  * AT&T Bell Laboratories
26  *
27  * cat
28  */
29 
30 #include <cmd.h>
31 #include <fcntl.h>
32 
33 static const char usage[] =
34 "[-?\n@(#)$Id: cat (AT&T Research) 2012-05-31 $\n]"
35 USAGE_LICENSE
36 "[+NAME?cat - concatenate files]"
37 "[+DESCRIPTION?\bcat\b copies each \afile\a in sequence to the standard"
38 "	output. If no \afile\a is given, or if the \afile\a is \b-\b,"
39 "	\bcat\b copies from standard input starting at the current location.]"
40 
41 "[b:number-nonblank?Number lines as with \b-n\b but omit line numbers from"
42 "	blank lines.]"
43 "[d:dos-input?Input files are opened in \atext\amode which removes carriage"
44 "	returns in front of new-lines on some systems.]"
45 "[e?Equivalent to \b-vE\b.]"
46 "[n:number?Causes a line number to be inserted at the beginning of each line.]"
47 "[s?Equivalent to \b-S\b for \aatt\a universe and \b-B\b otherwise.]"
48 "[t?Equivalent to \b-vT\b.]"
49 "[u:unbuffer?The output is not delayed by buffering.]"
50 "[v:show-nonprinting|print-chars?Print characters as follows: space and "
51     "printable characters as themselves; control characters as \b^\b "
52     "followed by a letter of the alphabet; and characters with the high bit "
53     "set as the lower 7 bit character prefixed by \bM^\b for 7 bit "
54     "non-printable characters and \bM-\b for all other characters. If the 7 "
55     "bit character encoding is not ASCII then the characters are converted "
56     "to ASCII to determine \ahigh bit set\a, and if set it is cleared and "
57     "converted back to the native encoding. Multibyte characters in the "
58     "current locale are treated as printable characters.]"
59 "[A:show-all?Equivalent to \b-vET\b.]"
60 "[B:squeeze-blank?Multiple adjacent new-line characters are replace by one"
61 "	new-line.]"
62 "[D:dos-output?Output files are opened in \atext\amode which inserts carriage"
63 "	returns in front of new-lines on some systems.]"
64 "[E:show-ends?Causes a \b$\b to be inserted before each new-line.]"
65 "[R:regress?Regression test defaults: \b-v\b buffer size 4.]"
66 "[S:silent?\bcat\b is silent about non-existent files.]"
67 "[T:show-blank?Causes tabs to be copied as \b^I\b and formfeeds as \b^L\b.]"
68 
69 "\n"
70 "\n[file ...]\n"
71 "\n"
72 
73 "[+SEE ALSO?\bcp\b(1), \bgetconf\b(1), \bpr\b(1)]"
74 ;
75 
76 #define RUBOUT	0177
77 
78 /* control flags */
79 #define B_FLAG		(1<<0)
80 #define E_FLAG		(1<<1)
81 #define F_FLAG		(1<<2)
82 #define N_FLAG		(1<<3)
83 #define S_FLAG		(1<<4)
84 #define T_FLAG		(1<<5)
85 #define U_FLAG		(1<<6)
86 #define V_FLAG		(1<<7)
87 #define D_FLAG		(1<<8)
88 #define d_FLAG		(1<<9)
89 
90 /* character types */
91 #define T_ERROR		1
92 #define T_EOF		2
93 #define T_ENDBUF	3
94 #define T_NEWLINE	4
95 #define T_CONTROL	5
96 #define T_EIGHTBIT	6
97 #define T_CNTL8BIT	7
98 
99 #define printof(c)	((c)^0100)
100 
101 typedef void* (*Reserve_f)(Sfio_t*, ssize_t, int);
102 
103 #ifndef sfvalue
104 #define sfvalue(f)	((f)->_val)
105 #endif
106 
107 static void*
regress(Sfio_t * sp,ssize_t n,int f)108 regress(Sfio_t* sp, ssize_t n, int f)
109 {
110 	void*	r;
111 
112 	if (!(r = sfreserve(sp, 4, f)))
113 		r = sfreserve(sp, n, f);
114 	else if (sfvalue(sp) > 4)
115 		sfvalue(sp) = 4;
116 	return r;
117 }
118 
119 /*
120  * called for any special output processing
121  */
122 
123 static int
vcat(register char * states,Sfio_t * ip,Sfio_t * op,Reserve_f reserve,int flags)124 vcat(register char* states, Sfio_t* ip, Sfio_t* op, Reserve_f reserve, int flags)
125 {
126 	register unsigned char*	cp;
127 	register unsigned char*	pp;
128 	unsigned char*		cur;
129 	unsigned char*		end;
130 	unsigned char*		buf;
131 	unsigned char*		nxt;
132 	register int		n;
133 	register int		line;
134 	register int		raw;
135 	int			last;
136 	int			c;
137 	int			m;
138 	int			any;
139 	int			header;
140 
141 	unsigned char		meta[3];
142 	unsigned char		tmp[32];
143 
144 	meta[0] = 'M';
145 	last = -1;
146 	*(cp = buf = end = tmp) = 0;
147 	any = 0;
148 	header = flags & (B_FLAG|N_FLAG);
149 	line = 1;
150 	states[0] = T_ENDBUF;
151 	raw = !mbwide();
152 	for (;;)
153 	{
154 		cur = cp;
155 		if (raw)
156 			while (!(n = states[*cp++]));
157 		else
158 			for (;;)
159 			{
160 				while (!(n = states[*cp++]));
161 				if (n < T_CONTROL)
162 					break;
163 				if ((m = mbsize(pp = cp - 1)) > 1)
164 					cp += m - 1;
165 				else
166 				{
167 					if (m <= 0)
168 					{
169 						if (cur == pp)
170 						{
171 							if (last > 0)
172 							{
173 								*end = last;
174 								last = -1;
175 								c = end - pp + 1;
176 								if ((m = mbsize(pp)) == c)
177 								{
178 									any = 1;
179 									if (header)
180 									{
181 										header = 0;
182 										sfprintf(op, "%6d\t", line);
183 									}
184 									sfwrite(op, cur, m);
185 									*(cp = cur = end) = 0;
186 								}
187 								else
188 								{
189 									memcpy(tmp, pp, c);
190 									if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
191 									{
192 										states[0] = sfvalue(ip) ? T_ERROR : T_EOF;
193 										*(cp = end = tmp + sizeof(tmp) - 1) = 0;
194 										last = -1;
195 									}
196 									else if ((n = sfvalue(ip)) <= 0)
197 									{
198 										states[0] = n ? T_ERROR : T_EOF;
199 										*(cp = end = tmp + sizeof(tmp) - 1) = 0;
200 										last = -1;
201 									}
202 									else
203 									{
204 										cp = buf = nxt;
205 										end = buf + n - 1;
206 										last = *end;
207 										*end = 0;
208 									}
209  mb:
210 									if ((n = end - cp + 1) >= (sizeof(tmp) - c))
211 										n = sizeof(tmp) - c - 1;
212 									memcpy(tmp + c, cp, n);
213 									if ((m = mbsize(tmp)) >= c)
214 									{
215 										any = 1;
216 										if (header)
217 										{
218 											header = 0;
219 											sfprintf(op, "%6d\t", line);
220 										}
221 										sfwrite(op, tmp, m);
222 										cur = cp += m - c;
223 									}
224 								}
225 								continue;
226 							}
227 						}
228 						else
229 						{
230 							cp = pp + 1;
231 							n = 0;
232 						}
233 					}
234 					break;
235 				}
236 			}
237 		c = *--cp;
238 		if ((m = cp - cur) || n >= T_CONTROL)
239 		{
240  flush:
241 			any = 1;
242 			if (header)
243 			{
244 				header = 0;
245 				sfprintf(op, "%6d\t", line);
246 			}
247 			if (m)
248 				sfwrite(op, cur, m);
249 		}
250  special:
251 		switch (n)
252 		{
253 		case T_ERROR:
254 			if (cp < end)
255 			{
256 				n = T_CONTROL;
257 				goto flush;
258 			}
259 			return -1;
260 		case T_EOF:
261 			if (cp < end)
262 			{
263 				n = T_CONTROL;
264 				goto flush;
265 			}
266 			return 0;
267 		case T_ENDBUF:
268 			if (cp < end)
269 			{
270 				n = T_CONTROL;
271 				goto flush;
272 			}
273 			c = last;
274 			if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
275 			{
276 				*(cp = end = tmp + sizeof(tmp) - 1) = 0;
277 				states[0] = (m = sfvalue(ip)) ? T_ERROR : T_EOF;
278 				last = -1;
279 			}
280 			else if ((m = sfvalue(ip)) <= 0)
281 			{
282 				*(cp = end = tmp + sizeof(tmp) - 1) = 0;
283 				states[0] = m ? T_ERROR : T_EOF;
284 				last = -1;
285 			}
286 			else
287 			{
288 				buf = nxt;
289 				end = buf + m - 1;
290 				last = *end;
291 				*end = 0;
292 				cp = buf;
293 			}
294 			if (c >= 0)
295 			{
296 				if (!(n = states[c]))
297 				{
298 					*(cur = tmp) = c;
299 					m = 1;
300 					goto flush;
301 				}
302 				if (raw || n < T_CONTROL)
303 				{
304 					cp--;
305 					goto special;
306 				}
307 				tmp[0] = c;
308 				c = 1;
309 				goto mb;
310 			}
311 			break;
312 		case T_CONTROL:
313 			do
314 			{
315 				sfputc(op, '^');
316 				sfputc(op, printof(c));
317 			} while (states[c = *++cp] == T_CONTROL);
318 			break;
319 		case T_CNTL8BIT:
320 			meta[1] = '^';
321 			do
322 			{
323 				n = c & ~0200;
324 				meta[2] = printof(n);
325 				sfwrite(op, (char*)meta, 3);
326 			} while (states[c = *++cp] == T_CNTL8BIT && raw);
327 			break;
328 		case T_EIGHTBIT:
329 			meta[1] = '-';
330 			do
331 			{
332 				meta[2] = c & ~0200;
333 				sfwrite(op, (char*)meta, 3);
334 			} while (states[c = *++cp] == T_EIGHTBIT && raw);
335 			break;
336 		case T_NEWLINE:
337 			if (header && !(flags & B_FLAG))
338 				sfprintf(op, "%6d\t", line);
339 			if (flags & E_FLAG)
340 				sfputc(op, '$');
341 			sfputc(op, '\n');
342 			if (!header || !(flags & B_FLAG))
343 				line++;
344 			header = !(flags & S_FLAG);
345 			for (;;)
346 			{
347 				if ((n = states[*++cp]) == T_ENDBUF)
348 				{
349 					if (cp < end || last != '\n')
350 						break;
351 					if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
352 					{
353 						states[0] = sfvalue(ip) ? T_ERROR : T_EOF;
354 						cp = end = tmp;
355 						*cp-- = 0;
356 						last = -1;
357 					}
358 					else if ((n = sfvalue(ip)) <= 0)
359 					{
360 						states[0] = n ? T_ERROR : T_EOF;
361 						cp = end = tmp;
362 						*cp-- = 0;
363 						last = -1;
364 					}
365 					else
366 					{
367 						buf = nxt;
368 						end = buf + n - 1;
369 						last = *end;
370 						*end = 0;
371 						cp = buf - 1;
372 					}
373 				}
374 				else if (n != T_NEWLINE)
375 					break;
376 				if (!(flags & S_FLAG) || any || header)
377 				{
378 					any = 0;
379 					header = 0;
380 					if ((flags & (B_FLAG|N_FLAG)) == N_FLAG)
381 						sfprintf(op, "%6d\t", line);
382 					if (flags & E_FLAG)
383 						sfputc(op, '$');
384 					sfputc(op, '\n');
385 				}
386 				if (!(flags & B_FLAG))
387 					line++;
388 			}
389 			header = flags & (B_FLAG|N_FLAG);
390 			break;
391 		}
392 	}
393 }
394 
395 int
b_cat(int argc,char ** argv,Shbltin_t * context)396 b_cat(int argc, char** argv, Shbltin_t* context)
397 {
398 	register int		n;
399 	register int		flags = 0;
400 	register char*		cp;
401 	register Sfio_t*	fp;
402 	char*			mode;
403 	Reserve_f		reserve = sfreserve;
404 	int			att;
405 	int			dovcat = 0;
406 	char			states[UCHAR_MAX+1];
407 
408 	cmdinit(argc, argv, context, ERROR_CATALOG, 0);
409 	att = !strcmp(astconf("UNIVERSE", NiL, NiL), "att");
410 	mode = "r";
411 	for (;;)
412 	{
413 		n = 0;
414 		switch (optget(argv, usage))
415 		{
416 		case 'A':
417 			n = T_FLAG|E_FLAG|V_FLAG;
418 			break;
419 		case 'B':
420 			n = S_FLAG;
421 			break;
422 		case 'b':
423 			n = B_FLAG;
424 			break;
425 		case 'd':
426 			mode = opt_info.num ? "rt" : "r";
427 			continue;
428 		case 'D':
429 			n = d_FLAG;
430 			break;
431 		case 'E':
432 			n = E_FLAG;
433 			break;
434 		case 'e':
435 			n = E_FLAG|V_FLAG;
436 			break;
437 		case 'n':
438 			n = N_FLAG;
439 			break;
440 		case 'R':
441 			reserve = opt_info.num ? regress : sfreserve;
442 			continue;
443 		case 's':
444 			n = att ? F_FLAG : S_FLAG;
445 			break;
446 		case 'S':
447 			n = F_FLAG;
448 			break;
449 		case 'T':
450 			n = T_FLAG;
451 			break;
452 		case 't':
453 			n = T_FLAG|V_FLAG;
454 			break;
455 		case 'u':
456 			n = U_FLAG;
457 			break;
458 		case 'v':
459 			n = V_FLAG;
460 			break;
461 		case ':':
462 			error(2, "%s", opt_info.arg);
463 			break;
464 		case '?':
465 			error(ERROR_usage(2), "%s", opt_info.arg);
466 			break;
467 		}
468 		if (!n)
469 			break;
470 		if (opt_info.num)
471 			flags |= n;
472 		else
473 			flags &= ~n;
474 	}
475 	argv += opt_info.index;
476 	if (error_info.errors)
477 		error(ERROR_usage(2), "%s", optusage(NiL));
478 	memset(states, 0, sizeof(states));
479 	if (flags&V_FLAG)
480 	{
481 		memset(states, T_CONTROL, ' ');
482 		states[RUBOUT] = T_CONTROL;
483 		memset(states+0200, T_EIGHTBIT, 0200);
484 		memset(states+0200, T_CNTL8BIT, ' ');
485 		states[RUBOUT|0200] = T_CNTL8BIT;
486 		states['\n'] = 0;
487 	}
488 	if (flags&T_FLAG)
489 		states['\t'] = T_CONTROL;
490 	states[0] = T_ENDBUF;
491 	if (att)
492 	{
493 		if (flags&V_FLAG)
494 		{
495 			states['\n'|0200] = T_EIGHTBIT;
496 			if (!(flags&T_FLAG))
497 			{
498 				states['\t'] = states['\f'] = 0;
499 				states['\t'|0200] = states['\f'|0200] = T_EIGHTBIT;
500 			}
501 		}
502 	}
503 	else if (flags)
504 	{
505 		if (!(flags&T_FLAG))
506 			states['\t'] = 0;
507 	}
508 	if (flags&(V_FLAG|T_FLAG|N_FLAG|E_FLAG|B_FLAG|S_FLAG))
509 	{
510 		states['\n'] = T_NEWLINE;
511 		dovcat = 1;
512 	}
513 	if (flags&d_FLAG)
514 		sfopen(sfstdout, NiL, "wt");
515 	if (cp = *argv)
516 		argv++;
517 	do
518 	{
519 		if (!cp || streq(cp, "-"))
520 		{
521 			fp = sfstdin;
522 			if (flags&D_FLAG)
523 				sfopen(fp, NiL, mode);
524 		}
525 		else if (!(fp = sfopen(NiL, cp, mode)))
526 		{
527 			if (!(flags&F_FLAG))
528 				error(ERROR_system(0), "%s: cannot open", cp);
529 			error_info.errors = 1;
530 			continue;
531 		}
532 		if (flags&U_FLAG)
533 			sfsetbuf(fp, (void*)fp, -1);
534 		if (dovcat)
535 			n = vcat(states, fp, sfstdout, reserve, flags);
536 		else if (sfmove(fp, sfstdout, SF_UNBOUND, -1) >= 0 && sfeof(fp))
537 			n = 0;
538 		else
539 			n = -1;
540 		if (fp != sfstdin)
541 			sfclose(fp);
542 		if (n < 0 && !ERROR_PIPE(errno) && errno != EINTR)
543 		{
544 			if (cp)
545 				error(ERROR_system(0), "%s: read error", cp);
546 			else
547 				error(ERROR_system(0), "read error");
548 		}
549 		if (sferror(sfstdout))
550 			break;
551 	} while (cp = *argv++);
552 	if (sfsync(sfstdout))
553 		error(ERROR_system(0), "write error");
554 	if (flags&d_FLAG)
555 		sfopen(sfstdout, NiL, "w");
556 	return error_info.errors;
557 }
558