1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1992-2010 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                                                                      *
20***********************************************************************/
21#pragma prototyped
22/*
23 * David Korn
24 * Glenn Fowler
25 * AT&T Bell Laboratories
26 *
27 * cat
28 */
29
30#include <cmd.h>
31#include <fcntl.h>
32
33static const char usage[] =
34"[-?\n@(#)$Id: cat (AT&T Research) 2009-03-31 $\n]"
35USAGE_LICENSE
36"[+NAME?cat - concatenate files]"
37"[+DESCRIPTION?\bcat\b copies each \afile\a in sequence to the standard"
38"	output. If no \afile\a is given, or if the \afile\a is \b-\b,"
39"	\bcat\b copies from standard input starting at the current location.]"
40
41"[b:number-nonblank?Number lines as with \b-n\b but omit line numbers from"
42"	blank lines.]"
43"[d:dos-input?Input files are opened in \atext\amode which removes carriage"
44"	returns in front of new-lines on some systems.]"
45"[e?Equivalent to \b-vE\b.]"
46"[n:number?Causes a line number to be inserted at the beginning of each line.]"
47"[s?Equivalent to \b-S\b for \aatt\a universe and \b-B\b otherwise.]"
48"[t?Equivalent to \b-vT\b.]"
49"[u:unbuffer?The output is not delayed by buffering.]"
50"[v:show-nonprinting?Causes non-printing characters (whith the exception of"
51"	tabs, new-lines, and form-feeds) to be output as printable charater"
52"	sequences. ASCII control characters are printed as \b^\b\an\a,"
53"	where \an\a is the corresponding ASCII character in the range"
54"	octal 100-137. The DEL character (octal 0177) is copied"
55"	as \b^?\b. Other non-printable characters are copied as \bM-\b\ax\a"
56"	where \ax\a is the ASCII character specified by the low-order seven"
57"	bits.  Multibyte characters in the current locale are treated as"
58"	printable characters.]"
59"[A:show-all?Equivalent to \b-vET\b.]"
60"[B:squeeze-blank?Multiple adjacent new-line characters are replace by one"
61"	new-line.]"
62"[D:dos-output?Output files are opened in \atext\amode which inserts carriage"
63"	returns in front of new-lines on some systems.]"
64"[E:show-ends?Causes a \b$\b to be inserted before each new-line.]"
65"[R:regress?Regression test defaults: \b-v\b buffer size 4.]"
66"[S:silent?\bcat\b is silent about non-existent files.]"
67"[T:show-blank?Causes tabs to be copied as \b^I\b and formfeeds as \b^L\b.]"
68
69"\n"
70"\n[file ...]\n"
71"\n"
72
73"[+SEE ALSO?\bcp\b(1), \bgetconf\b(1), \bpr\b(1)]"
74;
75
76#define RUBOUT	0177
77
78/* control flags */
79#define B_FLAG		(1<<0)
80#define E_FLAG		(1<<1)
81#define F_FLAG		(1<<2)
82#define N_FLAG		(1<<3)
83#define S_FLAG		(1<<4)
84#define T_FLAG		(1<<5)
85#define U_FLAG		(1<<6)
86#define V_FLAG		(1<<7)
87#define D_FLAG		(1<<8)
88#define d_FLAG		(1<<9)
89
90/* character types */
91#define T_ERROR		1
92#define T_EOF		2
93#define T_ENDBUF	3
94#define T_NEWLINE	4
95#define T_CONTROL	5
96#define T_EIGHTBIT	6
97#define T_CNTL8BIT	7
98
99#define printof(c)	((c)^0100)
100
101typedef void* (*Reserve_f)(Sfio_t*, ssize_t, int);
102
103#ifndef sfvalue
104#define sfvalue(f)	((f)->_val)
105#endif
106
107static void*
108regress(Sfio_t* sp, ssize_t n, int f)
109{
110	void*	r;
111
112	if (!(r = sfreserve(sp, 4, f)))
113		r = sfreserve(sp, n, f);
114	else if (sfvalue(sp) > 4)
115		sfvalue(sp) = 4;
116	return r;
117}
118
119/*
120 * called for any special output processing
121 */
122
123static int
124vcat(register char* states, Sfio_t* ip, Sfio_t* op, Reserve_f reserve, int flags)
125{
126	register unsigned char*	cp;
127	register unsigned char*	pp;
128	unsigned char*		cur;
129	unsigned char*		end;
130	unsigned char*		buf;
131	unsigned char*		nxt;
132	register int		n;
133	register int		line;
134	register int		raw;
135	int			last;
136	int			c;
137	int			m;
138	int			any;
139	int			header;
140
141	unsigned char		meta[4];
142	unsigned char		tmp[32];
143
144	meta[0] = 'M';
145	meta[1] = '-';
146	last = -1;
147	*(cp = buf = end = tmp) = 0;
148	any = 0;
149	header = flags & (B_FLAG|N_FLAG);
150	line = 1;
151	states[0] = T_ENDBUF;
152	raw = !mbwide();
153	for (;;)
154	{
155		cur = cp;
156		if (raw)
157			while (!(n = states[*cp++]));
158		else
159			for (;;)
160			{
161				while (!(n = states[*cp++]));
162				if (n < T_CONTROL)
163					break;
164				if ((m = mbsize(pp = cp - 1)) > 1)
165					cp += m - 1;
166				else
167				{
168					if (m <= 0)
169					{
170						if (cur == pp)
171						{
172							if (last > 0)
173							{
174								*end = last;
175								last = -1;
176								c = end - pp + 1;
177								if ((m = mbsize(pp)) == c)
178								{
179									any = 1;
180									if (header)
181									{
182										header = 0;
183										sfprintf(op, "%6d\t", line);
184									}
185									sfwrite(op, cur, m);
186									*(cp = cur = end) = 0;
187								}
188								else
189								{
190									memcpy(tmp, pp, c);
191									if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
192									{
193										states[0] = sfvalue(ip) ? T_ERROR : T_EOF;
194										*(cp = end = tmp + sizeof(tmp) - 1) = 0;
195										last = -1;
196									}
197									else if ((n = sfvalue(ip)) <= 0)
198									{
199										states[0] = n ? T_ERROR : T_EOF;
200										*(cp = end = tmp + sizeof(tmp) - 1) = 0;
201										last = -1;
202									}
203									else
204									{
205										cp = buf = nxt;
206										end = buf + n - 1;
207										last = *end;
208										*end = 0;
209									}
210 mb:
211									if ((n = end - cp + 1) >= (sizeof(tmp) - c))
212										n = sizeof(tmp) - c - 1;
213									memcpy(tmp + c, cp, n);
214									if ((m = mbsize(tmp)) >= c)
215									{
216										any = 1;
217										if (header)
218										{
219											header = 0;
220											sfprintf(op, "%6d\t", line);
221										}
222										sfwrite(op, tmp, m);
223										cur = cp += m - c;
224									}
225								}
226								continue;
227							}
228						}
229						else
230						{
231							cp = pp + 1;
232							n = 0;
233						}
234					}
235					break;
236				}
237			}
238		c = *--cp;
239		if ((m = cp - cur) || n >= T_CONTROL)
240		{
241 flush:
242			any = 1;
243			if (header)
244			{
245				header = 0;
246				sfprintf(op, "%6d\t", line);
247			}
248			if (m)
249				sfwrite(op, cur, m);
250		}
251 special:
252		switch (n)
253		{
254		case T_ERROR:
255			if (cp != end)
256			{
257				n = T_CONTROL;
258				goto flush;
259			}
260			return -1;
261		case T_EOF:
262			if (cp != end)
263			{
264				n = T_CONTROL;
265				goto flush;
266			}
267			return 0;
268		case T_ENDBUF:
269			if (cp != end)
270			{
271				n = T_CONTROL;
272				goto flush;
273			}
274			c = last;
275			if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
276			{
277				*(cp = end = tmp) = 0;
278				states[0] = sfvalue(ip) ? T_ERROR : T_EOF;
279				last = -1;
280			}
281			else if ((m = sfvalue(ip)) <= 0)
282			{
283				*(cp = end = tmp) = 0;
284				states[0] = m ? T_ERROR : T_EOF;
285				last = -1;
286			}
287			else
288			{
289				buf = nxt;
290				end = buf + m - 1;
291				last = *end;
292				*end = 0;
293				cp = buf;
294			}
295			if (c >= 0)
296			{
297				if (!(n = states[c]))
298				{
299					*(cur = tmp) = c;
300					m = 1;
301					goto flush;
302				}
303				if (raw || n < T_CONTROL)
304				{
305					cp--;
306					goto special;
307				}
308				tmp[0] = c;
309				c = 1;
310				goto mb;
311			}
312			break;
313		case T_CONTROL:
314			do
315			{
316				sfputc(op, '^');
317				sfputc(op, printof(c));
318			} while (states[c = *++cp] == T_CONTROL);
319			break;
320		case T_CNTL8BIT:
321			meta[2] = '^';
322			do
323			{
324				n = c & ~0200;
325				meta[3] = printof(n);
326				sfwrite(op, (char*)meta, 4);
327			} while (states[c = *++cp] == T_CNTL8BIT && raw);
328			break;
329		case T_EIGHTBIT:
330			do
331			{
332				meta[2] = c & ~0200;
333				sfwrite(op, (char*)meta, 3);
334			} while (states[c = *++cp] == T_EIGHTBIT && raw);
335			break;
336		case T_NEWLINE:
337			if (header && !(flags & B_FLAG))
338				sfprintf(op, "%6d\t", line);
339			if (flags & E_FLAG)
340				sfputc(op, '$');
341			sfputc(op, '\n');
342			if (!header || !(flags & B_FLAG))
343				line++;
344			header = !(flags & S_FLAG);
345			for (;;)
346			{
347				if ((n = states[*++cp]) == T_ENDBUF)
348				{
349					if (cp != end || last != '\n')
350						break;
351					if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0)))
352					{
353						states[0] = sfvalue(ip) ? T_ERROR : T_EOF;
354						cp = end = tmp;
355						*cp-- = 0;
356						last = -1;
357					}
358					else if ((n = sfvalue(ip)) <= 0)
359					{
360						states[0] = n ? T_ERROR : T_EOF;
361						cp = end = tmp;
362						*cp-- = 0;
363						last = -1;
364					}
365					else
366					{
367						buf = nxt;
368						end = buf + n - 1;
369						last = *end;
370						*end = 0;
371						cp = buf - 1;
372					}
373				}
374				else if (n != T_NEWLINE)
375					break;
376				if (!(flags & S_FLAG) || any || header)
377				{
378					any = 0;
379					header = 0;
380					if ((flags & (B_FLAG|N_FLAG)) == N_FLAG)
381						sfprintf(op, "%6d\t", line);
382					if (flags & E_FLAG)
383						sfputc(op, '$');
384					sfputc(op, '\n');
385				}
386				if (!(flags & B_FLAG))
387					line++;
388			}
389			header = flags & (B_FLAG|N_FLAG);
390			break;
391		}
392	}
393}
394
395int
396b_cat(int argc, char** argv, void* context)
397{
398	register int		n;
399	register int		flags = 0;
400	register char*		cp;
401	register Sfio_t*	fp;
402	char*			mode;
403	Reserve_f		reserve = sfreserve;
404	int			att;
405	int			dovcat = 0;
406	char			states[UCHAR_MAX+1];
407
408	cmdinit(argc, argv, context, ERROR_CATALOG, 0);
409	att = !strcmp(astconf("UNIVERSE", NiL, NiL), "att");
410	mode = "r";
411	for (;;)
412	{
413		n = 0;
414		switch (optget(argv, usage))
415		{
416		case 'A':
417			n = T_FLAG|E_FLAG|V_FLAG;
418			break;
419		case 'B':
420			n = S_FLAG;
421			break;
422		case 'b':
423			n = B_FLAG;
424			break;
425		case 'd':
426			mode = opt_info.num ? "rt" : "r";
427			continue;
428		case 'D':
429			n = d_FLAG;
430			break;
431		case 'E':
432			n = E_FLAG;
433			break;
434		case 'e':
435			n = E_FLAG|V_FLAG;
436			break;
437		case 'n':
438			n = N_FLAG;
439			break;
440		case 'R':
441			reserve = opt_info.num ? regress : sfreserve;
442			continue;
443		case 's':
444			n = att ? F_FLAG : S_FLAG;
445			break;
446		case 'S':
447			n = F_FLAG;
448			break;
449		case 'T':
450			n = T_FLAG;
451			break;
452		case 't':
453			n = T_FLAG|V_FLAG;
454			break;
455		case 'u':
456			n = U_FLAG;
457			break;
458		case 'v':
459			n = V_FLAG;
460			break;
461		case ':':
462			error(2, "%s", opt_info.arg);
463			break;
464		case '?':
465			error(ERROR_usage(2), "%s", opt_info.arg);
466			break;
467		}
468		if (!n)
469			break;
470		if (opt_info.num)
471			flags |= n;
472		else
473			flags &= ~n;
474	}
475	argv += opt_info.index;
476	if (error_info.errors)
477		error(ERROR_usage(2), "%s", optusage(NiL));
478	memset(states, 0, sizeof(states));
479	if (flags&V_FLAG)
480	{
481		memset(states, T_CONTROL, ' ');
482		states[RUBOUT] = T_CONTROL;
483		memset(states+0200, T_EIGHTBIT, 0200);
484		memset(states+0200, T_CNTL8BIT, ' ');
485		states[RUBOUT|0200] = T_CNTL8BIT;
486		states['\n'] = 0;
487	}
488	if (flags&T_FLAG)
489		states['\t'] = T_CONTROL;
490	states[0] = T_ENDBUF;
491	if (att)
492	{
493		if (flags&V_FLAG)
494		{
495			states['\n'|0200] = T_EIGHTBIT;
496			if (!(flags&T_FLAG))
497			{
498				states['\t'] = states['\f'] = 0;
499				states['\t'|0200] = states['\f'|0200] = T_EIGHTBIT;
500			}
501		}
502	}
503	else if (flags)
504	{
505		if (!(flags&T_FLAG))
506			states['\t'] = 0;
507	}
508	if (flags&(V_FLAG|T_FLAG|N_FLAG|E_FLAG|B_FLAG|S_FLAG))
509	{
510		states['\n'] = T_NEWLINE;
511		dovcat = 1;
512	}
513	if (flags&d_FLAG)
514		sfopen(sfstdout, NiL, "wt");
515	if (cp = *argv)
516		argv++;
517	do
518	{
519		if (!cp || streq(cp, "-"))
520		{
521			fp = sfstdin;
522			if (flags&D_FLAG)
523				sfopen(fp, NiL, mode);
524		}
525		else if (!(fp = sfopen(NiL, cp, mode)))
526		{
527			if (!(flags&F_FLAG))
528				error(ERROR_system(0), "%s: cannot open", cp);
529			error_info.errors = 1;
530			continue;
531		}
532		if (flags&U_FLAG)
533			sfsetbuf(fp, (void*)fp, -1);
534		if (dovcat)
535			n = vcat(states, fp, sfstdout, reserve, flags);
536		else if (sfmove(fp, sfstdout, SF_UNBOUND, -1) >= 0 && sfeof(fp))
537			n = 0;
538		else
539			n = -1;
540		if (fp != sfstdin)
541			sfclose(fp);
542		if (n < 0 && errno != EPIPE)
543		{
544			if (cp)
545				error(ERROR_system(0), "%s: read error", cp);
546			else
547				error(ERROR_system(0), "read error");
548		}
549		if (sferror(sfstdout))
550			break;
551	} while (cp = *argv++);
552	if (sfsync(sfstdout))
553		error(ERROR_system(0), "write error");
554	if (flags&d_FLAG)
555		sfopen(sfstdout, NiL, "w");
556	return error_info.errors;
557}
558