1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin * *
3da2e3ebdSchin * This software is part of the ast package *
4*b30d1939SAndy Fiddaman * Copyright (c) 1992-2012 AT&T Intellectual Property *
5da2e3ebdSchin * and is licensed under the *
6*b30d1939SAndy Fiddaman * Eclipse Public License, Version 1.0 *
77c2fbfb3SApril Chin * by AT&T Intellectual Property *
8da2e3ebdSchin * *
9da2e3ebdSchin * A copy of the License is available at *
10*b30d1939SAndy Fiddaman * http://www.eclipse.org/org/documents/epl-v10.html *
11*b30d1939SAndy Fiddaman * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12da2e3ebdSchin * *
13da2e3ebdSchin * Information and Software Systems Research *
14da2e3ebdSchin * AT&T Research *
15da2e3ebdSchin * Florham Park NJ *
16da2e3ebdSchin * *
17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> *
18da2e3ebdSchin * David Korn <dgk@research.att.com> *
19da2e3ebdSchin * *
20da2e3ebdSchin ***********************************************************************/
21da2e3ebdSchin #pragma prototyped
22da2e3ebdSchin /*
23da2e3ebdSchin * David Korn
24da2e3ebdSchin * AT&T Bell Laboratories
25da2e3ebdSchin *
26da2e3ebdSchin * cut fields or columns from fields from a file
27da2e3ebdSchin */
28da2e3ebdSchin
29da2e3ebdSchin static const char usage[] =
30*b30d1939SAndy Fiddaman "[-?\n@(#)$Id: cut (AT&T Research) 2010-08-11 $\n]"
31da2e3ebdSchin USAGE_LICENSE
32da2e3ebdSchin "[+NAME?cut - cut out selected columns or fields of each line of a file]"
33da2e3ebdSchin "[+DESCRIPTION?\bcut\b bytes, characters, or character-delimited fields "
34da2e3ebdSchin "from one or more files, contatenating them on standard output.]"
35da2e3ebdSchin "[+?The option argument \alist\a is a comma-separated or blank-separated "
36da2e3ebdSchin "list of positive numbers and ranges. Ranges can be of three "
37da2e3ebdSchin "forms. The first is two positive integers separated by a hyphen "
38da2e3ebdSchin "(\alow\a\b-\b\ahigh\a), which represents all fields from \alow\a to "
39da2e3ebdSchin "\ahigh\a. The second is a positive number preceded by a hyphen "
40da2e3ebdSchin "(\b-\b\ahigh\a), which represents all fields from field \b1\b to "
41da2e3ebdSchin "\ahigh\a. The last is a positive number followed by a hyphen "
42da2e3ebdSchin "(\alow\a\b-\b), which represents all fields from \alow\a to the "
43da2e3ebdSchin "last field, inclusive. Elements in the \alist\a can be repeated, "
44da2e3ebdSchin "can overlap, and can appear in any order. The order of the "
45da2e3ebdSchin "output is that of the input.]"
46da2e3ebdSchin "[+?One and only one of \b-b\b, \b-c\b, or \b-f\b must be specified.]"
47da2e3ebdSchin "[+?If no \afile\a is given, or if the \afile\a is \b-\b, \bcut\b "
48da2e3ebdSchin "cuts from standard input. The start of the file is defined "
49da2e3ebdSchin "as the current offset.]"
503e14f97fSRoger A. Faulkner "[b:bytes]:[list?\bcut\b based on a list of byte counts.]"
513e14f97fSRoger A. Faulkner "[c:characters]:[list?\bcut\b based on a list of character counts.]"
52da2e3ebdSchin "[d:delimiter]:[delim?The field character for the \b-f\b option is set "
53da2e3ebdSchin "to \adelim\a. The default is the \btab\b character.]"
54da2e3ebdSchin "[f:fields]:[list?\bcut\b based on fields separated by the delimiter "
55da2e3ebdSchin "character specified with the \b-d\b optiion.]"
563e14f97fSRoger A. Faulkner "[n!:split?Split multibyte characters selected by the \b-b\b option.]"
57da2e3ebdSchin "[R|r:reclen]#[reclen?If \areclen\a > 0, the input will be read as fixed length "
58da2e3ebdSchin "records of length \areclen\a when used with the \b-b\b or \b-c\b "
59da2e3ebdSchin "option.]"
60da2e3ebdSchin "[s:suppress|only-delimited?Suppress lines with no delimiter characters, "
61da2e3ebdSchin "when used with the \b-f\b option. By default, lines with no "
62da2e3ebdSchin "delimiters will be passsed in untouched.]"
63da2e3ebdSchin "[D:line-delimeter|output-delimiter]:[ldelim?The line delimiter character for "
64da2e3ebdSchin "the \b-f\b option is set to \aldelim\a. The default is the "
65da2e3ebdSchin "\bnewline\b character.]"
663e14f97fSRoger A. Faulkner "[N!:newline?Output new-lines at end of each record when used "
67da2e3ebdSchin "with the \b-b\b or \b-c\b option.]"
68da2e3ebdSchin "\n"
69da2e3ebdSchin "\n[file ...]\n"
70da2e3ebdSchin "\n"
71da2e3ebdSchin "[+EXIT STATUS?]{"
72da2e3ebdSchin "[+0?All files processed successfully.]"
73da2e3ebdSchin "[+>0?One or more files failed to open or could not be read.]"
74da2e3ebdSchin "}"
75da2e3ebdSchin "[+SEE ALSO?\bpaste\b(1), \bgrep\b(1)]"
76da2e3ebdSchin ;
77da2e3ebdSchin
78da2e3ebdSchin #include <cmd.h>
79da2e3ebdSchin #include <ctype.h>
80da2e3ebdSchin
813e14f97fSRoger A. Faulkner typedef struct Delim_s
82da2e3ebdSchin {
833e14f97fSRoger A. Faulkner char* str;
843e14f97fSRoger A. Faulkner int len;
853e14f97fSRoger A. Faulkner int chr;
863e14f97fSRoger A. Faulkner } Delim_t;
87da2e3ebdSchin
88da2e3ebdSchin typedef struct Cut_s
89da2e3ebdSchin {
903e14f97fSRoger A. Faulkner int mb;
913e14f97fSRoger A. Faulkner int eob;
92da2e3ebdSchin int cflag;
933e14f97fSRoger A. Faulkner int nosplit;
94da2e3ebdSchin int sflag;
95da2e3ebdSchin int nlflag;
96da2e3ebdSchin int reclen;
973e14f97fSRoger A. Faulkner Delim_t wdelim;
983e14f97fSRoger A. Faulkner Delim_t ldelim;
993e14f97fSRoger A. Faulkner unsigned char space[UCHAR_MAX+1];
100da2e3ebdSchin int list[2]; /* NOTE: must be last member */
101da2e3ebdSchin } Cut_t;
102da2e3ebdSchin
1033e14f97fSRoger A. Faulkner #define HUGE INT_MAX
104da2e3ebdSchin #define BLOCK 8*1024
105da2e3ebdSchin #define C_BYTES 1
106da2e3ebdSchin #define C_CHARS 2
107da2e3ebdSchin #define C_FIELDS 4
108da2e3ebdSchin #define C_SUPRESS 8
1093e14f97fSRoger A. Faulkner #define C_NOSPLIT 16
110da2e3ebdSchin #define C_NONEWLINE 32
111da2e3ebdSchin
1123e14f97fSRoger A. Faulkner #define SP_LINE 1
1133e14f97fSRoger A. Faulkner #define SP_WORD 2
1143e14f97fSRoger A. Faulkner #define SP_WIDE 3
1153e14f97fSRoger A. Faulkner
116da2e3ebdSchin /*
117da2e3ebdSchin * compare the first of an array of integers
118da2e3ebdSchin */
119da2e3ebdSchin
1203e14f97fSRoger A. Faulkner static int
mycomp(register const void * a,register const void * b)1213e14f97fSRoger A. Faulkner mycomp(register const void* a, register const void* b)
122da2e3ebdSchin {
1233e14f97fSRoger A. Faulkner if (*((int*)a) < *((int*)b))
1243e14f97fSRoger A. Faulkner return -1;
1253e14f97fSRoger A. Faulkner if (*((int*)a) > *((int*)b))
1263e14f97fSRoger A. Faulkner return 1;
1273e14f97fSRoger A. Faulkner return 0;
128da2e3ebdSchin }
129da2e3ebdSchin
1303e14f97fSRoger A. Faulkner static Cut_t*
cutinit(int mode,char * str,Delim_t * wdelim,Delim_t * ldelim,size_t reclen)1313e14f97fSRoger A. Faulkner cutinit(int mode, char* str, Delim_t* wdelim, Delim_t* ldelim, size_t reclen)
132da2e3ebdSchin {
1333e14f97fSRoger A. Faulkner register int* lp;
1343e14f97fSRoger A. Faulkner register int c;
1353e14f97fSRoger A. Faulkner register int n = 0;
1363e14f97fSRoger A. Faulkner register int range = 0;
1373e14f97fSRoger A. Faulkner register char* cp = str;
1383e14f97fSRoger A. Faulkner Cut_t* cut;
1393e14f97fSRoger A. Faulkner
1403e14f97fSRoger A. Faulkner if (!(cut = (Cut_t*)stakalloc(sizeof(Cut_t) + strlen(cp) * sizeof(int))))
141da2e3ebdSchin error(ERROR_exit(1), "out of space");
1423e14f97fSRoger A. Faulkner if (cut->mb = mbwide())
143da2e3ebdSchin {
1443e14f97fSRoger A. Faulkner memset(cut->space, 0, sizeof(cut->space) / 2);
1453e14f97fSRoger A. Faulkner memset(cut->space + sizeof(cut->space) / 2, SP_WIDE, sizeof(cut->space) / 2);
1463e14f97fSRoger A. Faulkner }
1473e14f97fSRoger A. Faulkner else
1483e14f97fSRoger A. Faulkner memset(cut->space, 0, sizeof(cut->space));
1493e14f97fSRoger A. Faulkner cut->wdelim = *wdelim;
1503e14f97fSRoger A. Faulkner if (wdelim->len == 1)
1513e14f97fSRoger A. Faulkner cut->space[wdelim->chr] = SP_WORD;
1523e14f97fSRoger A. Faulkner cut->ldelim = *ldelim;
1533e14f97fSRoger A. Faulkner cut->eob = (ldelim->len == 1) ? ldelim->chr : 0;
1543e14f97fSRoger A. Faulkner cut->space[cut->eob] = SP_LINE;
1553e14f97fSRoger A. Faulkner cut->cflag = (mode&C_CHARS) && cut->mb;
1563e14f97fSRoger A. Faulkner cut->nosplit = (mode&(C_BYTES|C_NOSPLIT)) == (C_BYTES|C_NOSPLIT) && cut->mb;
1573e14f97fSRoger A. Faulkner cut->sflag = (mode&C_SUPRESS) != 0;
1583e14f97fSRoger A. Faulkner cut->nlflag = (mode&C_NONEWLINE) != 0;
1593e14f97fSRoger A. Faulkner cut->reclen = reclen;
1603e14f97fSRoger A. Faulkner lp = cut->list;
1613e14f97fSRoger A. Faulkner for (;;)
1623e14f97fSRoger A. Faulkner switch(c = *cp++)
1633e14f97fSRoger A. Faulkner {
164da2e3ebdSchin case ' ':
165da2e3ebdSchin case '\t':
166da2e3ebdSchin while(*cp==' ' || *cp=='\t')
167da2e3ebdSchin cp++;
1683e14f97fSRoger A. Faulkner /*FALLTHROUGH*/
169da2e3ebdSchin case 0:
170da2e3ebdSchin case ',':
171da2e3ebdSchin if(range)
172da2e3ebdSchin {
173da2e3ebdSchin --range;
1743e14f97fSRoger A. Faulkner if((n = (n ? (n-range) : (HUGE-1))) < 0)
175da2e3ebdSchin error(ERROR_exit(1),"invalid range for c/f option");
176da2e3ebdSchin *lp++ = range;
177da2e3ebdSchin *lp++ = n;
178da2e3ebdSchin }
179da2e3ebdSchin else
180da2e3ebdSchin {
181da2e3ebdSchin *lp++ = --n;
182da2e3ebdSchin *lp++ = 1;
183da2e3ebdSchin }
184da2e3ebdSchin if(c==0)
185da2e3ebdSchin {
186da2e3ebdSchin register int *dp;
187da2e3ebdSchin *lp = HUGE;
1883e14f97fSRoger A. Faulkner n = 1 + (lp-cut->list)/2;
1893e14f97fSRoger A. Faulkner qsort(lp=cut->list,n,2*sizeof(*lp),mycomp);
190da2e3ebdSchin /* eliminate overlapping regions */
191da2e3ebdSchin for(n=0,range= -2,dp=lp; *lp!=HUGE; lp+=2)
192da2e3ebdSchin {
193da2e3ebdSchin if(lp[0] <= range)
194da2e3ebdSchin {
195da2e3ebdSchin if(lp[1]==HUGE)
196da2e3ebdSchin {
197da2e3ebdSchin dp[-1] = HUGE;
198da2e3ebdSchin break;
199da2e3ebdSchin }
200da2e3ebdSchin if((c = lp[0]+lp[1]-range)>0)
201da2e3ebdSchin {
202da2e3ebdSchin range += c;
203da2e3ebdSchin dp[-1] += c;
204da2e3ebdSchin }
205da2e3ebdSchin }
206da2e3ebdSchin else
207da2e3ebdSchin {
208da2e3ebdSchin range = *dp++ = lp[0];
209da2e3ebdSchin if(lp[1]==HUGE)
210da2e3ebdSchin {
211da2e3ebdSchin *dp++ = HUGE;
212da2e3ebdSchin break;
213da2e3ebdSchin }
214da2e3ebdSchin range += (*dp++ = lp[1]);
215da2e3ebdSchin }
216da2e3ebdSchin }
217da2e3ebdSchin *dp = HUGE;
2183e14f97fSRoger A. Faulkner lp = cut->list;
219da2e3ebdSchin /* convert ranges into gaps */
220da2e3ebdSchin for(n=0; *lp!=HUGE; lp+=2)
221da2e3ebdSchin {
222da2e3ebdSchin c = *lp;
223da2e3ebdSchin *lp -= n;
224da2e3ebdSchin n = c+lp[1];
225da2e3ebdSchin }
2263e14f97fSRoger A. Faulkner return cut;
227da2e3ebdSchin }
228da2e3ebdSchin n = range = 0;
229da2e3ebdSchin break;
230da2e3ebdSchin
231da2e3ebdSchin case '-':
232da2e3ebdSchin if(range)
233da2e3ebdSchin error(ERROR_exit(1),"bad list for c/f option");
234da2e3ebdSchin range = n?n:1;
235da2e3ebdSchin n = 0;
236da2e3ebdSchin break;
237da2e3ebdSchin
238da2e3ebdSchin default:
239da2e3ebdSchin if(!isdigit(c))
240da2e3ebdSchin error(ERROR_exit(1),"bad list for c/f option");
241da2e3ebdSchin n = 10*n + (c-'0');
2423e14f97fSRoger A. Faulkner break;
2433e14f97fSRoger A. Faulkner }
244da2e3ebdSchin /* NOTREACHED */
245da2e3ebdSchin }
246da2e3ebdSchin
247da2e3ebdSchin /*
248da2e3ebdSchin * cut each line of file <fdin> and put results to <fdout> using list <list>
249da2e3ebdSchin */
250da2e3ebdSchin
2513e14f97fSRoger A. Faulkner static void
cutcols(Cut_t * cut,Sfio_t * fdin,Sfio_t * fdout)2523e14f97fSRoger A. Faulkner cutcols(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
253da2e3ebdSchin {
2543e14f97fSRoger A. Faulkner register int c;
2553e14f97fSRoger A. Faulkner register int len;
2563e14f97fSRoger A. Faulkner register int ncol = 0;
2573e14f97fSRoger A. Faulkner register const int* lp = cut->list;
2583e14f97fSRoger A. Faulkner register char* bp;
259da2e3ebdSchin register int skip; /* non-zero for don't copy */
2603e14f97fSRoger A. Faulkner int must;
2613e14f97fSRoger A. Faulkner const char* xx;
2623e14f97fSRoger A. Faulkner
2633e14f97fSRoger A. Faulkner for (;;)
264da2e3ebdSchin {
2653e14f97fSRoger A. Faulkner if (len = cut->reclen)
2663e14f97fSRoger A. Faulkner bp = sfreserve(fdin, len, -1);
267da2e3ebdSchin else
2683e14f97fSRoger A. Faulkner bp = sfgetr(fdin, '\n', 0);
2693e14f97fSRoger A. Faulkner if (!bp && !(bp = sfgetr(fdin, 0, SF_LASTR)))
270da2e3ebdSchin break;
271da2e3ebdSchin len = sfvalue(fdin);
2723e14f97fSRoger A. Faulkner xx = 0;
2733e14f97fSRoger A. Faulkner if (!(ncol = skip = *(lp = cut->list)))
274da2e3ebdSchin ncol = *++lp;
2753e14f97fSRoger A. Faulkner must = 1;
2763e14f97fSRoger A. Faulkner do
277da2e3ebdSchin {
2783e14f97fSRoger A. Faulkner if (cut->nosplit)
2793e14f97fSRoger A. Faulkner {
2803e14f97fSRoger A. Faulkner register const char* s = bp;
2813e14f97fSRoger A. Faulkner register int w = len < ncol ? len : ncol;
2823e14f97fSRoger A. Faulkner register int z;
2833e14f97fSRoger A. Faulkner
2843e14f97fSRoger A. Faulkner while (w > 0)
2853e14f97fSRoger A. Faulkner {
2863e14f97fSRoger A. Faulkner if (!(*s & 0x80))
2873e14f97fSRoger A. Faulkner z = 1;
288*b30d1939SAndy Fiddaman else if ((z = mbnsize(s, w)) <= 0)
2893e14f97fSRoger A. Faulkner {
2903e14f97fSRoger A. Faulkner if (s == bp && xx)
2913e14f97fSRoger A. Faulkner {
2923e14f97fSRoger A. Faulkner w += s - xx;
2933e14f97fSRoger A. Faulkner bp = (char*)(s = xx);
2943e14f97fSRoger A. Faulkner xx = 0;
2953e14f97fSRoger A. Faulkner continue;
2963e14f97fSRoger A. Faulkner }
2973e14f97fSRoger A. Faulkner xx = s;
2983e14f97fSRoger A. Faulkner if (skip)
2993e14f97fSRoger A. Faulkner s += w;
3003e14f97fSRoger A. Faulkner w = 0;
3013e14f97fSRoger A. Faulkner break;
3023e14f97fSRoger A. Faulkner }
3033e14f97fSRoger A. Faulkner s += z;
3043e14f97fSRoger A. Faulkner w -= z;
3053e14f97fSRoger A. Faulkner }
3063e14f97fSRoger A. Faulkner c = s - bp;
3073e14f97fSRoger A. Faulkner ncol = !w && ncol >= len;
3083e14f97fSRoger A. Faulkner }
3093e14f97fSRoger A. Faulkner else if (cut->cflag)
3103e14f97fSRoger A. Faulkner {
3113e14f97fSRoger A. Faulkner register const char* s = bp;
3123e14f97fSRoger A. Faulkner register int w = len;
3133e14f97fSRoger A. Faulkner register int z;
3143e14f97fSRoger A. Faulkner
3153e14f97fSRoger A. Faulkner while (w > 0 && ncol > 0)
3163e14f97fSRoger A. Faulkner {
3173e14f97fSRoger A. Faulkner ncol--;
318*b30d1939SAndy Fiddaman if (!(*s & 0x80) || (z = mbnsize(s, w)) <= 0)
3193e14f97fSRoger A. Faulkner z = 1;
3203e14f97fSRoger A. Faulkner s += z;
3213e14f97fSRoger A. Faulkner w -= z;
3223e14f97fSRoger A. Faulkner
3233e14f97fSRoger A. Faulkner }
3243e14f97fSRoger A. Faulkner c = s - bp;
3253e14f97fSRoger A. Faulkner ncol = !w && (ncol || !skip);
3263e14f97fSRoger A. Faulkner }
3273e14f97fSRoger A. Faulkner else
3283e14f97fSRoger A. Faulkner {
3293e14f97fSRoger A. Faulkner if ((c = ncol) > len)
3303e14f97fSRoger A. Faulkner c = len;
3313e14f97fSRoger A. Faulkner else if (c == len && !skip)
3323e14f97fSRoger A. Faulkner ncol++;
3333e14f97fSRoger A. Faulkner ncol -= c;
3343e14f97fSRoger A. Faulkner }
3353e14f97fSRoger A. Faulkner if (!skip && c)
3363e14f97fSRoger A. Faulkner {
3373e14f97fSRoger A. Faulkner if (sfwrite(fdout, (char*)bp, c) < 0)
3383e14f97fSRoger A. Faulkner return;
3393e14f97fSRoger A. Faulkner must = 0;
3403e14f97fSRoger A. Faulkner }
3413e14f97fSRoger A. Faulkner bp += c;
3423e14f97fSRoger A. Faulkner if (ncol)
343da2e3ebdSchin break;
344da2e3ebdSchin len -= c;
345da2e3ebdSchin ncol = *++lp;
346da2e3ebdSchin skip = !skip;
3473e14f97fSRoger A. Faulkner } while (ncol != HUGE);
3483e14f97fSRoger A. Faulkner if (!cut->nlflag && (skip || must || cut->reclen))
3493e14f97fSRoger A. Faulkner {
3503e14f97fSRoger A. Faulkner if (cut->ldelim.len > 1)
3513e14f97fSRoger A. Faulkner sfwrite(fdout, cut->ldelim.str, cut->ldelim.len);
3523e14f97fSRoger A. Faulkner else
3533e14f97fSRoger A. Faulkner sfputc(fdout, cut->ldelim.chr);
354da2e3ebdSchin }
355da2e3ebdSchin }
356da2e3ebdSchin }
357da2e3ebdSchin
358da2e3ebdSchin /*
359da2e3ebdSchin * cut each line of file <fdin> and put results to <fdout> using list <list>
360da2e3ebdSchin * stream <fdin> must be line buffered
361da2e3ebdSchin */
362da2e3ebdSchin
3633e14f97fSRoger A. Faulkner static void
cutfields(Cut_t * cut,Sfio_t * fdin,Sfio_t * fdout)3643e14f97fSRoger A. Faulkner cutfields(Cut_t* cut, Sfio_t* fdin, Sfio_t* fdout)
365da2e3ebdSchin {
3663e14f97fSRoger A. Faulkner register unsigned char *sp = cut->space;
367da2e3ebdSchin register unsigned char *cp;
3683e14f97fSRoger A. Faulkner register unsigned char *wp;
369da2e3ebdSchin register int c, nfields;
3703e14f97fSRoger A. Faulkner register const int *lp = cut->list;
371da2e3ebdSchin register unsigned char *copy;
372da2e3ebdSchin register int nodelim, empty, inword=0;
3733e14f97fSRoger A. Faulkner register unsigned char *ep;
3743e14f97fSRoger A. Faulkner unsigned char *bp, *first;
375da2e3ebdSchin int lastchar;
3763e14f97fSRoger A. Faulkner wchar_t w;
377da2e3ebdSchin Sfio_t *fdtmp = 0;
378da2e3ebdSchin long offset = 0;
3793e14f97fSRoger A. Faulkner unsigned char mb[8];
380da2e3ebdSchin /* process each buffer */
3813e14f97fSRoger A. Faulkner while ((bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) && (c = sfvalue(fdin)) > 0)
382da2e3ebdSchin {
3833e14f97fSRoger A. Faulkner cp = bp;
3843e14f97fSRoger A. Faulkner ep = cp + --c;
3853e14f97fSRoger A. Faulkner if((lastchar = cp[c]) != cut->eob)
3863e14f97fSRoger A. Faulkner *ep = cut->eob;
387da2e3ebdSchin /* process each line in the buffer */
3883e14f97fSRoger A. Faulkner while (cp <= ep)
389da2e3ebdSchin {
390da2e3ebdSchin first = cp;
3913e14f97fSRoger A. Faulkner if (!inword)
392da2e3ebdSchin {
393da2e3ebdSchin nodelim = empty = 1;
394da2e3ebdSchin copy = cp;
3953e14f97fSRoger A. Faulkner if (nfields = *(lp = cut->list))
396da2e3ebdSchin copy = 0;
397da2e3ebdSchin else
398da2e3ebdSchin nfields = *++lp;
399da2e3ebdSchin }
4003e14f97fSRoger A. Faulkner else if (copy)
401da2e3ebdSchin copy = cp;
402da2e3ebdSchin inword = 0;
4033e14f97fSRoger A. Faulkner do
404da2e3ebdSchin {
405da2e3ebdSchin /* skip over non-delimiter characters */
4063e14f97fSRoger A. Faulkner if (cut->mb)
4073e14f97fSRoger A. Faulkner for (;;)
4083e14f97fSRoger A. Faulkner {
4093e14f97fSRoger A. Faulkner switch (c = sp[*(unsigned char*)cp++])
4103e14f97fSRoger A. Faulkner {
4113e14f97fSRoger A. Faulkner case 0:
4123e14f97fSRoger A. Faulkner continue;
4133e14f97fSRoger A. Faulkner case SP_WIDE:
4143e14f97fSRoger A. Faulkner wp = --cp;
4153e14f97fSRoger A. Faulkner while ((c = mb2wc(w, cp, ep - cp)) <= 0)
4163e14f97fSRoger A. Faulkner {
4173e14f97fSRoger A. Faulkner /* mb char possibly spanning buffer boundary -- fun stuff */
4183e14f97fSRoger A. Faulkner if ((ep - cp) < mbmax())
4193e14f97fSRoger A. Faulkner {
4203e14f97fSRoger A. Faulkner int i;
4213e14f97fSRoger A. Faulkner int j;
4223e14f97fSRoger A. Faulkner int k;
4233e14f97fSRoger A. Faulkner
4243e14f97fSRoger A. Faulkner if (lastchar != cut->eob)
4253e14f97fSRoger A. Faulkner {
4263e14f97fSRoger A. Faulkner *ep = lastchar;
4273e14f97fSRoger A. Faulkner if ((c = mb2wc(w, cp, ep - cp)) > 0)
4283e14f97fSRoger A. Faulkner break;
4293e14f97fSRoger A. Faulkner }
4303e14f97fSRoger A. Faulkner if (copy)
4313e14f97fSRoger A. Faulkner {
4323e14f97fSRoger A. Faulkner empty = 0;
4333e14f97fSRoger A. Faulkner if ((c = cp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
4343e14f97fSRoger A. Faulkner goto failed;
4353e14f97fSRoger A. Faulkner }
4363e14f97fSRoger A. Faulkner for (i = 0; i <= (ep - cp); i++)
4373e14f97fSRoger A. Faulkner mb[i] = cp[i];
4383e14f97fSRoger A. Faulkner if (!(bp = (unsigned char*)sfreserve(fdin, SF_UNBOUND, -1)) || (c = sfvalue(fdin)) <= 0)
4393e14f97fSRoger A. Faulkner goto failed;
4403e14f97fSRoger A. Faulkner cp = bp;
4413e14f97fSRoger A. Faulkner ep = cp + --c;
4423e14f97fSRoger A. Faulkner if ((lastchar = cp[c]) != cut->eob)
4433e14f97fSRoger A. Faulkner *ep = cut->eob;
4443e14f97fSRoger A. Faulkner j = i;
4453e14f97fSRoger A. Faulkner k = 0;
4463e14f97fSRoger A. Faulkner while (j < mbmax())
4473e14f97fSRoger A. Faulkner mb[j++] = cp[k++];
4483e14f97fSRoger A. Faulkner if ((c = mb2wc(w, (char*)mb, j)) <= 0)
4493e14f97fSRoger A. Faulkner {
4503e14f97fSRoger A. Faulkner c = i;
4513e14f97fSRoger A. Faulkner w = 0;
4523e14f97fSRoger A. Faulkner }
4533e14f97fSRoger A. Faulkner first = bp = cp += c - i;
4543e14f97fSRoger A. Faulkner if (copy)
4553e14f97fSRoger A. Faulkner {
4563e14f97fSRoger A. Faulkner copy = bp;
4573e14f97fSRoger A. Faulkner if (w == cut->ldelim.chr)
4583e14f97fSRoger A. Faulkner lastchar = cut->ldelim.chr;
4593e14f97fSRoger A. Faulkner else if (w != cut->wdelim.chr)
4603e14f97fSRoger A. Faulkner {
4613e14f97fSRoger A. Faulkner empty = 0;
4623e14f97fSRoger A. Faulkner if (sfwrite(fdout, (char*)mb, c) < 0)
4633e14f97fSRoger A. Faulkner goto failed;
4643e14f97fSRoger A. Faulkner }
4653e14f97fSRoger A. Faulkner }
4663e14f97fSRoger A. Faulkner c = 0;
4673e14f97fSRoger A. Faulkner }
4683e14f97fSRoger A. Faulkner else
4693e14f97fSRoger A. Faulkner {
4703e14f97fSRoger A. Faulkner w = *cp;
4713e14f97fSRoger A. Faulkner c = 1;
4723e14f97fSRoger A. Faulkner }
4733e14f97fSRoger A. Faulkner break;
4743e14f97fSRoger A. Faulkner }
4753e14f97fSRoger A. Faulkner cp += c;
4763e14f97fSRoger A. Faulkner c = w;
4773e14f97fSRoger A. Faulkner if (c == cut->wdelim.chr)
4783e14f97fSRoger A. Faulkner {
4793e14f97fSRoger A. Faulkner c = SP_WORD;
4803e14f97fSRoger A. Faulkner break;
4813e14f97fSRoger A. Faulkner }
4823e14f97fSRoger A. Faulkner if (c == cut->ldelim.chr)
4833e14f97fSRoger A. Faulkner {
4843e14f97fSRoger A. Faulkner c = SP_LINE;
4853e14f97fSRoger A. Faulkner break;
4863e14f97fSRoger A. Faulkner }
4873e14f97fSRoger A. Faulkner continue;
4883e14f97fSRoger A. Faulkner default:
4893e14f97fSRoger A. Faulkner wp = cp - 1;
4903e14f97fSRoger A. Faulkner break;
4913e14f97fSRoger A. Faulkner }
4923e14f97fSRoger A. Faulkner break;
4933e14f97fSRoger A. Faulkner }
4943e14f97fSRoger A. Faulkner else
4953e14f97fSRoger A. Faulkner {
4963e14f97fSRoger A. Faulkner while (!(c = sp[*cp++]));
4973e14f97fSRoger A. Faulkner wp = cp - 1;
4983e14f97fSRoger A. Faulkner }
499da2e3ebdSchin /* check for end-of-line */
5003e14f97fSRoger A. Faulkner if (c == SP_LINE)
501da2e3ebdSchin {
5023e14f97fSRoger A. Faulkner if (cp <= ep)
503da2e3ebdSchin break;
5043e14f97fSRoger A. Faulkner if (lastchar == cut->ldelim.chr)
505da2e3ebdSchin break;
5063e14f97fSRoger A. Faulkner /* restore cut->last character */
5073e14f97fSRoger A. Faulkner if (lastchar != cut->eob)
5083e14f97fSRoger A. Faulkner *ep = lastchar;
509da2e3ebdSchin inword++;
5103e14f97fSRoger A. Faulkner if (!sp[lastchar])
511da2e3ebdSchin break;
512da2e3ebdSchin }
513da2e3ebdSchin nodelim = 0;
5143e14f97fSRoger A. Faulkner if (--nfields > 0)
515da2e3ebdSchin continue;
516da2e3ebdSchin nfields = *++lp;
5173e14f97fSRoger A. Faulkner if (copy)
518da2e3ebdSchin {
519da2e3ebdSchin empty = 0;
5203e14f97fSRoger A. Faulkner if ((c = wp - copy) > 0 && sfwrite(fdout, (char*)copy, c) < 0)
521da2e3ebdSchin goto failed;
522da2e3ebdSchin copy = 0;
523da2e3ebdSchin }
524da2e3ebdSchin else
525da2e3ebdSchin /* set to delimiter unless the first field */
5263e14f97fSRoger A. Faulkner copy = empty ? cp : wp;
5273e14f97fSRoger A. Faulkner } while (!inword);
5283e14f97fSRoger A. Faulkner if (!inword)
529da2e3ebdSchin {
5303e14f97fSRoger A. Faulkner if (!copy)
531da2e3ebdSchin {
5323e14f97fSRoger A. Faulkner if (nodelim)
533da2e3ebdSchin {
5343e14f97fSRoger A. Faulkner if (!cut->sflag)
535da2e3ebdSchin {
5363e14f97fSRoger A. Faulkner if (offset)
537da2e3ebdSchin {
538da2e3ebdSchin sfseek(fdtmp,(Sfoff_t)0,SEEK_SET);
539da2e3ebdSchin sfmove(fdtmp,fdout,offset,-1);
540da2e3ebdSchin }
541da2e3ebdSchin copy = first;
542da2e3ebdSchin }
543da2e3ebdSchin }
544da2e3ebdSchin else
545da2e3ebdSchin sfputc(fdout,'\n');
546da2e3ebdSchin }
5473e14f97fSRoger A. Faulkner if (offset)
548da2e3ebdSchin sfseek(fdtmp,offset=0,SEEK_SET);
549da2e3ebdSchin }
5503e14f97fSRoger A. Faulkner if (copy && (c=cp-copy)>0 && (!nodelim || !cut->sflag) && sfwrite(fdout,(char*)copy,c)< 0)
551da2e3ebdSchin goto failed;
552da2e3ebdSchin }
553da2e3ebdSchin /* see whether to save in tmp file */
5543e14f97fSRoger A. Faulkner if(inword && nodelim && !cut->sflag && (c=cp-first)>0)
555da2e3ebdSchin {
556da2e3ebdSchin /* copy line to tmpfile in case no fields */
557da2e3ebdSchin if(!fdtmp)
558da2e3ebdSchin fdtmp = sftmp(BLOCK);
559da2e3ebdSchin sfwrite(fdtmp,(char*)first,c);
560da2e3ebdSchin offset +=c;
561da2e3ebdSchin }
562da2e3ebdSchin }
5633e14f97fSRoger A. Faulkner failed:
564da2e3ebdSchin if(fdtmp)
565da2e3ebdSchin sfclose(fdtmp);
566da2e3ebdSchin }
567da2e3ebdSchin
568da2e3ebdSchin int
b_cut(int argc,char ** argv,Shbltin_t * context)569*b30d1939SAndy Fiddaman b_cut(int argc, char** argv, Shbltin_t* context)
570da2e3ebdSchin {
5713e14f97fSRoger A. Faulkner register char* cp = 0;
5723e14f97fSRoger A. Faulkner register Sfio_t* fp;
5733e14f97fSRoger A. Faulkner char* s;
5743e14f97fSRoger A. Faulkner int n;
5753e14f97fSRoger A. Faulkner Cut_t* cut;
5763e14f97fSRoger A. Faulkner int mode = 0;
5773e14f97fSRoger A. Faulkner Delim_t wdelim;
5783e14f97fSRoger A. Faulkner Delim_t ldelim;
5793e14f97fSRoger A. Faulkner size_t reclen = 0;
580da2e3ebdSchin
581da2e3ebdSchin cmdinit(argc, argv, context, ERROR_CATALOG, 0);
5823e14f97fSRoger A. Faulkner wdelim.chr = '\t';
5833e14f97fSRoger A. Faulkner ldelim.chr = '\n';
5843e14f97fSRoger A. Faulkner wdelim.len = ldelim.len = 1;
5853e14f97fSRoger A. Faulkner for (;;)
586da2e3ebdSchin {
587*b30d1939SAndy Fiddaman switch (optget(argv, usage))
588da2e3ebdSchin {
5893e14f97fSRoger A. Faulkner case 0:
590da2e3ebdSchin break;
5913e14f97fSRoger A. Faulkner case 'b':
5923e14f97fSRoger A. Faulkner case 'c':
5933e14f97fSRoger A. Faulkner if(mode&C_FIELDS)
5943e14f97fSRoger A. Faulkner {
5953e14f97fSRoger A. Faulkner error(2, "f option already specified");
5963e14f97fSRoger A. Faulkner continue;
5973e14f97fSRoger A. Faulkner }
5983e14f97fSRoger A. Faulkner cp = opt_info.arg;
599*b30d1939SAndy Fiddaman if(opt_info.option[1]=='b')
6003e14f97fSRoger A. Faulkner mode |= C_BYTES;
6013e14f97fSRoger A. Faulkner else
6023e14f97fSRoger A. Faulkner mode |= C_CHARS;
6033e14f97fSRoger A. Faulkner continue;
6043e14f97fSRoger A. Faulkner case 'D':
6053e14f97fSRoger A. Faulkner ldelim.str = opt_info.arg;
6063e14f97fSRoger A. Faulkner if (mbwide())
6073e14f97fSRoger A. Faulkner {
6083e14f97fSRoger A. Faulkner s = opt_info.arg;
6093e14f97fSRoger A. Faulkner ldelim.chr = mbchar(s);
6103e14f97fSRoger A. Faulkner if ((n = s - opt_info.arg) > 1)
6113e14f97fSRoger A. Faulkner {
6123e14f97fSRoger A. Faulkner ldelim.len = n;
6133e14f97fSRoger A. Faulkner continue;
6143e14f97fSRoger A. Faulkner }
6153e14f97fSRoger A. Faulkner }
6163e14f97fSRoger A. Faulkner ldelim.chr = *(unsigned char*)opt_info.arg;
6173e14f97fSRoger A. Faulkner ldelim.len = 1;
6183e14f97fSRoger A. Faulkner continue;
6193e14f97fSRoger A. Faulkner case 'd':
6203e14f97fSRoger A. Faulkner wdelim.str = opt_info.arg;
6213e14f97fSRoger A. Faulkner if (mbwide())
6223e14f97fSRoger A. Faulkner {
6233e14f97fSRoger A. Faulkner s = opt_info.arg;
6243e14f97fSRoger A. Faulkner wdelim.chr = mbchar(s);
6253e14f97fSRoger A. Faulkner if ((n = s - opt_info.arg) > 1)
6263e14f97fSRoger A. Faulkner {
6273e14f97fSRoger A. Faulkner wdelim.len = n;
6283e14f97fSRoger A. Faulkner continue;
6293e14f97fSRoger A. Faulkner }
6303e14f97fSRoger A. Faulkner }
6313e14f97fSRoger A. Faulkner wdelim.chr = *(unsigned char*)opt_info.arg;
6323e14f97fSRoger A. Faulkner wdelim.len = 1;
6333e14f97fSRoger A. Faulkner continue;
6343e14f97fSRoger A. Faulkner case 'f':
6353e14f97fSRoger A. Faulkner if(mode&(C_CHARS|C_BYTES))
6363e14f97fSRoger A. Faulkner {
6373e14f97fSRoger A. Faulkner error(2, "c option already specified");
6383e14f97fSRoger A. Faulkner continue;
6393e14f97fSRoger A. Faulkner }
6403e14f97fSRoger A. Faulkner cp = opt_info.arg;
6413e14f97fSRoger A. Faulkner mode |= C_FIELDS;
6423e14f97fSRoger A. Faulkner continue;
6433e14f97fSRoger A. Faulkner case 'n':
6443e14f97fSRoger A. Faulkner mode |= C_NOSPLIT;
6453e14f97fSRoger A. Faulkner continue;
6463e14f97fSRoger A. Faulkner case 'N':
6473e14f97fSRoger A. Faulkner mode |= C_NONEWLINE;
6483e14f97fSRoger A. Faulkner continue;
6493e14f97fSRoger A. Faulkner case 'R':
6503e14f97fSRoger A. Faulkner case 'r':
6513e14f97fSRoger A. Faulkner if(opt_info.num>0)
6523e14f97fSRoger A. Faulkner reclen = opt_info.num;
6533e14f97fSRoger A. Faulkner continue;
6543e14f97fSRoger A. Faulkner case 's':
6553e14f97fSRoger A. Faulkner mode |= C_SUPRESS;
6563e14f97fSRoger A. Faulkner continue;
6573e14f97fSRoger A. Faulkner case ':':
6583e14f97fSRoger A. Faulkner error(2, "%s", opt_info.arg);
6593e14f97fSRoger A. Faulkner break;
6603e14f97fSRoger A. Faulkner case '?':
6613e14f97fSRoger A. Faulkner error(ERROR_usage(2), "%s", opt_info.arg);
662da2e3ebdSchin break;
663da2e3ebdSchin }
664da2e3ebdSchin break;
665da2e3ebdSchin }
666da2e3ebdSchin argv += opt_info.index;
667da2e3ebdSchin if (error_info.errors)
668da2e3ebdSchin error(ERROR_usage(2), "%s",optusage(NiL));
669da2e3ebdSchin if(!cp)
670da2e3ebdSchin {
671da2e3ebdSchin error(2, "b, c or f option must be specified");
672da2e3ebdSchin error(ERROR_usage(2), "%s", optusage(NiL));
673da2e3ebdSchin }
674da2e3ebdSchin if(!*cp)
675da2e3ebdSchin error(3, "non-empty b, c or f option must be specified");
676da2e3ebdSchin if((mode & (C_FIELDS|C_SUPRESS)) == C_SUPRESS)
677da2e3ebdSchin error(3, "s option requires f option");
6783e14f97fSRoger A. Faulkner cut = cutinit(mode, cp, &wdelim, &ldelim, reclen);
679da2e3ebdSchin if(cp = *argv)
680da2e3ebdSchin argv++;
681da2e3ebdSchin do
682da2e3ebdSchin {
683da2e3ebdSchin if(!cp || streq(cp,"-"))
684da2e3ebdSchin fp = sfstdin;
685da2e3ebdSchin else if(!(fp = sfopen(NiL,cp,"r")))
686da2e3ebdSchin {
687da2e3ebdSchin error(ERROR_system(0),"%s: cannot open",cp);
688da2e3ebdSchin continue;
689da2e3ebdSchin }
690da2e3ebdSchin if(mode&C_FIELDS)
6913e14f97fSRoger A. Faulkner cutfields(cut,fp,sfstdout);
692da2e3ebdSchin else
6933e14f97fSRoger A. Faulkner cutcols(cut,fp,sfstdout);
694da2e3ebdSchin if(fp!=sfstdin)
695da2e3ebdSchin sfclose(fp);
6967c2fbfb3SApril Chin } while(cp = *argv++);
6977c2fbfb3SApril Chin if (sfsync(sfstdout))
6987c2fbfb3SApril Chin error(ERROR_system(0), "write error");
6993e14f97fSRoger A. Faulkner return error_info.errors != 0;
700da2e3ebdSchin }
701