1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin *                                                                      *
3da2e3ebdSchin *               This software is part of the ast package               *
4*b30d1939SAndy Fiddaman *          Copyright (c) 1992-2012 AT&T Intellectual Property          *
5da2e3ebdSchin *                      and is licensed under the                       *
6*b30d1939SAndy Fiddaman *                 Eclipse Public License, Version 1.0                  *
77c2fbfb3SApril Chin *                    by AT&T Intellectual Property                     *
8da2e3ebdSchin *                                                                      *
9da2e3ebdSchin *                A copy of the License is available at                 *
10*b30d1939SAndy Fiddaman *          http://www.eclipse.org/org/documents/epl-v10.html           *
11*b30d1939SAndy Fiddaman *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12da2e3ebdSchin *                                                                      *
13da2e3ebdSchin *              Information and Software Systems Research               *
14da2e3ebdSchin *                            AT&T Research                             *
15da2e3ebdSchin *                           Florham Park NJ                            *
16da2e3ebdSchin *                                                                      *
17da2e3ebdSchin *                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebdSchin *                  David Korn <dgk@research.att.com>                   *
19da2e3ebdSchin *                                                                      *
20da2e3ebdSchin ***********************************************************************/
21da2e3ebdSchin #pragma prototyped
22da2e3ebdSchin /*
23da2e3ebdSchin  * David Korn
24da2e3ebdSchin  * Glenn Fowler
25da2e3ebdSchin  * AT&T Research
26da2e3ebdSchin  *
27da2e3ebdSchin  * join
28da2e3ebdSchin  */
29da2e3ebdSchin 
30da2e3ebdSchin static const char usage[] =
313e14f97fSRoger A. Faulkner "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
32da2e3ebdSchin USAGE_LICENSE
33da2e3ebdSchin "[+NAME?join - relational database operator]"
34da2e3ebdSchin "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35da2e3ebdSchin 	"and \afile2\a and writes the resulting joined files to standard "
36da2e3ebdSchin 	"output.  By default, a field is delimited by one or more spaces "
37da2e3ebdSchin 	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38da2e3ebdSchin 	"can be used to change the field delimiter.]"
39da2e3ebdSchin "[+?The \ajoin field\a is a field in each file on which files are compared. "
40da2e3ebdSchin 	"By default \bjoin\b writes one line in the output for each pair "
41da2e3ebdSchin 	"of lines in \afiles1\a and \afiles2\a that have identical join "
42da2e3ebdSchin 	"fields.  The default output line consists of the join field, "
43da2e3ebdSchin 	"then the remaining fields from \afile1\a, then the remaining "
44da2e3ebdSchin 	"fields from \afile2\a, but this can be changed with the \b-o\b "
45da2e3ebdSchin 	"option.  The \b-a\b option can be used to add unmatched lines "
46da2e3ebdSchin 	"to the output.  The \b-v\b option can be used to output only "
47da2e3ebdSchin 	"unmatched lines.]"
48da2e3ebdSchin "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49da2e3ebdSchin 	"sequence of \bsort -b\b on the fields on which they are to be "
50da2e3ebdSchin 	"joined otherwise the results are unspecified.]"
51da2e3ebdSchin "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52da2e3ebdSchin         "uses standard input starting at the current location.]"
53da2e3ebdSchin 
54da2e3ebdSchin "[e:empty]:[string?Replace empty output fields in the list selected with"
55da2e3ebdSchin "	\b-o\b with \astring\a.]"
56da2e3ebdSchin "[o:output]:[list?Construct the output line to comprise the fields specified "
57da2e3ebdSchin 	"in a blank or comma separated list \alist\a.  Each element in "
58da2e3ebdSchin 	"\alist\a consists of a file number (either 1 or 2), a period, "
59da2e3ebdSchin 	"and a field number or \b0\b representing the join field.  "
60da2e3ebdSchin 	"As an obsolete feature multiple occurrences of \b-o\b can "
61da2e3ebdSchin 	"be specified.]"
62da2e3ebdSchin "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63da2e3ebdSchin "	and output.]"
64da2e3ebdSchin "[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65da2e3ebdSchin "[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66da2e3ebdSchin "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67da2e3ebdSchin "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68da2e3ebdSchin "	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69da2e3ebdSchin "	normal output.  If \b-a\b options appear for both 1 and 2, then "
70da2e3ebdSchin 	"all unpairable lines will be output.]"
71da2e3ebdSchin "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72da2e3ebdSchin "	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73da2e3ebdSchin 	"output.  If \b-v\b options appear for both 1 and 2, then "
74da2e3ebdSchin 	"all unpairable lines will be output.] ]"
75da2e3ebdSchin "[i:ignorecase?Ignore case in field comparisons.]"
76da2e3ebdSchin "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77da2e3ebdSchin 
78da2e3ebdSchin "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79da2e3ebdSchin "	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80da2e3ebdSchin "	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81da2e3ebdSchin "	equivalent to \b-2\b \afield\a.]"
82da2e3ebdSchin 
83da2e3ebdSchin "\n"
84da2e3ebdSchin "\nfile1 file2\n"
85da2e3ebdSchin "\n"
86da2e3ebdSchin "[+EXIT STATUS?]{"
87da2e3ebdSchin 	"[+0?Both files processed successfully.]"
88da2e3ebdSchin 	"[+>0?An error occurred.]"
89da2e3ebdSchin "}"
90da2e3ebdSchin "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91da2e3ebdSchin ;
92da2e3ebdSchin 
93da2e3ebdSchin #include <cmd.h>
94da2e3ebdSchin #include <sfdisc.h>
95da2e3ebdSchin 
963e14f97fSRoger A. Faulkner #if _hdr_wchar && _hdr_wctype && _lib_iswctype
973e14f97fSRoger A. Faulkner 
983e14f97fSRoger A. Faulkner #include <wchar.h>
993e14f97fSRoger A. Faulkner #include <wctype.h>
1003e14f97fSRoger A. Faulkner 
1013e14f97fSRoger A. Faulkner #else
1023e14f97fSRoger A. Faulkner 
1033e14f97fSRoger A. Faulkner #include <ctype.h>
1043e14f97fSRoger A. Faulkner 
1053e14f97fSRoger A. Faulkner #ifndef iswspace
1063e14f97fSRoger A. Faulkner #define iswspace(x)	isspace(x)
1073e14f97fSRoger A. Faulkner #endif
1083e14f97fSRoger A. Faulkner 
1093e14f97fSRoger A. Faulkner #endif
1103e14f97fSRoger A. Faulkner 
111da2e3ebdSchin #define C_FILE1		001
112da2e3ebdSchin #define C_FILE2		002
113da2e3ebdSchin #define C_COMMON	004
114da2e3ebdSchin #define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
115da2e3ebdSchin 
116da2e3ebdSchin #define NFIELD		10
117da2e3ebdSchin #define JOINFIELD	2
118da2e3ebdSchin 
119da2e3ebdSchin #define S_DELIM		1
120da2e3ebdSchin #define S_SPACE		2
121da2e3ebdSchin #define S_NL		3
1223e14f97fSRoger A. Faulkner #define S_WIDE		4
123da2e3ebdSchin 
1243e14f97fSRoger A. Faulkner typedef struct Field_s
1253e14f97fSRoger A. Faulkner {
1263e14f97fSRoger A. Faulkner 	char*		beg;
1273e14f97fSRoger A. Faulkner 	char*		end;
1283e14f97fSRoger A. Faulkner } Field_t;
1293e14f97fSRoger A. Faulkner 
1303e14f97fSRoger A. Faulkner typedef struct File_s
131da2e3ebdSchin {
132da2e3ebdSchin 	Sfio_t*		iop;
133da2e3ebdSchin 	char*		name;
134da2e3ebdSchin 	char*		recptr;
135da2e3ebdSchin 	int		reclen;
136da2e3ebdSchin 	int		field;
137da2e3ebdSchin 	int		fieldlen;
138da2e3ebdSchin 	int		nfields;
139da2e3ebdSchin 	int		maxfields;
140da2e3ebdSchin 	int		spaces;
141da2e3ebdSchin 	int		hit;
142da2e3ebdSchin 	int		discard;
1433e14f97fSRoger A. Faulkner 	Field_t*	fields;
144da2e3ebdSchin } File_t;
145da2e3ebdSchin 
1463e14f97fSRoger A. Faulkner typedef struct Join_s
147da2e3ebdSchin {
148da2e3ebdSchin 	unsigned char	state[1<<CHAR_BIT];
149da2e3ebdSchin 	Sfio_t*		outfile;
150da2e3ebdSchin 	int*		outlist;
151da2e3ebdSchin 	int		outmode;
152da2e3ebdSchin 	int		ooutmode;
153da2e3ebdSchin 	char*		nullfield;
1543e14f97fSRoger A. Faulkner 	char*		delimstr;
155da2e3ebdSchin 	int		delim;
1563e14f97fSRoger A. Faulkner 	int		delimlen;
157da2e3ebdSchin 	int		buffered;
158da2e3ebdSchin 	int		ignorecase;
1593e14f97fSRoger A. Faulkner 	int		mb;
160da2e3ebdSchin 	char*		same;
161da2e3ebdSchin 	int		samesize;
162*b30d1939SAndy Fiddaman 	Shbltin_t*	context;
163da2e3ebdSchin 	File_t		file[2];
164da2e3ebdSchin } Join_t;
165da2e3ebdSchin 
166da2e3ebdSchin static void
done(register Join_t * jp)167da2e3ebdSchin done(register Join_t* jp)
168da2e3ebdSchin {
169da2e3ebdSchin 	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
170da2e3ebdSchin 		sfclose(jp->file[0].iop);
171da2e3ebdSchin 	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
172da2e3ebdSchin 		sfclose(jp->file[1].iop);
173da2e3ebdSchin 	if (jp->outlist)
174da2e3ebdSchin 		free(jp->outlist);
1753e14f97fSRoger A. Faulkner 	if (jp->file[0].fields)
1763e14f97fSRoger A. Faulkner 		free(jp->file[0].fields);
1773e14f97fSRoger A. Faulkner 	if (jp->file[1].fields)
1783e14f97fSRoger A. Faulkner 		free(jp->file[1].fields);
179da2e3ebdSchin 	if (jp->same)
180da2e3ebdSchin 		free(jp->same);
181da2e3ebdSchin 	free(jp);
182da2e3ebdSchin }
183da2e3ebdSchin 
184da2e3ebdSchin static Join_t*
init(void)185da2e3ebdSchin init(void)
186da2e3ebdSchin {
187da2e3ebdSchin 	register Join_t*	jp;
1883e14f97fSRoger A. Faulkner 	register int		i;
189da2e3ebdSchin 
1903e14f97fSRoger A. Faulkner 	setlocale(LC_ALL, "");
191da2e3ebdSchin 	if (jp = newof(0, Join_t, 1, 0))
192da2e3ebdSchin 	{
1933e14f97fSRoger A. Faulkner 		if (jp->mb = mbwide())
1943e14f97fSRoger A. Faulkner 			for (i = 0x80; i <= 0xff; i++)
1953e14f97fSRoger A. Faulkner 				jp->state[i] = S_WIDE;
196da2e3ebdSchin 		jp->state[' '] = jp->state['\t'] = S_SPACE;
1973e14f97fSRoger A. Faulkner 		jp->state['\n'] = S_NL;
198da2e3ebdSchin 		jp->delim = -1;
199da2e3ebdSchin 		jp->nullfield = 0;
2003e14f97fSRoger A. Faulkner 		if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
2013e14f97fSRoger A. Faulkner 		    !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
202da2e3ebdSchin 		{
203da2e3ebdSchin 			done(jp);
204da2e3ebdSchin 			return 0;
205da2e3ebdSchin 		}
206da2e3ebdSchin 		jp->file[0].maxfields = NFIELD;
207da2e3ebdSchin 		jp->file[1].maxfields = NFIELD;
208da2e3ebdSchin 		jp->outmode = C_COMMON;
209da2e3ebdSchin 	}
210da2e3ebdSchin 	return jp;
211da2e3ebdSchin }
212da2e3ebdSchin 
213da2e3ebdSchin static int
getolist(Join_t * jp,const char * first,char ** arglist)214da2e3ebdSchin getolist(Join_t* jp, const char* first, char** arglist)
215da2e3ebdSchin {
216da2e3ebdSchin 	register const char*	cp = first;
217da2e3ebdSchin 	char**			argv = arglist;
218da2e3ebdSchin 	register int		c;
219da2e3ebdSchin 	int*			outptr;
220da2e3ebdSchin 	int*			outmax;
221da2e3ebdSchin 	int			nfield = NFIELD;
222da2e3ebdSchin 	char*			str;
223da2e3ebdSchin 
224da2e3ebdSchin 	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
225da2e3ebdSchin 	outmax = outptr + NFIELD;
226da2e3ebdSchin 	while (c = *cp++)
227da2e3ebdSchin 	{
228da2e3ebdSchin 		if (c==' ' || c=='\t' || c==',')
229da2e3ebdSchin 			continue;
230da2e3ebdSchin 		str = (char*)--cp;
231da2e3ebdSchin 		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
232da2e3ebdSchin 		{
233da2e3ebdSchin 			str++;
234da2e3ebdSchin 			c = JOINFIELD;
235da2e3ebdSchin 			goto skip;
236da2e3ebdSchin 		}
237da2e3ebdSchin 		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
238da2e3ebdSchin 		{
239da2e3ebdSchin 			error(2,"%s: invalid field list",first);
240da2e3ebdSchin 			break;
241da2e3ebdSchin 		}
242da2e3ebdSchin 		c--;
243da2e3ebdSchin 		c <<=2;
244da2e3ebdSchin 		if (*cp=='2')
245da2e3ebdSchin 			c |=1;
246da2e3ebdSchin 	skip:
247da2e3ebdSchin 		if (outptr >= outmax)
248da2e3ebdSchin 		{
249da2e3ebdSchin 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250da2e3ebdSchin 			outptr = jp->outlist + nfield;
251da2e3ebdSchin 			nfield *= 2;
252da2e3ebdSchin 			outmax = jp->outlist + nfield;
253da2e3ebdSchin 		}
254da2e3ebdSchin 		*outptr++ = c;
255da2e3ebdSchin 		cp = str;
256da2e3ebdSchin 	}
257da2e3ebdSchin 	/* need to accept obsolescent command syntax */
258*b30d1939SAndy Fiddaman 	while (cp = *argv)
259da2e3ebdSchin 	{
260*b30d1939SAndy Fiddaman 		if (cp[1]!='.' || (*cp!='1' && *cp!='2'))
261da2e3ebdSchin 		{
262da2e3ebdSchin 			if (*cp=='0' && cp[1]==0)
263da2e3ebdSchin 			{
264da2e3ebdSchin 				c = JOINFIELD;
265da2e3ebdSchin 				goto skip2;
266da2e3ebdSchin 			}
267da2e3ebdSchin 			break;
268da2e3ebdSchin 		}
269da2e3ebdSchin 		str = (char*)cp;
270da2e3ebdSchin 		c = strtol(cp+2, &str,10);
271da2e3ebdSchin 		if (*str || --c<0)
272da2e3ebdSchin 			break;
273da2e3ebdSchin 		argv++;
274da2e3ebdSchin 		c <<= 2;
275da2e3ebdSchin 		if (*cp=='2')
276da2e3ebdSchin 			c |=1;
277da2e3ebdSchin 	skip2:
278da2e3ebdSchin 		if (outptr >= outmax)
279da2e3ebdSchin 		{
280da2e3ebdSchin 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
281da2e3ebdSchin 			outptr = jp->outlist + nfield;
282da2e3ebdSchin 			nfield *= 2;
283da2e3ebdSchin 			outmax = jp->outlist + nfield;
284da2e3ebdSchin 		}
285da2e3ebdSchin 		*outptr++ = c;
286da2e3ebdSchin 	}
287da2e3ebdSchin 	*outptr = -1;
288da2e3ebdSchin 	return argv-arglist;
289da2e3ebdSchin }
290da2e3ebdSchin 
291da2e3ebdSchin /*
292da2e3ebdSchin  * read in a record from file <index> and split into fields
293da2e3ebdSchin  */
294da2e3ebdSchin static unsigned char*
getrec(Join_t * jp,int index,int discard)295da2e3ebdSchin getrec(Join_t* jp, int index, int discard)
296da2e3ebdSchin {
297da2e3ebdSchin 	register unsigned char*	sp = jp->state;
298da2e3ebdSchin 	register File_t*	fp = &jp->file[index];
2993e14f97fSRoger A. Faulkner 	register Field_t*	field = fp->fields;
3003e14f97fSRoger A. Faulkner 	register Field_t*	fieldmax = field + fp->maxfields;
301da2e3ebdSchin 	register char*		cp;
3023e14f97fSRoger A. Faulkner 	register int		n;
3033e14f97fSRoger A. Faulkner 	char*			tp;
304da2e3ebdSchin 
3057c2fbfb3SApril Chin 	if (sh_checksig(jp->context))
306da2e3ebdSchin 		return 0;
307da2e3ebdSchin 	if (discard && fp->discard)
308da2e3ebdSchin 		sfraise(fp->iop, SFSK_DISCARD, NiL);
309da2e3ebdSchin 	fp->spaces = 0;
310da2e3ebdSchin 	fp->hit = 0;
311da2e3ebdSchin 	if (!(cp = sfgetr(fp->iop, '\n', 0)))
312da2e3ebdSchin 	{
313da2e3ebdSchin 		jp->outmode &= ~(1<<index);
314da2e3ebdSchin 		return 0;
315da2e3ebdSchin 	}
316da2e3ebdSchin 	fp->recptr = cp;
317da2e3ebdSchin 	fp->reclen = sfvalue(fp->iop);
3183e14f97fSRoger A. Faulkner 	if (jp->delim == '\n')	/* handle new-line delimiter specially */
319da2e3ebdSchin 	{
3203e14f97fSRoger A. Faulkner 		field->beg = cp;
321da2e3ebdSchin 		cp += fp->reclen;
3223e14f97fSRoger A. Faulkner 		field->end = cp - 1;
3233e14f97fSRoger A. Faulkner 		field++;
324da2e3ebdSchin 	}
3253e14f97fSRoger A. Faulkner 	else
3263e14f97fSRoger A. Faulkner 		do /* separate into fields */
327da2e3ebdSchin 		{
3283e14f97fSRoger A. Faulkner 			if (field >= fieldmax)
3293e14f97fSRoger A. Faulkner 			{
3303e14f97fSRoger A. Faulkner 				n = 2 * fp->maxfields;
3313e14f97fSRoger A. Faulkner 				fp->fields = newof(fp->fields, Field_t, n + 1, 0);
3323e14f97fSRoger A. Faulkner 				field = fp->fields + fp->maxfields;
3333e14f97fSRoger A. Faulkner 				fp->maxfields = n;
3343e14f97fSRoger A. Faulkner 				fieldmax = fp->fields + n;
3353e14f97fSRoger A. Faulkner 			}
3363e14f97fSRoger A. Faulkner 			field->beg = cp;
3373e14f97fSRoger A. Faulkner 			if (jp->delim == -1)
3383e14f97fSRoger A. Faulkner 			{
3393e14f97fSRoger A. Faulkner 				switch (sp[*(unsigned char*)cp])
3403e14f97fSRoger A. Faulkner 				{
3413e14f97fSRoger A. Faulkner 				case S_SPACE:
3423e14f97fSRoger A. Faulkner 					cp++;
3433e14f97fSRoger A. Faulkner 					break;
3443e14f97fSRoger A. Faulkner 				case S_WIDE:
3453e14f97fSRoger A. Faulkner 					tp = cp;
3463e14f97fSRoger A. Faulkner 					if (iswspace(mbchar(tp)))
3473e14f97fSRoger A. Faulkner 					{
3483e14f97fSRoger A. Faulkner 						cp = tp;
3493e14f97fSRoger A. Faulkner 						break;
3503e14f97fSRoger A. Faulkner 					}
3513e14f97fSRoger A. Faulkner 					/*FALLTHROUGH*/
3523e14f97fSRoger A. Faulkner 				default:
3533e14f97fSRoger A. Faulkner 					goto next;
3543e14f97fSRoger A. Faulkner 				}
3553e14f97fSRoger A. Faulkner 				fp->spaces = 1;
3563e14f97fSRoger A. Faulkner 				if (jp->mb)
3573e14f97fSRoger A. Faulkner 					for (;;)
3583e14f97fSRoger A. Faulkner 					{
3593e14f97fSRoger A. Faulkner 						switch (sp[*(unsigned char*)cp++])
3603e14f97fSRoger A. Faulkner 						{
3613e14f97fSRoger A. Faulkner 						case S_SPACE:
3623e14f97fSRoger A. Faulkner 							continue;
3633e14f97fSRoger A. Faulkner 						case S_WIDE:
3643e14f97fSRoger A. Faulkner 							tp = cp - 1;
3653e14f97fSRoger A. Faulkner 							if (iswspace(mbchar(tp)))
3663e14f97fSRoger A. Faulkner 							{
3673e14f97fSRoger A. Faulkner 								cp = tp;
3683e14f97fSRoger A. Faulkner 								continue;
3693e14f97fSRoger A. Faulkner 							}
3703e14f97fSRoger A. Faulkner 							break;
3713e14f97fSRoger A. Faulkner 						}
3723e14f97fSRoger A. Faulkner 						break;
3733e14f97fSRoger A. Faulkner 					}
3743e14f97fSRoger A. Faulkner 				else
3753e14f97fSRoger A. Faulkner 					while (sp[*(unsigned char*)cp++]==S_SPACE);
3763e14f97fSRoger A. Faulkner 				cp--;
3773e14f97fSRoger A. Faulkner 			}
3783e14f97fSRoger A. Faulkner 		next:
3793e14f97fSRoger A. Faulkner 			if (jp->mb)
3803e14f97fSRoger A. Faulkner 			{
3813e14f97fSRoger A. Faulkner 				for (;;)
3823e14f97fSRoger A. Faulkner 				{
3833e14f97fSRoger A. Faulkner 					tp = cp;
3843e14f97fSRoger A. Faulkner 					switch (n = sp[*(unsigned char*)cp++])
3853e14f97fSRoger A. Faulkner 					{
3863e14f97fSRoger A. Faulkner 					case 0:
3873e14f97fSRoger A. Faulkner 						continue;
3883e14f97fSRoger A. Faulkner 					case S_WIDE:
3893e14f97fSRoger A. Faulkner 						cp--;
3903e14f97fSRoger A. Faulkner 						n = mbchar(cp);
3913e14f97fSRoger A. Faulkner 						if (n == jp->delim)
3923e14f97fSRoger A. Faulkner 						{
3933e14f97fSRoger A. Faulkner 							n = S_DELIM;
3943e14f97fSRoger A. Faulkner 							break;
3953e14f97fSRoger A. Faulkner 						}
3963e14f97fSRoger A. Faulkner 						if (jp->delim == -1 && iswspace(n))
3973e14f97fSRoger A. Faulkner 						{
3983e14f97fSRoger A. Faulkner 							n = S_SPACE;
3993e14f97fSRoger A. Faulkner 							break;
4003e14f97fSRoger A. Faulkner 						}
4013e14f97fSRoger A. Faulkner 						continue;
4023e14f97fSRoger A. Faulkner 					}
4033e14f97fSRoger A. Faulkner 					break;
4043e14f97fSRoger A. Faulkner 				}
4053e14f97fSRoger A. Faulkner 				field->end = tp;
4063e14f97fSRoger A. Faulkner 			}
4073e14f97fSRoger A. Faulkner 			else
4083e14f97fSRoger A. Faulkner 			{
4093e14f97fSRoger A. Faulkner 				while (!(n = sp[*(unsigned char*)cp++]));
4103e14f97fSRoger A. Faulkner 				field->end = cp - 1;
4113e14f97fSRoger A. Faulkner 			}
4123e14f97fSRoger A. Faulkner 			field++;
4133e14f97fSRoger A. Faulkner 		} while (n != S_NL);
4143e14f97fSRoger A. Faulkner 	fp->nfields = field - fp->fields;
4153e14f97fSRoger A. Faulkner 	if ((n = fp->field) < fp->nfields)
416da2e3ebdSchin 	{
4173e14f97fSRoger A. Faulkner 		cp = fp->fields[n].beg;
418da2e3ebdSchin 		/* eliminate leading spaces */
419da2e3ebdSchin 		if (fp->spaces)
420da2e3ebdSchin 		{
4213e14f97fSRoger A. Faulkner 			if (jp->mb)
4223e14f97fSRoger A. Faulkner 				for (;;)
4233e14f97fSRoger A. Faulkner 				{
4243e14f97fSRoger A. Faulkner 					switch (sp[*(unsigned char*)cp++])
4253e14f97fSRoger A. Faulkner 					{
4263e14f97fSRoger A. Faulkner 					case S_SPACE:
4273e14f97fSRoger A. Faulkner 						continue;
4283e14f97fSRoger A. Faulkner 					case S_WIDE:
4293e14f97fSRoger A. Faulkner 						tp = cp - 1;
4303e14f97fSRoger A. Faulkner 						if (iswspace(mbchar(tp)))
4313e14f97fSRoger A. Faulkner 						{
4323e14f97fSRoger A. Faulkner 							cp = tp;
4333e14f97fSRoger A. Faulkner 							continue;
4343e14f97fSRoger A. Faulkner 						}
4353e14f97fSRoger A. Faulkner 						break;
4363e14f97fSRoger A. Faulkner 					}
4373e14f97fSRoger A. Faulkner 					break;
4383e14f97fSRoger A. Faulkner 				}
4393e14f97fSRoger A. Faulkner 			else
4403e14f97fSRoger A. Faulkner 				while (sp[*(unsigned char*)cp++]==S_SPACE);
441da2e3ebdSchin 			cp--;
442da2e3ebdSchin 		}
4433e14f97fSRoger A. Faulkner 		fp->fieldlen = fp->fields[n].end - cp;
444da2e3ebdSchin 		return (unsigned char*)cp;
445da2e3ebdSchin 	}
446da2e3ebdSchin 	fp->fieldlen = 0;
447da2e3ebdSchin 	return (unsigned char*)"";
448da2e3ebdSchin }
449da2e3ebdSchin 
450da2e3ebdSchin #if DEBUG_TRACE
451*b30d1939SAndy Fiddaman static unsigned char* u1;
452da2e3ebdSchin #define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
453da2e3ebdSchin #endif
454da2e3ebdSchin 
455da2e3ebdSchin /*
456da2e3ebdSchin  * print field <n> from file <index>
457da2e3ebdSchin  */
458da2e3ebdSchin static int
outfield(Join_t * jp,int index,register int n,int last)459da2e3ebdSchin outfield(Join_t* jp, int index, register int n, int last)
460da2e3ebdSchin {
461da2e3ebdSchin 	register File_t*	fp = &jp->file[index];
462da2e3ebdSchin 	register char*		cp;
463da2e3ebdSchin 	register char*		cpmax;
464da2e3ebdSchin 	register int		size;
465da2e3ebdSchin 	register Sfio_t*	iop = jp->outfile;
4663e14f97fSRoger A. Faulkner 	char*			tp;
467da2e3ebdSchin 
468da2e3ebdSchin 	if (n < fp->nfields)
469da2e3ebdSchin 	{
4703e14f97fSRoger A. Faulkner 		cp = fp->fields[n].beg;
4713e14f97fSRoger A. Faulkner 		cpmax = fp->fields[n].end + 1;
472da2e3ebdSchin 	}
473da2e3ebdSchin 	else
474da2e3ebdSchin 		cp = 0;
4753e14f97fSRoger A. Faulkner 	if ((n = jp->delim) == -1)
476da2e3ebdSchin 	{
47734f9b3eeSRoland Mainz 		if (cp && fp->spaces)
478da2e3ebdSchin 		{
4793e14f97fSRoger A. Faulkner 			register unsigned char*	sp = jp->state;
4803e14f97fSRoger A. Faulkner 
481da2e3ebdSchin 			/*eliminate leading spaces */
4823e14f97fSRoger A. Faulkner 			if (jp->mb)
4833e14f97fSRoger A. Faulkner 				for (;;)
4843e14f97fSRoger A. Faulkner 				{
4853e14f97fSRoger A. Faulkner 					switch (sp[*(unsigned char*)cp++])
4863e14f97fSRoger A. Faulkner 					{
4873e14f97fSRoger A. Faulkner 					case S_SPACE:
4883e14f97fSRoger A. Faulkner 						continue;
4893e14f97fSRoger A. Faulkner 					case S_WIDE:
4903e14f97fSRoger A. Faulkner 						tp = cp - 1;
4913e14f97fSRoger A. Faulkner 						if (iswspace(mbchar(tp)))
4923e14f97fSRoger A. Faulkner 						{
4933e14f97fSRoger A. Faulkner 							cp = tp;
4943e14f97fSRoger A. Faulkner 							continue;
4953e14f97fSRoger A. Faulkner 						}
4963e14f97fSRoger A. Faulkner 						break;
4973e14f97fSRoger A. Faulkner 					}
4983e14f97fSRoger A. Faulkner 					break;
4993e14f97fSRoger A. Faulkner 				}
5003e14f97fSRoger A. Faulkner 			else
5013e14f97fSRoger A. Faulkner 				while (sp[*(unsigned char*)cp++]==S_SPACE);
502da2e3ebdSchin 			cp--;
503da2e3ebdSchin 		}
504da2e3ebdSchin 		n = ' ';
505da2e3ebdSchin 	}
5063e14f97fSRoger A. Faulkner 	else if (jp->delimstr)
5073e14f97fSRoger A. Faulkner 		n = -1;
508da2e3ebdSchin 	if (last)
509da2e3ebdSchin 		n = '\n';
510da2e3ebdSchin 	if (cp)
5113e14f97fSRoger A. Faulkner 		size = cpmax - cp;
512da2e3ebdSchin 	else
513da2e3ebdSchin 		size = 0;
5143e14f97fSRoger A. Faulkner 	if (n == -1)
5153e14f97fSRoger A. Faulkner 	{
5163e14f97fSRoger A. Faulkner 		if (size<=1)
5173e14f97fSRoger A. Faulkner 		{
5183e14f97fSRoger A. Faulkner 			if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
5193e14f97fSRoger A. Faulkner 				return -1;
5203e14f97fSRoger A. Faulkner 		}
5213e14f97fSRoger A. Faulkner 		else if (sfwrite(iop, cp, size) < 0)
5223e14f97fSRoger A. Faulkner 			return -1;
5233e14f97fSRoger A. Faulkner 		if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
5243e14f97fSRoger A. Faulkner 			return -1;
5253e14f97fSRoger A. Faulkner 	}
5263e14f97fSRoger A. Faulkner 	else if (size <= 1)
527da2e3ebdSchin 	{
528da2e3ebdSchin 		if (!jp->nullfield)
5293e14f97fSRoger A. Faulkner 			sfputc(iop, n);
5303e14f97fSRoger A. Faulkner 		else if (sfputr(iop, jp->nullfield, n) < 0)
531da2e3ebdSchin 			return -1;
532da2e3ebdSchin 	}
533da2e3ebdSchin 	else
534da2e3ebdSchin 	{
535da2e3ebdSchin 		last = cp[size-1];
536da2e3ebdSchin 		cp[size-1] = n;
5373e14f97fSRoger A. Faulkner 		if (sfwrite(iop, cp, size) < 0)
538da2e3ebdSchin 			return -1;
539da2e3ebdSchin 		cp[size-1] = last;
540da2e3ebdSchin 	}
541da2e3ebdSchin 	return 0;
542da2e3ebdSchin }
543da2e3ebdSchin 
544da2e3ebdSchin #if DEBUG_TRACE
545da2e3ebdSchin static int i1,i2,i3;
546da2e3ebdSchin #define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
547da2e3ebdSchin #endif
548da2e3ebdSchin 
549da2e3ebdSchin static int
outrec(register Join_t * jp,int mode)550da2e3ebdSchin outrec(register Join_t* jp, int mode)
551da2e3ebdSchin {
552da2e3ebdSchin 	register File_t*	fp;
553da2e3ebdSchin 	register int		i;
554da2e3ebdSchin 	register int		j;
555da2e3ebdSchin 	register int		k;
556da2e3ebdSchin 	register int		n;
557da2e3ebdSchin 	int*			out;
558da2e3ebdSchin 
559da2e3ebdSchin 	if (mode < 0 && jp->file[0].hit++)
560da2e3ebdSchin 		return 0;
561da2e3ebdSchin 	if (mode > 0 && jp->file[1].hit++)
562da2e3ebdSchin 		return 0;
563da2e3ebdSchin 	if (out = jp->outlist)
564da2e3ebdSchin 	{
565da2e3ebdSchin 		while ((n = *out++) >= 0)
566da2e3ebdSchin 		{
567da2e3ebdSchin 			if (n == JOINFIELD)
568da2e3ebdSchin 			{
569da2e3ebdSchin 				i = mode >= 0;
570da2e3ebdSchin 				j = jp->file[i].field;
571da2e3ebdSchin 			}
572da2e3ebdSchin 			else
573da2e3ebdSchin 			{
574da2e3ebdSchin 				i = n & 1;
575da2e3ebdSchin 				j = (mode<0 && i || mode>0 && !i) ?
576da2e3ebdSchin 					jp->file[i].nfields :
577da2e3ebdSchin 					n >> 2;
578da2e3ebdSchin 			}
579da2e3ebdSchin 			if (outfield(jp, i, j, *out < 0) < 0)
580da2e3ebdSchin 				return -1;
581da2e3ebdSchin 		}
582da2e3ebdSchin 		return 0;
583da2e3ebdSchin 	}
584da2e3ebdSchin 	k = jp->file[0].nfields;
585da2e3ebdSchin 	if (mode >= 0)
586da2e3ebdSchin 		k += jp->file[1].nfields - 1;
587da2e3ebdSchin 	for (i=0; i<2; i++)
588da2e3ebdSchin 	{
589da2e3ebdSchin 		fp = &jp->file[i];
590da2e3ebdSchin 		if (mode>0 && i==0)
591da2e3ebdSchin 		{
592da2e3ebdSchin 			k -= (fp->nfields - 1);
593da2e3ebdSchin 			continue;
594da2e3ebdSchin 		}
595da2e3ebdSchin 		n = fp->field;
596da2e3ebdSchin 		if (mode||i==0)
597da2e3ebdSchin 		{
598da2e3ebdSchin 			/* output join field first */
599da2e3ebdSchin 			if (outfield(jp,i,n,!--k) < 0)
600da2e3ebdSchin 				return -1;
601da2e3ebdSchin 			if (!k)
602da2e3ebdSchin 				return 0;
603da2e3ebdSchin 			for (j=0; j<n; j++)
604da2e3ebdSchin 			{
605da2e3ebdSchin 				if (outfield(jp,i,j,!--k) < 0)
606da2e3ebdSchin 					return -1;
607da2e3ebdSchin 				if (!k)
608da2e3ebdSchin 					return 0;
609da2e3ebdSchin 			}
610da2e3ebdSchin 			j = n + 1;
611da2e3ebdSchin 		}
612da2e3ebdSchin 		else
613da2e3ebdSchin 			j = 0;
614da2e3ebdSchin 		for (;j<fp->nfields; j++)
615da2e3ebdSchin 		{
616da2e3ebdSchin 			if (j!=n && outfield(jp,i,j,!--k) < 0)
617da2e3ebdSchin 				return -1;
618da2e3ebdSchin 			if (!k)
619da2e3ebdSchin 				return 0;
620da2e3ebdSchin 		}
621da2e3ebdSchin 	}
622da2e3ebdSchin 	return 0;
623da2e3ebdSchin }
624da2e3ebdSchin 
625da2e3ebdSchin #if DEBUG_TRACE
626da2e3ebdSchin #define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
627da2e3ebdSchin #endif
628da2e3ebdSchin 
629da2e3ebdSchin static int
join(Join_t * jp)630da2e3ebdSchin join(Join_t* jp)
631da2e3ebdSchin {
632da2e3ebdSchin 	register unsigned char*	cp1;
633da2e3ebdSchin 	register unsigned char*	cp2;
634da2e3ebdSchin 	register int		n1;
635da2e3ebdSchin 	register int		n2;
636da2e3ebdSchin 	register int		n;
637da2e3ebdSchin 	register int		cmp;
638da2e3ebdSchin 	register int		same;
639da2e3ebdSchin 	int			o2;
640da2e3ebdSchin 	Sfoff_t			lo = -1;
641da2e3ebdSchin 	Sfoff_t			hi = -1;
642da2e3ebdSchin 
643da2e3ebdSchin 	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
644da2e3ebdSchin 	{
645da2e3ebdSchin 		n1 = jp->file[0].fieldlen;
646da2e3ebdSchin 		n2 = jp->file[1].fieldlen;
647da2e3ebdSchin 		same = 0;
648da2e3ebdSchin 		for (;;)
649da2e3ebdSchin 		{
650da2e3ebdSchin 			n = n1 < n2 ? n1 : n2;
651da2e3ebdSchin #if DEBUG_TRACE
652da2e3ebdSchin 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
653da2e3ebdSchin 				cmp = n1 - n2;
654da2e3ebdSchin sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
655da2e3ebdSchin 			if (!cmp)
656da2e3ebdSchin #else
657da2e3ebdSchin 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
658da2e3ebdSchin #endif
659da2e3ebdSchin 			{
660da2e3ebdSchin 				if (!(jp->outmode & C_COMMON))
661da2e3ebdSchin 				{
662da2e3ebdSchin 					if (cp1 = getrec(jp, 0, 1))
663da2e3ebdSchin 					{
664da2e3ebdSchin 						n1 = jp->file[0].fieldlen;
665da2e3ebdSchin 						same = 1;
666da2e3ebdSchin 						continue;
667da2e3ebdSchin 					}
668da2e3ebdSchin 					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
669da2e3ebdSchin 						break;
670da2e3ebdSchin 					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
671da2e3ebdSchin 					{
672da2e3ebdSchin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
673da2e3ebdSchin 						return -1;
674da2e3ebdSchin 					}
675da2e3ebdSchin 				}
676da2e3ebdSchin 				else if (outrec(jp, 0) < 0)
677da2e3ebdSchin 					return -1;
678da2e3ebdSchin 				else if (lo < 0 && (jp->outmode & C_COMMON))
679da2e3ebdSchin 				{
680da2e3ebdSchin 					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
681da2e3ebdSchin 					{
682da2e3ebdSchin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
683da2e3ebdSchin 						return -1;
684da2e3ebdSchin 					}
685da2e3ebdSchin 					lo -= jp->file[1].reclen;
686da2e3ebdSchin 				}
687da2e3ebdSchin 				if (cp2 = getrec(jp, 1, lo < 0))
688da2e3ebdSchin 				{
689da2e3ebdSchin 					n2 = jp->file[1].fieldlen;
690da2e3ebdSchin 					continue;
691da2e3ebdSchin 				}
692da2e3ebdSchin #if DEBUG_TRACE
693da2e3ebdSchin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
694da2e3ebdSchin #endif
695da2e3ebdSchin 			}
696da2e3ebdSchin 			else if (cmp > 0)
697da2e3ebdSchin 			{
698da2e3ebdSchin 				if (same)
699da2e3ebdSchin 				{
700da2e3ebdSchin 					same = 0;
701da2e3ebdSchin 				next:
702da2e3ebdSchin 					if (n2 > jp->samesize)
703da2e3ebdSchin 					{
704da2e3ebdSchin 						jp->samesize = roundof(n2, 16);
705da2e3ebdSchin 						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
706da2e3ebdSchin 						{
707da2e3ebdSchin 							error(ERROR_SYSTEM|2, "out of space");
708da2e3ebdSchin 							return -1;
709da2e3ebdSchin 						}
710da2e3ebdSchin 					}
711da2e3ebdSchin 					memcpy(jp->same, cp2, o2 = n2);
712da2e3ebdSchin 					if (!(cp2 = getrec(jp, 1, 0)))
713da2e3ebdSchin 						break;
714da2e3ebdSchin 					n2 = jp->file[1].fieldlen;
715da2e3ebdSchin 					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
716da2e3ebdSchin 						goto next;
717da2e3ebdSchin 					continue;
718da2e3ebdSchin 				}
719da2e3ebdSchin 				if (hi >= 0)
720da2e3ebdSchin 				{
721da2e3ebdSchin 					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
722da2e3ebdSchin 					{
723da2e3ebdSchin 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
724da2e3ebdSchin 						return -1;
725da2e3ebdSchin 					}
726da2e3ebdSchin 					hi = -1;
727da2e3ebdSchin 				}
728da2e3ebdSchin 				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
729da2e3ebdSchin 					return -1;
730da2e3ebdSchin 				lo = -1;
731da2e3ebdSchin 				if (cp2 = getrec(jp, 1, 1))
732da2e3ebdSchin 				{
733da2e3ebdSchin 					n2 = jp->file[1].fieldlen;
734da2e3ebdSchin 					continue;
735da2e3ebdSchin 				}
736da2e3ebdSchin #if DEBUG_TRACE
737da2e3ebdSchin sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
738da2e3ebdSchin #endif
739da2e3ebdSchin 			}
740da2e3ebdSchin 			else if (same)
741da2e3ebdSchin 			{
742da2e3ebdSchin 				same = 0;
743da2e3ebdSchin 				if (!(cp1 = getrec(jp, 0, 0)))
744da2e3ebdSchin 					break;
745da2e3ebdSchin 				n1 = jp->file[0].fieldlen;
746da2e3ebdSchin 				continue;
747da2e3ebdSchin 			}
748da2e3ebdSchin 			if (lo >= 0)
749da2e3ebdSchin 			{
750da2e3ebdSchin 				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
751da2e3ebdSchin 				    (hi -= jp->file[1].reclen) < 0 ||
752da2e3ebdSchin 				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
753da2e3ebdSchin 				    !(cp2 = getrec(jp, 1, 0)))
754da2e3ebdSchin 				{
755da2e3ebdSchin 					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
756da2e3ebdSchin 					return -1;
757da2e3ebdSchin 				}
758da2e3ebdSchin 				n2 = jp->file[1].fieldlen;
759da2e3ebdSchin 				lo = -1;
760da2e3ebdSchin 				if (jp->file[1].discard)
761da2e3ebdSchin 					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
762da2e3ebdSchin 			}
763da2e3ebdSchin 			else if (!cp2)
764da2e3ebdSchin 				break;
765da2e3ebdSchin 			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
766da2e3ebdSchin 				return -1;
767da2e3ebdSchin 			if (!(cp1 = getrec(jp, 0, 1)))
768da2e3ebdSchin 				break;
769da2e3ebdSchin 			n1 = jp->file[0].fieldlen;
770da2e3ebdSchin 		}
771da2e3ebdSchin 	}
772da2e3ebdSchin #if DEBUG_TRACE
773*b30d1939SAndy Fiddaman sfprintf(sfstdout, "[X#%d:?,%p,%p,%d,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
774da2e3ebdSchin #endif
775da2e3ebdSchin 	if (cp2)
776da2e3ebdSchin 	{
777da2e3ebdSchin 		if (hi >= 0 &&
778da2e3ebdSchin 		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
779da2e3ebdSchin 		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
780da2e3ebdSchin 		{
781da2e3ebdSchin 			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
782da2e3ebdSchin 			return -1;
783da2e3ebdSchin 		}
784da2e3ebdSchin #if DEBUG_TRACE
785da2e3ebdSchin sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
786da2e3ebdSchin #endif
787da2e3ebdSchin 		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
788da2e3ebdSchin 		cmp = 1;
789da2e3ebdSchin 		n = 1;
790da2e3ebdSchin 	}
791da2e3ebdSchin 	else
792da2e3ebdSchin 	{
793da2e3ebdSchin 		cmp = -1;
794da2e3ebdSchin 		n = 0;
795da2e3ebdSchin 	}
796da2e3ebdSchin #if DEBUG_TRACE
797da2e3ebdSchin sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
798da2e3ebdSchin #endif
799da2e3ebdSchin 	if (!cp1 || !(jp->outmode & (1<<n)))
800da2e3ebdSchin 	{
801da2e3ebdSchin 		if (cp1 && jp->file[n].iop == sfstdin)
802da2e3ebdSchin 			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
803da2e3ebdSchin 		return 0;
804da2e3ebdSchin 	}
805da2e3ebdSchin 	if (outrec(jp, cmp) < 0)
806da2e3ebdSchin 		return -1;
807da2e3ebdSchin 	do
808da2e3ebdSchin 	{
809da2e3ebdSchin 		if (!getrec(jp, n, 1))
810da2e3ebdSchin 			return 0;
811da2e3ebdSchin 	} while (outrec(jp, cmp) >= 0);
812da2e3ebdSchin 	return -1;
813da2e3ebdSchin }
814da2e3ebdSchin 
815da2e3ebdSchin int
b_join(int argc,char ** argv,Shbltin_t * context)816*b30d1939SAndy Fiddaman b_join(int argc, char** argv, Shbltin_t* context)
817da2e3ebdSchin {
818da2e3ebdSchin 	register int		n;
819da2e3ebdSchin 	register char*		cp;
820da2e3ebdSchin 	register Join_t*	jp;
821da2e3ebdSchin 	char*			e;
822da2e3ebdSchin 
823da2e3ebdSchin #if !DEBUG_TRACE
824da2e3ebdSchin 	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
825da2e3ebdSchin #endif
826da2e3ebdSchin 	if (!(jp = init()))
827da2e3ebdSchin 		error(ERROR_system(1),"out of space");
8287c2fbfb3SApril Chin 	jp->context = context;
829da2e3ebdSchin 	for (;;)
830da2e3ebdSchin 	{
831da2e3ebdSchin 		switch (n = optget(argv, usage))
832da2e3ebdSchin 		{
833da2e3ebdSchin  		case 'j':
834da2e3ebdSchin 			/*
835da2e3ebdSchin 			 * check for obsolete "-j1 field" and "-j2 field"
836da2e3ebdSchin 			 */
837da2e3ebdSchin 
838da2e3ebdSchin 			if (opt_info.offset == 0)
839da2e3ebdSchin 			{
840da2e3ebdSchin 				cp = argv[opt_info.index - 1];
841da2e3ebdSchin 				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
842da2e3ebdSchin 				n = cp[n] == 'j';
843da2e3ebdSchin 			}
844da2e3ebdSchin 			else
845da2e3ebdSchin 				n = 0;
846da2e3ebdSchin 			if (n)
847da2e3ebdSchin 			{
848da2e3ebdSchin 				if (opt_info.num!=1 && opt_info.num!=2)
849da2e3ebdSchin 					error(2,"-jfileno field: fileno must be 1 or 2");
850da2e3ebdSchin 				n = '0' + opt_info.num;
851da2e3ebdSchin 				if (!(cp = argv[opt_info.index]))
852da2e3ebdSchin 				{
853da2e3ebdSchin 					argc = 0;
854da2e3ebdSchin 					break;
855da2e3ebdSchin 				}
856da2e3ebdSchin 				opt_info.num = strtol(cp, &e, 10);
857da2e3ebdSchin 				if (*e)
858da2e3ebdSchin 				{
859da2e3ebdSchin 					argc = 0;
860da2e3ebdSchin 					break;
861da2e3ebdSchin 				}
862da2e3ebdSchin 				opt_info.index++;
863da2e3ebdSchin 			}
864da2e3ebdSchin 			else
865da2e3ebdSchin 			{
866da2e3ebdSchin 				jp->file[0].field = (int)(opt_info.num-1);
867da2e3ebdSchin 				n = '2';
868da2e3ebdSchin 			}
869da2e3ebdSchin 			/*FALLTHROUGH*/
870da2e3ebdSchin  		case '1':
871da2e3ebdSchin 		case '2':
872da2e3ebdSchin 			if (opt_info.num <=0)
873da2e3ebdSchin 				error(2,"field number must positive");
874da2e3ebdSchin 			jp->file[n-'1'].field = (int)(opt_info.num-1);
875da2e3ebdSchin 			continue;
876da2e3ebdSchin 		case 'v':
877da2e3ebdSchin 			jp->outmode &= ~C_COMMON;
878da2e3ebdSchin 			/*FALLTHROUGH*/
879da2e3ebdSchin 		case 'a':
880da2e3ebdSchin 			if (opt_info.num!=1 && opt_info.num!=2)
881da2e3ebdSchin 				error(2,"%s: file number must be 1 or 2", opt_info.name);
882da2e3ebdSchin 			jp->outmode |= 1<<(opt_info.num-1);
883da2e3ebdSchin 			continue;
884da2e3ebdSchin 		case 'e':
885da2e3ebdSchin 			jp->nullfield = opt_info.arg;
886da2e3ebdSchin 			continue;
887da2e3ebdSchin 		case 'o':
888da2e3ebdSchin 			/* need to accept obsolescent command syntax */
889da2e3ebdSchin 			n = getolist(jp, opt_info.arg, argv+opt_info.index);
890da2e3ebdSchin 			opt_info.index += n;
891da2e3ebdSchin 			continue;
892da2e3ebdSchin 		case 't':
893da2e3ebdSchin 			jp->state[' '] = jp->state['\t'] = 0;
8943e14f97fSRoger A. Faulkner 			if (jp->mb)
8953e14f97fSRoger A. Faulkner 			{
8963e14f97fSRoger A. Faulkner 				cp = opt_info.arg;
8973e14f97fSRoger A. Faulkner 				jp->delim = mbchar(cp);
8983e14f97fSRoger A. Faulkner 				if ((n = cp - opt_info.arg) > 1)
8993e14f97fSRoger A. Faulkner 				{
9003e14f97fSRoger A. Faulkner 					jp->delimlen = n;
9013e14f97fSRoger A. Faulkner 					jp->delimstr = opt_info.arg;
9023e14f97fSRoger A. Faulkner 					continue;
9033e14f97fSRoger A. Faulkner 				}
9043e14f97fSRoger A. Faulkner 			}
9053e14f97fSRoger A. Faulkner 			n = *(unsigned char*)opt_info.arg;
906da2e3ebdSchin 			jp->state[n] = S_DELIM;
907da2e3ebdSchin 			jp->delim = n;
908da2e3ebdSchin 			continue;
909da2e3ebdSchin 		case 'i':
910da2e3ebdSchin 			jp->ignorecase = !opt_info.num;
911da2e3ebdSchin 			continue;
912da2e3ebdSchin 		case 'B':
913da2e3ebdSchin 			jp->buffered = !opt_info.num;
914da2e3ebdSchin 			continue;
915da2e3ebdSchin 		case ':':
916da2e3ebdSchin 			error(2, "%s", opt_info.arg);
917da2e3ebdSchin 			break;
918da2e3ebdSchin 		case '?':
919da2e3ebdSchin 			done(jp);
920da2e3ebdSchin 			error(ERROR_usage(2), "%s", opt_info.arg);
921da2e3ebdSchin 			break;
922da2e3ebdSchin 		}
923da2e3ebdSchin 		break;
924da2e3ebdSchin 	}
925da2e3ebdSchin 	argv += opt_info.index;
926da2e3ebdSchin 	argc -= opt_info.index;
927da2e3ebdSchin 	if (error_info.errors || argc!=2)
928da2e3ebdSchin 	{
929da2e3ebdSchin 		done(jp);
930da2e3ebdSchin 		error(ERROR_usage(2),"%s", optusage(NiL));
931da2e3ebdSchin 	}
932da2e3ebdSchin 	jp->ooutmode = jp->outmode;
933da2e3ebdSchin 	jp->file[0].name = cp = *argv++;
934da2e3ebdSchin 	if (streq(cp,"-"))
935da2e3ebdSchin 	{
936da2e3ebdSchin 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
937da2e3ebdSchin 		{
938da2e3ebdSchin 			if (sfdcseekable(sfstdin))
939da2e3ebdSchin 				error(ERROR_warn(0),"%s: seek may fail",cp);
940da2e3ebdSchin 			else
941da2e3ebdSchin 				jp->file[0].discard = 1;
942da2e3ebdSchin 		}
943da2e3ebdSchin 		jp->file[0].iop = sfstdin;
944da2e3ebdSchin 	}
945da2e3ebdSchin 	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
946da2e3ebdSchin 	{
947da2e3ebdSchin 		done(jp);
948da2e3ebdSchin 		error(ERROR_system(1),"%s: cannot open",cp);
949da2e3ebdSchin 	}
950da2e3ebdSchin 	jp->file[1].name = cp = *argv;
951da2e3ebdSchin 	if (streq(cp,"-"))
952da2e3ebdSchin 	{
953da2e3ebdSchin 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
954da2e3ebdSchin 		{
955da2e3ebdSchin 			if (sfdcseekable(sfstdin))
956da2e3ebdSchin 				error(ERROR_warn(0),"%s: seek may fail",cp);
957da2e3ebdSchin 			else
958da2e3ebdSchin 				jp->file[1].discard = 1;
959da2e3ebdSchin 		}
960da2e3ebdSchin 		jp->file[1].iop = sfstdin;
961da2e3ebdSchin 	}
962da2e3ebdSchin 	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
963da2e3ebdSchin 	{
964da2e3ebdSchin 		done(jp);
965da2e3ebdSchin 		error(ERROR_system(1),"%s: cannot open",cp);
966da2e3ebdSchin 	}
967da2e3ebdSchin 	if (jp->buffered)
968da2e3ebdSchin 	{
969da2e3ebdSchin 		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
97034f9b3eeSRoland Mainz 		sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
971da2e3ebdSchin 	}
972da2e3ebdSchin 	jp->outfile = sfstdout;
973da2e3ebdSchin 	if (!jp->outlist)
974da2e3ebdSchin 		jp->nullfield = 0;
975da2e3ebdSchin 	if (join(jp) < 0)
976da2e3ebdSchin 	{
977da2e3ebdSchin 		done(jp);
978da2e3ebdSchin 		error(ERROR_system(1),"write error");
979da2e3ebdSchin 	}
980da2e3ebdSchin 	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
981da2e3ebdSchin 		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
982da2e3ebdSchin 	done(jp);
983da2e3ebdSchin 	return error_info.errors;
984da2e3ebdSchin }
985