1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1992-2012 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                 Eclipse Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *          http://www.eclipse.org/org/documents/epl-v10.html           *
11 *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                                                                      *
20 ***********************************************************************/
21 #pragma prototyped
22 /*
23  * David Korn
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * join
28  */
29 
30 static const char usage[] =
31 "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
32 USAGE_LICENSE
33 "[+NAME?join - relational database operator]"
34 "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35 	"and \afile2\a and writes the resulting joined files to standard "
36 	"output.  By default, a field is delimited by one or more spaces "
37 	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38 	"can be used to change the field delimiter.]"
39 "[+?The \ajoin field\a is a field in each file on which files are compared. "
40 	"By default \bjoin\b writes one line in the output for each pair "
41 	"of lines in \afiles1\a and \afiles2\a that have identical join "
42 	"fields.  The default output line consists of the join field, "
43 	"then the remaining fields from \afile1\a, then the remaining "
44 	"fields from \afile2\a, but this can be changed with the \b-o\b "
45 	"option.  The \b-a\b option can be used to add unmatched lines "
46 	"to the output.  The \b-v\b option can be used to output only "
47 	"unmatched lines.]"
48 "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49 	"sequence of \bsort -b\b on the fields on which they are to be "
50 	"joined otherwise the results are unspecified.]"
51 "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52         "uses standard input starting at the current location.]"
53 
54 "[e:empty]:[string?Replace empty output fields in the list selected with"
55 "	\b-o\b with \astring\a.]"
56 "[o:output]:[list?Construct the output line to comprise the fields specified "
57 	"in a blank or comma separated list \alist\a.  Each element in "
58 	"\alist\a consists of a file number (either 1 or 2), a period, "
59 	"and a field number or \b0\b representing the join field.  "
60 	"As an obsolete feature multiple occurrences of \b-o\b can "
61 	"be specified.]"
62 "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63 "	and output.]"
64 "[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65 "[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66 "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67 "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68 "	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69 "	normal output.  If \b-a\b options appear for both 1 and 2, then "
70 	"all unpairable lines will be output.]"
71 "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72 "	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73 	"output.  If \b-v\b options appear for both 1 and 2, then "
74 	"all unpairable lines will be output.] ]"
75 "[i:ignorecase?Ignore case in field comparisons.]"
76 "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77 
78 "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79 "	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80 "	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81 "	equivalent to \b-2\b \afield\a.]"
82 
83 "\n"
84 "\nfile1 file2\n"
85 "\n"
86 "[+EXIT STATUS?]{"
87 	"[+0?Both files processed successfully.]"
88 	"[+>0?An error occurred.]"
89 "}"
90 "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91 ;
92 
93 #include <cmd.h>
94 #include <sfdisc.h>
95 
96 #if _hdr_wchar && _hdr_wctype && _lib_iswctype
97 
98 #include <wchar.h>
99 #include <wctype.h>
100 
101 #else
102 
103 #include <ctype.h>
104 
105 #ifndef iswspace
106 #define iswspace(x)	isspace(x)
107 #endif
108 
109 #endif
110 
111 #define C_FILE1		001
112 #define C_FILE2		002
113 #define C_COMMON	004
114 #define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
115 
116 #define NFIELD		10
117 #define JOINFIELD	2
118 
119 #define S_DELIM		1
120 #define S_SPACE		2
121 #define S_NL		3
122 #define S_WIDE		4
123 
124 typedef struct Field_s
125 {
126 	char*		beg;
127 	char*		end;
128 } Field_t;
129 
130 typedef struct File_s
131 {
132 	Sfio_t*		iop;
133 	char*		name;
134 	char*		recptr;
135 	int		reclen;
136 	int		field;
137 	int		fieldlen;
138 	int		nfields;
139 	int		maxfields;
140 	int		spaces;
141 	int		hit;
142 	int		discard;
143 	Field_t*	fields;
144 } File_t;
145 
146 typedef struct Join_s
147 {
148 	unsigned char	state[1<<CHAR_BIT];
149 	Sfio_t*		outfile;
150 	int*		outlist;
151 	int		outmode;
152 	int		ooutmode;
153 	char*		nullfield;
154 	char*		delimstr;
155 	int		delim;
156 	int		delimlen;
157 	int		buffered;
158 	int		ignorecase;
159 	int		mb;
160 	char*		same;
161 	int		samesize;
162 	Shbltin_t*	context;
163 	File_t		file[2];
164 } Join_t;
165 
166 static void
done(register Join_t * jp)167 done(register Join_t* jp)
168 {
169 	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
170 		sfclose(jp->file[0].iop);
171 	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
172 		sfclose(jp->file[1].iop);
173 	if (jp->outlist)
174 		free(jp->outlist);
175 	if (jp->file[0].fields)
176 		free(jp->file[0].fields);
177 	if (jp->file[1].fields)
178 		free(jp->file[1].fields);
179 	if (jp->same)
180 		free(jp->same);
181 	free(jp);
182 }
183 
184 static Join_t*
init(void)185 init(void)
186 {
187 	register Join_t*	jp;
188 	register int		i;
189 
190 	setlocale(LC_ALL, "");
191 	if (jp = newof(0, Join_t, 1, 0))
192 	{
193 		if (jp->mb = mbwide())
194 			for (i = 0x80; i <= 0xff; i++)
195 				jp->state[i] = S_WIDE;
196 		jp->state[' '] = jp->state['\t'] = S_SPACE;
197 		jp->state['\n'] = S_NL;
198 		jp->delim = -1;
199 		jp->nullfield = 0;
200 		if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
201 		    !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
202 		{
203 			done(jp);
204 			return 0;
205 		}
206 		jp->file[0].maxfields = NFIELD;
207 		jp->file[1].maxfields = NFIELD;
208 		jp->outmode = C_COMMON;
209 	}
210 	return jp;
211 }
212 
213 static int
getolist(Join_t * jp,const char * first,char ** arglist)214 getolist(Join_t* jp, const char* first, char** arglist)
215 {
216 	register const char*	cp = first;
217 	char**			argv = arglist;
218 	register int		c;
219 	int*			outptr;
220 	int*			outmax;
221 	int			nfield = NFIELD;
222 	char*			str;
223 
224 	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
225 	outmax = outptr + NFIELD;
226 	while (c = *cp++)
227 	{
228 		if (c==' ' || c=='\t' || c==',')
229 			continue;
230 		str = (char*)--cp;
231 		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
232 		{
233 			str++;
234 			c = JOINFIELD;
235 			goto skip;
236 		}
237 		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
238 		{
239 			error(2,"%s: invalid field list",first);
240 			break;
241 		}
242 		c--;
243 		c <<=2;
244 		if (*cp=='2')
245 			c |=1;
246 	skip:
247 		if (outptr >= outmax)
248 		{
249 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250 			outptr = jp->outlist + nfield;
251 			nfield *= 2;
252 			outmax = jp->outlist + nfield;
253 		}
254 		*outptr++ = c;
255 		cp = str;
256 	}
257 	/* need to accept obsolescent command syntax */
258 	while (cp = *argv)
259 	{
260 		if (cp[1]!='.' || (*cp!='1' && *cp!='2'))
261 		{
262 			if (*cp=='0' && cp[1]==0)
263 			{
264 				c = JOINFIELD;
265 				goto skip2;
266 			}
267 			break;
268 		}
269 		str = (char*)cp;
270 		c = strtol(cp+2, &str,10);
271 		if (*str || --c<0)
272 			break;
273 		argv++;
274 		c <<= 2;
275 		if (*cp=='2')
276 			c |=1;
277 	skip2:
278 		if (outptr >= outmax)
279 		{
280 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
281 			outptr = jp->outlist + nfield;
282 			nfield *= 2;
283 			outmax = jp->outlist + nfield;
284 		}
285 		*outptr++ = c;
286 	}
287 	*outptr = -1;
288 	return argv-arglist;
289 }
290 
291 /*
292  * read in a record from file <index> and split into fields
293  */
294 static unsigned char*
getrec(Join_t * jp,int index,int discard)295 getrec(Join_t* jp, int index, int discard)
296 {
297 	register unsigned char*	sp = jp->state;
298 	register File_t*	fp = &jp->file[index];
299 	register Field_t*	field = fp->fields;
300 	register Field_t*	fieldmax = field + fp->maxfields;
301 	register char*		cp;
302 	register int		n;
303 	char*			tp;
304 
305 	if (sh_checksig(jp->context))
306 		return 0;
307 	if (discard && fp->discard)
308 		sfraise(fp->iop, SFSK_DISCARD, NiL);
309 	fp->spaces = 0;
310 	fp->hit = 0;
311 	if (!(cp = sfgetr(fp->iop, '\n', 0)))
312 	{
313 		jp->outmode &= ~(1<<index);
314 		return 0;
315 	}
316 	fp->recptr = cp;
317 	fp->reclen = sfvalue(fp->iop);
318 	if (jp->delim == '\n')	/* handle new-line delimiter specially */
319 	{
320 		field->beg = cp;
321 		cp += fp->reclen;
322 		field->end = cp - 1;
323 		field++;
324 	}
325 	else
326 		do /* separate into fields */
327 		{
328 			if (field >= fieldmax)
329 			{
330 				n = 2 * fp->maxfields;
331 				fp->fields = newof(fp->fields, Field_t, n + 1, 0);
332 				field = fp->fields + fp->maxfields;
333 				fp->maxfields = n;
334 				fieldmax = fp->fields + n;
335 			}
336 			field->beg = cp;
337 			if (jp->delim == -1)
338 			{
339 				switch (sp[*(unsigned char*)cp])
340 				{
341 				case S_SPACE:
342 					cp++;
343 					break;
344 				case S_WIDE:
345 					tp = cp;
346 					if (iswspace(mbchar(tp)))
347 					{
348 						cp = tp;
349 						break;
350 					}
351 					/*FALLTHROUGH*/
352 				default:
353 					goto next;
354 				}
355 				fp->spaces = 1;
356 				if (jp->mb)
357 					for (;;)
358 					{
359 						switch (sp[*(unsigned char*)cp++])
360 						{
361 						case S_SPACE:
362 							continue;
363 						case S_WIDE:
364 							tp = cp - 1;
365 							if (iswspace(mbchar(tp)))
366 							{
367 								cp = tp;
368 								continue;
369 							}
370 							break;
371 						}
372 						break;
373 					}
374 				else
375 					while (sp[*(unsigned char*)cp++]==S_SPACE);
376 				cp--;
377 			}
378 		next:
379 			if (jp->mb)
380 			{
381 				for (;;)
382 				{
383 					tp = cp;
384 					switch (n = sp[*(unsigned char*)cp++])
385 					{
386 					case 0:
387 						continue;
388 					case S_WIDE:
389 						cp--;
390 						n = mbchar(cp);
391 						if (n == jp->delim)
392 						{
393 							n = S_DELIM;
394 							break;
395 						}
396 						if (jp->delim == -1 && iswspace(n))
397 						{
398 							n = S_SPACE;
399 							break;
400 						}
401 						continue;
402 					}
403 					break;
404 				}
405 				field->end = tp;
406 			}
407 			else
408 			{
409 				while (!(n = sp[*(unsigned char*)cp++]));
410 				field->end = cp - 1;
411 			}
412 			field++;
413 		} while (n != S_NL);
414 	fp->nfields = field - fp->fields;
415 	if ((n = fp->field) < fp->nfields)
416 	{
417 		cp = fp->fields[n].beg;
418 		/* eliminate leading spaces */
419 		if (fp->spaces)
420 		{
421 			if (jp->mb)
422 				for (;;)
423 				{
424 					switch (sp[*(unsigned char*)cp++])
425 					{
426 					case S_SPACE:
427 						continue;
428 					case S_WIDE:
429 						tp = cp - 1;
430 						if (iswspace(mbchar(tp)))
431 						{
432 							cp = tp;
433 							continue;
434 						}
435 						break;
436 					}
437 					break;
438 				}
439 			else
440 				while (sp[*(unsigned char*)cp++]==S_SPACE);
441 			cp--;
442 		}
443 		fp->fieldlen = fp->fields[n].end - cp;
444 		return (unsigned char*)cp;
445 	}
446 	fp->fieldlen = 0;
447 	return (unsigned char*)"";
448 }
449 
450 #if DEBUG_TRACE
451 static unsigned char* u1;
452 #define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
453 #endif
454 
455 /*
456  * print field <n> from file <index>
457  */
458 static int
outfield(Join_t * jp,int index,register int n,int last)459 outfield(Join_t* jp, int index, register int n, int last)
460 {
461 	register File_t*	fp = &jp->file[index];
462 	register char*		cp;
463 	register char*		cpmax;
464 	register int		size;
465 	register Sfio_t*	iop = jp->outfile;
466 	char*			tp;
467 
468 	if (n < fp->nfields)
469 	{
470 		cp = fp->fields[n].beg;
471 		cpmax = fp->fields[n].end + 1;
472 	}
473 	else
474 		cp = 0;
475 	if ((n = jp->delim) == -1)
476 	{
477 		if (cp && fp->spaces)
478 		{
479 			register unsigned char*	sp = jp->state;
480 
481 			/*eliminate leading spaces */
482 			if (jp->mb)
483 				for (;;)
484 				{
485 					switch (sp[*(unsigned char*)cp++])
486 					{
487 					case S_SPACE:
488 						continue;
489 					case S_WIDE:
490 						tp = cp - 1;
491 						if (iswspace(mbchar(tp)))
492 						{
493 							cp = tp;
494 							continue;
495 						}
496 						break;
497 					}
498 					break;
499 				}
500 			else
501 				while (sp[*(unsigned char*)cp++]==S_SPACE);
502 			cp--;
503 		}
504 		n = ' ';
505 	}
506 	else if (jp->delimstr)
507 		n = -1;
508 	if (last)
509 		n = '\n';
510 	if (cp)
511 		size = cpmax - cp;
512 	else
513 		size = 0;
514 	if (n == -1)
515 	{
516 		if (size<=1)
517 		{
518 			if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
519 				return -1;
520 		}
521 		else if (sfwrite(iop, cp, size) < 0)
522 			return -1;
523 		if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
524 			return -1;
525 	}
526 	else if (size <= 1)
527 	{
528 		if (!jp->nullfield)
529 			sfputc(iop, n);
530 		else if (sfputr(iop, jp->nullfield, n) < 0)
531 			return -1;
532 	}
533 	else
534 	{
535 		last = cp[size-1];
536 		cp[size-1] = n;
537 		if (sfwrite(iop, cp, size) < 0)
538 			return -1;
539 		cp[size-1] = last;
540 	}
541 	return 0;
542 }
543 
544 #if DEBUG_TRACE
545 static int i1,i2,i3;
546 #define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
547 #endif
548 
549 static int
outrec(register Join_t * jp,int mode)550 outrec(register Join_t* jp, int mode)
551 {
552 	register File_t*	fp;
553 	register int		i;
554 	register int		j;
555 	register int		k;
556 	register int		n;
557 	int*			out;
558 
559 	if (mode < 0 && jp->file[0].hit++)
560 		return 0;
561 	if (mode > 0 && jp->file[1].hit++)
562 		return 0;
563 	if (out = jp->outlist)
564 	{
565 		while ((n = *out++) >= 0)
566 		{
567 			if (n == JOINFIELD)
568 			{
569 				i = mode >= 0;
570 				j = jp->file[i].field;
571 			}
572 			else
573 			{
574 				i = n & 1;
575 				j = (mode<0 && i || mode>0 && !i) ?
576 					jp->file[i].nfields :
577 					n >> 2;
578 			}
579 			if (outfield(jp, i, j, *out < 0) < 0)
580 				return -1;
581 		}
582 		return 0;
583 	}
584 	k = jp->file[0].nfields;
585 	if (mode >= 0)
586 		k += jp->file[1].nfields - 1;
587 	for (i=0; i<2; i++)
588 	{
589 		fp = &jp->file[i];
590 		if (mode>0 && i==0)
591 		{
592 			k -= (fp->nfields - 1);
593 			continue;
594 		}
595 		n = fp->field;
596 		if (mode||i==0)
597 		{
598 			/* output join field first */
599 			if (outfield(jp,i,n,!--k) < 0)
600 				return -1;
601 			if (!k)
602 				return 0;
603 			for (j=0; j<n; j++)
604 			{
605 				if (outfield(jp,i,j,!--k) < 0)
606 					return -1;
607 				if (!k)
608 					return 0;
609 			}
610 			j = n + 1;
611 		}
612 		else
613 			j = 0;
614 		for (;j<fp->nfields; j++)
615 		{
616 			if (j!=n && outfield(jp,i,j,!--k) < 0)
617 				return -1;
618 			if (!k)
619 				return 0;
620 		}
621 	}
622 	return 0;
623 }
624 
625 #if DEBUG_TRACE
626 #define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
627 #endif
628 
629 static int
join(Join_t * jp)630 join(Join_t* jp)
631 {
632 	register unsigned char*	cp1;
633 	register unsigned char*	cp2;
634 	register int		n1;
635 	register int		n2;
636 	register int		n;
637 	register int		cmp;
638 	register int		same;
639 	int			o2;
640 	Sfoff_t			lo = -1;
641 	Sfoff_t			hi = -1;
642 
643 	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
644 	{
645 		n1 = jp->file[0].fieldlen;
646 		n2 = jp->file[1].fieldlen;
647 		same = 0;
648 		for (;;)
649 		{
650 			n = n1 < n2 ? n1 : n2;
651 #if DEBUG_TRACE
652 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
653 				cmp = n1 - n2;
654 sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
655 			if (!cmp)
656 #else
657 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
658 #endif
659 			{
660 				if (!(jp->outmode & C_COMMON))
661 				{
662 					if (cp1 = getrec(jp, 0, 1))
663 					{
664 						n1 = jp->file[0].fieldlen;
665 						same = 1;
666 						continue;
667 					}
668 					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
669 						break;
670 					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
671 					{
672 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
673 						return -1;
674 					}
675 				}
676 				else if (outrec(jp, 0) < 0)
677 					return -1;
678 				else if (lo < 0 && (jp->outmode & C_COMMON))
679 				{
680 					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
681 					{
682 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
683 						return -1;
684 					}
685 					lo -= jp->file[1].reclen;
686 				}
687 				if (cp2 = getrec(jp, 1, lo < 0))
688 				{
689 					n2 = jp->file[1].fieldlen;
690 					continue;
691 				}
692 #if DEBUG_TRACE
693 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
694 #endif
695 			}
696 			else if (cmp > 0)
697 			{
698 				if (same)
699 				{
700 					same = 0;
701 				next:
702 					if (n2 > jp->samesize)
703 					{
704 						jp->samesize = roundof(n2, 16);
705 						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
706 						{
707 							error(ERROR_SYSTEM|2, "out of space");
708 							return -1;
709 						}
710 					}
711 					memcpy(jp->same, cp2, o2 = n2);
712 					if (!(cp2 = getrec(jp, 1, 0)))
713 						break;
714 					n2 = jp->file[1].fieldlen;
715 					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
716 						goto next;
717 					continue;
718 				}
719 				if (hi >= 0)
720 				{
721 					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
722 					{
723 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
724 						return -1;
725 					}
726 					hi = -1;
727 				}
728 				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
729 					return -1;
730 				lo = -1;
731 				if (cp2 = getrec(jp, 1, 1))
732 				{
733 					n2 = jp->file[1].fieldlen;
734 					continue;
735 				}
736 #if DEBUG_TRACE
737 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
738 #endif
739 			}
740 			else if (same)
741 			{
742 				same = 0;
743 				if (!(cp1 = getrec(jp, 0, 0)))
744 					break;
745 				n1 = jp->file[0].fieldlen;
746 				continue;
747 			}
748 			if (lo >= 0)
749 			{
750 				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
751 				    (hi -= jp->file[1].reclen) < 0 ||
752 				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
753 				    !(cp2 = getrec(jp, 1, 0)))
754 				{
755 					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
756 					return -1;
757 				}
758 				n2 = jp->file[1].fieldlen;
759 				lo = -1;
760 				if (jp->file[1].discard)
761 					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
762 			}
763 			else if (!cp2)
764 				break;
765 			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
766 				return -1;
767 			if (!(cp1 = getrec(jp, 0, 1)))
768 				break;
769 			n1 = jp->file[0].fieldlen;
770 		}
771 	}
772 #if DEBUG_TRACE
773 sfprintf(sfstdout, "[X#%d:?,%p,%p,%d,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
774 #endif
775 	if (cp2)
776 	{
777 		if (hi >= 0 &&
778 		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
779 		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
780 		{
781 			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
782 			return -1;
783 		}
784 #if DEBUG_TRACE
785 sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
786 #endif
787 		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
788 		cmp = 1;
789 		n = 1;
790 	}
791 	else
792 	{
793 		cmp = -1;
794 		n = 0;
795 	}
796 #if DEBUG_TRACE
797 sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
798 #endif
799 	if (!cp1 || !(jp->outmode & (1<<n)))
800 	{
801 		if (cp1 && jp->file[n].iop == sfstdin)
802 			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
803 		return 0;
804 	}
805 	if (outrec(jp, cmp) < 0)
806 		return -1;
807 	do
808 	{
809 		if (!getrec(jp, n, 1))
810 			return 0;
811 	} while (outrec(jp, cmp) >= 0);
812 	return -1;
813 }
814 
815 int
b_join(int argc,char ** argv,Shbltin_t * context)816 b_join(int argc, char** argv, Shbltin_t* context)
817 {
818 	register int		n;
819 	register char*		cp;
820 	register Join_t*	jp;
821 	char*			e;
822 
823 #if !DEBUG_TRACE
824 	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
825 #endif
826 	if (!(jp = init()))
827 		error(ERROR_system(1),"out of space");
828 	jp->context = context;
829 	for (;;)
830 	{
831 		switch (n = optget(argv, usage))
832 		{
833  		case 'j':
834 			/*
835 			 * check for obsolete "-j1 field" and "-j2 field"
836 			 */
837 
838 			if (opt_info.offset == 0)
839 			{
840 				cp = argv[opt_info.index - 1];
841 				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
842 				n = cp[n] == 'j';
843 			}
844 			else
845 				n = 0;
846 			if (n)
847 			{
848 				if (opt_info.num!=1 && opt_info.num!=2)
849 					error(2,"-jfileno field: fileno must be 1 or 2");
850 				n = '0' + opt_info.num;
851 				if (!(cp = argv[opt_info.index]))
852 				{
853 					argc = 0;
854 					break;
855 				}
856 				opt_info.num = strtol(cp, &e, 10);
857 				if (*e)
858 				{
859 					argc = 0;
860 					break;
861 				}
862 				opt_info.index++;
863 			}
864 			else
865 			{
866 				jp->file[0].field = (int)(opt_info.num-1);
867 				n = '2';
868 			}
869 			/*FALLTHROUGH*/
870  		case '1':
871 		case '2':
872 			if (opt_info.num <=0)
873 				error(2,"field number must positive");
874 			jp->file[n-'1'].field = (int)(opt_info.num-1);
875 			continue;
876 		case 'v':
877 			jp->outmode &= ~C_COMMON;
878 			/*FALLTHROUGH*/
879 		case 'a':
880 			if (opt_info.num!=1 && opt_info.num!=2)
881 				error(2,"%s: file number must be 1 or 2", opt_info.name);
882 			jp->outmode |= 1<<(opt_info.num-1);
883 			continue;
884 		case 'e':
885 			jp->nullfield = opt_info.arg;
886 			continue;
887 		case 'o':
888 			/* need to accept obsolescent command syntax */
889 			n = getolist(jp, opt_info.arg, argv+opt_info.index);
890 			opt_info.index += n;
891 			continue;
892 		case 't':
893 			jp->state[' '] = jp->state['\t'] = 0;
894 			if (jp->mb)
895 			{
896 				cp = opt_info.arg;
897 				jp->delim = mbchar(cp);
898 				if ((n = cp - opt_info.arg) > 1)
899 				{
900 					jp->delimlen = n;
901 					jp->delimstr = opt_info.arg;
902 					continue;
903 				}
904 			}
905 			n = *(unsigned char*)opt_info.arg;
906 			jp->state[n] = S_DELIM;
907 			jp->delim = n;
908 			continue;
909 		case 'i':
910 			jp->ignorecase = !opt_info.num;
911 			continue;
912 		case 'B':
913 			jp->buffered = !opt_info.num;
914 			continue;
915 		case ':':
916 			error(2, "%s", opt_info.arg);
917 			break;
918 		case '?':
919 			done(jp);
920 			error(ERROR_usage(2), "%s", opt_info.arg);
921 			break;
922 		}
923 		break;
924 	}
925 	argv += opt_info.index;
926 	argc -= opt_info.index;
927 	if (error_info.errors || argc!=2)
928 	{
929 		done(jp);
930 		error(ERROR_usage(2),"%s", optusage(NiL));
931 	}
932 	jp->ooutmode = jp->outmode;
933 	jp->file[0].name = cp = *argv++;
934 	if (streq(cp,"-"))
935 	{
936 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
937 		{
938 			if (sfdcseekable(sfstdin))
939 				error(ERROR_warn(0),"%s: seek may fail",cp);
940 			else
941 				jp->file[0].discard = 1;
942 		}
943 		jp->file[0].iop = sfstdin;
944 	}
945 	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
946 	{
947 		done(jp);
948 		error(ERROR_system(1),"%s: cannot open",cp);
949 	}
950 	jp->file[1].name = cp = *argv;
951 	if (streq(cp,"-"))
952 	{
953 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
954 		{
955 			if (sfdcseekable(sfstdin))
956 				error(ERROR_warn(0),"%s: seek may fail",cp);
957 			else
958 				jp->file[1].discard = 1;
959 		}
960 		jp->file[1].iop = sfstdin;
961 	}
962 	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
963 	{
964 		done(jp);
965 		error(ERROR_system(1),"%s: cannot open",cp);
966 	}
967 	if (jp->buffered)
968 	{
969 		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
970 		sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
971 	}
972 	jp->outfile = sfstdout;
973 	if (!jp->outlist)
974 		jp->nullfield = 0;
975 	if (join(jp) < 0)
976 	{
977 		done(jp);
978 		error(ERROR_system(1),"write error");
979 	}
980 	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
981 		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
982 	done(jp);
983 	return error_info.errors;
984 }
985