1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1992-2010 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                                                                      *
20 ***********************************************************************/
21 #pragma prototyped
22 /*
23  * David Korn
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * join
28  */
29 
30 static const char usage[] =
31 "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
32 USAGE_LICENSE
33 "[+NAME?join - relational database operator]"
34 "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35 	"and \afile2\a and writes the resulting joined files to standard "
36 	"output.  By default, a field is delimited by one or more spaces "
37 	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38 	"can be used to change the field delimiter.]"
39 "[+?The \ajoin field\a is a field in each file on which files are compared. "
40 	"By default \bjoin\b writes one line in the output for each pair "
41 	"of lines in \afiles1\a and \afiles2\a that have identical join "
42 	"fields.  The default output line consists of the join field, "
43 	"then the remaining fields from \afile1\a, then the remaining "
44 	"fields from \afile2\a, but this can be changed with the \b-o\b "
45 	"option.  The \b-a\b option can be used to add unmatched lines "
46 	"to the output.  The \b-v\b option can be used to output only "
47 	"unmatched lines.]"
48 "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49 	"sequence of \bsort -b\b on the fields on which they are to be "
50 	"joined otherwise the results are unspecified.]"
51 "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52         "uses standard input starting at the current location.]"
53 
54 "[e:empty]:[string?Replace empty output fields in the list selected with"
55 "	\b-o\b with \astring\a.]"
56 "[o:output]:[list?Construct the output line to comprise the fields specified "
57 	"in a blank or comma separated list \alist\a.  Each element in "
58 	"\alist\a consists of a file number (either 1 or 2), a period, "
59 	"and a field number or \b0\b representing the join field.  "
60 	"As an obsolete feature multiple occurrences of \b-o\b can "
61 	"be specified.]"
62 "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63 "	and output.]"
64 "[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65 "[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66 "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67 "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68 "	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69 "	normal output.  If \b-a\b options appear for both 1 and 2, then "
70 	"all unpairable lines will be output.]"
71 "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72 "	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73 	"output.  If \b-v\b options appear for both 1 and 2, then "
74 	"all unpairable lines will be output.] ]"
75 "[i:ignorecase?Ignore case in field comparisons.]"
76 "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77 
78 "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79 "	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80 "	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81 "	equivalent to \b-2\b \afield\a.]"
82 
83 "\n"
84 "\nfile1 file2\n"
85 "\n"
86 "[+EXIT STATUS?]{"
87 	"[+0?Both files processed successfully.]"
88 	"[+>0?An error occurred.]"
89 "}"
90 "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91 ;
92 
93 #include <cmd.h>
94 #include <sfdisc.h>
95 
96 #if _hdr_wchar && _hdr_wctype && _lib_iswctype
97 
98 #include <wchar.h>
99 #include <wctype.h>
100 
101 #else
102 
103 #include <ctype.h>
104 
105 #ifndef iswspace
106 #define iswspace(x)	isspace(x)
107 #endif
108 
109 #endif
110 
111 #define C_FILE1		001
112 #define C_FILE2		002
113 #define C_COMMON	004
114 #define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
115 
116 #define NFIELD		10
117 #define JOINFIELD	2
118 
119 #define S_DELIM		1
120 #define S_SPACE		2
121 #define S_NL		3
122 #define S_WIDE		4
123 
124 typedef struct Field_s
125 {
126 	char*		beg;
127 	char*		end;
128 } Field_t;
129 
130 typedef struct File_s
131 {
132 	Sfio_t*		iop;
133 	char*		name;
134 	char*		recptr;
135 	int		reclen;
136 	int		field;
137 	int		fieldlen;
138 	int		nfields;
139 	int		maxfields;
140 	int		spaces;
141 	int		hit;
142 	int		discard;
143 	Field_t*	fields;
144 } File_t;
145 
146 typedef struct Join_s
147 {
148 	unsigned char	state[1<<CHAR_BIT];
149 	Sfio_t*		outfile;
150 	int*		outlist;
151 	int		outmode;
152 	int		ooutmode;
153 	char*		nullfield;
154 	char*		delimstr;
155 	int		delim;
156 	int		delimlen;
157 	int		buffered;
158 	int		ignorecase;
159 	int		mb;
160 	char*		same;
161 	int		samesize;
162 	void*		context;
163 	File_t		file[2];
164 } Join_t;
165 
166 static void
167 done(register Join_t* jp)
168 {
169 	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
170 		sfclose(jp->file[0].iop);
171 	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
172 		sfclose(jp->file[1].iop);
173 	if (jp->outlist)
174 		free(jp->outlist);
175 	if (jp->file[0].fields)
176 		free(jp->file[0].fields);
177 	if (jp->file[1].fields)
178 		free(jp->file[1].fields);
179 	if (jp->same)
180 		free(jp->same);
181 	free(jp);
182 }
183 
184 static Join_t*
185 init(void)
186 {
187 	register Join_t*	jp;
188 	register int		i;
189 
190 	setlocale(LC_ALL, "");
191 	if (jp = newof(0, Join_t, 1, 0))
192 	{
193 		if (jp->mb = mbwide())
194 			for (i = 0x80; i <= 0xff; i++)
195 				jp->state[i] = S_WIDE;
196 		jp->state[' '] = jp->state['\t'] = S_SPACE;
197 		jp->state['\n'] = S_NL;
198 		jp->delim = -1;
199 		jp->nullfield = 0;
200 		if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
201 		    !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
202 		{
203 			done(jp);
204 			return 0;
205 		}
206 		jp->file[0].maxfields = NFIELD;
207 		jp->file[1].maxfields = NFIELD;
208 		jp->outmode = C_COMMON;
209 	}
210 	return jp;
211 }
212 
213 static int
214 getolist(Join_t* jp, const char* first, char** arglist)
215 {
216 	register const char*	cp = first;
217 	char**			argv = arglist;
218 	register int		c;
219 	int*			outptr;
220 	int*			outmax;
221 	int			nfield = NFIELD;
222 	char*			str;
223 
224 	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
225 	outmax = outptr + NFIELD;
226 	while (c = *cp++)
227 	{
228 		if (c==' ' || c=='\t' || c==',')
229 			continue;
230 		str = (char*)--cp;
231 		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
232 		{
233 			str++;
234 			c = JOINFIELD;
235 			goto skip;
236 		}
237 		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
238 		{
239 			error(2,"%s: invalid field list",first);
240 			break;
241 		}
242 		c--;
243 		c <<=2;
244 		if (*cp=='2')
245 			c |=1;
246 	skip:
247 		if (outptr >= outmax)
248 		{
249 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250 			outptr = jp->outlist + nfield;
251 			nfield *= 2;
252 			outmax = jp->outlist + nfield;
253 		}
254 		*outptr++ = c;
255 		cp = str;
256 	}
257 	/* need to accept obsolescent command syntax */
258 	while (1)
259 	{
260 		if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2'))
261 		{
262 			if (*cp=='0' && cp[1]==0)
263 			{
264 				c = JOINFIELD;
265 				goto skip2;
266 			}
267 			break;
268 		}
269 		str = (char*)cp;
270 		c = strtol(cp+2, &str,10);
271 		if (*str || --c<0)
272 			break;
273 		argv++;
274 		c <<= 2;
275 		if (*cp=='2')
276 			c |=1;
277 	skip2:
278 		if (outptr >= outmax)
279 		{
280 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
281 			outptr = jp->outlist + nfield;
282 			nfield *= 2;
283 			outmax = jp->outlist + nfield;
284 		}
285 		*outptr++ = c;
286 	}
287 	*outptr = -1;
288 	return argv-arglist;
289 }
290 
291 /*
292  * read in a record from file <index> and split into fields
293  */
294 static unsigned char*
295 getrec(Join_t* jp, int index, int discard)
296 {
297 	register unsigned char*	sp = jp->state;
298 	register File_t*	fp = &jp->file[index];
299 	register Field_t*	field = fp->fields;
300 	register Field_t*	fieldmax = field + fp->maxfields;
301 	register char*		cp;
302 	register int		n;
303 	char*			tp;
304 
305 	if (sh_checksig(jp->context))
306 		return 0;
307 	if (discard && fp->discard)
308 		sfraise(fp->iop, SFSK_DISCARD, NiL);
309 	fp->spaces = 0;
310 	fp->hit = 0;
311 	if (!(cp = sfgetr(fp->iop, '\n', 0)))
312 	{
313 		jp->outmode &= ~(1<<index);
314 		return 0;
315 	}
316 	fp->recptr = cp;
317 	fp->reclen = sfvalue(fp->iop);
318 	if (jp->delim == '\n')	/* handle new-line delimiter specially */
319 	{
320 		field->beg = cp;
321 		cp += fp->reclen;
322 		field->end = cp - 1;
323 		field++;
324 	}
325 	else
326 		do /* separate into fields */
327 		{
328 			if (field >= fieldmax)
329 			{
330 				n = 2 * fp->maxfields;
331 				fp->fields = newof(fp->fields, Field_t, n + 1, 0);
332 				field = fp->fields + fp->maxfields;
333 				fp->maxfields = n;
334 				fieldmax = fp->fields + n;
335 			}
336 			field->beg = cp;
337 			if (jp->delim == -1)
338 			{
339 				switch (sp[*(unsigned char*)cp])
340 				{
341 				case S_SPACE:
342 					cp++;
343 					break;
344 				case S_WIDE:
345 					tp = cp;
346 					if (iswspace(mbchar(tp)))
347 					{
348 						cp = tp;
349 						break;
350 					}
351 					/*FALLTHROUGH*/
352 				default:
353 					goto next;
354 				}
355 				fp->spaces = 1;
356 				if (jp->mb)
357 					for (;;)
358 					{
359 						switch (sp[*(unsigned char*)cp++])
360 						{
361 						case S_SPACE:
362 							continue;
363 						case S_WIDE:
364 							tp = cp - 1;
365 							if (iswspace(mbchar(tp)))
366 							{
367 								cp = tp;
368 								continue;
369 							}
370 							break;
371 						}
372 						break;
373 					}
374 				else
375 					while (sp[*(unsigned char*)cp++]==S_SPACE);
376 				cp--;
377 			}
378 		next:
379 			if (jp->mb)
380 			{
381 				for (;;)
382 				{
383 					tp = cp;
384 					switch (n = sp[*(unsigned char*)cp++])
385 					{
386 					case 0:
387 						continue;
388 					case S_WIDE:
389 						cp--;
390 						n = mbchar(cp);
391 						if (n == jp->delim)
392 						{
393 							n = S_DELIM;
394 							break;
395 						}
396 						if (jp->delim == -1 && iswspace(n))
397 						{
398 							n = S_SPACE;
399 							break;
400 						}
401 						continue;
402 					}
403 					break;
404 				}
405 				field->end = tp;
406 			}
407 			else
408 			{
409 				while (!(n = sp[*(unsigned char*)cp++]));
410 				field->end = cp - 1;
411 			}
412 			field++;
413 		} while (n != S_NL);
414 	fp->nfields = field - fp->fields;
415 	if ((n = fp->field) < fp->nfields)
416 	{
417 		cp = fp->fields[n].beg;
418 		/* eliminate leading spaces */
419 		if (fp->spaces)
420 		{
421 			if (jp->mb)
422 				for (;;)
423 				{
424 					switch (sp[*(unsigned char*)cp++])
425 					{
426 					case S_SPACE:
427 						continue;
428 					case S_WIDE:
429 						tp = cp - 1;
430 						if (iswspace(mbchar(tp)))
431 						{
432 							cp = tp;
433 							continue;
434 						}
435 						break;
436 					}
437 					break;
438 				}
439 			else
440 				while (sp[*(unsigned char*)cp++]==S_SPACE);
441 			cp--;
442 		}
443 		fp->fieldlen = fp->fields[n].end - cp;
444 		return (unsigned char*)cp;
445 	}
446 	fp->fieldlen = 0;
447 	return (unsigned char*)"";
448 }
449 
450 static unsigned char*
451 _trace_getrec(Join_t* jp, int index, int discard)
452 {
453 	unsigned char*	r;
454 
455 	r = getrec(jp, index, discard);
456 	return r;
457 }
458 #define getrec	_trace_getrec
459 
460 #if DEBUG_TRACE
461 static unsigned char* u1,u2,u3;
462 #define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
463 #endif
464 
465 /*
466  * print field <n> from file <index>
467  */
468 static int
469 outfield(Join_t* jp, int index, register int n, int last)
470 {
471 	register File_t*	fp = &jp->file[index];
472 	register char*		cp;
473 	register char*		cpmax;
474 	register int		size;
475 	register Sfio_t*	iop = jp->outfile;
476 	char*			tp;
477 
478 	if (n < fp->nfields)
479 	{
480 		cp = fp->fields[n].beg;
481 		cpmax = fp->fields[n].end + 1;
482 	}
483 	else
484 		cp = 0;
485 	if ((n = jp->delim) == -1)
486 	{
487 		if (cp && fp->spaces)
488 		{
489 			register unsigned char*	sp = jp->state;
490 
491 			/*eliminate leading spaces */
492 			if (jp->mb)
493 				for (;;)
494 				{
495 					switch (sp[*(unsigned char*)cp++])
496 					{
497 					case S_SPACE:
498 						continue;
499 					case S_WIDE:
500 						tp = cp - 1;
501 						if (iswspace(mbchar(tp)))
502 						{
503 							cp = tp;
504 							continue;
505 						}
506 						break;
507 					}
508 					break;
509 				}
510 			else
511 				while (sp[*(unsigned char*)cp++]==S_SPACE);
512 			cp--;
513 		}
514 		n = ' ';
515 	}
516 	else if (jp->delimstr)
517 		n = -1;
518 	if (last)
519 		n = '\n';
520 	if (cp)
521 		size = cpmax - cp;
522 	else
523 		size = 0;
524 	if (n == -1)
525 	{
526 		if (size<=1)
527 		{
528 			if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
529 				return -1;
530 		}
531 		else if (sfwrite(iop, cp, size) < 0)
532 			return -1;
533 		if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
534 			return -1;
535 	}
536 	else if (size <= 1)
537 	{
538 		if (!jp->nullfield)
539 			sfputc(iop, n);
540 		else if (sfputr(iop, jp->nullfield, n) < 0)
541 			return -1;
542 	}
543 	else
544 	{
545 		last = cp[size-1];
546 		cp[size-1] = n;
547 		if (sfwrite(iop, cp, size) < 0)
548 			return -1;
549 		cp[size-1] = last;
550 	}
551 	return 0;
552 }
553 
554 #if DEBUG_TRACE
555 static int i1,i2,i3;
556 #define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
557 #endif
558 
559 static int
560 outrec(register Join_t* jp, int mode)
561 {
562 	register File_t*	fp;
563 	register int		i;
564 	register int		j;
565 	register int		k;
566 	register int		n;
567 	int*			out;
568 
569 	if (mode < 0 && jp->file[0].hit++)
570 		return 0;
571 	if (mode > 0 && jp->file[1].hit++)
572 		return 0;
573 	if (out = jp->outlist)
574 	{
575 		while ((n = *out++) >= 0)
576 		{
577 			if (n == JOINFIELD)
578 			{
579 				i = mode >= 0;
580 				j = jp->file[i].field;
581 			}
582 			else
583 			{
584 				i = n & 1;
585 				j = (mode<0 && i || mode>0 && !i) ?
586 					jp->file[i].nfields :
587 					n >> 2;
588 			}
589 			if (outfield(jp, i, j, *out < 0) < 0)
590 				return -1;
591 		}
592 		return 0;
593 	}
594 	k = jp->file[0].nfields;
595 	if (mode >= 0)
596 		k += jp->file[1].nfields - 1;
597 	for (i=0; i<2; i++)
598 	{
599 		fp = &jp->file[i];
600 		if (mode>0 && i==0)
601 		{
602 			k -= (fp->nfields - 1);
603 			continue;
604 		}
605 		n = fp->field;
606 		if (mode||i==0)
607 		{
608 			/* output join field first */
609 			if (outfield(jp,i,n,!--k) < 0)
610 				return -1;
611 			if (!k)
612 				return 0;
613 			for (j=0; j<n; j++)
614 			{
615 				if (outfield(jp,i,j,!--k) < 0)
616 					return -1;
617 				if (!k)
618 					return 0;
619 			}
620 			j = n + 1;
621 		}
622 		else
623 			j = 0;
624 		for (;j<fp->nfields; j++)
625 		{
626 			if (j!=n && outfield(jp,i,j,!--k) < 0)
627 				return -1;
628 			if (!k)
629 				return 0;
630 		}
631 	}
632 	return 0;
633 }
634 
635 #if DEBUG_TRACE
636 #define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
637 #endif
638 
639 static int
640 join(Join_t* jp)
641 {
642 	register unsigned char*	cp1;
643 	register unsigned char*	cp2;
644 	register int		n1;
645 	register int		n2;
646 	register int		n;
647 	register int		cmp;
648 	register int		same;
649 	int			o2;
650 	Sfoff_t			lo = -1;
651 	Sfoff_t			hi = -1;
652 
653 	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
654 	{
655 		n1 = jp->file[0].fieldlen;
656 		n2 = jp->file[1].fieldlen;
657 		same = 0;
658 		for (;;)
659 		{
660 			n = n1 < n2 ? n1 : n2;
661 #if DEBUG_TRACE
662 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
663 				cmp = n1 - n2;
664 sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
665 			if (!cmp)
666 #else
667 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
668 #endif
669 			{
670 				if (!(jp->outmode & C_COMMON))
671 				{
672 					if (cp1 = getrec(jp, 0, 1))
673 					{
674 						n1 = jp->file[0].fieldlen;
675 						same = 1;
676 						continue;
677 					}
678 					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
679 						break;
680 					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
681 					{
682 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
683 						return -1;
684 					}
685 				}
686 				else if (outrec(jp, 0) < 0)
687 					return -1;
688 				else if (lo < 0 && (jp->outmode & C_COMMON))
689 				{
690 					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
691 					{
692 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
693 						return -1;
694 					}
695 					lo -= jp->file[1].reclen;
696 				}
697 				if (cp2 = getrec(jp, 1, lo < 0))
698 				{
699 					n2 = jp->file[1].fieldlen;
700 					continue;
701 				}
702 #if DEBUG_TRACE
703 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
704 #endif
705 			}
706 			else if (cmp > 0)
707 			{
708 				if (same)
709 				{
710 					same = 0;
711 				next:
712 					if (n2 > jp->samesize)
713 					{
714 						jp->samesize = roundof(n2, 16);
715 						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
716 						{
717 							error(ERROR_SYSTEM|2, "out of space");
718 							return -1;
719 						}
720 					}
721 					memcpy(jp->same, cp2, o2 = n2);
722 					if (!(cp2 = getrec(jp, 1, 0)))
723 						break;
724 					n2 = jp->file[1].fieldlen;
725 					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
726 						goto next;
727 					continue;
728 				}
729 				if (hi >= 0)
730 				{
731 					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
732 					{
733 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
734 						return -1;
735 					}
736 					hi = -1;
737 				}
738 				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
739 					return -1;
740 				lo = -1;
741 				if (cp2 = getrec(jp, 1, 1))
742 				{
743 					n2 = jp->file[1].fieldlen;
744 					continue;
745 				}
746 #if DEBUG_TRACE
747 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
748 #endif
749 			}
750 			else if (same)
751 			{
752 				same = 0;
753 				if (!(cp1 = getrec(jp, 0, 0)))
754 					break;
755 				n1 = jp->file[0].fieldlen;
756 				continue;
757 			}
758 			if (lo >= 0)
759 			{
760 				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
761 				    (hi -= jp->file[1].reclen) < 0 ||
762 				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
763 				    !(cp2 = getrec(jp, 1, 0)))
764 				{
765 					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
766 					return -1;
767 				}
768 				n2 = jp->file[1].fieldlen;
769 				lo = -1;
770 				if (jp->file[1].discard)
771 					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
772 			}
773 			else if (!cp2)
774 				break;
775 			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
776 				return -1;
777 			if (!(cp1 = getrec(jp, 0, 1)))
778 				break;
779 			n1 = jp->file[0].fieldlen;
780 		}
781 	}
782 #if DEBUG_TRACE
783 sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
784 #endif
785 	if (cp2)
786 	{
787 		if (hi >= 0 &&
788 		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
789 		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
790 		{
791 			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
792 			return -1;
793 		}
794 #if DEBUG_TRACE
795 sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
796 #endif
797 		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
798 		cmp = 1;
799 		n = 1;
800 	}
801 	else
802 	{
803 		cmp = -1;
804 		n = 0;
805 	}
806 #if DEBUG_TRACE
807 sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
808 #endif
809 	if (!cp1 || !(jp->outmode & (1<<n)))
810 	{
811 		if (cp1 && jp->file[n].iop == sfstdin)
812 			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
813 		return 0;
814 	}
815 	if (outrec(jp, cmp) < 0)
816 		return -1;
817 	do
818 	{
819 		if (!getrec(jp, n, 1))
820 			return 0;
821 	} while (outrec(jp, cmp) >= 0);
822 	return -1;
823 }
824 
825 int
826 b_join(int argc, char** argv, void* context)
827 {
828 	register int		n;
829 	register char*		cp;
830 	register Join_t*	jp;
831 	char*			e;
832 
833 #if !DEBUG_TRACE
834 	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
835 #endif
836 	if (!(jp = init()))
837 		error(ERROR_system(1),"out of space");
838 	jp->context = context;
839 	for (;;)
840 	{
841 		switch (n = optget(argv, usage))
842 		{
843 		case 0:
844 			break;
845  		case 'j':
846 			/*
847 			 * check for obsolete "-j1 field" and "-j2 field"
848 			 */
849 
850 			if (opt_info.offset == 0)
851 			{
852 				cp = argv[opt_info.index - 1];
853 				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
854 				n = cp[n] == 'j';
855 			}
856 			else
857 				n = 0;
858 			if (n)
859 			{
860 				if (opt_info.num!=1 && opt_info.num!=2)
861 					error(2,"-jfileno field: fileno must be 1 or 2");
862 				n = '0' + opt_info.num;
863 				if (!(cp = argv[opt_info.index]))
864 				{
865 					argc = 0;
866 					break;
867 				}
868 				opt_info.num = strtol(cp, &e, 10);
869 				if (*e)
870 				{
871 					argc = 0;
872 					break;
873 				}
874 				opt_info.index++;
875 			}
876 			else
877 			{
878 				jp->file[0].field = (int)(opt_info.num-1);
879 				n = '2';
880 			}
881 			/*FALLTHROUGH*/
882  		case '1':
883 		case '2':
884 			if (opt_info.num <=0)
885 				error(2,"field number must positive");
886 			jp->file[n-'1'].field = (int)(opt_info.num-1);
887 			continue;
888 		case 'v':
889 			jp->outmode &= ~C_COMMON;
890 			/*FALLTHROUGH*/
891 		case 'a':
892 			if (opt_info.num!=1 && opt_info.num!=2)
893 				error(2,"%s: file number must be 1 or 2", opt_info.name);
894 			jp->outmode |= 1<<(opt_info.num-1);
895 			continue;
896 		case 'e':
897 			jp->nullfield = opt_info.arg;
898 			continue;
899 		case 'o':
900 			/* need to accept obsolescent command syntax */
901 			n = getolist(jp, opt_info.arg, argv+opt_info.index);
902 			opt_info.index += n;
903 			continue;
904 		case 't':
905 			jp->state[' '] = jp->state['\t'] = 0;
906 			if (jp->mb)
907 			{
908 				cp = opt_info.arg;
909 				jp->delim = mbchar(cp);
910 				if ((n = cp - opt_info.arg) > 1)
911 				{
912 					jp->delimlen = n;
913 					jp->delimstr = opt_info.arg;
914 					continue;
915 				}
916 			}
917 			n = *(unsigned char*)opt_info.arg;
918 			jp->state[n] = S_DELIM;
919 			jp->delim = n;
920 			continue;
921 		case 'i':
922 			jp->ignorecase = !opt_info.num;
923 			continue;
924 		case 'B':
925 			jp->buffered = !opt_info.num;
926 			continue;
927 		case ':':
928 			error(2, "%s", opt_info.arg);
929 			break;
930 		case '?':
931 			done(jp);
932 			error(ERROR_usage(2), "%s", opt_info.arg);
933 			break;
934 		}
935 		break;
936 	}
937 	argv += opt_info.index;
938 	argc -= opt_info.index;
939 	if (error_info.errors || argc!=2)
940 	{
941 		done(jp);
942 		error(ERROR_usage(2),"%s", optusage(NiL));
943 	}
944 	jp->ooutmode = jp->outmode;
945 	jp->file[0].name = cp = *argv++;
946 	if (streq(cp,"-"))
947 	{
948 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
949 		{
950 			if (sfdcseekable(sfstdin))
951 				error(ERROR_warn(0),"%s: seek may fail",cp);
952 			else
953 				jp->file[0].discard = 1;
954 		}
955 		jp->file[0].iop = sfstdin;
956 	}
957 	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
958 	{
959 		done(jp);
960 		error(ERROR_system(1),"%s: cannot open",cp);
961 	}
962 	jp->file[1].name = cp = *argv;
963 	if (streq(cp,"-"))
964 	{
965 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
966 		{
967 			if (sfdcseekable(sfstdin))
968 				error(ERROR_warn(0),"%s: seek may fail",cp);
969 			else
970 				jp->file[1].discard = 1;
971 		}
972 		jp->file[1].iop = sfstdin;
973 	}
974 	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
975 	{
976 		done(jp);
977 		error(ERROR_system(1),"%s: cannot open",cp);
978 	}
979 	if (jp->buffered)
980 	{
981 		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
982 		sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
983 	}
984 	jp->outfile = sfstdout;
985 	if (!jp->outlist)
986 		jp->nullfield = 0;
987 	if (join(jp) < 0)
988 	{
989 		done(jp);
990 		error(ERROR_system(1),"write error");
991 	}
992 	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
993 		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
994 	done(jp);
995 	return error_info.errors;
996 }
997