1 #pragma prototyped
2 /*
3  * original code
4  *
5  *  	James A. Woods, Informatics General Corporation,
6  *	NASA Ames Research Center, 6/81.
7  *	Usenix ;login:, February/March, 1983, p. 8.
8  *
9  * discipline/method interface
10  *
11  *	Glenn Fowler
12  *	AT&T Research
13  *	modified from the original BSD source
14  *
15  * 'fastfind' scans a file list for the full pathname of a file
16  * given only a piece of the name.  The list is processed with
17  * with "front-compression" and bigram coding.  Front compression reduces
18  * space by a factor of 4-5, bigram coding by a further 20-25%.
19  *
20  * there are 4 methods:
21  *
22  *	FF_old	original with 7 bit bigram encoding (no magic)
23  *	FF_gnu	8 bit clean front compression (FF_gnu_magic)
24  *	FF_dir	FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic)
25  *	FF_typ	FF_dir with (mime) types (FF_typ_magic)
26  *
27  * the bigram encoding steals the eighth bit (that's why its FF_old)
28  * maybe one day we'll limit it to readonly:
29  *
30  * 0-2*FF_OFF	 likeliest differential counts + offset to make nonnegative
31  * FF_ESC	 4 byte big-endian out-of-range count+FF_OFF follows
32  * FF_MIN-FF_MAX ascii residue
33  * >=FF_MAX	 bigram codes
34  *
35  * a two-tiered string search technique is employed
36  *
37  * a metacharacter-free subpattern and partial pathname is matched
38  * backwards to avoid full expansion of the pathname list
39  *
40  * then the actual shell glob-style regular expression (if in this form)
41  * is matched against the candidate pathnames using the slower regexec()
42  *
43  * The original BSD code is covered by the BSD license:
44  *
45  * Copyright (c) 1985, 1993, 1999
46  *	The Regents of the University of California.  All rights reserved.
47  *
48  * Redistribution and use in source and binary forms, with or without
49  * modification, are permitted provided that the following conditions
50  * are met:
51  * 1. Redistributions of source code must retain the above copyright
52  *    notice, this list of conditions and the following disclaimer.
53  * 2. Redistributions in binary form must reproduce the above copyright
54  *    notice, this list of conditions and the following disclaimer in the
55  *    documentation and/or other materials provided with the distribution.
56  * 3. Neither the name of the University nor the names of its contributors
57  *    may be used to endorse or promote products derived from this software
58  *    without specific prior written permission.
59  *
60  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
61  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
62  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
63  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
64  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
65  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
66  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70  * SUCH DAMAGE.
71  */
72 
73 static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n";
74 
75 static const char lib[] = "libast:fastfind";
76 
77 #include "findlib.h"
78 
79 #define FIND_MATCH	"*/(find|locate)/*"
80 
81 /*
82  * this db could be anywhere
83  * findcodes[] directories are checked for findnames[i]
84  */
85 
86 static char*		findcodes[] =
87 {
88 	0,
89 	0,
90 	FIND_CODES,
91 	"/usr/local/share/lib",
92 	"/usr/local/lib",
93 	"/usr/share/lib",
94 	"/usr/lib",
95 	"/var/spool",
96 	"/usr/local/var",
97 	"/var/lib",
98 	"/var/lib/slocate",
99 	"/var/db",
100 };
101 
102 static char*		findnames[] =
103 {
104 	"find/codes",
105 	"find/find.codes",
106 	"locate/locatedb",
107 	"locatedb",
108 	"locate.database",
109 	"slocate.db",
110 };
111 
112 /*
113  * convert t to lower case and drop leading x- and x- after /
114  * converted value copied to b of size n
115  */
116 
117 char*
typefix(char * buf,size_t n,register const char * t)118 typefix(char* buf, size_t n, register const char* t)
119 {
120 	register int	c;
121 	register char*	b = buf;
122 
123 	if ((*t == 'x' || *t == 'X') && *(t + 1) == '-')
124 		t += 2;
125 	while (c = *t++)
126 	{
127 		if (isupper(c))
128 			c = tolower(c);
129 		if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-')
130 			t += 2;
131 	}
132 	*b = 0;
133 	return buf;
134 }
135 
136 /*
137  * return a fastfind stream handle for pattern
138  */
139 
140 Find_t*
findopen(const char * file,const char * pattern,const char * type,Finddisc_t * disc)141 findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc)
142 {
143 	register Find_t*	fp;
144 	register char*		p;
145 	register char*		s;
146 	register char*		b;
147 	register int		i;
148 	register int		j;
149 	char*			path;
150 	int			brace = 0;
151 	int			paren = 0;
152 	int			k;
153 	int			q;
154 	int			fd;
155 	int			uid;
156 	Vmalloc_t*		vm;
157 	Type_t*			tp;
158 	struct stat		st;
159 
160 
161 	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
162 		goto nospace;
163 
164 	/*
165 	 * NOTE: searching for FIND_CODES would be much simpler if we
166 	 *       just stuck with our own, but we also support GNU
167 	 *	 locate codes and have to search for the one of a
168 	 *	 bazillion possible names for that file
169 	 */
170 
171 	if (!findcodes[1])
172 		findcodes[1] = getenv(FIND_CODES_ENV);
173 	if (disc->flags & FIND_GENERATE)
174 	{
175 		if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t))))
176 			goto nospace;
177 		fp->vm = vm;
178 		fp->id = lib;
179 		fp->disc = disc;
180 		fp->generate = 1;
181 		if (file && (!*file || streq(file, "-")))
182 			file = 0;
183 		uid = geteuid();
184 		j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
185 
186 		/*
187 		 * look for the codes file, but since it may not exist yet,
188 		 * also look for the containing directory if i<2 or if
189 		 * it is sufficiently qualified (FIND_MATCH)
190 		 */
191 
192 		for (i = 0; i < j; i++)
193 			if (path = findcodes[i])
194 			{
195 				if (*path == '/')
196 				{
197 					if (!stat(path, &st))
198 					{
199 						if (S_ISDIR(st.st_mode))
200 						{
201 							for (k = 0; k < elementsof(findnames); k++)
202 							{
203 								sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]);
204 								if (!eaccess(fp->encode.file, R_OK|W_OK))
205 								{
206 									path = fp->encode.file;
207 									break;
208 								}
209 								if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/')))
210 								{
211 									*b = 0;
212 									if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
213 									{
214 										*b = '/';
215 										path = fp->encode.file;
216 										break;
217 									}
218 								}
219 							}
220 							if (k < elementsof(findnames))
221 								break;
222 						}
223 						else if (st.st_uid == uid && (st.st_mode & S_IWUSR))
224 						{
225 							sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
226 							path = fp->encode.file;
227 							break;
228 						}
229 					}
230 					else if (i < 2 || strmatch(path, FIND_MATCH))
231 					{
232 						sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
233 						if (b = strrchr(fp->encode.file, '/'))
234 						{
235 							*b = 0;
236 							if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
237 							{
238 								*b = '/';
239 								path = fp->encode.file;
240 								break;
241 							}
242 						}
243 					}
244 				}
245 				else if (pathpath(path, "", PATH_REGULAR|PATH_READ|PATH_WRITE, fp->encode.file, sizeof(fp->encode.file)))
246 				{
247 					path = fp->encode.file;
248 					break;
249 				}
250 				else if (b = strrchr(path, '/'))
251 				{
252 					sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path);
253 					if (pathpath(fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE, fp->encode.temp, sizeof(fp->encode.temp)) &&
254 					    !stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
255 					{
256 						sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b);
257 						path = fp->encode.file;
258 						break;
259 					}
260 				}
261 			}
262 		if (i >= j)
263 		{
264 			if (fp->disc->errorf)
265 				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
266 			goto drop;
267 		}
268 		if (fp->disc->flags & FIND_OLD)
269 		{
270 			/*
271 			 * FF_old generates temp data that is read
272 			 * in a second pass to generate the real codes
273 			 */
274 
275 			fp->method = FF_old;
276 			if (!(fp->fp = sftmp(32 * PATH_MAX)))
277 			{
278 				if (fp->disc->errorf)
279 					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file");
280 				goto drop;
281 			}
282 		}
283 		else
284 		{
285 			/*
286 			 * the rest generate into a temp file that
287 			 * is simply renamed on completion
288 			 */
289 
290 			if (s = strrchr(path, '/'))
291 			{
292 				*s = 0;
293 				p = path;
294 			}
295 			else
296 				p = ".";
297 			if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd))
298 			{
299 				if (fp->disc->errorf)
300 					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : ".");
301 				goto drop;
302 			}
303 			if (s)
304 				*s = '/';
305 			if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE)))
306 			{
307 				if (fp->disc->errorf)
308 					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp);
309 				close(fd);
310 				goto drop;
311 			}
312 			if (fp->disc->flags & FIND_TYPE)
313 			{
314 				fp->method = FF_typ;
315 				fp->encode.namedisc.key = offsetof(Type_t, name);
316 				fp->encode.namedisc.link = offsetof(Type_t, byname);
317 				fp->encode.indexdisc.key = offsetof(Type_t, index);
318 				fp->encode.indexdisc.size = sizeof(unsigned long);
319 				fp->encode.indexdisc.link = offsetof(Type_t, byindex);
320 				s = "system/dir";
321 				if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dtoset)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dtoset)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1)))
322 				{
323 					if (fp->encode.namedict)
324 						dtclose(fp->encode.namedict);
325 					if (fp->disc->errorf)
326 						(*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table");
327 					goto drop;
328 				}
329 
330 				/*
331 				 * type index 1 is always system/dir
332 				 */
333 
334 				tp->index = ++fp->types;
335 				strcpy(tp->name, s);
336 				dtinsert(fp->encode.namedict, tp);
337 				dtinsert(fp->encode.indexdict, tp);
338 			}
339 			else if (fp->disc->flags & FIND_GNU)
340 			{
341 				fp->method = FF_gnu;
342 				sfputc(fp->fp, 0);
343 				sfputr(fp->fp, FF_gnu_magic, 0);
344 			}
345 			else
346 			{
347 				fp->method = FF_dir;
348 				sfputc(fp->fp, 0);
349 				sfputr(fp->fp, FF_dir_magic, 0);
350 			}
351 		}
352 	}
353 	else
354 	{
355 		i = sizeof(Decode_t) + sizeof(Code_t);
356 		if (!pattern || !*pattern)
357 			pattern = "*";
358 		i += (j = 2 * (strlen(pattern) + 1));
359 		if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i)))
360 		{
361 			vmclose(vm);
362 			return 0;
363 		}
364 		fp->vm = vm;
365 		fp->id = lib;
366 		fp->disc = disc;
367 		if (disc->flags & FIND_ICASE)
368 			fp->decode.ignorecase = 1;
369 		j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
370 		for (i = 0; i < j; i++)
371 			if (path = findcodes[i])
372 			{
373 				if (*path == '/')
374 				{
375 					if (!stat(path, &st))
376 					{
377 						if (S_ISDIR(st.st_mode))
378 						{
379 							for (k = 0; k < elementsof(findnames); k++)
380 							{
381 								sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]);
382 								if (fp->fp = sfopen(NiL, fp->decode.path, "r"))
383 								{
384 									path = fp->decode.path;
385 									break;
386 								}
387 							}
388 							if (fp->fp)
389 								break;
390 						}
391 						else if (fp->fp = sfopen(NiL, path, "r"))
392 							break;
393 					}
394 				}
395 				else if ((path = pathpath(path, "", PATH_REGULAR|PATH_READ, fp->decode.path, sizeof(fp->decode.path))) && (fp->fp = sfopen(NiL, path, "r")))
396 					break;
397 			}
398 		if (!fp->fp)
399 		{
400 			if (fp->disc->errorf)
401 				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
402 			goto drop;
403 		}
404 		if (fstat(sffileno(fp->fp), &st))
405 		{
406 			if (fp->disc->errorf)
407 				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path);
408 			goto drop;
409 		}
410 		if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid())
411 			setgid(getgid());
412 		fp->stamp = st.st_mtime;
413 		b = (s = fp->decode.temp) + 1;
414 		for (i = 0; i < elementsof(fp->decode.bigram1); i++)
415 		{
416 			if ((j = sfgetc(fp->fp)) == EOF)
417 				goto invalid;
418 			if (!(*s++ = fp->decode.bigram1[i] = j) && i)
419 			{
420 				i = -i;
421 				break;
422 			}
423 			if ((j = sfgetc(fp->fp)) == EOF)
424 				goto invalid;
425 			if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1'))
426 				break;
427 		}
428 		if (streq(b, FF_typ_magic))
429 		{
430 			if (type)
431 			{
432 				type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type);
433 				memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1));
434 			}
435 			fp->method = FF_typ;
436 			for (j = 0, i = 1;; i++)
437 			{
438 				if (!(s = sfgetr(fp->fp, 0, 0)))
439 					goto invalid;
440 				if (!*s)
441 					break;
442 				if (type && strmatch(s, type))
443 				{
444 					FF_SET_TYPE(fp, i);
445 					j++;
446 				}
447 			}
448 			if (type && !j)
449 				goto drop;
450 			fp->types = j;
451 		}
452 		else if (streq(b, FF_dir_magic))
453 			fp->method = FF_dir;
454 		else if (streq(b, FF_gnu_magic))
455 			fp->method = FF_gnu;
456 		else if (!*b && *--b >= '0' && *b <= '1')
457 		{
458 			fp->method = FF_gnu;
459 			while (j = sfgetc(fp->fp))
460 			{
461 				if (j == EOF || fp->decode.count >= sizeof(fp->decode.path))
462 					goto invalid;
463 				fp->decode.path[fp->decode.count++] = j;
464 			}
465 		}
466 		else
467 		{
468 			fp->method = FF_old;
469 			if (i < 0)
470 			{
471 				if ((j = sfgetc(fp->fp)) == EOF)
472 					goto invalid;
473 				fp->decode.bigram2[i = -i] = j;
474 			}
475 			while (++i < elementsof(fp->decode.bigram1))
476 			{
477 				if ((j = sfgetc(fp->fp)) == EOF)
478 					goto invalid;
479 				fp->decode.bigram1[i] = j;
480 				if ((j = sfgetc(fp->fp)) == EOF)
481 					goto invalid;
482 				fp->decode.bigram2[i] = j;
483 			}
484 			if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF)
485 				goto invalid;
486 		}
487 
488 		/*
489 		 * set up the physical dir table
490 		 */
491 
492 		if (disc->version >= 19980301L)
493 		{
494 			fp->verifyf = disc->verifyf;
495 			if (disc->dirs && *disc->dirs)
496 			{
497 				for (k = 0; disc->dirs[k]; k++);
498 				if (k == 1 && streq(disc->dirs[0], "/"))
499 					k = 0;
500 				if (k)
501 				{
502 					if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0)))
503 						goto drop;
504 					if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0)))
505 						goto drop;
506 					p = 0;
507 					b = fp->decode.temp;
508 					j = fp->method == FF_old || fp->method == FF_gnu;
509 
510 					/*
511 					 * fill the dir list with logical and
512 					 * physical names since we don't know
513 					 * which way the db was encoded (it
514 					 * could be *both* ways)
515 					 */
516 
517 					for (i = q = 0; i < k; i++)
518 					{
519 						if (*(s = disc->dirs[i]) == '/')
520 							sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s);
521 						else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path))))
522 							goto nospace;
523 						else
524 							sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s);
525 						s = pathcanon(b, sizeof(fp->decode.temp), 0);
526 						*s = '/';
527 						*(s + 1) = 0;
528 						if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
529 							goto nospace;
530 						if (j)
531 							(fp->dirs[q])[s - b] = 0;
532 						q++;
533 						*s = 0;
534 						s = pathcanon(b, sizeof(fp->decode.temp), PATH_PHYSICAL);
535 						*s = '/';
536 						*(s + 1) = 0;
537 						if (!strneq(b, fp->dirs[q - 1], s - b))
538 						{
539 							if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
540 								goto nospace;
541 							if (j)
542 								(fp->dirs[q])[s - b] = 0;
543 							q++;
544 						}
545 					}
546 					strsort(fp->dirs, q, strcasecmp);
547 					for (i = 0; i < q; i++)
548 						fp->lens[i] = strlen(fp->dirs[i]);
549 				}
550 			}
551 		}
552 		if (fp->verifyf || (disc->flags & FIND_VERIFY))
553 		{
554 			if (fp->method != FF_dir && fp->method != FF_typ)
555 			{
556 				if (fp->disc->errorf)
557 					(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM");
558 				goto drop;
559 			}
560 			fp->verify = 1;
561 		}
562 
563 		/*
564 		 * extract last glob-free subpattern in name for fast pre-match
565 		 * prepend 0 for backwards match
566 		 */
567 
568 		if (p = s = (char*)pattern)
569 		{
570 			b = fp->decode.pattern;
571 			for (;;)
572 			{
573 				switch (*b++ = *p++)
574 				{
575 				case 0:
576 					break;
577 				case '\\':
578 					s = p;
579 					if (!*p++)
580 						break;
581 					continue;
582 				case '[':
583 					if (!brace)
584 					{
585 						brace++;
586 						if (*p == ']')
587 							p++;
588 					}
589 					continue;
590 				case ']':
591 					if (brace)
592 					{
593 						brace--;
594 						s = p;
595 					}
596 					continue;
597 				case '(':
598 					if (!brace)
599 						paren++;
600 					continue;
601 				case ')':
602 					if (!brace && paren > 0 && !--paren)
603 						s = p;
604 					continue;
605 				case '|':
606 				case '&':
607 					if (!brace && !paren)
608 					{
609 						s = "";
610 						break;
611 					}
612 					continue;
613 				case '*':
614 				case '?':
615 					s = p;
616 					continue;
617 				default:
618 					continue;
619 				}
620 				break;
621 			}
622 			if (s != pattern && !streq(pattern, "*"))
623 			{
624 				fp->decode.match = 1;
625 				if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0)))
626 				{
627 					if (disc->errorf)
628 					{
629 						regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
630 						(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp);
631 					}
632 					goto drop;
633 				}
634 			}
635 			if (*s)
636 			{
637 				*b++ = 0;
638 				while (i = *s++)
639 					*b++ = i;
640 				*b-- = 0;
641 				fp->decode.end = b;
642 				if (fp->decode.ignorecase)
643 					for (s = fp->decode.pattern; s <= b; s++)
644 						if (isupper(*s))
645 							*s = tolower(*s);
646 			}
647 		}
648 	}
649 	return fp;
650  nospace:
651 	if (disc->errorf)
652 		(*fp->disc->errorf)(fp, fp->disc, 2, "out of space");
653 	if (!vm)
654 		return 0;
655 	if (!fp)
656 	{
657 		vmclose(vm);
658 		return 0;
659 	}
660 	goto drop;
661  invalid:
662 	if (fp->disc->errorf)
663 		(*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path);
664  drop:
665 	if (!fp->generate && fp->decode.match)
666 		regfree(&fp->decode.re);
667 	if (fp->fp)
668 		sfclose(fp->fp);
669 	vmclose(fp->vm);
670 	return 0;
671 }
672 
673 /*
674  * return the next fastfind path
675  * 0 returned when list exhausted
676  */
677 
678 char*
findread(register Find_t * fp)679 findread(register Find_t* fp)
680 {
681 	register char*		p;
682 	register char*		q;
683 	register char*		s;
684 	register char*		b;
685 	register char*		e;
686 	register int		c;
687 	register int		n;
688 	register int		m;
689 	int			ignorecase;
690 	int			t;
691 	unsigned char		w[4];
692 	struct stat		st;
693 
694 	if (fp->generate)
695 		return 0;
696 	if (fp->decode.restore)
697 	{
698 		*fp->decode.restore = '/';
699 		fp->decode.restore = 0;
700 	}
701 	ignorecase = fp->decode.ignorecase ? STR_ICASE : 0;
702 	c = fp->decode.peek;
703  next:
704 	for (;;)
705 	{
706 		switch (fp->method)
707 		{
708 		case FF_dir:
709 			t = 0;
710 			n = sfgetl(fp->fp);
711 			goto grab;
712 		case FF_gnu:
713 			if ((c = sfgetc(fp->fp)) == EOF)
714 				return 0;
715 			if (c == 0x80)
716 			{
717 				if ((c = sfgetc(fp->fp)) == EOF)
718 					return 0;
719 				n = c << 8;
720 				if ((c = sfgetc(fp->fp)) == EOF)
721 					return 0;
722 				n |= c;
723 				if (n & 0x8000)
724 					n = (n - 0xffff) - 1;
725 			}
726 			else if ((n = c) & 0x80)
727 				n = (n - 0xff) - 1;
728 			t = 0;
729 			goto grab;
730 		case FF_typ:
731 			t = sfgetu(fp->fp);
732 			n = sfgetl(fp->fp);
733 		grab:
734 			p = fp->decode.path + (fp->decode.count += n);
735 			do
736 			{
737 				if ((c = sfgetc(fp->fp)) == EOF)
738 					return 0;
739 			} while (*p++ = c);
740 			p -= 2;
741 			break;
742 		case FF_old:
743 			if (c == EOF)
744 			{
745 				fp->decode.peek = c;
746 				return 0;
747 			}
748 			if (c == FF_ESC)
749 			{
750 				if (sfread(fp->fp, w, sizeof(w)) != sizeof(w))
751 					return 0;
752 				if (fp->decode.swap >= 0)
753 				{
754 					c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]);
755 					if (!fp->decode.swap)
756 					{
757 						/*
758 						 * the old format uses machine
759 						 * byte order; this test uses
760 						 * the smallest magnitude of
761 						 * both byte orders on the
762 						 * first encoded path motion
763 						 * to determine the original
764 						 * byte order
765 						 */
766 
767 						m = c;
768 						if (m < 0)
769 							m = -m;
770 						n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
771 						if (n < 0)
772 							n = -n;
773 						if (m < n)
774 							fp->decode.swap = 1;
775 						else
776 						{
777 							fp->decode.swap = -1;
778 							c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
779 						}
780 					}
781 				}
782 				else
783 					c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
784 			}
785 			fp->decode.count += c - FF_OFF;
786 			for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;)
787 				if (c & (1<<(CHAR_BIT-1)))
788 				{
789 					*p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)];
790 					*p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)];
791 				}
792 				else
793 					*p++ = c;
794 			*p-- = 0;
795 			t = 0;
796 			break;
797 		}
798 		b = fp->decode.path;
799 		if (fp->decode.found)
800 			fp->decode.found = 0;
801 		else
802 			b += fp->decode.count;
803 		if (fp->dirs)
804 			for (;;)
805 			{
806 				if (!*fp->dirs)
807 					return 0;
808 
809 				/*
810 				 * use the ordering and lengths to prune
811 				 * comparison function calls
812 				 * (*fp->dirs)[*fp->lens]=='/' if its
813 				 * already been matched
814 				 */
815 
816 				if ((n = p - fp->decode.path + 1) > (m = *fp->lens))
817 				{
818 					if (!(*fp->dirs)[m])
819 						goto next;
820 					if (!strncasecmp(*fp->dirs, fp->decode.path, m))
821 						break;
822 				}
823 				else if (n == m)
824 				{
825 					if (!(*fp->dirs)[m])
826 					{
827 						if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path)))
828 						{
829 							if (m > 0)
830 							{
831 								(*fp->dirs)[m] = '/';
832 								if ((*fp->dirs)[m - 1] != '/')
833 									(*fp->dirs)[++(*fp->lens)] = '/';
834 							}
835 							break;
836 						}
837 						if (n >= 0)
838 							goto next;
839 					}
840 				}
841 				else if (!(*fp->dirs)[m])
842 					goto next;
843 				fp->dirs++;
844 				fp->lens++;
845 			}
846 		if (fp->verify && (*p == '/' || t == 1))
847 		{
848 			if ((n = p - fp->decode.path))
849 				*p = 0;
850 			else
851 				n = 1;
852 			if (fp->verifyf)
853 				n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc);
854 			else if (stat(fp->decode.path, &st))
855 				n = -1;
856 			else if ((unsigned long)st.st_mtime > fp->stamp)
857 				n = 1;
858 			else
859 				n = 0;
860 			*p = '/';
861 
862 			/*
863 			 * n<0	skip this subtree
864 			 * n==0 keep as is
865 			 * n>0	read this dir now
866 			 */
867 
868 			/* NOT IMPLEMENTED YET */
869 		}
870 		if (FF_OK_TYPE(fp, t))
871 		{
872 			if (fp->decode.end)
873 			{
874 				if (*(s = p) == '/')
875 					s--;
876 				if (*fp->decode.pattern == '/' && b > fp->decode.path)
877 					b--;
878 				for (; s >= b; s--)
879 					if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end)
880 					{
881 						if (ignorecase)
882 							for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--);
883 						else
884 							for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--);
885 						if (!*e)
886 						{
887 							fp->decode.found = 1;
888 							if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase))
889 							{
890 								fp->decode.peek = c;
891 								if (*p == '/')
892 									*(fp->decode.restore = p) = 0;
893 								if (!fp->secure || !access(fp->decode.path, F_OK))
894 									return fp->decode.path;
895 							}
896 							break;
897 						}
898 					}
899 			}
900 			else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0)))
901 			{
902 				fp->decode.peek = c;
903 				if (*p == '/' && p > fp->decode.path)
904 					*(fp->decode.restore = p) = 0;
905 				if (!fp->secure || !access(fp->decode.path, F_OK))
906 					return fp->decode.path;
907 			}
908 			else if (n != REG_NOMATCH)
909 			{
910 				if (fp->disc->errorf)
911 				{
912 					regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
913 					(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp);
914 				}
915 				return 0;
916 			}
917 		}
918 	}
919 }
920 
921 /*
922  * add path to the code table
923  * paths are assumed to be in sort order
924  */
925 
926 int
findwrite(register Find_t * fp,const char * path,size_t len,const char * type)927 findwrite(register Find_t* fp, const char* path, size_t len, const char* type)
928 {
929 	register unsigned char*	s;
930 	register unsigned char*	e;
931 	register unsigned char*	p;
932 	register int		n;
933 	register int		d;
934 	register Type_t*	x;
935 	register unsigned long	u;
936 
937 	if (!fp->generate)
938 		return -1;
939 	if (type && fp->method == FF_dir)
940 	{
941 		len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path);
942 		path = fp->encode.mark;
943 	}
944 	s = (unsigned char*)path;
945 	if (len <= 0)
946 		len = strlen(path);
947 	if (len < sizeof(fp->encode.path))
948 		e = s + len++;
949 	else
950 	{
951 		len = sizeof(fp->encode.path) - 1;
952 		e = s + len;
953 	}
954 	p = (unsigned char*)fp->encode.path;
955 	while (s < e)
956 	{
957 		if (*s != *p++)
958 			break;
959 		s++;
960 	}
961 	n = s - (unsigned char*)path;
962 	switch (fp->method)
963 	{
964 	case FF_gnu:
965 		d = n - fp->encode.prefix;
966 		if (d >= -127 && d <= 127)
967 			sfputc(fp->fp, d & 0xff);
968 		else
969 		{
970 			sfputc(fp->fp, 0x80);
971 			sfputc(fp->fp, (d >> 8) & 0xff);
972 			sfputc(fp->fp, d & 0xff);
973 		}
974 		fp->encode.prefix = n;
975 		sfputr(fp->fp, (char*)s, 0);
976 		break;
977 	case FF_old:
978 		sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF);
979 		fp->encode.prefix = n;
980 		sfputc(fp->fp, ' ');
981 		p = s;
982 		while (s < e)
983 		{
984 			n = *s++;
985 			if (s >= e)
986 				break;
987 			fp->encode.code[n][*s++]++;
988 		}
989 		while (p < e)
990 		{
991 			if ((n = *p++) < FF_MIN || n >= FF_MAX)
992 				n = '?';
993 			sfputc(fp->fp, n);
994 		}
995 		sfputc(fp->fp, 0);
996 		break;
997 	case FF_typ:
998 		if (type)
999 		{
1000 			type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type);
1001 			if (x = (Type_t*)dtmatch(fp->encode.namedict, type))
1002 				u = x->index;
1003 			else if (!(x = newof(0, Type_t, 1, strlen(type) + 1)))
1004 				u = 0;
1005 			else
1006 			{
1007 				u = x->index = ++fp->types;
1008 				strcpy(x->name, type);
1009 				dtinsert(fp->encode.namedict, x);
1010 				dtinsert(fp->encode.indexdict, x);
1011 			}
1012 		}
1013 		else
1014 			u = 0;
1015 		sfputu(fp->fp, u);
1016 		/*FALLTHROUGH...*/
1017 	case FF_dir:
1018 		d = n - fp->encode.prefix;
1019 		sfputl(fp->fp, d);
1020 		fp->encode.prefix = n;
1021 		sfputr(fp->fp, (char*)s, 0);
1022 		break;
1023 	}
1024 	memcpy(fp->encode.path, path, len);
1025 	return 0;
1026 }
1027 
1028 /*
1029  * findsync() helper
1030  */
1031 
1032 static int
finddone(register Find_t * fp)1033 finddone(register Find_t* fp)
1034 {
1035 	int	r;
1036 
1037 	if (sfsync(fp->fp))
1038 	{
1039 		if (fp->disc->errorf)
1040 			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file);
1041 		return -1;
1042 	}
1043 	if (sferror(fp->fp))
1044 	{
1045 		if (fp->disc->errorf)
1046 			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file);
1047 		return -1;
1048 	}
1049 	r = sfclose(fp->fp);
1050 	fp->fp = 0;
1051 	if (r)
1052 	{
1053 		if (fp->disc->errorf)
1054 			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file);
1055 		return -1;
1056 	}
1057 	return 0;
1058 }
1059 
1060 /*
1061  * finish the code table
1062  */
1063 
1064 static int
findsync(register Find_t * fp)1065 findsync(register Find_t* fp)
1066 {
1067 	register char*		s;
1068 	register int		n;
1069 	register int		m;
1070 	register int		d;
1071 	register Type_t*	x;
1072 	char*			t;
1073 	int			b;
1074 	long			z;
1075 	Sfio_t*			sp;
1076 
1077 	switch (fp->method)
1078 	{
1079 	case FF_dir:
1080 	case FF_gnu:
1081 		/*
1082 		 * replace the real file with the temp file
1083 		 */
1084 
1085 		if (finddone(fp))
1086 			goto bad;
1087 		remove(fp->encode.file);
1088 		if (rename(fp->encode.temp, fp->encode.file))
1089 		{
1090 			if (fp->disc->errorf)
1091 				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp);
1092 			remove(fp->encode.temp);
1093 			return -1;
1094 		}
1095 		break;
1096 	case FF_old:
1097 		/*
1098 		 * determine the top FF_MAX bigrams
1099 		 */
1100 
1101 		for (n = 0; n < FF_MAX; n++)
1102 			for (m = 0; m < FF_MAX; m++)
1103 				fp->encode.hits[fp->encode.code[n][m]]++;
1104 		fp->encode.hits[0] = 0;
1105 		m = 1;
1106 		for (n = USHRT_MAX; n >= 0; n--)
1107 			if (d = fp->encode.hits[n])
1108 			{
1109 				fp->encode.hits[n] = m;
1110 				if ((m += d) > FF_MAX)
1111 					break;
1112 			}
1113 		while (--n >= 0)
1114 			fp->encode.hits[n] = 0;
1115 		for (n = FF_MAX - 1; n >= 0; n--)
1116 			for (m = FF_MAX - 1; m >= 0; m--)
1117 				if (fp->encode.hits[fp->encode.code[n][m]])
1118 				{
1119 					d = fp->encode.code[n][m];
1120 					b = fp->encode.hits[d] - 1;
1121 					fp->encode.code[n][m] = b + FF_MAX;
1122 					if (fp->encode.hits[d]++ >= FF_MAX)
1123 						fp->encode.hits[d] = 0;
1124 					fp->encode.bigram[b *= 2] = n;
1125 					fp->encode.bigram[b + 1] = m;
1126 				}
1127 				else
1128 					fp->encode.code[n][m] = 0;
1129 
1130 		/*
1131 		 * commit the real file
1132 		 */
1133 
1134 		if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET))
1135 		{
1136 			if (fp->disc->errorf)
1137 				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file");
1138 			return -1;
1139 		}
1140 		if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1141 			goto badcreate;
1142 
1143 		/*
1144 		 * dump the bigrams
1145 		 */
1146 
1147 		sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram));
1148 
1149 		/*
1150 		 * encode the massaged paths
1151 		 */
1152 
1153 		while (s = sfgetr(fp->fp, 0, 0))
1154 		{
1155 			z = strtol(s, &t, 0);
1156 			s = t;
1157 			if (z < 0 || z > 2 * FF_OFF)
1158 			{
1159 				sfputc(sp, FF_ESC);
1160 				sfputc(sp, (z >> 24));
1161 				sfputc(sp, (z >> 16));
1162 				sfputc(sp, (z >> 8));
1163 				sfputc(sp, z);
1164 			}
1165 			else
1166 				sfputc(sp, z);
1167 			while (n = *s++)
1168 			{
1169 				if (!(m = *s++))
1170 				{
1171 					sfputc(sp, n);
1172 					break;
1173 				}
1174 				if (d = fp->encode.code[n][m])
1175 					sfputc(sp, d);
1176 				else
1177 				{
1178 					sfputc(sp, n);
1179 					sfputc(sp, m);
1180 				}
1181 			}
1182 		}
1183 		sfclose(fp->fp);
1184 		fp->fp = sp;
1185 		if (finddone(fp))
1186 			goto bad;
1187 		break;
1188 	case FF_typ:
1189 		if (finddone(fp))
1190 			goto bad;
1191 		if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r")))
1192 		{
1193 			if (fp->disc->errorf)
1194 				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp);
1195 			remove(fp->encode.temp);
1196 			return -1;
1197 		}
1198 
1199 		/*
1200 		 * commit the output file
1201 		 */
1202 
1203 		if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1204 			goto badcreate;
1205 
1206 		/*
1207 		 * write the header magic
1208 		 */
1209 
1210 		sfputc(sp, 0);
1211 		sfputr(sp, FF_typ_magic, 0);
1212 
1213 		/*
1214 		 * write the type table in index order starting with 1
1215 		 */
1216 
1217 		for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x))
1218 			sfputr(sp, x->name, 0);
1219 		sfputc(sp, 0);
1220 
1221 		/*
1222 		 * append the front compressed strings
1223 		 */
1224 
1225 		if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp))
1226 		{
1227 			sfclose(sp);
1228 			if (fp->disc->errorf)
1229 				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file);
1230 			goto bad;
1231 		}
1232 		sfclose(fp->fp);
1233 		fp->fp = sp;
1234 		if (finddone(fp))
1235 			goto bad;
1236 		remove(fp->encode.temp);
1237 		break;
1238 	}
1239 	return 0;
1240  badcreate:
1241 	if (fp->disc->errorf)
1242 		(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file);
1243  bad:
1244 	if (fp->fp)
1245 	{
1246 		sfclose(fp->fp);
1247 		fp->fp = 0;
1248 	}
1249 	remove(fp->encode.temp);
1250 	return -1;
1251 }
1252 
1253 /*
1254  * close an open fastfind stream
1255  */
1256 
1257 int
findclose(register Find_t * fp)1258 findclose(register Find_t* fp)
1259 {
1260 	int	n = 0;
1261 
1262 	if (!fp)
1263 		return -1;
1264 	if (fp->generate)
1265 	{
1266 		n = findsync(fp);
1267 		if (fp->encode.indexdict)
1268 			dtclose(fp->encode.indexdict);
1269 		if (fp->encode.namedict)
1270 			dtclose(fp->encode.namedict);
1271 	}
1272 	else
1273 	{
1274 		if (fp->decode.match)
1275 			regfree(&fp->decode.re);
1276 		n = 0;
1277 	}
1278 	if (fp->fp)
1279 		sfclose(fp->fp);
1280 	vmclose(fp->vm);
1281 	return n;
1282 }
1283