1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23/*
24 * original code
25 *
26 *  	James A. Woods, Informatics General Corporation,
27 *	NASA Ames Research Center, 6/81.
28 *	Usenix ;login:, February/March, 1983, p. 8.
29 *
30 * discipline/method interface
31 *
32 *	Glenn Fowler
33 *	AT&T Research
34 *	modified from the original BSD source
35 *
36 * 'fastfind' scans a file list for the full pathname of a file
37 * given only a piece of the name.  The list is processed with
38 * with "front-compression" and bigram coding.  Front compression reduces
39 * space by a factor of 4-5, bigram coding by a further 20-25%.
40 *
41 * there are 4 methods:
42 *
43 *	FF_old	original with 7 bit bigram encoding (no magic)
44 *	FF_gnu	8 bit clean front compression (FF_gnu_magic)
45 *	FF_dir	FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic)
46 *	FF_typ	FF_dir with (mime) types (FF_typ_magic)
47 *
48 * the bigram encoding steals the eighth bit (that's why its FF_old)
49 * maybe one day we'll limit it to readonly:
50 *
51 * 0-2*FF_OFF	 likeliest differential counts + offset to make nonnegative
52 * FF_ESC	 4 byte big-endian out-of-range count+FF_OFF follows
53 * FF_MIN-FF_MAX ascii residue
54 * >=FF_MAX	 bigram codes
55 *
56 * a two-tiered string search technique is employed
57 *
58 * a metacharacter-free subpattern and partial pathname is matched
59 * backwards to avoid full expansion of the pathname list
60 *
61 * then the actual shell glob-style regular expression (if in this form)
62 * is matched against the candidate pathnames using the slower regexec()
63 *
64 * The original BSD code is covered by the BSD license:
65 *
66 * Copyright (c) 1985, 1993, 1999
67 *	The Regents of the University of California.  All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 *    notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 *    notice, this list of conditions and the following disclaimer in the
76 *    documentation and/or other materials provided with the distribution.
77 * 3. Neither the name of the University nor the names of its contributors
78 *    may be used to endorse or promote products derived from this software
79 *    without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 */
93
94static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n";
95
96static const char lib[] = "libast:fastfind";
97
98#include "findlib.h"
99
100#define FIND_MATCH	"*/(find|locate)/*"
101
102/*
103 * this db could be anywhere
104 * findcodes[] directories are checked for findnames[i]
105 */
106
107static char*		findcodes[] =
108{
109	0,
110	0,
111	FIND_CODES,
112	"/usr/local/share/lib",
113	"/usr/local/lib",
114	"/usr/share/lib",
115	"/usr/lib",
116	"/var/spool",
117	"/usr/local/var",
118	"/var/lib",
119	"/var/lib/slocate",
120	"/var/db",
121};
122
123static char*		findnames[] =
124{
125	"find/codes",
126	"find/find.codes",
127	"locate/locatedb",
128	"locatedb",
129	"locate.database",
130	"slocate.db",
131};
132
133/*
134 * convert t to lower case and drop leading x- and x- after /
135 * converted value copied to b of size n
136 */
137
138char*
139typefix(char* buf, size_t n, register const char* t)
140{
141	register int	c;
142	register char*	b = buf;
143
144	if ((*t == 'x' || *t == 'X') && *(t + 1) == '-')
145		t += 2;
146	while (c = *t++)
147	{
148		if (isupper(c))
149			c = tolower(c);
150		if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-')
151			t += 2;
152	}
153	*b = 0;
154	return buf;
155}
156
157/*
158 * return a fastfind stream handle for pattern
159 */
160
161Find_t*
162findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc)
163{
164	register Find_t*	fp;
165	register char*		p;
166	register char*		s;
167	register char*		b;
168	register int		i;
169	register int		j;
170	char*			path;
171	int			brace = 0;
172	int			paren = 0;
173	int			k;
174	int			q;
175	int			fd;
176	int			uid;
177	Vmalloc_t*		vm;
178	Type_t*			tp;
179	struct stat		st;
180
181
182	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
183		goto nospace;
184
185	/*
186	 * NOTE: searching for FIND_CODES would be much simpler if we
187	 *       just stuck with our own, but we also support GNU
188	 *	 locate codes and have to search for the one of a
189	 *	 bazillion possible names for that file
190	 */
191
192	if (!findcodes[1])
193		findcodes[1] = getenv(FIND_CODES_ENV);
194	if (disc->flags & FIND_GENERATE)
195	{
196		if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t))))
197			goto nospace;
198		fp->vm = vm;
199		fp->id = lib;
200		fp->disc = disc;
201		fp->generate = 1;
202		if (file && (!*file || streq(file, "-")))
203			file = 0;
204		uid = geteuid();
205		j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
206
207		/*
208		 * look for the codes file, but since it may not exist yet,
209		 * also look for the containing directory if i<2 or if
210		 * it is sufficiently qualified (FIND_MATCH)
211		 */
212
213		for (i = 0; i < j; i++)
214			if (path = findcodes[i])
215			{
216				if (*path == '/')
217				{
218					if (!stat(path, &st))
219					{
220						if (S_ISDIR(st.st_mode))
221						{
222							for (k = 0; k < elementsof(findnames); k++)
223							{
224								sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]);
225								if (!eaccess(fp->encode.file, R_OK|W_OK))
226								{
227									path = fp->encode.file;
228									break;
229								}
230								if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/')))
231								{
232									*b = 0;
233									if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
234									{
235										*b = '/';
236										path = fp->encode.file;
237										break;
238									}
239								}
240							}
241							if (k < elementsof(findnames))
242								break;
243						}
244						else if (st.st_uid == uid && (st.st_mode & S_IWUSR))
245						{
246							sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
247							path = fp->encode.file;
248							break;
249						}
250					}
251					else if (i < 2 || strmatch(path, FIND_MATCH))
252					{
253						sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
254						if (b = strrchr(fp->encode.file, '/'))
255						{
256							*b = 0;
257							if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
258							{
259								*b = '/';
260								path = fp->encode.file;
261								break;
262							}
263						}
264					}
265				}
266				else if (pathpath(fp->encode.file, path, "", PATH_REGULAR|PATH_READ|PATH_WRITE))
267				{
268					path = fp->encode.file;
269					break;
270				}
271				else if (b = strrchr(path, '/'))
272				{
273					sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path);
274					if (pathpath(fp->encode.temp, fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE) &&
275					    !stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
276					{
277						sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b);
278						path = fp->encode.file;
279						break;
280					}
281				}
282			}
283		if (i >= j)
284		{
285			if (fp->disc->errorf)
286				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
287			goto drop;
288		}
289		if (fp->disc->flags & FIND_OLD)
290		{
291			/*
292			 * FF_old generates temp data that is read
293			 * in a second pass to generate the real codes
294			 */
295
296			fp->method = FF_old;
297			if (!(fp->fp = sftmp(32 * PATH_MAX)))
298			{
299				if (fp->disc->errorf)
300					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file");
301				goto drop;
302			}
303		}
304		else
305		{
306			/*
307			 * the rest generate into a temp file that
308			 * is simply renamed on completion
309			 */
310
311			if (s = strrchr(path, '/'))
312			{
313				*s = 0;
314				p = path;
315			}
316			else
317				p = ".";
318			if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd))
319			{
320				if (fp->disc->errorf)
321					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : ".");
322				goto drop;
323			}
324			if (s)
325				*s = '/';
326			if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE)))
327			{
328				if (fp->disc->errorf)
329					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp);
330				close(fd);
331				goto drop;
332			}
333			if (fp->disc->flags & FIND_TYPE)
334			{
335				fp->method = FF_typ;
336				fp->encode.namedisc.key = offsetof(Type_t, name);
337				fp->encode.namedisc.link = offsetof(Type_t, byname);
338				fp->encode.indexdisc.key = offsetof(Type_t, index);
339				fp->encode.indexdisc.size = sizeof(unsigned long);
340				fp->encode.indexdisc.link = offsetof(Type_t, byindex);
341				s = "system/dir";
342				if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dttree)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dttree)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1)))
343				{
344					if (fp->encode.namedict)
345						dtclose(fp->encode.namedict);
346					if (fp->disc->errorf)
347						(*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table");
348					goto drop;
349				}
350
351				/*
352				 * type index 1 is always system/dir
353				 */
354
355				tp->index = ++fp->types;
356				strcpy(tp->name, s);
357				dtinsert(fp->encode.namedict, tp);
358				dtinsert(fp->encode.indexdict, tp);
359			}
360			else if (fp->disc->flags & FIND_GNU)
361			{
362				fp->method = FF_gnu;
363				sfputc(fp->fp, 0);
364				sfputr(fp->fp, FF_gnu_magic, 0);
365			}
366			else
367			{
368				fp->method = FF_dir;
369				sfputc(fp->fp, 0);
370				sfputr(fp->fp, FF_dir_magic, 0);
371			}
372		}
373	}
374	else
375	{
376		i = sizeof(Decode_t) + sizeof(Code_t);
377		if (!pattern || !*pattern)
378			pattern = "*";
379		i += (j = 2 * (strlen(pattern) + 1));
380		if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i)))
381		{
382			vmclose(vm);
383			return 0;
384		}
385		fp->vm = vm;
386		fp->id = lib;
387		fp->disc = disc;
388		if (disc->flags & FIND_ICASE)
389			fp->decode.ignorecase = 1;
390		j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
391		for (i = 0; i < j; i++)
392			if (path = findcodes[i])
393			{
394				if (*path == '/')
395				{
396					if (!stat(path, &st))
397					{
398						if (S_ISDIR(st.st_mode))
399						{
400							for (k = 0; k < elementsof(findnames); k++)
401							{
402								sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]);
403								if (fp->fp = sfopen(NiL, fp->decode.path, "r"))
404								{
405									path = fp->decode.path;
406									break;
407								}
408							}
409							if (fp->fp)
410								break;
411						}
412						else if (fp->fp = sfopen(NiL, path, "r"))
413							break;
414					}
415				}
416				else if ((path = pathpath(fp->decode.path, path, "", PATH_REGULAR|PATH_READ)) && (fp->fp = sfopen(NiL, path, "r")))
417					break;
418			}
419		if (!fp->fp)
420		{
421			if (fp->disc->errorf)
422				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
423			goto drop;
424		}
425		if (fstat(sffileno(fp->fp), &st))
426		{
427			if (fp->disc->errorf)
428				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path);
429			goto drop;
430		}
431		if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid())
432			setgid(getgid());
433		fp->stamp = st.st_mtime;
434		b = (s = fp->decode.temp) + 1;
435		for (i = 0; i < elementsof(fp->decode.bigram1); i++)
436		{
437			if ((j = sfgetc(fp->fp)) == EOF)
438				goto invalid;
439			if (!(*s++ = fp->decode.bigram1[i] = j) && i)
440			{
441				i = -i;
442				break;
443			}
444			if ((j = sfgetc(fp->fp)) == EOF)
445				goto invalid;
446			if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1'))
447				break;
448		}
449		if (streq(b, FF_typ_magic))
450		{
451			if (type)
452			{
453				type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type);
454				memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1));
455			}
456			fp->method = FF_typ;
457			for (j = 0, i = 1;; i++)
458			{
459				if (!(s = sfgetr(fp->fp, 0, 0)))
460					goto invalid;
461				if (!*s)
462					break;
463				if (type && strmatch(s, type))
464				{
465					FF_SET_TYPE(fp, i);
466					j++;
467				}
468			}
469			if (type && !j)
470				goto drop;
471			fp->types = j;
472		}
473		else if (streq(b, FF_dir_magic))
474			fp->method = FF_dir;
475		else if (streq(b, FF_gnu_magic))
476			fp->method = FF_gnu;
477		else if (!*b && *--b >= '0' && *b <= '1')
478		{
479			fp->method = FF_gnu;
480			while (j = sfgetc(fp->fp))
481			{
482				if (j == EOF || fp->decode.count >= sizeof(fp->decode.path))
483					goto invalid;
484				fp->decode.path[fp->decode.count++] = j;
485			}
486		}
487		else
488		{
489			fp->method = FF_old;
490			if (i < 0)
491			{
492				if ((j = sfgetc(fp->fp)) == EOF)
493					goto invalid;
494				fp->decode.bigram2[i = -i] = j;
495			}
496			while (++i < elementsof(fp->decode.bigram1))
497			{
498				if ((j = sfgetc(fp->fp)) == EOF)
499					goto invalid;
500				fp->decode.bigram1[i] = j;
501				if ((j = sfgetc(fp->fp)) == EOF)
502					goto invalid;
503				fp->decode.bigram2[i] = j;
504			}
505			if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF)
506				goto invalid;
507		}
508
509		/*
510		 * set up the physical dir table
511		 */
512
513		if (disc->version >= 19980301L)
514		{
515			fp->verifyf = disc->verifyf;
516			if (disc->dirs && *disc->dirs)
517			{
518				for (k = 0; disc->dirs[k]; k++);
519				if (k == 1 && streq(disc->dirs[0], "/"))
520					k = 0;
521				if (k)
522				{
523					if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0)))
524						goto drop;
525					if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0)))
526						goto drop;
527					p = 0;
528					b = fp->decode.temp;
529					j = fp->method == FF_old || fp->method == FF_gnu;
530
531					/*
532					 * fill the dir list with logical and
533					 * physical names since we don't know
534					 * which way the db was encoded (it
535					 * could be *both* ways)
536					 */
537
538					for (i = q = 0; i < k; i++)
539					{
540						if (*(s = disc->dirs[i]) == '/')
541							sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s);
542						else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path))))
543							goto nospace;
544						else
545							sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s);
546						s = pathcanon(b, 0);
547						*s = '/';
548						*(s + 1) = 0;
549						if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
550							goto nospace;
551						if (j)
552							(fp->dirs[q])[s - b] = 0;
553						q++;
554						*s = 0;
555						s = pathcanon(b, PATH_PHYSICAL);
556						*s = '/';
557						*(s + 1) = 0;
558						if (!strneq(b, fp->dirs[q - 1], s - b))
559						{
560							if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
561								goto nospace;
562							if (j)
563								(fp->dirs[q])[s - b] = 0;
564							q++;
565						}
566					}
567					strsort(fp->dirs, q, strcasecmp);
568					for (i = 0; i < q; i++)
569						fp->lens[i] = strlen(fp->dirs[i]);
570				}
571			}
572		}
573		if (fp->verifyf || (disc->flags & FIND_VERIFY))
574		{
575			if (fp->method != FF_dir && fp->method != FF_typ)
576			{
577				if (fp->disc->errorf)
578					(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM");
579				goto drop;
580			}
581			fp->verify = 1;
582		}
583
584		/*
585		 * extract last glob-free subpattern in name for fast pre-match
586		 * prepend 0 for backwards match
587		 */
588
589		if (p = s = (char*)pattern)
590		{
591			b = fp->decode.pattern;
592			for (;;)
593			{
594				switch (*b++ = *p++)
595				{
596				case 0:
597					break;
598				case '\\':
599					s = p;
600					if (!*p++)
601						break;
602					continue;
603				case '[':
604					if (!brace)
605					{
606						brace++;
607						if (*p == ']')
608							p++;
609					}
610					continue;
611				case ']':
612					if (brace)
613					{
614						brace--;
615						s = p;
616					}
617					continue;
618				case '(':
619					if (!brace)
620						paren++;
621					continue;
622				case ')':
623					if (!brace && paren > 0 && !--paren)
624						s = p;
625					continue;
626				case '|':
627				case '&':
628					if (!brace && !paren)
629					{
630						s = "";
631						break;
632					}
633					continue;
634				case '*':
635				case '?':
636					s = p;
637					continue;
638				default:
639					continue;
640				}
641				break;
642			}
643			if (s != pattern && !streq(pattern, "*"))
644			{
645				fp->decode.match = 1;
646				if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0)))
647				{
648					if (disc->errorf)
649					{
650						regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
651						(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp);
652					}
653					goto drop;
654				}
655			}
656			if (*s)
657			{
658				*b++ = 0;
659				while (i = *s++)
660					*b++ = i;
661				*b-- = 0;
662				fp->decode.end = b;
663				if (fp->decode.ignorecase)
664					for (s = fp->decode.pattern; s <= b; s++)
665						if (isupper(*s))
666							*s = tolower(*s);
667			}
668		}
669	}
670	return fp;
671 nospace:
672	if (disc->errorf)
673		(*fp->disc->errorf)(fp, fp->disc, 2, "out of space");
674	if (!vm)
675		return 0;
676	if (!fp)
677	{
678		vmclose(vm);
679		return 0;
680	}
681	goto drop;
682 invalid:
683	if (fp->disc->errorf)
684		(*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path);
685 drop:
686	if (!fp->generate && fp->decode.match)
687		regfree(&fp->decode.re);
688	if (fp->fp)
689		sfclose(fp->fp);
690	vmclose(fp->vm);
691	return 0;
692}
693
694/*
695 * return the next fastfind path
696 * 0 returned when list exhausted
697 */
698
699char*
700findread(register Find_t* fp)
701{
702	register char*		p;
703	register char*		q;
704	register char*		s;
705	register char*		b;
706	register char*		e;
707	register int		c;
708	register int		n;
709	register int		m;
710	int			ignorecase;
711	int			t;
712	unsigned char		w[4];
713	struct stat		st;
714
715	if (fp->generate)
716		return 0;
717	if (fp->decode.restore)
718	{
719		*fp->decode.restore = '/';
720		fp->decode.restore = 0;
721	}
722	ignorecase = fp->decode.ignorecase ? STR_ICASE : 0;
723	c = fp->decode.peek;
724 next:
725	for (;;)
726	{
727		switch (fp->method)
728		{
729		case FF_dir:
730			t = 0;
731			n = sfgetl(fp->fp);
732			goto grab;
733		case FF_gnu:
734			if ((c = sfgetc(fp->fp)) == EOF)
735				return 0;
736			if (c == 0x80)
737			{
738				if ((c = sfgetc(fp->fp)) == EOF)
739					return 0;
740				n = c << 8;
741				if ((c = sfgetc(fp->fp)) == EOF)
742					return 0;
743				n |= c;
744				if (n & 0x8000)
745					n = (n - 0xffff) - 1;
746			}
747			else if ((n = c) & 0x80)
748				n = (n - 0xff) - 1;
749			t = 0;
750			goto grab;
751		case FF_typ:
752			t = sfgetu(fp->fp);
753			n = sfgetl(fp->fp);
754		grab:
755			p = fp->decode.path + (fp->decode.count += n);
756			do
757			{
758				if ((c = sfgetc(fp->fp)) == EOF)
759					return 0;
760			} while (*p++ = c);
761			p -= 2;
762			break;
763		case FF_old:
764			if (c == EOF)
765			{
766				fp->decode.peek = c;
767				return 0;
768			}
769			if (c == FF_ESC)
770			{
771				if (sfread(fp->fp, w, sizeof(w)) != sizeof(w))
772					return 0;
773				if (fp->decode.swap >= 0)
774				{
775					c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]);
776					if (!fp->decode.swap)
777					{
778						/*
779						 * the old format uses machine
780						 * byte order; this test uses
781						 * the smallest magnitude of
782						 * both byte orders on the
783						 * first encoded path motion
784						 * to determine the original
785						 * byte order
786						 */
787
788						m = c;
789						if (m < 0)
790							m = -m;
791						n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
792						if (n < 0)
793							n = -n;
794						if (m < n)
795							fp->decode.swap = 1;
796						else
797						{
798							fp->decode.swap = -1;
799							c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
800						}
801					}
802				}
803				else
804					c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
805			}
806			fp->decode.count += c - FF_OFF;
807			for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;)
808				if (c & (1<<(CHAR_BIT-1)))
809				{
810					*p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)];
811					*p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)];
812				}
813				else
814					*p++ = c;
815			*p-- = 0;
816			t = 0;
817			break;
818		}
819		b = fp->decode.path;
820		if (fp->decode.found)
821			fp->decode.found = 0;
822		else
823			b += fp->decode.count;
824		if (fp->dirs)
825			for (;;)
826			{
827				if (!*fp->dirs)
828					return 0;
829
830				/*
831				 * use the ordering and lengths to prune
832				 * comparison function calls
833				 * (*fp->dirs)[*fp->lens]=='/' if its
834				 * already been matched
835				 */
836
837				if ((n = p - fp->decode.path + 1) > (m = *fp->lens))
838				{
839					if (!(*fp->dirs)[m])
840						goto next;
841					if (!strncasecmp(*fp->dirs, fp->decode.path, m))
842						break;
843				}
844				else if (n == m)
845				{
846					if (!(*fp->dirs)[m])
847					{
848						if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path)))
849						{
850							if (m > 0)
851							{
852								(*fp->dirs)[m] = '/';
853								if ((*fp->dirs)[m - 1] != '/')
854									(*fp->dirs)[++(*fp->lens)] = '/';
855							}
856							break;
857						}
858						if (n >= 0)
859							goto next;
860					}
861				}
862				else if (!(*fp->dirs)[m])
863					goto next;
864				fp->dirs++;
865				fp->lens++;
866			}
867		if (fp->verify && (*p == '/' || t == 1))
868		{
869			if ((n = p - fp->decode.path))
870				*p = 0;
871			else
872				n = 1;
873			if (fp->verifyf)
874				n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc);
875			else if (stat(fp->decode.path, &st))
876				n = -1;
877			else if ((unsigned long)st.st_mtime > fp->stamp)
878				n = 1;
879			else
880				n = 0;
881			*p = '/';
882
883			/*
884			 * n<0	skip this subtree
885			 * n==0 keep as is
886			 * n>0	read this dir now
887			 */
888
889			/* NOT IMPLEMENTED YET */
890		}
891		if (FF_OK_TYPE(fp, t))
892		{
893			if (fp->decode.end)
894			{
895				if (*(s = p) == '/')
896					s--;
897				if (*fp->decode.pattern == '/' && b > fp->decode.path)
898					b--;
899				for (; s >= b; s--)
900					if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end)
901					{
902						if (ignorecase)
903							for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--);
904						else
905							for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--);
906						if (!*e)
907						{
908							fp->decode.found = 1;
909							if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase))
910							{
911								fp->decode.peek = c;
912								if (*p == '/')
913									*(fp->decode.restore = p) = 0;
914								if (!fp->secure || !access(fp->decode.path, F_OK))
915									return fp->decode.path;
916							}
917							break;
918						}
919					}
920			}
921			else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0)))
922			{
923				fp->decode.peek = c;
924				if (*p == '/' && p > fp->decode.path)
925					*(fp->decode.restore = p) = 0;
926				if (!fp->secure || !access(fp->decode.path, F_OK))
927					return fp->decode.path;
928			}
929			else if (n != REG_NOMATCH)
930			{
931				if (fp->disc->errorf)
932				{
933					regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
934					(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp);
935				}
936				return 0;
937			}
938		}
939	}
940}
941
942/*
943 * add path to the code table
944 * paths are assumed to be in sort order
945 */
946
947int
948findwrite(register Find_t* fp, const char* path, size_t len, const char* type)
949{
950	register unsigned char*	s;
951	register unsigned char*	e;
952	register unsigned char*	p;
953	register int		n;
954	register int		d;
955	register Type_t*	x;
956	register unsigned long	u;
957
958	if (!fp->generate)
959		return -1;
960	if (type && fp->method == FF_dir)
961	{
962		len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path);
963		path = fp->encode.mark;
964	}
965	s = (unsigned char*)path;
966	if (len <= 0)
967		len = strlen(path);
968	if (len < sizeof(fp->encode.path))
969		e = s + len++;
970	else
971	{
972		len = sizeof(fp->encode.path) - 1;
973		e = s + len;
974	}
975	p = (unsigned char*)fp->encode.path;
976	while (s < e)
977	{
978		if (*s != *p++)
979			break;
980		s++;
981	}
982	n = s - (unsigned char*)path;
983	switch (fp->method)
984	{
985	case FF_gnu:
986		d = n - fp->encode.prefix;
987		if (d >= -127 && d <= 127)
988			sfputc(fp->fp, d & 0xff);
989		else
990		{
991			sfputc(fp->fp, 0x80);
992			sfputc(fp->fp, (d >> 8) & 0xff);
993			sfputc(fp->fp, d & 0xff);
994		}
995		fp->encode.prefix = n;
996		sfputr(fp->fp, (char*)s, 0);
997		break;
998	case FF_old:
999		sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF);
1000		fp->encode.prefix = n;
1001		sfputc(fp->fp, ' ');
1002		p = s;
1003		while (s < e)
1004		{
1005			n = *s++;
1006			if (s >= e)
1007				break;
1008			fp->encode.code[n][*s++]++;
1009		}
1010		while (p < e)
1011		{
1012			if ((n = *p++) < FF_MIN || n >= FF_MAX)
1013				n = '?';
1014			sfputc(fp->fp, n);
1015		}
1016		sfputc(fp->fp, 0);
1017		break;
1018	case FF_typ:
1019		if (type)
1020		{
1021			type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type);
1022			if (x = (Type_t*)dtmatch(fp->encode.namedict, type))
1023				u = x->index;
1024			else if (!(x = newof(0, Type_t, 1, strlen(type) + 1)))
1025				u = 0;
1026			else
1027			{
1028				u = x->index = ++fp->types;
1029				strcpy(x->name, type);
1030				dtinsert(fp->encode.namedict, x);
1031				dtinsert(fp->encode.indexdict, x);
1032			}
1033		}
1034		else
1035			u = 0;
1036		sfputu(fp->fp, u);
1037		/*FALLTHROUGH...*/
1038	case FF_dir:
1039		d = n - fp->encode.prefix;
1040		sfputl(fp->fp, d);
1041		fp->encode.prefix = n;
1042		sfputr(fp->fp, (char*)s, 0);
1043		break;
1044	}
1045	memcpy(fp->encode.path, path, len);
1046	return 0;
1047}
1048
1049/*
1050 * findsync() helper
1051 */
1052
1053static int
1054finddone(register Find_t* fp)
1055{
1056	int	r;
1057
1058	if (sfsync(fp->fp))
1059	{
1060		if (fp->disc->errorf)
1061			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file);
1062		return -1;
1063	}
1064	if (sferror(fp->fp))
1065	{
1066		if (fp->disc->errorf)
1067			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file);
1068		return -1;
1069	}
1070	r = sfclose(fp->fp);
1071	fp->fp = 0;
1072	if (r)
1073	{
1074		if (fp->disc->errorf)
1075			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file);
1076		return -1;
1077	}
1078	return 0;
1079}
1080
1081/*
1082 * finish the code table
1083 */
1084
1085static int
1086findsync(register Find_t* fp)
1087{
1088	register char*		s;
1089	register int		n;
1090	register int		m;
1091	register int		d;
1092	register Type_t*	x;
1093	char*			t;
1094	int			b;
1095	long			z;
1096	Sfio_t*			sp;
1097
1098	switch (fp->method)
1099	{
1100	case FF_dir:
1101	case FF_gnu:
1102		/*
1103		 * replace the real file with the temp file
1104		 */
1105
1106		if (finddone(fp))
1107			goto bad;
1108		remove(fp->encode.file);
1109		if (rename(fp->encode.temp, fp->encode.file))
1110		{
1111			if (fp->disc->errorf)
1112				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp);
1113			remove(fp->encode.temp);
1114			return -1;
1115		}
1116		break;
1117	case FF_old:
1118		/*
1119		 * determine the top FF_MAX bigrams
1120		 */
1121
1122		for (n = 0; n < FF_MAX; n++)
1123			for (m = 0; m < FF_MAX; m++)
1124				fp->encode.hits[fp->encode.code[n][m]]++;
1125		fp->encode.hits[0] = 0;
1126		m = 1;
1127		for (n = USHRT_MAX; n >= 0; n--)
1128			if (d = fp->encode.hits[n])
1129			{
1130				fp->encode.hits[n] = m;
1131				if ((m += d) > FF_MAX)
1132					break;
1133			}
1134		while (--n >= 0)
1135			fp->encode.hits[n] = 0;
1136		for (n = FF_MAX - 1; n >= 0; n--)
1137			for (m = FF_MAX - 1; m >= 0; m--)
1138				if (fp->encode.hits[fp->encode.code[n][m]])
1139				{
1140					d = fp->encode.code[n][m];
1141					b = fp->encode.hits[d] - 1;
1142					fp->encode.code[n][m] = b + FF_MAX;
1143					if (fp->encode.hits[d]++ >= FF_MAX)
1144						fp->encode.hits[d] = 0;
1145					fp->encode.bigram[b *= 2] = n;
1146					fp->encode.bigram[b + 1] = m;
1147				}
1148				else
1149					fp->encode.code[n][m] = 0;
1150
1151		/*
1152		 * commit the real file
1153		 */
1154
1155		if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET))
1156		{
1157			if (fp->disc->errorf)
1158				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file");
1159			return -1;
1160		}
1161		if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1162			goto badcreate;
1163
1164		/*
1165		 * dump the bigrams
1166		 */
1167
1168		sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram));
1169
1170		/*
1171		 * encode the massaged paths
1172		 */
1173
1174		while (s = sfgetr(fp->fp, 0, 0))
1175		{
1176			z = strtol(s, &t, 0);
1177			s = t;
1178			if (z < 0 || z > 2 * FF_OFF)
1179			{
1180				sfputc(sp, FF_ESC);
1181				sfputc(sp, (z >> 24));
1182				sfputc(sp, (z >> 16));
1183				sfputc(sp, (z >> 8));
1184				sfputc(sp, z);
1185			}
1186			else
1187				sfputc(sp, z);
1188			while (n = *s++)
1189			{
1190				if (!(m = *s++))
1191				{
1192					sfputc(sp, n);
1193					break;
1194				}
1195				if (d = fp->encode.code[n][m])
1196					sfputc(sp, d);
1197				else
1198				{
1199					sfputc(sp, n);
1200					sfputc(sp, m);
1201				}
1202			}
1203		}
1204		sfclose(fp->fp);
1205		fp->fp = sp;
1206		if (finddone(fp))
1207			goto bad;
1208		break;
1209	case FF_typ:
1210		if (finddone(fp))
1211			goto bad;
1212		if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r")))
1213		{
1214			if (fp->disc->errorf)
1215				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp);
1216			remove(fp->encode.temp);
1217			return -1;
1218		}
1219
1220		/*
1221		 * commit the output file
1222		 */
1223
1224		if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1225			goto badcreate;
1226
1227		/*
1228		 * write the header magic
1229		 */
1230
1231		sfputc(sp, 0);
1232		sfputr(sp, FF_typ_magic, 0);
1233
1234		/*
1235		 * write the type table in index order starting with 1
1236		 */
1237
1238		for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x))
1239			sfputr(sp, x->name, 0);
1240		sfputc(sp, 0);
1241
1242		/*
1243		 * append the front compressed strings
1244		 */
1245
1246		if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp))
1247		{
1248			sfclose(sp);
1249			if (fp->disc->errorf)
1250				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file);
1251			goto bad;
1252		}
1253		sfclose(fp->fp);
1254		fp->fp = sp;
1255		if (finddone(fp))
1256			goto bad;
1257		remove(fp->encode.temp);
1258		break;
1259	}
1260	return 0;
1261 badcreate:
1262	if (fp->disc->errorf)
1263		(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file);
1264 bad:
1265	if (fp->fp)
1266	{
1267		sfclose(fp->fp);
1268		fp->fp = 0;
1269	}
1270	remove(fp->encode.temp);
1271	return -1;
1272}
1273
1274/*
1275 * close an open fastfind stream
1276 */
1277
1278int
1279findclose(register Find_t* fp)
1280{
1281	int	n = 0;
1282
1283	if (!fp)
1284		return -1;
1285	if (fp->generate)
1286	{
1287		n = findsync(fp);
1288		if (fp->encode.indexdict)
1289			dtclose(fp->encode.indexdict);
1290		if (fp->encode.namedict)
1291			dtclose(fp->encode.namedict);
1292	}
1293	else
1294	{
1295		if (fp->decode.match)
1296			regfree(&fp->decode.re);
1297		n = 0;
1298	}
1299	if (fp->fp)
1300		sfclose(fp->fp);
1301	vmclose(fp->vm);
1302	return n;
1303}
1304