xref: /illumos-gate/usr/src/cmd/refer/hunt2.c (revision 2a8bcb4e)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved. The Berkeley software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #include "refer..c"
16 
17 static int *coord = 0;
18 int hh[50];
19 extern int *hfreq, hfrflg;
20 extern int prfreqs;
21 union ptr {
22 	unsigned *a;
23 	long *b;
24 };
25 
26 extern int hash();
27 extern void shell();
28 extern void *zalloc();
29 
30 static long getl(FILE *);
31 static int hcomp(int, int);
32 static void hexch(int, int);
33 
34 int
doquery(long * hpt,int nhash,FILE * fb,int nitem,char * qitem[],unsigned * mptr)35 doquery(long *hpt, int nhash, FILE *fb, int nitem,
36 	    char *qitem[], unsigned *mptr)
37 {
38 	long k;
39 	union ptr prevdrop, master;
40 	int nf = 0, best = 0, nterm = 0, i, g, j;
41 	int *prevcoord;
42 	long lp;
43 	extern int lmaster, colevel, reached;
44 	extern int iflong;
45 
46 	if (iflong) {
47 		master.b = (long *)mptr;
48 	} else {
49 		master.a = mptr;
50 	}
51 
52 #if D1
53 	fprintf(stderr, "entering doquery nitem %d\n", nitem);
54 	fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n",
55 	    hpt[0], hpt[1], hpt[2], hpt[3], hpt[4]);
56 	fprintf(stderr, "and frequencies are  %d %d %d %d %d\n",
57 	    hfreq[0], hfreq[1], hfreq[2], hfreq[3], hfreq[4]);
58 #endif
59 	assert(lmaster > 0);
60 	if (coord == 0)
61 		coord = (int *)zalloc(lmaster, sizeof (lmaster));
62 	if (colevel > 0) {
63 		if (iflong)
64 			prevdrop.b = (long *)zalloc(lmaster, sizeof (long));
65 		else
66 			prevdrop.a = (unsigned *)zalloc(lmaster, sizeof (int));
67 		prevcoord = (int *)zalloc(lmaster, sizeof (lmaster));
68 	} else {
69 		prevdrop.a = master.a;
70 		prevcoord = coord;
71 	}
72 #if D1
73 	fprintf(stderr, "nitem %d\n", nitem);
74 #endif
75 	for (i = 0; i < nitem; i++) {
76 		hh[i] = hash(qitem[i])%nhash;
77 #if D1
78 		fprintf(stderr, "query wd X%sX has hash %d\n", qitem[i], hh[i]);
79 #endif
80 	}
81 #if D1
82 	fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
83 #endif
84 	if (prfreqs)
85 		for (i = 0; i < nitem; i++)
86 			fprintf(stderr, "item %s hash %d hfreq %d\n",
87 			    qitem[i], hh[i], hfreq[hh[i]]);
88 	/* if possible, sort query into decreasing frequency of hashes */
89 	if (hfrflg)
90 		shell(nitem, hcomp, hexch);
91 #if D1
92 	for (i = 0; i < nitem; i++)
93 		fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
94 #endif
95 	lp = hpt [hh[0]];
96 #if D1
97 	fprintf(stderr, "first item hash %d lp %ld 0%lo\n", hh[0], lp, lp);
98 #endif
99 	assert(fb != NULL);
100 	assert(fseek(fb, lp, 0) != -1);
101 	for (i = 0; i < lmaster; i++) {
102 		if (iflong)
103 			master.b[i] = getl(fb);
104 		else
105 			master.a[i] = getw(fb);
106 		coord[i] = 1;
107 #if D2
108 		if (iflong)
109 			fprintf(stderr, "master has %ld\n", (master.b[i]));
110 		else
111 			fprintf(stderr, "master has %d\n", (master.a[i]));
112 #endif
113 		assert(i < lmaster);
114 		if (iflong) {
115 			if (master.b[i] == -1L) break;
116 		} else {
117 			if (master.a[i] == -1) break;
118 		}
119 	}
120 	nf = i;
121 	for (nterm = 1; nterm < nitem; nterm++) {
122 #ifdef D1
123 		fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
124 #endif
125 		if (colevel > 0) {
126 			for (j = 0; j < nf; j++) {
127 				if (iflong)
128 					prevdrop.b[j] = master.b[j];
129 				else
130 					prevdrop.a[j] = master.a[j];
131 				prevcoord[j] = coord[j];
132 			}
133 		}
134 		lp = hpt[hh[nterm]];
135 		assert(fseek(fb, lp, 0) != -1);
136 #if D1
137 		fprintf(stderr, "item %d hash %d seek to %ld\n",
138 		    nterm, hh[nterm], lp);
139 #endif
140 		g = j = 0;
141 		while (1) {
142 			if (iflong)
143 				k = getl(fb);
144 			else
145 				k = getw(fb);
146 			if (k == -1) break;
147 #if D2
148 			fprintf(stderr, "next term finds %ld\n", k);
149 #endif
150 #if D3
151 			if (iflong)
152 				fprintf(stderr,
153 				    "bfwh j %d nf %d master %ld k %ld\n",
154 				    j, nf, prevdrop.b[j], (long)(k));
155 			else
156 				fprintf(stderr,
157 				    "bfwh j %d nf %d master %ld k %ld\n",
158 				    j, nf, prevdrop.a[j], (long)(k));
159 #endif
160 			while (j < nf &&
161 			    (iflong ? prevdrop.b[j] : prevdrop.a[j]) < k) {
162 #if D3
163 				if (iflong)
164 					fprintf(stderr, "j %d nf %d prevdrop "
165 					    "%ld prevcoord %d colevel %d nterm "
166 					    "%d k %ld\n", j, nf, prevdrop.b[j],
167 					    prevcoord[j], colevel, nterm,
168 					    (long)(k));
169 				else
170 					fprintf(stderr, "j %d nf %d prevdrop "
171 					    "%ld prevcoord %d colevel %d nterm "
172 					    "%d k %ld\n", j, nf, prevdrop.a[j],
173 					    prevcoord[j], colevel, nterm,
174 					    (long)(k));
175 #endif
176 				if (prevcoord[j] + colevel <= nterm)
177 					j++;
178 				else {
179 					assert(g < lmaster);
180 					if (iflong)
181 						master.b[g] = prevdrop.b[j];
182 					else
183 						master.a[g] = prevdrop.a[j];
184 					coord[g++] = prevcoord[j++];
185 #if D1
186 					if (iflong)
187 						fprintf(stderr, " not skip g "
188 						    "%d doc %d coord %d note "
189 						    "%d\n", g, master.b[g-1],
190 						    coord[g-1], master.b[j-1]);
191 					else
192 						fprintf(stderr, " not skip g "
193 						    "%d doc %ld coord %d nterm "
194 						    "%d\n", g, master.a[g-1],
195 						    coord[g-1], nterm);
196 #endif
197 					continue;
198 				}
199 			}
200 			if (colevel == 0 && j >= nf) break;
201 			if (j < nf &&
202 			    (iflong ? prevdrop.b[j] : prevdrop.a[j]) == k) {
203 				if (iflong)
204 					master.b[g] = k;
205 				else
206 					master.a[g] = k;
207 				coord[g++] = prevcoord[j++]+1;
208 #if D1
209 				if (iflong)
210 					fprintf(stderr, " at g %d item %ld "
211 					    "coord %d note %ld\n", g,
212 					    master.b[g-1], coord[g-1],
213 					    master.b[j-1]);
214 				else
215 					fprintf(stderr, " at g %d item %d "
216 					    "coord %d note %d\n", g,
217 					    master.a[g-1], coord[g-1],
218 					    master.a[j-1]);
219 #endif
220 			} else
221 				if (colevel >= nterm) {
222 					if (iflong)
223 						master.b[g] = k;
224 					else
225 						master.a[g] = k;
226 					coord[g++] = 1;
227 				}
228 		}
229 #if D1
230 		fprintf(stderr, "now have %d items\n", g);
231 #endif
232 		if (colevel > 0)
233 			for (; j < nf; j++)
234 				if ((iflong ? prevdrop.b[j] : prevdrop.a[j]) +
235 				    colevel > nterm) {
236 					assert(g < lmaster);
237 					if (iflong)
238 						master.b[g] = prevdrop.b[j];
239 					else
240 						master.a[g] = prevdrop.a[j];
241 					coord[g++] = prevcoord[j];
242 #if D3
243 					if (iflong)
244 						fprintf(stderr, "copied over "
245 						    "%ld coord %d\n",
246 						    master.b[g-1], coord[g-1]);
247 					else
248 						fprintf(stderr, "copied over "
249 						    "%d coord %d\n",
250 						    master.a[g-1], coord[g-1]);
251 #endif
252 				}
253 		nf = g;
254 	}
255 	if (colevel > 0) {
256 		best = 0;
257 		for (j = 0; j < nf; j++)
258 			if (coord[j] > best) best = coord[j];
259 #if D1
260 		fprintf(stderr, "colevel %d best %d\n", colevel, best);
261 #endif
262 		reached = best;
263 		for (g = j = 0; j < nf; j++)
264 			if (coord[j] == best) {
265 				if (iflong)
266 					master.b[g++] = master.b[j];
267 				else
268 					master.a[g++] = master.a[j];
269 			}
270 		nf = g;
271 #if D1
272 		fprintf(stderr, "yet got %d\n", nf);
273 #endif
274 	}
275 #ifdef D1
276 	fprintf(stderr, " returning with %d\n", nf);
277 #endif
278 	if (colevel) {
279 		free(prevdrop, lmaster, iflong ? sizeof (long): sizeof (int));
280 		free(prevcoord, lmaster, sizeof (lmaster));
281 	}
282 #if D3
283 	for (g = 0; g < nf; g++)
284 		if (iflong)
285 			fprintf(stderr, ":%ld\n", master.b[g]);
286 		else
287 			fprintf(stderr, ":%d\n", master.a[g]);
288 #endif
289 	return (nf);
290 }
291 
292 static long
getl(FILE * fb)293 getl(FILE *fb)
294 {
295 	return (getw(fb));
296 }
297 
298 static int
hcomp(int n1,int n2)299 hcomp(int n1, int n2)
300 {
301 	return (hfreq[hh[n1]] <= hfreq[hh[n2]]);
302 }
303 
304 static void
hexch(int n1,int n2)305 hexch(int n1, int n2)
306 {
307 	int t;
308 	t = hh[n1];
309 	hh[n1] = hh[n2];
310 	hh[n2] = t;
311 }
312