xref: /illumos-gate/usr/src/cmd/spell/spellin.c (revision 0d8b5334)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <unistd.h>
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <locale.h>
37 #include "hash.h"
38 #include "huff.h"
39 
40 int	encode(long, long *);
41 
42 #define	S (BYTE * sizeof (long))
43 #define	B (BYTE * sizeof (unsigned))
44 unsigned *table;
45 int hindex[NI];
46 unsigned wp;		/* word pointer */
47 int bp = B;		/* bit pointer */
48 static int ignore;
49 static int extra;
50 
51 static int
52 append(register unsigned w1, register int i)
53 {
54 	while (wp < ND - 1) {
55 		table[wp] |= w1>>(B-bp);
56 		i -= bp;
57 		if (i < 0) {
58 			bp = -i;
59 			return (1);
60 		}
61 		w1 <<= bp;
62 		bp = B;
63 		wp++;
64 	}
65 	return (0);
66 }
67 
68 
69 /*
70  *	usage: hashin N
71  *	where N is number of words in dictionary
72  *	and standard input contains sorted, unique
73  *	hashed words in octal
74  */
75 
76 int
77 main(int argc, char **argv)
78 {
79 	long h, k, d;
80 	int  i;
81 	long count;
82 	long w1;
83 	long x;
84 	int t, u;
85 	double z;
86 
87 	/* Set locale environment variables local definitions */
88 	(void) setlocale(LC_ALL, "");
89 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
90 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it wasn't */
91 #endif
92 	(void) textdomain(TEXT_DOMAIN);
93 
94 	k = 0;
95 	u = 0;
96 	if (argc != 2) {
97 		(void) fprintf(stderr, gettext("%s: arg count\n"), argv[0]);
98 		exit(1);
99 	}
100 	table = (unsigned *)malloc(ND * sizeof (*table));
101 	if (table == 0) {
102 		(void) fprintf(stderr, gettext("%s: no space for table\n"),
103 		    argv[0]);
104 		exit(1);
105 	}
106 	if ((atof(argv[1])) == 0.0) {
107 		(void) fprintf(stderr, gettext("%s: illegal count"), argv[0]);
108 		exit(1);
109 	}
110 
111 	z = huff((1L<<HASHWIDTH)/atof(argv[1]));
112 	(void) fprintf(stderr, gettext("%s: expected code widths = %f\n"),
113 	    argv[0], z);
114 	for (count = 0; scanf("%lo", (unsigned long *)&h) == 1; ++count) {
115 		if ((t = h >> (HASHWIDTH - INDEXWIDTH)) != u) {
116 			if (bp != B)
117 				wp++;
118 			bp = B;
119 			while (u < t)
120 				hindex[++u] = wp;
121 			k =  (long)t<<(HASHWIDTH-INDEXWIDTH);
122 		}
123 		d = h-k;
124 		k = h;
125 		for (;;) {
126 			for (x = d; ; x /= 2) {
127 				i = encode(x, &w1);
128 				if (i > 0)
129 					break;
130 			}
131 			if (i > B) {
132 				if (!(append((unsigned)(w1>>(long) (i-B)), B) &&
133 				    append((unsigned)(w1<<(long) (B+B-i)),
134 				    i-B)))
135 					ignore++;
136 			} else
137 				if (!append((unsigned)(w1<<(long)(B-i)), i))
138 					ignore++;
139 			d -= x;
140 			if (d > 0)
141 				extra++;
142 			else
143 				break;
144 		}
145 	}
146 	if (bp != B)
147 		wp++;
148 	while (++u < NI)
149 		hindex[u] = wp;
150 	whuff();
151 	(void) fwrite((char *)hindex, sizeof (*hindex), NI, stdout);
152 	(void) fwrite((char *)table, sizeof (*table), wp, stdout);
153 	(void) fprintf(stderr,
154 	    gettext("%s: %ld items, %d ignored, %d extra, %u words occupied\n"),
155 	    argv[0], count, ignore, extra, wp);
156 	count -= ignore;
157 	(void) fprintf(stderr, "%s: %f table bits/item, %f table+index bits\n",
158 	    argv[0], (((float)BYTE * wp) * sizeof (*table) / count),
159 	    (BYTE * ((float)wp * sizeof (*table) + sizeof (hindex)) / count));
160 	return (0);
161 }
162