1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29
30 #include <unistd.h>
31 #include <stdlib.h>
32 #include <stdio.h>
33
34 #define BYTE 8
35 #define QW 1 /* width of bas-q digit in bits */
36
37 /*
38 * this stuff should be local and hidden; it was made
39 * accessible outside for dirty reasons: 20% faster spell
40 */
41 #include "huff.h"
42 struct huff huffcode;
43
44 /*
45 * Infinite Huffman code
46 *
47 * Let the messages be exponentially distributed with ratio r:
48 * P {message k} = r^k*(1-r), k = 0, 1, ...
49 * Let the messages be coded in base q, and suppose
50 * r^n = 1/q
51 * If each decade(base q) contains n codes, then
52 * the messages assigned to each decade will be q times
53 * as probable as the next. Moreover the code for the tail of
54 * the distribution after truncating one decade should look
55 * just like the original, but longer by one leading digit q-1.
56 * q(z+n) = z + (q-1)q^w
57 * where z is first code of decade, w is width of code, in shortest
58 * full decade. Examples, base 2:
59 * r^1 = 1/2 r^5 = 1/2
60 * 0 0110
61 * 10 0111
62 * 110 1000
63 * 1110 1001
64 * ... 1010
65 * 10110
66 * w = 1, z = 0 w = 4, z = 0110
67 * Rewriting slightly
68 * (q-1)z + q*n = (q-1)q^w
69 * whence z is a multiple of q and n is a multiple of q-1. Let
70 * z = cq, n = d(q-1)
71 * We pick w to be the least integer such that
72 * d = n/(q-1) <= q^(w-1)
73 * Then solve for c
74 * c = q^(w-1) - d
75 * If c is not zero, the first decade may be preceded by
76 * even shorter (w-1)-digit codes 0, 1, ..., c-1. Thus
77 * the example code with r^5 = 1/2 becomes
78 * 000
79 * 001
80 * 010
81 * 0110
82 * 0111
83 * 1000
84 * 1001
85 * 1010
86 * 10110
87 * ...
88 * 110110
89 * ...
90 * The expected number of base-q digits in a codeword is then
91 * w - 1 + r^c/(1-r^n)
92 * The present routines require q to be a power of 2
93 */
94 /*
95 * There is a lot of hanky-panky with left justification against
96 * sign instead of simple left justification because
97 * unsigned long is not available
98 */
99 #define L (BYTE*(sizeof (long))-1) /* length of signless long */
100 #define MASK (~((unsigned long)1<<L)) /* mask out sign */
101
102 /*
103 * decode the prefix of word y (which is left justified against sign)
104 * place mesage number into place pointed to by kp
105 * return length (in bits) of decoded prefix or 0 if code is out of
106 * range
107 */
108 int
decode(long y,long * pk)109 decode(long y, long *pk)
110 {
111 int l;
112 long v;
113 if (y < cs) {
114 *pk = y >> (long)(L+QW-w);
115 return (w-QW);
116 }
117 for (l = w, v = v0; y >= qcs;
118 y = ((unsigned long)y << QW) & MASK, v += n)
119 if ((l += QW) > L)
120 return (0);
121 *pk = v + (y>>(long)(L-w));
122 return (l);
123 }
124
125 /*
126 * encode message k and put result (right justified) into
127 * place pointed to by py.
128 * return length (in bits) of result,
129 * or 0 if code is too long
130 */
131
132 int
encode(long k,long * py)133 encode(long k, long *py)
134 {
135 int l;
136 long y;
137 if (k < c) {
138 *py = k;
139 return (w-QW);
140 }
141 for (k -= c, y = 1, l = w; k >= n; k -= n, y <<= QW)
142 if ((l += QW) > L)
143 return (0);
144 *py = ((y-1)<<w) + cq + k;
145 return (l);
146 }
147
148
149 /*
150 * Initialization code, given expected value of k
151 * E(k) = r/(1-r) = a
152 * and given base width b
153 * return expected length of coded messages
154 */
155 static struct qlog {
156 long p;
157 double u;
158 } z;
159
160 static struct qlog
qlog(double x,double y,long p,double u)161 qlog(double x, double y, long p, double u) /* find smallest p so x^p<=y */
162 {
163
164 if (u/x <= y) {
165 z.p = 0;
166 z.u = 1;
167 } else {
168 z = qlog(x, y, p+p, u*u);
169 if (u*z.u/x > y) {
170 z.p += p;
171 z.u *= u;
172 }
173 }
174 return (z);
175 }
176
177 double
huff(float a)178 huff(float a)
179 {
180 int i, q;
181 long d, j;
182 double r = a/(1.0 + a);
183 double rc, rq;
184
185 for (i = 0, q = 1, rq = r; i < QW; i++, q *= 2, rq *= rq)
186 continue;
187 rq /= r; /* rq = r^(q-1) */
188 (void) qlog(rq, 1./q, 1L, rq);
189 d = z.p;
190 n = d*(q-1);
191 if (n != d * (q - 1))
192 abort(); /* time to make n long */
193 for (w = QW, j = 1; j < d; w += QW, j *= q)
194 continue;
195 c = j - d;
196 cq = c*q;
197 cs = cq<<(L-w);
198 qcs = (((long)(q-1)<<w) + cq) << (L-QW-w);
199 v0 = c - cq;
200 for (i = 0, rc = 1; i < c; i++, rc *= r) /* rc = r^c */
201 continue;
202 return (w + QW*(rc/(1-z.u) - 1));
203 }
204
205 void
whuff(void)206 whuff(void)
207 {
208 (void) fwrite((char *) & huffcode, sizeof (huffcode), 1, stdout);
209 }
210
211 int
rhuff(FILE * f)212 rhuff(FILE *f)
213 {
214 return (read(fileno(f), (char *)&huffcode, sizeof (huffcode)) ==
215 sizeof (huffcode));
216 }
217