1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28/*	  All Rights Reserved  	*/
29
30#pragma ident	"%Z%%M%	%I%	%E% SMI"
31
32
33#include <unistd.h>
34#include <stdlib.h>
35#include <stdio.h>
36
37#define	BYTE 8
38#define	QW 1		/* width of bas-q digit in bits */
39
40/*
41 * this stuff should be local and hidden; it was made
42 * accessible outside for dirty reasons: 20% faster spell
43 */
44#include "huff.h"
45struct huff huffcode;
46
47/*
48 * Infinite Huffman code
49 *
50 * Let the messages be exponentially distributed with ratio r:
51 * 	P {message k} = r^k*(1-r),	k = 0, 1, ...
52 * Let the messages be coded in base q, and suppose
53 * 	r^n = 1/q
54 * If each decade(base q) contains n codes, then
55 * the messages assigned to each decade will be q times
56 * as probable as the next. Moreover the code for the tail of
57 * the distribution after truncating one decade should look
58 * just like the original, but longer by one leading digit q-1.
59 * 	q(z+n) = z + (q-1)q^w
60 * where z is first code of decade, w is width of code, in shortest
61 * full decade. Examples, base 2:
62 * 	r^1 = 1/2	r^5 = 1/2
63 * 	0		0110
64 * 	10		0111
65 * 	110		1000
66 * 	1110		1001
67 * 	...		1010
68 * 			10110
69 * 	w = 1, z = 0		w = 4, z = 0110
70 * Rewriting slightly
71 * 	(q-1)z + q*n = (q-1)q^w
72 * whence z is a multiple of q and n is a multiple of q-1. Let
73 * 	z = cq, n = d(q-1)
74 * We pick w to be the least integer such that
75 * 	d = n/(q-1) <= q^(w-1)
76 * Then solve for c
77 * 	c = q^(w-1) - d
78 * If c is not zero, the first decade may be preceded by
79 * even shorter (w-1)-digit codes 0, 1, ..., c-1. Thus
80 * the example code with r^5 = 1/2 becomes
81 * 	000
82 * 	001
83 * 	010
84 * 	0110
85 * 	0111
86 * 	1000
87 * 	1001
88 * 	1010
89 * 	10110
90 * 	...
91 * 	110110
92 * 	...
93 * The expected number of base-q digits in a codeword is then
94 *	w - 1 + r^c/(1-r^n)
95 * The present routines require q to be a power of 2
96 */
97/*
98 * There is a lot of hanky-panky with left justification against
99 * sign instead of simple left justification because
100 * unsigned long is not available
101 */
102#define	L (BYTE*(sizeof (long))-1)	/* length of signless long */
103#define	MASK (~((unsigned long)1<<L))	/* mask out sign */
104
105/*
106 * decode the prefix of word y (which is left justified against sign)
107 * place mesage number into place pointed to by kp
108 * return length (in bits) of decoded prefix or 0 if code is out of
109 * range
110 */
111int
112decode(long y, long *pk)
113{
114	int l;
115	long v;
116	if (y < cs) {
117		*pk = y >> (long)(L+QW-w);
118		return (w-QW);
119	}
120	for (l = w, v = v0; y >= qcs;
121	    y = ((unsigned long)y << QW) & MASK, v += n)
122		if ((l += QW) > L)
123			return (0);
124	*pk = v + (y>>(long)(L-w));
125	return (l);
126}
127
128/*
129 * encode message k and put result (right justified) into
130 * place pointed to by py.
131 * return length (in bits) of result,
132 * or 0 if code is too long
133 */
134
135int
136encode(long k, long *py)
137{
138	int l;
139	long y;
140	if (k < c) {
141		*py = k;
142		return (w-QW);
143	}
144	for (k -= c, y = 1, l = w; k >= n; k -= n, y <<= QW)
145		if ((l += QW) > L)
146			return (0);
147	*py = ((y-1)<<w) + cq + k;
148	return (l);
149}
150
151
152/*
153 * Initialization code, given expected value of k
154 * E(k) = r/(1-r) = a
155 * and given base width b
156 * return expected length of coded messages
157 */
158static struct qlog {
159	long p;
160	double u;
161} z;
162
163static struct qlog
164qlog(double x, double y, long p, double u)	/* find smallest p so x^p<=y */
165{
166
167	if (u/x <= y) {
168		z.p = 0;
169		z.u = 1;
170	} else {
171		z = qlog(x, y, p+p, u*u);
172		if (u*z.u/x > y) {
173			z.p += p;
174			z.u *= u;
175		}
176	}
177	return (z);
178}
179
180double
181huff(float a)
182{
183	int i, q;
184	long d, j;
185	double r = a/(1.0 + a);
186	double rc, rq;
187
188	for (i = 0, q = 1, rq = r; i < QW; i++, q *= 2, rq *= rq)
189		continue;
190	rq /= r;	/* rq = r^(q-1) */
191	(void) qlog(rq, 1./q, 1L, rq);
192	d = z.p;
193	n = d*(q-1);
194	if (n != d * (q - 1))
195		abort();	/* time to make n long */
196	for (w = QW, j = 1; j < d; w += QW, j *= q)
197		continue;
198	c = j - d;
199	cq = c*q;
200	cs = cq<<(L-w);
201	qcs = (((long)(q-1)<<w) + cq) << (L-QW-w);
202	v0 = c - cq;
203	for (i = 0, rc = 1; i < c; i++, rc *= r)	/* rc = r^c */
204		continue;
205	return (w + QW*(rc/(1-z.u) - 1));
206}
207
208void
209whuff(void)
210{
211	(void) fwrite((char *) & huffcode, sizeof (huffcode), 1, stdout);
212}
213
214int
215rhuff(FILE *f)
216{
217	return (read(fileno(f), (char *)&huffcode, sizeof (huffcode)) ==
218	    sizeof (huffcode));
219}
220