xref: /illumos-gate/usr/src/common/bzip2/huffman.c (revision 55fea89d)
1ca3e8d88SDave Plauger 
2ca3e8d88SDave Plauger /*-------------------------------------------------------------*/
3ca3e8d88SDave Plauger /*--- Huffman coding low-level stuff                        ---*/
4ca3e8d88SDave Plauger /*---                                             huffman.c ---*/
5ca3e8d88SDave Plauger /*-------------------------------------------------------------*/
6ca3e8d88SDave Plauger 
7ca3e8d88SDave Plauger /* ------------------------------------------------------------------
8ca3e8d88SDave Plauger    This file is part of bzip2/libbzip2, a program and library for
9ca3e8d88SDave Plauger    lossless, block-sorting data compression.
10ca3e8d88SDave Plauger 
11b9071c34SGordon Ross    bzip2/libbzip2 version 1.0.6 of 6 September 2010
12b9071c34SGordon Ross    Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
13ca3e8d88SDave Plauger 
14*55fea89dSDan Cross    Please read the WARNING, DISCLAIMER and PATENTS sections in the
15ca3e8d88SDave Plauger    README file.
16ca3e8d88SDave Plauger 
17ca3e8d88SDave Plauger    This program is released under the terms of the license contained
18ca3e8d88SDave Plauger    in the file LICENSE.
19ca3e8d88SDave Plauger    ------------------------------------------------------------------ */
20ca3e8d88SDave Plauger 
21ca3e8d88SDave Plauger 
22ca3e8d88SDave Plauger #include "bzlib_private.h"
23ca3e8d88SDave Plauger 
24ca3e8d88SDave Plauger /*---------------------------------------------------*/
25ca3e8d88SDave Plauger #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
26ca3e8d88SDave Plauger #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
27ca3e8d88SDave Plauger #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28ca3e8d88SDave Plauger 
29ca3e8d88SDave Plauger #define ADDWEIGHTS(zw1,zw2)                           \
30ca3e8d88SDave Plauger    (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
31ca3e8d88SDave Plauger    (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32ca3e8d88SDave Plauger 
33ca3e8d88SDave Plauger #define UPHEAP(z)                                     \
34ca3e8d88SDave Plauger {                                                     \
35ca3e8d88SDave Plauger    Int32 zz, tmp;                                     \
36ca3e8d88SDave Plauger    zz = z; tmp = heap[zz];                            \
37ca3e8d88SDave Plauger    while (weight[tmp] < weight[heap[zz >> 1]]) {      \
38ca3e8d88SDave Plauger       heap[zz] = heap[zz >> 1];                       \
39ca3e8d88SDave Plauger       zz >>= 1;                                       \
40ca3e8d88SDave Plauger    }                                                  \
41ca3e8d88SDave Plauger    heap[zz] = tmp;                                    \
42ca3e8d88SDave Plauger }
43ca3e8d88SDave Plauger 
44ca3e8d88SDave Plauger #define DOWNHEAP(z)                                   \
45ca3e8d88SDave Plauger {                                                     \
46ca3e8d88SDave Plauger    Int32 zz, yy, tmp;                                 \
47ca3e8d88SDave Plauger    zz = z; tmp = heap[zz];                            \
48ca3e8d88SDave Plauger    while (True) {                                     \
49ca3e8d88SDave Plauger       yy = zz << 1;                                   \
50ca3e8d88SDave Plauger       if (yy > nHeap) break;                          \
51ca3e8d88SDave Plauger       if (yy < nHeap &&                               \
52ca3e8d88SDave Plauger           weight[heap[yy+1]] < weight[heap[yy]])      \
53ca3e8d88SDave Plauger          yy++;                                        \
54ca3e8d88SDave Plauger       if (weight[tmp] < weight[heap[yy]]) break;      \
55ca3e8d88SDave Plauger       heap[zz] = heap[yy];                            \
56ca3e8d88SDave Plauger       zz = yy;                                        \
57ca3e8d88SDave Plauger    }                                                  \
58ca3e8d88SDave Plauger    heap[zz] = tmp;                                    \
59ca3e8d88SDave Plauger }
60ca3e8d88SDave Plauger 
61ca3e8d88SDave Plauger 
62ca3e8d88SDave Plauger /*---------------------------------------------------*/
BZ2_hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)63*55fea89dSDan Cross void BZ2_hbMakeCodeLengths ( UChar *len,
64ca3e8d88SDave Plauger                              Int32 *freq,
65ca3e8d88SDave Plauger                              Int32 alphaSize,
66ca3e8d88SDave Plauger                              Int32 maxLen )
67ca3e8d88SDave Plauger {
68ca3e8d88SDave Plauger    /*--
69ca3e8d88SDave Plauger       Nodes and heap entries run from 1.  Entry 0
70ca3e8d88SDave Plauger       for both the heap and nodes is a sentinel.
71ca3e8d88SDave Plauger    --*/
72ca3e8d88SDave Plauger    Int32 nNodes, nHeap, n1, n2, i, j, k;
73ca3e8d88SDave Plauger    Bool  tooLong;
74ca3e8d88SDave Plauger 
75ca3e8d88SDave Plauger    Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
76ca3e8d88SDave Plauger    Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
77*55fea89dSDan Cross    Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78ca3e8d88SDave Plauger 
79ca3e8d88SDave Plauger    for (i = 0; i < alphaSize; i++)
80ca3e8d88SDave Plauger       weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81ca3e8d88SDave Plauger 
82ca3e8d88SDave Plauger    while (True) {
83ca3e8d88SDave Plauger 
84ca3e8d88SDave Plauger       nNodes = alphaSize;
85ca3e8d88SDave Plauger       nHeap = 0;
86ca3e8d88SDave Plauger 
87ca3e8d88SDave Plauger       heap[0] = 0;
88ca3e8d88SDave Plauger       weight[0] = 0;
89ca3e8d88SDave Plauger       parent[0] = -2;
90ca3e8d88SDave Plauger 
91ca3e8d88SDave Plauger       for (i = 1; i <= alphaSize; i++) {
92ca3e8d88SDave Plauger          parent[i] = -1;
93ca3e8d88SDave Plauger          nHeap++;
94ca3e8d88SDave Plauger          heap[nHeap] = i;
95ca3e8d88SDave Plauger          UPHEAP(nHeap);
96ca3e8d88SDave Plauger       }
97ca3e8d88SDave Plauger 
98ca3e8d88SDave Plauger       AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99*55fea89dSDan Cross 
100ca3e8d88SDave Plauger       while (nHeap > 1) {
101ca3e8d88SDave Plauger          n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102ca3e8d88SDave Plauger          n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103ca3e8d88SDave Plauger          nNodes++;
104ca3e8d88SDave Plauger          parent[n1] = parent[n2] = nNodes;
105ca3e8d88SDave Plauger          weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106ca3e8d88SDave Plauger          parent[nNodes] = -1;
107ca3e8d88SDave Plauger          nHeap++;
108ca3e8d88SDave Plauger          heap[nHeap] = nNodes;
109ca3e8d88SDave Plauger          UPHEAP(nHeap);
110ca3e8d88SDave Plauger       }
111ca3e8d88SDave Plauger 
112ca3e8d88SDave Plauger       AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113ca3e8d88SDave Plauger 
114ca3e8d88SDave Plauger       tooLong = False;
115ca3e8d88SDave Plauger       for (i = 1; i <= alphaSize; i++) {
116ca3e8d88SDave Plauger          j = 0;
117ca3e8d88SDave Plauger          k = i;
118ca3e8d88SDave Plauger          while (parent[k] >= 0) { k = parent[k]; j++; }
119ca3e8d88SDave Plauger          len[i-1] = j;
120ca3e8d88SDave Plauger          if (j > maxLen) tooLong = True;
121ca3e8d88SDave Plauger       }
122*55fea89dSDan Cross 
123ca3e8d88SDave Plauger       if (! tooLong) break;
124ca3e8d88SDave Plauger 
125ca3e8d88SDave Plauger       /* 17 Oct 04: keep-going condition for the following loop used
126ca3e8d88SDave Plauger          to be 'i < alphaSize', which missed the last element,
127ca3e8d88SDave Plauger          theoretically leading to the possibility of the compressor
128ca3e8d88SDave Plauger          looping.  However, this count-scaling step is only needed if
129ca3e8d88SDave Plauger          one of the generated Huffman code words is longer than
130ca3e8d88SDave Plauger          maxLen, which up to and including version 1.0.2 was 20 bits,
131ca3e8d88SDave Plauger          which is extremely unlikely.  In version 1.0.3 maxLen was
132ca3e8d88SDave Plauger          changed to 17 bits, which has minimal effect on compression
133ca3e8d88SDave Plauger          ratio, but does mean this scaling step is used from time to
134ca3e8d88SDave Plauger          time, enough to verify that it works.
135ca3e8d88SDave Plauger 
136ca3e8d88SDave Plauger          This means that bzip2-1.0.3 and later will only produce
137ca3e8d88SDave Plauger          Huffman codes with a maximum length of 17 bits.  However, in
138ca3e8d88SDave Plauger          order to preserve backwards compatibility with bitstreams
139ca3e8d88SDave Plauger          produced by versions pre-1.0.3, the decompressor must still
140ca3e8d88SDave Plauger          handle lengths of up to 20. */
141ca3e8d88SDave Plauger 
142ca3e8d88SDave Plauger       for (i = 1; i <= alphaSize; i++) {
143ca3e8d88SDave Plauger          j = weight[i] >> 8;
144ca3e8d88SDave Plauger          j = 1 + (j / 2);
145ca3e8d88SDave Plauger          weight[i] = j << 8;
146ca3e8d88SDave Plauger       }
147ca3e8d88SDave Plauger    }
148ca3e8d88SDave Plauger }
149ca3e8d88SDave Plauger 
150ca3e8d88SDave Plauger 
151ca3e8d88SDave Plauger /*---------------------------------------------------*/
BZ2_hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)152ca3e8d88SDave Plauger void BZ2_hbAssignCodes ( Int32 *code,
153ca3e8d88SDave Plauger                          UChar *length,
154ca3e8d88SDave Plauger                          Int32 minLen,
155ca3e8d88SDave Plauger                          Int32 maxLen,
156ca3e8d88SDave Plauger                          Int32 alphaSize )
157ca3e8d88SDave Plauger {
158ca3e8d88SDave Plauger    Int32 n, vec, i;
159ca3e8d88SDave Plauger 
160ca3e8d88SDave Plauger    vec = 0;
161ca3e8d88SDave Plauger    for (n = minLen; n <= maxLen; n++) {
162ca3e8d88SDave Plauger       for (i = 0; i < alphaSize; i++)
163ca3e8d88SDave Plauger          if (length[i] == n) { code[i] = vec; vec++; };
164ca3e8d88SDave Plauger       vec <<= 1;
165ca3e8d88SDave Plauger    }
166ca3e8d88SDave Plauger }
167ca3e8d88SDave Plauger 
168ca3e8d88SDave Plauger 
169ca3e8d88SDave Plauger /*---------------------------------------------------*/
BZ2_hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)170ca3e8d88SDave Plauger void BZ2_hbCreateDecodeTables ( Int32 *limit,
171ca3e8d88SDave Plauger                                 Int32 *base,
172ca3e8d88SDave Plauger                                 Int32 *perm,
173ca3e8d88SDave Plauger                                 UChar *length,
174ca3e8d88SDave Plauger                                 Int32 minLen,
175ca3e8d88SDave Plauger                                 Int32 maxLen,
176ca3e8d88SDave Plauger                                 Int32 alphaSize )
177ca3e8d88SDave Plauger {
178ca3e8d88SDave Plauger    Int32 pp, i, j, vec;
179ca3e8d88SDave Plauger 
180ca3e8d88SDave Plauger    pp = 0;
181ca3e8d88SDave Plauger    for (i = minLen; i <= maxLen; i++)
182ca3e8d88SDave Plauger       for (j = 0; j < alphaSize; j++)
183ca3e8d88SDave Plauger          if (length[j] == i) { perm[pp] = j; pp++; };
184ca3e8d88SDave Plauger 
185ca3e8d88SDave Plauger    for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186ca3e8d88SDave Plauger    for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187ca3e8d88SDave Plauger 
188ca3e8d88SDave Plauger    for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189ca3e8d88SDave Plauger 
190ca3e8d88SDave Plauger    for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191ca3e8d88SDave Plauger    vec = 0;
192ca3e8d88SDave Plauger 
193ca3e8d88SDave Plauger    for (i = minLen; i <= maxLen; i++) {
194ca3e8d88SDave Plauger       vec += (base[i+1] - base[i]);
195ca3e8d88SDave Plauger       limit[i] = vec-1;
196ca3e8d88SDave Plauger       vec <<= 1;
197ca3e8d88SDave Plauger    }
198ca3e8d88SDave Plauger    for (i = minLen + 1; i <= maxLen; i++)
199ca3e8d88SDave Plauger       base[i] = ((limit[i-1] + 1) << 1) - base[i];
200ca3e8d88SDave Plauger }
201ca3e8d88SDave Plauger 
202ca3e8d88SDave Plauger 
203ca3e8d88SDave Plauger /*-------------------------------------------------------------*/
204ca3e8d88SDave Plauger /*--- end                                         huffman.c ---*/
205ca3e8d88SDave Plauger /*-------------------------------------------------------------*/
206