xref: /illumos-gate/usr/src/contrib/zlib/inffast.c (revision b8382935)
1*b8382935SToomas Soome /* inffast.c -- fast decoding
2*b8382935SToomas Soome  * Copyright (C) 1995-2017 Mark Adler
3*b8382935SToomas Soome  * For conditions of distribution and use, see copyright notice in zlib.h
4*b8382935SToomas Soome  */
5*b8382935SToomas Soome 
6*b8382935SToomas Soome #include "zutil.h"
7*b8382935SToomas Soome #include "inftrees.h"
8*b8382935SToomas Soome #include "inflate.h"
9*b8382935SToomas Soome #include "inffast.h"
10*b8382935SToomas Soome 
11*b8382935SToomas Soome #ifdef ASMINF
12*b8382935SToomas Soome #  pragma message("Assembler code may have bugs -- use at your own risk")
13*b8382935SToomas Soome #else
14*b8382935SToomas Soome 
15*b8382935SToomas Soome /*
16*b8382935SToomas Soome    Decode literal, length, and distance codes and write out the resulting
17*b8382935SToomas Soome    literal and match bytes until either not enough input or output is
18*b8382935SToomas Soome    available, an end-of-block is encountered, or a data error is encountered.
19*b8382935SToomas Soome    When large enough input and output buffers are supplied to inflate(), for
20*b8382935SToomas Soome    example, a 16K input buffer and a 64K output buffer, more than 95% of the
21*b8382935SToomas Soome    inflate execution time is spent in this routine.
22*b8382935SToomas Soome 
23*b8382935SToomas Soome    Entry assumptions:
24*b8382935SToomas Soome 
25*b8382935SToomas Soome         state->mode == LEN
26*b8382935SToomas Soome         strm->avail_in >= 6
27*b8382935SToomas Soome         strm->avail_out >= 258
28*b8382935SToomas Soome         start >= strm->avail_out
29*b8382935SToomas Soome         state->bits < 8
30*b8382935SToomas Soome 
31*b8382935SToomas Soome    On return, state->mode is one of:
32*b8382935SToomas Soome 
33*b8382935SToomas Soome         LEN -- ran out of enough output space or enough available input
34*b8382935SToomas Soome         TYPE -- reached end of block code, inflate() to interpret next block
35*b8382935SToomas Soome         BAD -- error in block data
36*b8382935SToomas Soome 
37*b8382935SToomas Soome    Notes:
38*b8382935SToomas Soome 
39*b8382935SToomas Soome     - The maximum input bits used by a length/distance pair is 15 bits for the
40*b8382935SToomas Soome       length code, 5 bits for the length extra, 15 bits for the distance code,
41*b8382935SToomas Soome       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
42*b8382935SToomas Soome       Therefore if strm->avail_in >= 6, then there is enough input to avoid
43*b8382935SToomas Soome       checking for available input while decoding.
44*b8382935SToomas Soome 
45*b8382935SToomas Soome     - The maximum bytes that a single length/distance pair can output is 258
46*b8382935SToomas Soome       bytes, which is the maximum length that can be coded.  inflate_fast()
47*b8382935SToomas Soome       requires strm->avail_out >= 258 for each loop to avoid checking for
48*b8382935SToomas Soome       output space.
49*b8382935SToomas Soome  */
50*b8382935SToomas Soome void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start)
51*b8382935SToomas Soome {
52*b8382935SToomas Soome     struct inflate_state FAR *state;
53*b8382935SToomas Soome     z_const unsigned char FAR *in;      /* local strm->next_in */
54*b8382935SToomas Soome     z_const unsigned char FAR *last;    /* have enough input while in < last */
55*b8382935SToomas Soome     unsigned char FAR *out;     /* local strm->next_out */
56*b8382935SToomas Soome     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
57*b8382935SToomas Soome     unsigned char FAR *end;     /* while out < end, enough space available */
58*b8382935SToomas Soome #ifdef INFLATE_STRICT
59*b8382935SToomas Soome     unsigned dmax;              /* maximum distance from zlib header */
60*b8382935SToomas Soome #endif
61*b8382935SToomas Soome     unsigned wsize;             /* window size or zero if not using window */
62*b8382935SToomas Soome     unsigned whave;             /* valid bytes in the window */
63*b8382935SToomas Soome     unsigned wnext;             /* window write index */
64*b8382935SToomas Soome     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
65*b8382935SToomas Soome     unsigned long hold;         /* local strm->hold */
66*b8382935SToomas Soome     unsigned bits;              /* local strm->bits */
67*b8382935SToomas Soome     code const FAR *lcode;      /* local strm->lencode */
68*b8382935SToomas Soome     code const FAR *dcode;      /* local strm->distcode */
69*b8382935SToomas Soome     unsigned lmask;             /* mask for first level of length codes */
70*b8382935SToomas Soome     unsigned dmask;             /* mask for first level of distance codes */
71*b8382935SToomas Soome     code here;                  /* retrieved table entry */
72*b8382935SToomas Soome     unsigned op;                /* code bits, operation, extra bits, or */
73*b8382935SToomas Soome                                 /*  window position, window bytes to copy */
74*b8382935SToomas Soome     unsigned len;               /* match length, unused bytes */
75*b8382935SToomas Soome     unsigned dist;              /* match distance */
76*b8382935SToomas Soome     unsigned char FAR *from;    /* where to copy match from */
77*b8382935SToomas Soome 
78*b8382935SToomas Soome     /* copy state to local variables */
79*b8382935SToomas Soome     state = (struct inflate_state FAR *)strm->state;
80*b8382935SToomas Soome     in = strm->next_in;
81*b8382935SToomas Soome     last = in + (strm->avail_in - 5);
82*b8382935SToomas Soome     out = strm->next_out;
83*b8382935SToomas Soome     beg = out - (start - strm->avail_out);
84*b8382935SToomas Soome     end = out + (strm->avail_out - 257);
85*b8382935SToomas Soome #ifdef INFLATE_STRICT
86*b8382935SToomas Soome     dmax = state->dmax;
87*b8382935SToomas Soome #endif
88*b8382935SToomas Soome     wsize = state->wsize;
89*b8382935SToomas Soome     whave = state->whave;
90*b8382935SToomas Soome     wnext = state->wnext;
91*b8382935SToomas Soome     window = state->window;
92*b8382935SToomas Soome     hold = state->hold;
93*b8382935SToomas Soome     bits = state->bits;
94*b8382935SToomas Soome     lcode = state->lencode;
95*b8382935SToomas Soome     dcode = state->distcode;
96*b8382935SToomas Soome     lmask = (1U << state->lenbits) - 1;
97*b8382935SToomas Soome     dmask = (1U << state->distbits) - 1;
98*b8382935SToomas Soome 
99*b8382935SToomas Soome     /* decode literals and length/distances until end-of-block or not enough
100*b8382935SToomas Soome        input data or output space */
101*b8382935SToomas Soome     do {
102*b8382935SToomas Soome         if (bits < 15) {
103*b8382935SToomas Soome             hold += (unsigned long)(*in++) << bits;
104*b8382935SToomas Soome             bits += 8;
105*b8382935SToomas Soome             hold += (unsigned long)(*in++) << bits;
106*b8382935SToomas Soome             bits += 8;
107*b8382935SToomas Soome         }
108*b8382935SToomas Soome         here = lcode[hold & lmask];
109*b8382935SToomas Soome       dolen:
110*b8382935SToomas Soome         op = (unsigned)(here.bits);
111*b8382935SToomas Soome         hold >>= op;
112*b8382935SToomas Soome         bits -= op;
113*b8382935SToomas Soome         op = (unsigned)(here.op);
114*b8382935SToomas Soome         if (op == 0) {                          /* literal */
115*b8382935SToomas Soome             Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
116*b8382935SToomas Soome                     "inflate:         literal '%c'\n" :
117*b8382935SToomas Soome                     "inflate:         literal 0x%02x\n", here.val));
118*b8382935SToomas Soome             *out++ = (unsigned char)(here.val);
119*b8382935SToomas Soome         }
120*b8382935SToomas Soome         else if (op & 16) {                     /* length base */
121*b8382935SToomas Soome             len = (unsigned)(here.val);
122*b8382935SToomas Soome             op &= 15;                           /* number of extra bits */
123*b8382935SToomas Soome             if (op) {
124*b8382935SToomas Soome                 if (bits < op) {
125*b8382935SToomas Soome                     hold += (unsigned long)(*in++) << bits;
126*b8382935SToomas Soome                     bits += 8;
127*b8382935SToomas Soome                 }
128*b8382935SToomas Soome                 len += (unsigned)hold & ((1U << op) - 1);
129*b8382935SToomas Soome                 hold >>= op;
130*b8382935SToomas Soome                 bits -= op;
131*b8382935SToomas Soome             }
132*b8382935SToomas Soome             Tracevv((stderr, "inflate:         length %u\n", len));
133*b8382935SToomas Soome             if (bits < 15) {
134*b8382935SToomas Soome                 hold += (unsigned long)(*in++) << bits;
135*b8382935SToomas Soome                 bits += 8;
136*b8382935SToomas Soome                 hold += (unsigned long)(*in++) << bits;
137*b8382935SToomas Soome                 bits += 8;
138*b8382935SToomas Soome             }
139*b8382935SToomas Soome             here = dcode[hold & dmask];
140*b8382935SToomas Soome           dodist:
141*b8382935SToomas Soome             op = (unsigned)(here.bits);
142*b8382935SToomas Soome             hold >>= op;
143*b8382935SToomas Soome             bits -= op;
144*b8382935SToomas Soome             op = (unsigned)(here.op);
145*b8382935SToomas Soome             if (op & 16) {                      /* distance base */
146*b8382935SToomas Soome                 dist = (unsigned)(here.val);
147*b8382935SToomas Soome                 op &= 15;                       /* number of extra bits */
148*b8382935SToomas Soome                 if (bits < op) {
149*b8382935SToomas Soome                     hold += (unsigned long)(*in++) << bits;
150*b8382935SToomas Soome                     bits += 8;
151*b8382935SToomas Soome                     if (bits < op) {
152*b8382935SToomas Soome                         hold += (unsigned long)(*in++) << bits;
153*b8382935SToomas Soome                         bits += 8;
154*b8382935SToomas Soome                     }
155*b8382935SToomas Soome                 }
156*b8382935SToomas Soome                 dist += (unsigned)hold & ((1U << op) - 1);
157*b8382935SToomas Soome #ifdef INFLATE_STRICT
158*b8382935SToomas Soome                 if (dist > dmax) {
159*b8382935SToomas Soome                     strm->msg = (char *)"invalid distance too far back";
160*b8382935SToomas Soome                     state->mode = BAD;
161*b8382935SToomas Soome                     break;
162*b8382935SToomas Soome                 }
163*b8382935SToomas Soome #endif
164*b8382935SToomas Soome                 hold >>= op;
165*b8382935SToomas Soome                 bits -= op;
166*b8382935SToomas Soome                 Tracevv((stderr, "inflate:         distance %u\n", dist));
167*b8382935SToomas Soome                 op = (unsigned)(out - beg);     /* max distance in output */
168*b8382935SToomas Soome                 if (dist > op) {                /* see if copy from window */
169*b8382935SToomas Soome                     op = dist - op;             /* distance back in window */
170*b8382935SToomas Soome                     if (op > whave) {
171*b8382935SToomas Soome                         if (state->sane) {
172*b8382935SToomas Soome                             strm->msg =
173*b8382935SToomas Soome                                 (char *)"invalid distance too far back";
174*b8382935SToomas Soome                             state->mode = BAD;
175*b8382935SToomas Soome                             break;
176*b8382935SToomas Soome                         }
177*b8382935SToomas Soome #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
178*b8382935SToomas Soome                         if (len <= op - whave) {
179*b8382935SToomas Soome                             do {
180*b8382935SToomas Soome                                 *out++ = 0;
181*b8382935SToomas Soome                             } while (--len);
182*b8382935SToomas Soome                             continue;
183*b8382935SToomas Soome                         }
184*b8382935SToomas Soome                         len -= op - whave;
185*b8382935SToomas Soome                         do {
186*b8382935SToomas Soome                             *out++ = 0;
187*b8382935SToomas Soome                         } while (--op > whave);
188*b8382935SToomas Soome                         if (op == 0) {
189*b8382935SToomas Soome                             from = out - dist;
190*b8382935SToomas Soome                             do {
191*b8382935SToomas Soome                                 *out++ = *from++;
192*b8382935SToomas Soome                             } while (--len);
193*b8382935SToomas Soome                             continue;
194*b8382935SToomas Soome                         }
195*b8382935SToomas Soome #endif
196*b8382935SToomas Soome                     }
197*b8382935SToomas Soome                     from = window;
198*b8382935SToomas Soome                     if (wnext == 0) {           /* very common case */
199*b8382935SToomas Soome                         from += wsize - op;
200*b8382935SToomas Soome                         if (op < len) {         /* some from window */
201*b8382935SToomas Soome                             len -= op;
202*b8382935SToomas Soome                             do {
203*b8382935SToomas Soome                                 *out++ = *from++;
204*b8382935SToomas Soome                             } while (--op);
205*b8382935SToomas Soome                             from = out - dist;  /* rest from output */
206*b8382935SToomas Soome                         }
207*b8382935SToomas Soome                     }
208*b8382935SToomas Soome                     else if (wnext < op) {      /* wrap around window */
209*b8382935SToomas Soome                         from += wsize + wnext - op;
210*b8382935SToomas Soome                         op -= wnext;
211*b8382935SToomas Soome                         if (op < len) {         /* some from end of window */
212*b8382935SToomas Soome                             len -= op;
213*b8382935SToomas Soome                             do {
214*b8382935SToomas Soome                                 *out++ = *from++;
215*b8382935SToomas Soome                             } while (--op);
216*b8382935SToomas Soome                             from = window;
217*b8382935SToomas Soome                             if (wnext < len) {  /* some from start of window */
218*b8382935SToomas Soome                                 op = wnext;
219*b8382935SToomas Soome                                 len -= op;
220*b8382935SToomas Soome                                 do {
221*b8382935SToomas Soome                                     *out++ = *from++;
222*b8382935SToomas Soome                                 } while (--op);
223*b8382935SToomas Soome                                 from = out - dist;      /* rest from output */
224*b8382935SToomas Soome                             }
225*b8382935SToomas Soome                         }
226*b8382935SToomas Soome                     }
227*b8382935SToomas Soome                     else {                      /* contiguous in window */
228*b8382935SToomas Soome                         from += wnext - op;
229*b8382935SToomas Soome                         if (op < len) {         /* some from window */
230*b8382935SToomas Soome                             len -= op;
231*b8382935SToomas Soome                             do {
232*b8382935SToomas Soome                                 *out++ = *from++;
233*b8382935SToomas Soome                             } while (--op);
234*b8382935SToomas Soome                             from = out - dist;  /* rest from output */
235*b8382935SToomas Soome                         }
236*b8382935SToomas Soome                     }
237*b8382935SToomas Soome                     while (len > 2) {
238*b8382935SToomas Soome                         *out++ = *from++;
239*b8382935SToomas Soome                         *out++ = *from++;
240*b8382935SToomas Soome                         *out++ = *from++;
241*b8382935SToomas Soome                         len -= 3;
242*b8382935SToomas Soome                     }
243*b8382935SToomas Soome                     if (len) {
244*b8382935SToomas Soome                         *out++ = *from++;
245*b8382935SToomas Soome                         if (len > 1)
246*b8382935SToomas Soome                             *out++ = *from++;
247*b8382935SToomas Soome                     }
248*b8382935SToomas Soome                 }
249*b8382935SToomas Soome                 else {
250*b8382935SToomas Soome                     from = out - dist;          /* copy direct from output */
251*b8382935SToomas Soome                     do {                        /* minimum length is three */
252*b8382935SToomas Soome                         *out++ = *from++;
253*b8382935SToomas Soome                         *out++ = *from++;
254*b8382935SToomas Soome                         *out++ = *from++;
255*b8382935SToomas Soome                         len -= 3;
256*b8382935SToomas Soome                     } while (len > 2);
257*b8382935SToomas Soome                     if (len) {
258*b8382935SToomas Soome                         *out++ = *from++;
259*b8382935SToomas Soome                         if (len > 1)
260*b8382935SToomas Soome                             *out++ = *from++;
261*b8382935SToomas Soome                     }
262*b8382935SToomas Soome                 }
263*b8382935SToomas Soome             }
264*b8382935SToomas Soome             else if ((op & 64) == 0) {          /* 2nd level distance code */
265*b8382935SToomas Soome                 here = dcode[here.val + (hold & ((1U << op) - 1))];
266*b8382935SToomas Soome                 goto dodist;
267*b8382935SToomas Soome             }
268*b8382935SToomas Soome             else {
269*b8382935SToomas Soome                 strm->msg = (char *)"invalid distance code";
270*b8382935SToomas Soome                 state->mode = BAD;
271*b8382935SToomas Soome                 break;
272*b8382935SToomas Soome             }
273*b8382935SToomas Soome         }
274*b8382935SToomas Soome         else if ((op & 64) == 0) {              /* 2nd level length code */
275*b8382935SToomas Soome             here = lcode[here.val + (hold & ((1U << op) - 1))];
276*b8382935SToomas Soome             goto dolen;
277*b8382935SToomas Soome         }
278*b8382935SToomas Soome         else if (op & 32) {                     /* end-of-block */
279*b8382935SToomas Soome             Tracevv((stderr, "inflate:         end of block\n"));
280*b8382935SToomas Soome             state->mode = TYPE;
281*b8382935SToomas Soome             break;
282*b8382935SToomas Soome         }
283*b8382935SToomas Soome         else {
284*b8382935SToomas Soome             strm->msg = (char *)"invalid literal/length code";
285*b8382935SToomas Soome             state->mode = BAD;
286*b8382935SToomas Soome             break;
287*b8382935SToomas Soome         }
288*b8382935SToomas Soome     } while (in < last && out < end);
289*b8382935SToomas Soome 
290*b8382935SToomas Soome     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
291*b8382935SToomas Soome     len = bits >> 3;
292*b8382935SToomas Soome     in -= len;
293*b8382935SToomas Soome     bits -= len << 3;
294*b8382935SToomas Soome     hold &= (1U << bits) - 1;
295*b8382935SToomas Soome 
296*b8382935SToomas Soome     /* update state and return */
297*b8382935SToomas Soome     strm->next_in = in;
298*b8382935SToomas Soome     strm->next_out = out;
299*b8382935SToomas Soome     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
300*b8382935SToomas Soome     strm->avail_out = (unsigned)(out < end ?
301*b8382935SToomas Soome                                  257 + (end - out) : 257 - (out - end));
302*b8382935SToomas Soome     state->hold = hold;
303*b8382935SToomas Soome     state->bits = bits;
304*b8382935SToomas Soome     return;
305*b8382935SToomas Soome }
306*b8382935SToomas Soome 
307*b8382935SToomas Soome /*
308*b8382935SToomas Soome    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
309*b8382935SToomas Soome    - Using bit fields for code structure
310*b8382935SToomas Soome    - Different op definition to avoid & for extra bits (do & for table bits)
311*b8382935SToomas Soome    - Three separate decoding do-loops for direct, window, and wnext == 0
312*b8382935SToomas Soome    - Special case for distance > 1 copies to do overlapped load and store copy
313*b8382935SToomas Soome    - Explicit branch predictions (based on measured branch probabilities)
314*b8382935SToomas Soome    - Deferring match copy and interspersed it with decoding subsequent codes
315*b8382935SToomas Soome    - Swapping literal/length else
316*b8382935SToomas Soome    - Swapping window/direct else
317*b8382935SToomas Soome    - Larger unrolled copy loops (three is about right)
318*b8382935SToomas Soome    - Moving len -= 3 statement into middle of loop
319*b8382935SToomas Soome  */
320*b8382935SToomas Soome 
321*b8382935SToomas Soome #endif /* !ASMINF */
322