1*b8382935SToomas Soome /* inffast.c -- fast decoding 2*b8382935SToomas Soome * Copyright (C) 1995-2017 Mark Adler 3*b8382935SToomas Soome * For conditions of distribution and use, see copyright notice in zlib.h 4*b8382935SToomas Soome */ 5*b8382935SToomas Soome 6*b8382935SToomas Soome #include "zutil.h" 7*b8382935SToomas Soome #include "inftrees.h" 8*b8382935SToomas Soome #include "inflate.h" 9*b8382935SToomas Soome #include "inffast.h" 10*b8382935SToomas Soome 11*b8382935SToomas Soome #ifdef ASMINF 12*b8382935SToomas Soome # pragma message("Assembler code may have bugs -- use at your own risk") 13*b8382935SToomas Soome #else 14*b8382935SToomas Soome 15*b8382935SToomas Soome /* 16*b8382935SToomas Soome Decode literal, length, and distance codes and write out the resulting 17*b8382935SToomas Soome literal and match bytes until either not enough input or output is 18*b8382935SToomas Soome available, an end-of-block is encountered, or a data error is encountered. 19*b8382935SToomas Soome When large enough input and output buffers are supplied to inflate(), for 20*b8382935SToomas Soome example, a 16K input buffer and a 64K output buffer, more than 95% of the 21*b8382935SToomas Soome inflate execution time is spent in this routine. 22*b8382935SToomas Soome 23*b8382935SToomas Soome Entry assumptions: 24*b8382935SToomas Soome 25*b8382935SToomas Soome state->mode == LEN 26*b8382935SToomas Soome strm->avail_in >= 6 27*b8382935SToomas Soome strm->avail_out >= 258 28*b8382935SToomas Soome start >= strm->avail_out 29*b8382935SToomas Soome state->bits < 8 30*b8382935SToomas Soome 31*b8382935SToomas Soome On return, state->mode is one of: 32*b8382935SToomas Soome 33*b8382935SToomas Soome LEN -- ran out of enough output space or enough available input 34*b8382935SToomas Soome TYPE -- reached end of block code, inflate() to interpret next block 35*b8382935SToomas Soome BAD -- error in block data 36*b8382935SToomas Soome 37*b8382935SToomas Soome Notes: 38*b8382935SToomas Soome 39*b8382935SToomas Soome - The maximum input bits used by a length/distance pair is 15 bits for the 40*b8382935SToomas Soome length code, 5 bits for the length extra, 15 bits for the distance code, 41*b8382935SToomas Soome and 13 bits for the distance extra. This totals 48 bits, or six bytes. 42*b8382935SToomas Soome Therefore if strm->avail_in >= 6, then there is enough input to avoid 43*b8382935SToomas Soome checking for available input while decoding. 44*b8382935SToomas Soome 45*b8382935SToomas Soome - The maximum bytes that a single length/distance pair can output is 258 46*b8382935SToomas Soome bytes, which is the maximum length that can be coded. inflate_fast() 47*b8382935SToomas Soome requires strm->avail_out >= 258 for each loop to avoid checking for 48*b8382935SToomas Soome output space. 49*b8382935SToomas Soome */ 50*b8382935SToomas Soome void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) 51*b8382935SToomas Soome { 52*b8382935SToomas Soome struct inflate_state FAR *state; 53*b8382935SToomas Soome z_const unsigned char FAR *in; /* local strm->next_in */ 54*b8382935SToomas Soome z_const unsigned char FAR *last; /* have enough input while in < last */ 55*b8382935SToomas Soome unsigned char FAR *out; /* local strm->next_out */ 56*b8382935SToomas Soome unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ 57*b8382935SToomas Soome unsigned char FAR *end; /* while out < end, enough space available */ 58*b8382935SToomas Soome #ifdef INFLATE_STRICT 59*b8382935SToomas Soome unsigned dmax; /* maximum distance from zlib header */ 60*b8382935SToomas Soome #endif 61*b8382935SToomas Soome unsigned wsize; /* window size or zero if not using window */ 62*b8382935SToomas Soome unsigned whave; /* valid bytes in the window */ 63*b8382935SToomas Soome unsigned wnext; /* window write index */ 64*b8382935SToomas Soome unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ 65*b8382935SToomas Soome unsigned long hold; /* local strm->hold */ 66*b8382935SToomas Soome unsigned bits; /* local strm->bits */ 67*b8382935SToomas Soome code const FAR *lcode; /* local strm->lencode */ 68*b8382935SToomas Soome code const FAR *dcode; /* local strm->distcode */ 69*b8382935SToomas Soome unsigned lmask; /* mask for first level of length codes */ 70*b8382935SToomas Soome unsigned dmask; /* mask for first level of distance codes */ 71*b8382935SToomas Soome code here; /* retrieved table entry */ 72*b8382935SToomas Soome unsigned op; /* code bits, operation, extra bits, or */ 73*b8382935SToomas Soome /* window position, window bytes to copy */ 74*b8382935SToomas Soome unsigned len; /* match length, unused bytes */ 75*b8382935SToomas Soome unsigned dist; /* match distance */ 76*b8382935SToomas Soome unsigned char FAR *from; /* where to copy match from */ 77*b8382935SToomas Soome 78*b8382935SToomas Soome /* copy state to local variables */ 79*b8382935SToomas Soome state = (struct inflate_state FAR *)strm->state; 80*b8382935SToomas Soome in = strm->next_in; 81*b8382935SToomas Soome last = in + (strm->avail_in - 5); 82*b8382935SToomas Soome out = strm->next_out; 83*b8382935SToomas Soome beg = out - (start - strm->avail_out); 84*b8382935SToomas Soome end = out + (strm->avail_out - 257); 85*b8382935SToomas Soome #ifdef INFLATE_STRICT 86*b8382935SToomas Soome dmax = state->dmax; 87*b8382935SToomas Soome #endif 88*b8382935SToomas Soome wsize = state->wsize; 89*b8382935SToomas Soome whave = state->whave; 90*b8382935SToomas Soome wnext = state->wnext; 91*b8382935SToomas Soome window = state->window; 92*b8382935SToomas Soome hold = state->hold; 93*b8382935SToomas Soome bits = state->bits; 94*b8382935SToomas Soome lcode = state->lencode; 95*b8382935SToomas Soome dcode = state->distcode; 96*b8382935SToomas Soome lmask = (1U << state->lenbits) - 1; 97*b8382935SToomas Soome dmask = (1U << state->distbits) - 1; 98*b8382935SToomas Soome 99*b8382935SToomas Soome /* decode literals and length/distances until end-of-block or not enough 100*b8382935SToomas Soome input data or output space */ 101*b8382935SToomas Soome do { 102*b8382935SToomas Soome if (bits < 15) { 103*b8382935SToomas Soome hold += (unsigned long)(*in++) << bits; 104*b8382935SToomas Soome bits += 8; 105*b8382935SToomas Soome hold += (unsigned long)(*in++) << bits; 106*b8382935SToomas Soome bits += 8; 107*b8382935SToomas Soome } 108*b8382935SToomas Soome here = lcode[hold & lmask]; 109*b8382935SToomas Soome dolen: 110*b8382935SToomas Soome op = (unsigned)(here.bits); 111*b8382935SToomas Soome hold >>= op; 112*b8382935SToomas Soome bits -= op; 113*b8382935SToomas Soome op = (unsigned)(here.op); 114*b8382935SToomas Soome if (op == 0) { /* literal */ 115*b8382935SToomas Soome Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? 116*b8382935SToomas Soome "inflate: literal '%c'\n" : 117*b8382935SToomas Soome "inflate: literal 0x%02x\n", here.val)); 118*b8382935SToomas Soome *out++ = (unsigned char)(here.val); 119*b8382935SToomas Soome } 120*b8382935SToomas Soome else if (op & 16) { /* length base */ 121*b8382935SToomas Soome len = (unsigned)(here.val); 122*b8382935SToomas Soome op &= 15; /* number of extra bits */ 123*b8382935SToomas Soome if (op) { 124*b8382935SToomas Soome if (bits < op) { 125*b8382935SToomas Soome hold += (unsigned long)(*in++) << bits; 126*b8382935SToomas Soome bits += 8; 127*b8382935SToomas Soome } 128*b8382935SToomas Soome len += (unsigned)hold & ((1U << op) - 1); 129*b8382935SToomas Soome hold >>= op; 130*b8382935SToomas Soome bits -= op; 131*b8382935SToomas Soome } 132*b8382935SToomas Soome Tracevv((stderr, "inflate: length %u\n", len)); 133*b8382935SToomas Soome if (bits < 15) { 134*b8382935SToomas Soome hold += (unsigned long)(*in++) << bits; 135*b8382935SToomas Soome bits += 8; 136*b8382935SToomas Soome hold += (unsigned long)(*in++) << bits; 137*b8382935SToomas Soome bits += 8; 138*b8382935SToomas Soome } 139*b8382935SToomas Soome here = dcode[hold & dmask]; 140*b8382935SToomas Soome dodist: 141*b8382935SToomas Soome op = (unsigned)(here.bits); 142*b8382935SToomas Soome hold >>= op; 143*b8382935SToomas Soome bits -= op; 144*b8382935SToomas Soome op = (unsigned)(here.op); 145*b8382935SToomas Soome if (op & 16) { /* distance base */ 146*b8382935SToomas Soome dist = (unsigned)(here.val); 147*b8382935SToomas Soome op &= 15; /* number of extra bits */ 148*b8382935SToomas Soome if (bits < op) { 149*b8382935SToomas Soome hold += (unsigned long)(*in++) << bits; 150*b8382935SToomas Soome bits += 8; 151*b8382935SToomas Soome if (bits < op) { 152*b8382935SToomas Soome hold += (unsigned long)(*in++) << bits; 153*b8382935SToomas Soome bits += 8; 154*b8382935SToomas Soome } 155*b8382935SToomas Soome } 156*b8382935SToomas Soome dist += (unsigned)hold & ((1U << op) - 1); 157*b8382935SToomas Soome #ifdef INFLATE_STRICT 158*b8382935SToomas Soome if (dist > dmax) { 159*b8382935SToomas Soome strm->msg = (char *)"invalid distance too far back"; 160*b8382935SToomas Soome state->mode = BAD; 161*b8382935SToomas Soome break; 162*b8382935SToomas Soome } 163*b8382935SToomas Soome #endif 164*b8382935SToomas Soome hold >>= op; 165*b8382935SToomas Soome bits -= op; 166*b8382935SToomas Soome Tracevv((stderr, "inflate: distance %u\n", dist)); 167*b8382935SToomas Soome op = (unsigned)(out - beg); /* max distance in output */ 168*b8382935SToomas Soome if (dist > op) { /* see if copy from window */ 169*b8382935SToomas Soome op = dist - op; /* distance back in window */ 170*b8382935SToomas Soome if (op > whave) { 171*b8382935SToomas Soome if (state->sane) { 172*b8382935SToomas Soome strm->msg = 173*b8382935SToomas Soome (char *)"invalid distance too far back"; 174*b8382935SToomas Soome state->mode = BAD; 175*b8382935SToomas Soome break; 176*b8382935SToomas Soome } 177*b8382935SToomas Soome #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR 178*b8382935SToomas Soome if (len <= op - whave) { 179*b8382935SToomas Soome do { 180*b8382935SToomas Soome *out++ = 0; 181*b8382935SToomas Soome } while (--len); 182*b8382935SToomas Soome continue; 183*b8382935SToomas Soome } 184*b8382935SToomas Soome len -= op - whave; 185*b8382935SToomas Soome do { 186*b8382935SToomas Soome *out++ = 0; 187*b8382935SToomas Soome } while (--op > whave); 188*b8382935SToomas Soome if (op == 0) { 189*b8382935SToomas Soome from = out - dist; 190*b8382935SToomas Soome do { 191*b8382935SToomas Soome *out++ = *from++; 192*b8382935SToomas Soome } while (--len); 193*b8382935SToomas Soome continue; 194*b8382935SToomas Soome } 195*b8382935SToomas Soome #endif 196*b8382935SToomas Soome } 197*b8382935SToomas Soome from = window; 198*b8382935SToomas Soome if (wnext == 0) { /* very common case */ 199*b8382935SToomas Soome from += wsize - op; 200*b8382935SToomas Soome if (op < len) { /* some from window */ 201*b8382935SToomas Soome len -= op; 202*b8382935SToomas Soome do { 203*b8382935SToomas Soome *out++ = *from++; 204*b8382935SToomas Soome } while (--op); 205*b8382935SToomas Soome from = out - dist; /* rest from output */ 206*b8382935SToomas Soome } 207*b8382935SToomas Soome } 208*b8382935SToomas Soome else if (wnext < op) { /* wrap around window */ 209*b8382935SToomas Soome from += wsize + wnext - op; 210*b8382935SToomas Soome op -= wnext; 211*b8382935SToomas Soome if (op < len) { /* some from end of window */ 212*b8382935SToomas Soome len -= op; 213*b8382935SToomas Soome do { 214*b8382935SToomas Soome *out++ = *from++; 215*b8382935SToomas Soome } while (--op); 216*b8382935SToomas Soome from = window; 217*b8382935SToomas Soome if (wnext < len) { /* some from start of window */ 218*b8382935SToomas Soome op = wnext; 219*b8382935SToomas Soome len -= op; 220*b8382935SToomas Soome do { 221*b8382935SToomas Soome *out++ = *from++; 222*b8382935SToomas Soome } while (--op); 223*b8382935SToomas Soome from = out - dist; /* rest from output */ 224*b8382935SToomas Soome } 225*b8382935SToomas Soome } 226*b8382935SToomas Soome } 227*b8382935SToomas Soome else { /* contiguous in window */ 228*b8382935SToomas Soome from += wnext - op; 229*b8382935SToomas Soome if (op < len) { /* some from window */ 230*b8382935SToomas Soome len -= op; 231*b8382935SToomas Soome do { 232*b8382935SToomas Soome *out++ = *from++; 233*b8382935SToomas Soome } while (--op); 234*b8382935SToomas Soome from = out - dist; /* rest from output */ 235*b8382935SToomas Soome } 236*b8382935SToomas Soome } 237*b8382935SToomas Soome while (len > 2) { 238*b8382935SToomas Soome *out++ = *from++; 239*b8382935SToomas Soome *out++ = *from++; 240*b8382935SToomas Soome *out++ = *from++; 241*b8382935SToomas Soome len -= 3; 242*b8382935SToomas Soome } 243*b8382935SToomas Soome if (len) { 244*b8382935SToomas Soome *out++ = *from++; 245*b8382935SToomas Soome if (len > 1) 246*b8382935SToomas Soome *out++ = *from++; 247*b8382935SToomas Soome } 248*b8382935SToomas Soome } 249*b8382935SToomas Soome else { 250*b8382935SToomas Soome from = out - dist; /* copy direct from output */ 251*b8382935SToomas Soome do { /* minimum length is three */ 252*b8382935SToomas Soome *out++ = *from++; 253*b8382935SToomas Soome *out++ = *from++; 254*b8382935SToomas Soome *out++ = *from++; 255*b8382935SToomas Soome len -= 3; 256*b8382935SToomas Soome } while (len > 2); 257*b8382935SToomas Soome if (len) { 258*b8382935SToomas Soome *out++ = *from++; 259*b8382935SToomas Soome if (len > 1) 260*b8382935SToomas Soome *out++ = *from++; 261*b8382935SToomas Soome } 262*b8382935SToomas Soome } 263*b8382935SToomas Soome } 264*b8382935SToomas Soome else if ((op & 64) == 0) { /* 2nd level distance code */ 265*b8382935SToomas Soome here = dcode[here.val + (hold & ((1U << op) - 1))]; 266*b8382935SToomas Soome goto dodist; 267*b8382935SToomas Soome } 268*b8382935SToomas Soome else { 269*b8382935SToomas Soome strm->msg = (char *)"invalid distance code"; 270*b8382935SToomas Soome state->mode = BAD; 271*b8382935SToomas Soome break; 272*b8382935SToomas Soome } 273*b8382935SToomas Soome } 274*b8382935SToomas Soome else if ((op & 64) == 0) { /* 2nd level length code */ 275*b8382935SToomas Soome here = lcode[here.val + (hold & ((1U << op) - 1))]; 276*b8382935SToomas Soome goto dolen; 277*b8382935SToomas Soome } 278*b8382935SToomas Soome else if (op & 32) { /* end-of-block */ 279*b8382935SToomas Soome Tracevv((stderr, "inflate: end of block\n")); 280*b8382935SToomas Soome state->mode = TYPE; 281*b8382935SToomas Soome break; 282*b8382935SToomas Soome } 283*b8382935SToomas Soome else { 284*b8382935SToomas Soome strm->msg = (char *)"invalid literal/length code"; 285*b8382935SToomas Soome state->mode = BAD; 286*b8382935SToomas Soome break; 287*b8382935SToomas Soome } 288*b8382935SToomas Soome } while (in < last && out < end); 289*b8382935SToomas Soome 290*b8382935SToomas Soome /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 291*b8382935SToomas Soome len = bits >> 3; 292*b8382935SToomas Soome in -= len; 293*b8382935SToomas Soome bits -= len << 3; 294*b8382935SToomas Soome hold &= (1U << bits) - 1; 295*b8382935SToomas Soome 296*b8382935SToomas Soome /* update state and return */ 297*b8382935SToomas Soome strm->next_in = in; 298*b8382935SToomas Soome strm->next_out = out; 299*b8382935SToomas Soome strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); 300*b8382935SToomas Soome strm->avail_out = (unsigned)(out < end ? 301*b8382935SToomas Soome 257 + (end - out) : 257 - (out - end)); 302*b8382935SToomas Soome state->hold = hold; 303*b8382935SToomas Soome state->bits = bits; 304*b8382935SToomas Soome return; 305*b8382935SToomas Soome } 306*b8382935SToomas Soome 307*b8382935SToomas Soome /* 308*b8382935SToomas Soome inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): 309*b8382935SToomas Soome - Using bit fields for code structure 310*b8382935SToomas Soome - Different op definition to avoid & for extra bits (do & for table bits) 311*b8382935SToomas Soome - Three separate decoding do-loops for direct, window, and wnext == 0 312*b8382935SToomas Soome - Special case for distance > 1 copies to do overlapped load and store copy 313*b8382935SToomas Soome - Explicit branch predictions (based on measured branch probabilities) 314*b8382935SToomas Soome - Deferring match copy and interspersed it with decoding subsequent codes 315*b8382935SToomas Soome - Swapping literal/length else 316*b8382935SToomas Soome - Swapping window/direct else 317*b8382935SToomas Soome - Larger unrolled copy loops (three is about right) 318*b8382935SToomas Soome - Moving len -= 3 statement into middle of loop 319*b8382935SToomas Soome */ 320*b8382935SToomas Soome 321*b8382935SToomas Soome #endif /* !ASMINF */ 322