1/*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34/*
35 * Copyright (c) 2016 by Delphix. All rights reserved.
36 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
37 */
38
39#if defined(_KERNEL) || defined(_FAKE_KERNEL)
40#include <sys/zfs_context.h>
41#elif defined(_STANDALONE)
42#include <sys/cdefs.h>
43#include <stand.h>
44#include <sys/types.h>
45#include <sys/endian.h>
46#include <assert.h>
47
48#define	ASSERT	assert
49#else
50#include <string.h>
51#include <stdlib.h>
52#include <sys/types.h>
53#include <sys/sysmacros.h>
54#include <netinet/in.h>
55#include <assert.h>
56
57#define	ASSERT	assert
58#endif
59#include <lz4.h>
60
61static int real_LZ4_compress(const char *source, char *dest, int isize,
62    int osize);
63static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
64    int isize, int maxOutputSize);
65static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
66    int isize, int osize);
67static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
68    int isize, int osize);
69
70size_t
71lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
72    int n __unused)
73{
74	uint32_t bufsiz;
75	char *dest = d_start;
76
77	ASSERT(d_len >= sizeof (bufsiz));
78
79	bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
80	    d_len - sizeof (bufsiz));
81
82	/* Signal an error if the compression routine returned zero. */
83	if (bufsiz == 0)
84		return (s_len);
85
86	/*
87	 * Encode the compresed buffer size at the start. We'll need this in
88	 * decompression to counter the effects of padding which might be
89	 * added to the compressed buffer and which, if unhandled, would
90	 * confuse the hell out of our decompression function.
91	 */
92#if defined(_KERNEL) || defined(_FAKE_KERNEL)
93	*(uint32_t *)(void *)dest = BE_32(bufsiz);
94#else
95	*(uint32_t *)(void *)dest = htonl(bufsiz);
96#endif
97
98	return (bufsiz + sizeof (bufsiz));
99}
100
101int
102lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
103    int n __unused)
104{
105	const char *src = s_start;
106#if defined(_KERNEL) || defined(_FAKE_KERNEL)
107	uint32_t bufsiz = BE_IN32(s_start);
108#else
109	uint32_t bufsiz = htonl(*(uint32_t *)s_start);
110#endif
111
112	/* invalid compressed buffer size encoded at start */
113	if (bufsiz + sizeof (bufsiz) > s_len)
114		return (1);
115
116	/*
117	 * Returns 0 on success (decompression function returned non-negative)
118	 * and non-zero on failure (decompression function returned negative).
119	 */
120	return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
121	    d_start, bufsiz, d_len) < 0);
122}
123
124/*
125 * LZ4 API Description:
126 *
127 * Simple Functions:
128 * real_LZ4_compress() :
129 *	isize  : is the input size. Max supported value is ~1.9GB
130 *	return : the number of bytes written in buffer dest
131 *		 or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
132 *	note : destination buffer must be already allocated.
133 *		destination buffer must be sized to handle worst cases
134 *		situations (input data not compressible).
135 *
136 * Advanced Functions
137 *
138 * LZ4_compressBound() :
139 *	Provides the maximum size that LZ4 may output in a "worst case"
140 *	scenario (input data not compressible) primarily useful for memory
141 *	allocation of output buffer.
142 *
143 *	isize  : is the input size. Max supported value is ~1.9GB
144 *	return : maximum output size in a "worst case" scenario
145 *	note : this function is limited by "int" range (2^31-1)
146 *
147 * LZ4_uncompress_unknownOutputSize() :
148 *	isize  : is the input size, therefore the compressed size
149 *	maxOutputSize : is the size of the destination buffer (which must be
150 *		already allocated)
151 *	return : the number of bytes decoded in the destination buffer
152 *		(necessarily <= maxOutputSize). If the source stream is
153 *		malformed, the function will stop decoding and return a
154 *		negative result, indicating the byte position of the faulty
155 *		instruction. This function never writes beyond dest +
156 *		maxOutputSize, and is therefore protected against malicious
157 *		data packets.
158 *	note   : Destination buffer must be already allocated.
159 *
160 * LZ4_compressCtx() :
161 *	This function explicitly handles the CTX memory structure.
162 *
163 *	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
164 *	by the caller (either on the stack or using kmem_zalloc). Passing NULL
165 *	isn't valid.
166 *
167 * LZ4_compress64kCtx() :
168 *	Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
169 *	isize *Must* be <64KB, otherwise the output will be corrupted.
170 *
171 *	ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
172 *	by the caller (either on the stack or using kmem_zalloc). Passing NULL
173 *	isn't valid.
174 */
175
176/*
177 * Tuning parameters
178 */
179
180/*
181 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
182 *	 Lowering this value reduces memory usage. Reduced memory usage
183 *	typically improves speed, due to cache effect (ex: L1 32KB for Intel,
184 *	L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
185 *	(examples : 12 -> 16KB ; 17 -> 512KB)
186 */
187#define	COMPRESSIONLEVEL 12
188
189/*
190 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
191 *	algorithm skip faster data segments considered "incompressible".
192 *	This may decrease compression ratio dramatically, but will be
193 *	faster on incompressible data. Increasing this value will make
194 *	the algorithm search more before declaring a segment "incompressible".
195 *	This could improve compression a bit, but will be slower on
196 *	incompressible data. The default value (6) is recommended.
197 */
198#define	NOTCOMPRESSIBLE_CONFIRMATION 6
199
200/*
201 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
202 * performance for big endian cpu, but the resulting compressed stream
203 * will be incompatible with little-endian CPU. You can set this option
204 * to 1 in situations where data will stay within closed environment.
205 * This option is useless on Little_Endian CPU (such as x86).
206 */
207/* #define	BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
208
209/*
210 * CPU Feature Detection
211 */
212
213/* 32 or 64 bits ? */
214#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
215    defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
216    defined(__LP64__) || defined(_LP64))
217#define	LZ4_ARCH64 1
218#else
219#define	LZ4_ARCH64 0
220#endif
221
222/*
223 * Limits the amount of stack space that the algorithm may consume to hold
224 * the compression lookup table. The value `9' here means we'll never use
225 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
226 * If more memory is needed, it is allocated from the heap.
227 */
228#define	STACKLIMIT 9
229
230/*
231 * Little Endian or Big Endian?
232 * Note: overwrite the below #define if you know your architecture endianess.
233 */
234#if defined(BYTE_ORDER)
235#if BYTE_ORDER == BIG_ENDIAN	/* This is sys/endian.h API */
236#define	LZ4_BIG_ENDIAN 1
237#else
238/*
239 * Little Endian assumed. PDP Endian and other very rare endian format
240 * are unsupported.
241 */
242#endif
243#else /* !defined(BYTE_ORDER) */
244#if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
245	defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
246	defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
247	defined(__powerpc) || defined(powerpc) || \
248	((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
249#define	LZ4_BIG_ENDIAN 1
250#else
251/*
252 * Little Endian assumed. PDP Endian and other very rare endian format
253 * are unsupported.
254 */
255#endif
256#endif /* defined(BYTE_ORDER) */
257
258/*
259 * Unaligned memory access is automatically enabled for "common" CPU,
260 * such as x86. For others CPU, the compiler will be more cautious, and
261 * insert extra code to ensure aligned access is respected. If you know
262 * your target CPU supports unaligned memory access, you may want to
263 * force this option manually to improve performance
264 */
265#if defined(__ARM_FEATURE_UNALIGNED)
266#define	LZ4_FORCE_UNALIGNED_ACCESS 1
267#endif
268
269#ifdef __sparc
270#define	LZ4_FORCE_SW_BITCOUNT
271#endif
272
273/*
274 * Compiler Options
275 */
276#if __STDC_VERSION__ >= 199901L	/* C99 */
277/* "restrict" is a known keyword */
278#else
279/* Disable restrict */
280#define	restrict
281#endif
282
283#define	GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
284
285#ifdef _MSC_VER
286/* Visual Studio */
287/* Visual is not C99, but supports some kind of inline */
288#define	inline __forceinline
289#if LZ4_ARCH64
290/* For Visual 2005 */
291#pragma intrinsic(_BitScanForward64)
292#pragma intrinsic(_BitScanReverse64)
293#else /* !LZ4_ARCH64 */
294/* For Visual 2005 */
295#pragma intrinsic(_BitScanForward)
296#pragma intrinsic(_BitScanReverse)
297#endif /* !LZ4_ARCH64 */
298#endif /* _MSC_VER */
299
300#ifdef _MSC_VER
301#define	lz4_bswap16(x) _byteswap_ushort(x)
302#else /* !_MSC_VER */
303#define	lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
304	(((x) & 0xffu) << 8)))
305#endif /* !_MSC_VER */
306
307#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
308#define	expect(expr, value)    (__builtin_expect((expr), (value)))
309#else
310#define	expect(expr, value)    (expr)
311#endif
312
313#ifndef likely
314#define	likely(expr)	expect((expr) != 0, 1)
315#endif
316
317#ifndef unlikely
318#define	unlikely(expr)	expect((expr) != 0, 0)
319#endif
320
321/* Basic types */
322#if defined(_MSC_VER)
323/* Visual Studio does not support 'stdint' natively */
324#define	BYTE	unsigned __int8
325#define	U16	unsigned __int16
326#define	U32	unsigned __int32
327#define	S32	__int32
328#define	U64	unsigned __int64
329#else /* !defined(_MSC_VER) */
330#define	BYTE	uint8_t
331#define	U16	uint16_t
332#define	U32	uint32_t
333#define	S32	int32_t
334#define	U64	uint64_t
335#endif /* !defined(_MSC_VER) */
336
337#ifndef LZ4_FORCE_UNALIGNED_ACCESS
338#pragma pack(1)
339#endif
340
341typedef struct _U16_S {
342	U16 v;
343} U16_S;
344typedef struct _U32_S {
345	U32 v;
346} U32_S;
347typedef struct _U64_S {
348	U64 v;
349} U64_S;
350
351#ifndef LZ4_FORCE_UNALIGNED_ACCESS
352#pragma pack()
353#endif
354
355#define	A64(x) (((U64_S *)(__DECONST(void *, x)))->v)
356#define	A32(x) (((U32_S *)(__DECONST(void *, x)))->v)
357#define	A16(x) (((U16_S *)(__DECONST(void *, x)))->v)
358
359/*
360 * Constants
361 */
362#define	MINMATCH 4
363
364#define	HASH_LOG COMPRESSIONLEVEL
365#define	HASHTABLESIZE (1 << HASH_LOG)
366#define	HASH_MASK (HASHTABLESIZE - 1)
367
368#define	SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
369	NOTCOMPRESSIBLE_CONFIRMATION : 2)
370
371/*
372 * Defines if memory is allocated into the stack (local variable),
373 * or into the heap (kmem_alloc()).
374 */
375#define	HEAPMODE (HASH_LOG > STACKLIMIT)
376#define	COPYLENGTH 8
377#define	LASTLITERALS 5
378#define	MFLIMIT (COPYLENGTH + MINMATCH)
379#define	MINLENGTH (MFLIMIT + 1)
380
381#define	MAXD_LOG 16
382#define	MAX_DISTANCE ((1 << MAXD_LOG) - 1)
383
384#define	ML_BITS 4
385#define	ML_MASK ((1U<<ML_BITS)-1)
386#define	RUN_BITS (8-ML_BITS)
387#define	RUN_MASK ((1U<<RUN_BITS)-1)
388
389
390/*
391 * Architecture-specific macros
392 */
393#if LZ4_ARCH64
394#define	STEPSIZE 8
395#define	UARCH U64
396#define	AARCH A64
397#define	LZ4_COPYSTEP(s, d)	A64(d) = A64(s); d += 8; s += 8;
398#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d)
399#define	LZ4_SECURECOPY(s, d, e)	if (d < e) LZ4_WILDCOPY(s, d, e)
400#define	HTYPE U32
401#define	INITBASE(base)		const BYTE* const base = ip
402#else /* !LZ4_ARCH64 */
403#define	STEPSIZE 4
404#define	UARCH U32
405#define	AARCH A32
406#define	LZ4_COPYSTEP(s, d)	A32(d) = A32(s); d += 4; s += 4;
407#define	LZ4_COPYPACKET(s, d)	LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
408#define	LZ4_SECURECOPY		LZ4_WILDCOPY
409#define	HTYPE const BYTE *
410#define	INITBASE(base)		const int base = 0
411#endif /* !LZ4_ARCH64 */
412
413#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
414#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) \
415	{ U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
416#define	LZ4_WRITE_LITTLEENDIAN_16(p, i) \
417	{ U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
418#else
419#define	LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
420#define	LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
421#endif
422
423
424/* Local structures */
425struct refTables {
426	HTYPE hashTable[HASHTABLESIZE];
427};
428
429
430/* Macros */
431#define	LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
432	HASH_LOG))
433#define	LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
434#define	LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
435#define	LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
436	d = e; }
437
438
439/* Private functions */
440#if LZ4_ARCH64
441
442static inline int
443LZ4_NbCommonBytes(register U64 val)
444{
445#if defined(LZ4_BIG_ENDIAN)
446#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
447	unsigned long r = 0;
448	_BitScanReverse64(&r, val);
449	return (int)(r >> 3);
450#elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
451	!defined(LZ4_FORCE_SW_BITCOUNT)
452	return (__builtin_clzll(val) >> 3);
453#else
454	int r;
455	if (!(val >> 32)) {
456		r = 4;
457	} else {
458		r = 0;
459		val >>= 32;
460	}
461	if (!(val >> 16)) {
462		r += 2;
463		val >>= 8;
464	} else {
465		val >>= 24;
466	}
467	r += (!val);
468	return (r);
469#endif
470#else
471#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
472	unsigned long r = 0;
473	_BitScanForward64(&r, val);
474	return (int)(r >> 3);
475#elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
476	!defined(LZ4_FORCE_SW_BITCOUNT)
477	return (__builtin_ctzll(val) >> 3);
478#else
479	static const int DeBruijnBytePos[64] =
480	    { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
481		3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
482		5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
483		4, 5, 7, 2, 6, 5, 7, 6, 7, 7
484	};
485	return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
486	    58];
487#endif
488#endif
489}
490
491#else
492
493static inline int
494LZ4_NbCommonBytes(register U32 val)
495{
496#if defined(LZ4_BIG_ENDIAN)
497#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
498	unsigned long r = 0;
499	_BitScanReverse(&r, val);
500	return (int)(r >> 3);
501#elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
502	!defined(LZ4_FORCE_SW_BITCOUNT)
503	return (__builtin_clz(val) >> 3);
504#else
505	int r;
506	if (!(val >> 16)) {
507		r = 2;
508		val >>= 8;
509	} else {
510		r = 0;
511		val >>= 24;
512	}
513	r += (!val);
514	return (r);
515#endif
516#else
517#if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
518	unsigned long r = 0;
519	_BitScanForward(&r, val);
520	return (int)(r >> 3);
521#elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
522	!defined(LZ4_FORCE_SW_BITCOUNT)
523	return (__builtin_ctz(val) >> 3);
524#else
525	static const int DeBruijnBytePos[32] = {
526		0, 0, 3, 0, 3, 1, 3, 0,
527		3, 2, 2, 1, 3, 2, 0, 1,
528		3, 3, 1, 2, 2, 2, 2, 0,
529		3, 1, 2, 0, 1, 0, 1, 1
530	};
531	return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
532	    27];
533#endif
534#endif
535}
536
537#endif
538
539/* Compression functions */
540
541/*ARGSUSED*/
542static int
543LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
544    int osize)
545{
546#if HEAPMODE
547	struct refTables *srt = (struct refTables *)ctx;
548	HTYPE *HashTable = (HTYPE *) (srt->hashTable);
549#else
550	HTYPE HashTable[HASHTABLESIZE] = { 0 };
551#endif
552
553	const BYTE *ip = (const BYTE *) source;
554	INITBASE(base);
555	const BYTE *anchor = ip;
556	const BYTE *const iend = ip + isize;
557	const BYTE *const oend = (BYTE *) dest + osize;
558	const BYTE *const mflimit = iend - MFLIMIT;
559#define	matchlimit (iend - LASTLITERALS)
560
561	BYTE *op = (BYTE *) dest;
562
563	int len, length;
564	const int skipStrength = SKIPSTRENGTH;
565	U32 forwardH;
566
567
568	/* Init */
569	if (isize < MINLENGTH)
570		goto _last_literals;
571
572	/* First Byte */
573	HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
574	ip++;
575	forwardH = LZ4_HASH_VALUE(ip);
576
577	/* Main Loop */
578	for (;;) {
579		int findMatchAttempts = (1U << skipStrength) + 3;
580		const BYTE *forwardIp = ip;
581		const BYTE *ref;
582		BYTE *token;
583
584		/* Find a match */
585		do {
586			U32 h = forwardH;
587			int step = findMatchAttempts++ >> skipStrength;
588			ip = forwardIp;
589			forwardIp = ip + step;
590
591			if unlikely(forwardIp > mflimit) {
592				goto _last_literals;
593			}
594
595			forwardH = LZ4_HASH_VALUE(forwardIp);
596			ref = base + HashTable[h];
597			HashTable[h] = ip - base;
598
599		} while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
600
601		/* Catch up */
602		while ((ip > anchor) && (ref > (const BYTE *) source) &&
603		    unlikely(ip[-1] == ref[-1])) {
604			ip--;
605			ref--;
606		}
607
608		/* Encode Literal length */
609		length = ip - anchor;
610		token = op++;
611
612		/* Check output limit */
613		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
614		    (length >> 8) > oend)
615			return (0);
616
617		if (length >= (int)RUN_MASK) {
618			*token = (RUN_MASK << ML_BITS);
619			len = length - RUN_MASK;
620			for (; len > 254; len -= 255)
621				*op++ = 255;
622			*op++ = (BYTE)len;
623		} else
624			*token = (length << ML_BITS);
625
626		/* Copy Literals */
627		LZ4_BLINDCOPY(anchor, op, length);
628
629		_next_match:
630		/* Encode Offset */
631		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
632
633		/* Start Counting */
634		ip += MINMATCH;
635		ref += MINMATCH;	/* MinMatch verified */
636		anchor = ip;
637		while likely(ip < matchlimit - (STEPSIZE - 1)) {
638			UARCH diff = AARCH(ref) ^ AARCH(ip);
639			if (!diff) {
640				ip += STEPSIZE;
641				ref += STEPSIZE;
642				continue;
643			}
644			ip += LZ4_NbCommonBytes(diff);
645			goto _endCount;
646		}
647#if LZ4_ARCH64
648		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
649			ip += 4;
650			ref += 4;
651		}
652#endif
653		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
654			ip += 2;
655			ref += 2;
656		}
657		if ((ip < matchlimit) && (*ref == *ip))
658			ip++;
659		_endCount:
660
661		/* Encode MatchLength */
662		len = (ip - anchor);
663		/* Check output limit */
664		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
665			return (0);
666		if (len >= (int)ML_MASK) {
667			*token += ML_MASK;
668			len -= ML_MASK;
669			for (; len > 509; len -= 510) {
670				*op++ = 255;
671				*op++ = 255;
672			}
673			if (len > 254) {
674				len -= 255;
675				*op++ = 255;
676			}
677			*op++ = (BYTE)len;
678		} else
679			*token += len;
680
681		/* Test end of chunk */
682		if (ip > mflimit) {
683			anchor = ip;
684			break;
685		}
686		/* Fill table */
687		HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
688
689		/* Test next position */
690		ref = base + HashTable[LZ4_HASH_VALUE(ip)];
691		HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
692		if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
693			token = op++;
694			*token = 0;
695			goto _next_match;
696		}
697		/* Prepare next loop */
698		anchor = ip++;
699		forwardH = LZ4_HASH_VALUE(ip);
700	}
701
702	_last_literals:
703	/* Encode Last Literals */
704	{
705		int lastRun = iend - anchor;
706		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
707		    oend)
708			return (0);
709		if (lastRun >= (int)RUN_MASK) {
710			*op++ = (RUN_MASK << ML_BITS);
711			lastRun -= RUN_MASK;
712			for (; lastRun > 254; lastRun -= 255) {
713				*op++ = 255;
714			}
715			*op++ = (BYTE)lastRun;
716		} else
717			*op++ = (lastRun << ML_BITS);
718		(void) memcpy(op, anchor, iend - anchor);
719		op += iend - anchor;
720	}
721
722	/* End */
723	return (int)(((char *)op) - dest);
724}
725
726
727
728/* Note : this function is valid only if isize < LZ4_64KLIMIT */
729#define	LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
730#define	HASHLOG64K (HASH_LOG + 1)
731#define	HASH64KTABLESIZE (1U << HASHLOG64K)
732#define	LZ4_HASH64K_FUNCTION(i)	(((i) * 2654435761U) >> ((MINMATCH*8) - \
733	HASHLOG64K))
734#define	LZ4_HASH64K_VALUE(p)	LZ4_HASH64K_FUNCTION(A32(p))
735
736/*ARGSUSED*/
737static int
738LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
739    int osize)
740{
741#if HEAPMODE
742	struct refTables *srt = (struct refTables *)ctx;
743	U16 *HashTable = (U16 *) (srt->hashTable);
744#else
745	U16 HashTable[HASH64KTABLESIZE] = { 0 };
746#endif
747
748	const BYTE *ip = (const BYTE *) source;
749	const BYTE *anchor = ip;
750	const BYTE *const base = ip;
751	const BYTE *const iend = ip + isize;
752	const BYTE *const oend = (BYTE *) dest + osize;
753	const BYTE *const mflimit = iend - MFLIMIT;
754#define	matchlimit (iend - LASTLITERALS)
755
756	BYTE *op = (BYTE *) dest;
757
758	int len, length;
759	const int skipStrength = SKIPSTRENGTH;
760	U32 forwardH;
761
762	/* Init */
763	if (isize < MINLENGTH)
764		goto _last_literals;
765
766	/* First Byte */
767	ip++;
768	forwardH = LZ4_HASH64K_VALUE(ip);
769
770	/* Main Loop */
771	for (;;) {
772		int findMatchAttempts = (1U << skipStrength) + 3;
773		const BYTE *forwardIp = ip;
774		const BYTE *ref;
775		BYTE *token;
776
777		/* Find a match */
778		do {
779			U32 h = forwardH;
780			int step = findMatchAttempts++ >> skipStrength;
781			ip = forwardIp;
782			forwardIp = ip + step;
783
784			if (forwardIp > mflimit) {
785				goto _last_literals;
786			}
787
788			forwardH = LZ4_HASH64K_VALUE(forwardIp);
789			ref = base + HashTable[h];
790			HashTable[h] = ip - base;
791
792		} while (A32(ref) != A32(ip));
793
794		/* Catch up */
795		while ((ip > anchor) && (ref > (const BYTE *) source) &&
796		    (ip[-1] == ref[-1])) {
797			ip--;
798			ref--;
799		}
800
801		/* Encode Literal length */
802		length = ip - anchor;
803		token = op++;
804
805		/* Check output limit */
806		if unlikely(op + length + (2 + 1 + LASTLITERALS) +
807		    (length >> 8) > oend)
808			return (0);
809
810		if (length >= (int)RUN_MASK) {
811			*token = (RUN_MASK << ML_BITS);
812			len = length - RUN_MASK;
813			for (; len > 254; len -= 255)
814				*op++ = 255;
815			*op++ = (BYTE)len;
816		} else
817			*token = (length << ML_BITS);
818
819		/* Copy Literals */
820		LZ4_BLINDCOPY(anchor, op, length);
821
822		_next_match:
823		/* Encode Offset */
824		LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
825
826		/* Start Counting */
827		ip += MINMATCH;
828		ref += MINMATCH;	/* MinMatch verified */
829		anchor = ip;
830		while (ip < matchlimit - (STEPSIZE - 1)) {
831			UARCH diff = AARCH(ref) ^ AARCH(ip);
832			if (!diff) {
833				ip += STEPSIZE;
834				ref += STEPSIZE;
835				continue;
836			}
837			ip += LZ4_NbCommonBytes(diff);
838			goto _endCount;
839		}
840#if LZ4_ARCH64
841		if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
842			ip += 4;
843			ref += 4;
844		}
845#endif
846		if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
847			ip += 2;
848			ref += 2;
849		}
850		if ((ip < matchlimit) && (*ref == *ip))
851			ip++;
852		_endCount:
853
854		/* Encode MatchLength */
855		len = (ip - anchor);
856		/* Check output limit */
857		if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
858			return (0);
859		if (len >= (int)ML_MASK) {
860			*token += ML_MASK;
861			len -= ML_MASK;
862			for (; len > 509; len -= 510) {
863				*op++ = 255;
864				*op++ = 255;
865			}
866			if (len > 254) {
867				len -= 255;
868				*op++ = 255;
869			}
870			*op++ = (BYTE)len;
871		} else
872			*token += len;
873
874		/* Test end of chunk */
875		if (ip > mflimit) {
876			anchor = ip;
877			break;
878		}
879		/* Fill table */
880		HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
881
882		/* Test next position */
883		ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
884		HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
885		if (A32(ref) == A32(ip)) {
886			token = op++;
887			*token = 0;
888			goto _next_match;
889		}
890		/* Prepare next loop */
891		anchor = ip++;
892		forwardH = LZ4_HASH64K_VALUE(ip);
893	}
894
895	_last_literals:
896	/* Encode Last Literals */
897	{
898		int lastRun = iend - anchor;
899		if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
900		    oend)
901			return (0);
902		if (lastRun >= (int)RUN_MASK) {
903			*op++ = (RUN_MASK << ML_BITS);
904			lastRun -= RUN_MASK;
905			for (; lastRun > 254; lastRun -= 255)
906				*op++ = 255;
907			*op++ = (BYTE)lastRun;
908		} else
909			*op++ = (lastRun << ML_BITS);
910		(void) memcpy(op, anchor, iend - anchor);
911		op += iend - anchor;
912	}
913
914	/* End */
915	return (int)(((char *)op) - dest);
916}
917
918static int
919real_LZ4_compress(const char *source, char *dest, int isize, int osize)
920{
921#if HEAPMODE
922#if defined(_KERNEL) || defined(_FAKE_KERNEL)
923	void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
924#else
925	void *ctx = calloc(1, sizeof (struct refTables));
926#endif
927	int result;
928
929	/*
930	 * out of kernel memory, gently fall through - this will disable
931	 * compression in zio_compress_data
932	 */
933	if (ctx == NULL)
934		return (0);
935
936	if (isize < LZ4_64KLIMIT)
937		result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
938	else
939		result = LZ4_compressCtx(ctx, source, dest, isize, osize);
940
941#if defined(_KERNEL) || defined(_FAKE_KERNEL)
942	kmem_free(ctx, sizeof (struct refTables));
943#else
944	free(ctx);
945#endif
946	return (result);
947#else
948	if (isize < (int)LZ4_64KLIMIT)
949		return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
950	return (LZ4_compressCtx(NULL, source, dest, isize, osize));
951#endif
952}
953
954/* Decompression functions */
955
956/*
957 * Note: The decoding function LZ4_uncompress_unknownOutputSize() is safe
958 *	against "buffer overflow" attack type. It will never write nor
959 *	read outside of the provided output buffers.
960 *	LZ4_uncompress_unknownOutputSize() also insures that
961 *	it will never read outside of the input buffer. A corrupted input
962 *	will produce an error result, a negative int, indicating the position
963 *	of the error within input stream.
964 */
965
966static int
967LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
968    int maxOutputSize)
969{
970	/* Local Variables */
971	const BYTE *restrict ip = (const BYTE *) source;
972	const BYTE *const iend = ip + isize;
973	const BYTE *ref;
974
975	BYTE *op = (BYTE *) dest;
976	BYTE *const oend = op + maxOutputSize;
977	BYTE *cpy;
978
979	size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
980#if LZ4_ARCH64
981	size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
982#endif
983
984	/* Main Loop */
985	while (ip < iend) {
986		unsigned token;
987		size_t length;
988
989		/* get runlength */
990		token = *ip++;
991		if ((length = (token >> ML_BITS)) == RUN_MASK) {
992			int s = 255;
993			while ((ip < iend) && (s == 255)) {
994				s = *ip++;
995				length += s;
996			}
997		}
998		/* copy literals */
999		cpy = op + length;
1000		/* CORNER-CASE: cpy might overflow. */
1001		if (cpy < op)
1002			goto _output_error;	/* cpy was overflowed, bail! */
1003		if ((cpy > oend - COPYLENGTH) ||
1004		    (ip + length > iend - COPYLENGTH)) {
1005			if (cpy > oend)
1006				/* Error: writes beyond output buffer */
1007				goto _output_error;
1008			if (ip + length != iend)
1009				/*
1010				 * Error: LZ4 format requires to consume all
1011				 * input at this stage
1012				 */
1013				goto _output_error;
1014			(void) memcpy(op, ip, length);
1015			op += length;
1016			/* Necessarily EOF, due to parsing restrictions */
1017			break;
1018		}
1019		LZ4_WILDCOPY(ip, op, cpy);
1020		ip -= (op - cpy);
1021		op = cpy;
1022
1023		/* get offset */
1024		LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1025		ip += 2;
1026		if (ref < (BYTE * const) dest)
1027			/*
1028			 * Error: offset creates reference outside of
1029			 * destination buffer
1030			 */
1031			goto _output_error;
1032
1033		/* get matchlength */
1034		if ((length = (token & ML_MASK)) == ML_MASK) {
1035			while (ip < iend) {
1036				int s = *ip++;
1037				length += s;
1038				if (s == 255)
1039					continue;
1040				break;
1041			}
1042		}
1043		/* copy repeated sequence */
1044		if unlikely(op - ref < STEPSIZE) {
1045#if LZ4_ARCH64
1046			size_t dec64 = dec64table[op-ref];
1047#else
1048			const int dec64 = 0;
1049#endif
1050			op[0] = ref[0];
1051			op[1] = ref[1];
1052			op[2] = ref[2];
1053			op[3] = ref[3];
1054			op += 4;
1055			ref += 4;
1056			ref -= dec32table[op-ref];
1057			A32(op) = A32(ref);
1058			op += STEPSIZE - 4;
1059			ref -= dec64;
1060		} else {
1061			LZ4_COPYSTEP(ref, op);
1062		}
1063		cpy = op + length - (STEPSIZE - 4);
1064		if (cpy > oend - COPYLENGTH) {
1065			if (cpy > oend)
1066				/*
1067				 * Error: request to write outside of
1068				 * destination buffer
1069				 */
1070				goto _output_error;
1071			LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1072			while (op < cpy)
1073				*op++ = *ref++;
1074			op = cpy;
1075			if (op == oend)
1076				/*
1077				 * Check EOF (should never happen, since
1078				 * last 5 bytes are supposed to be literals)
1079				 */
1080				goto _output_error;
1081			continue;
1082		}
1083		LZ4_SECURECOPY(ref, op, cpy);
1084		op = cpy;	/* correction */
1085	}
1086
1087	/* end of decoding */
1088	return (int)(((char *)op) - dest);
1089
1090	/* write overflow error detected */
1091	_output_error:
1092	return (int)(-(((const char *)ip) - source));
1093}
1094