xref: /illumos-gate/usr/src/cmd/pack/pack.c (revision da6c28aa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 /*
32  *	Huffman encoding program
33  *	Usage:	pack [[ -f ] [ - ] [-/] filename ... ] filename ...
34  *		- option: enable/disable listing of statistics
35  */
36 
37 #include <locale.h>
38 #include <stdarg.h>
39 #include <sys/isa_defs.h>
40 #include <sys/param.h>
41 #include <utime.h>
42 #include <sys/acl.h>
43 #include <aclutils.h>
44 #include <libcmdutils.h>
45 
46 #undef lint
47 
48 #define	END	256
49 #define	PACKED 017436 /* <US><RS> - Unlikely value */
50 #define	SUF0	'.'
51 #define	SUF1	'z'
52 
53 struct stat status, ostatus;
54 static struct utimbuf u_times;
55 
56 /* union for overlaying a long int with a set of four characters */
57 union FOUR {
58 	struct { long int lng; } lint;
59 	struct { char c0, c1, c2, c3; } chars;
60 };
61 
62 /* character counters */
63 long	count [END+1];
64 union	FOUR insize;
65 long	outsize;
66 long	dictsize;
67 int	diffbytes;
68 
69 /* i/o stuff */
70 char	vflag = 0;
71 int	force = 0;	/* allow forced packing for consistency in directory */
72 
73 static	char filename [MAXPATHLEN];
74 static int max_name;
75 
76 int	infile;		/* unpacked file */
77 int	outfile;	/* packed file */
78 char	inbuff [BUFSIZ];
79 char	outbuff [BUFSIZ+4];
80 
81 /* variables associated with the tree */
82 int	maxlev;
83 int	levcount [25];
84 int	lastnode;
85 int	parent [2*END+1];
86 
87 /* variables associated with the encoding process */
88 char	length [END+1];
89 long	bits [END+1];
90 union	FOUR mask;
91 long	inc;
92 #if defined(_LITTLE_ENDIAN)
93 char	*maskshuff[4]  = {&(mask.chars.c3),
94 			    &(mask.chars.c2),
95 			    &(mask.chars.c1),
96 			    &(mask.chars.c0)};
97 #elif defined(_BIG_ENDIAN)
98 char	*maskshuff[4]  = {&(mask.chars.c0),
99 			    &(mask.chars.c1),
100 			    &(mask.chars.c2),
101 			    &(mask.chars.c3)};
102 #else
103 #error Unknown byte ordering!
104 #endif
105 
106 /* the heap */
107 int	n;
108 struct	heap {
109 	long int count;
110 	int node;
111 } heap [END+2];
112 #define	hmove(a, b) {(b).count = (a).count; (b).node = (a).node; }
113 
114 static void heapify(int i);
115 
116 /* Extended system attribute support */
117 
118 static int saflg = 0;
119 
120 
121 /* gather character frequency statistics */
122 /* return 1 if successful, 0 otherwise */
123 int
124 input(char *source)
125 {
126 	register int i;
127 	for (i = 0; i < END; i++)
128 		count[i] = 0;
129 	while ((i = read(infile, inbuff, BUFSIZ)) > 0)
130 		while (i > 0)
131 			count[inbuff[--i]&0377] += 2;
132 	if (i == 0)
133 		return (1);
134 	(void) fprintf(stderr, gettext(
135 	    "pack: %s: read error - file unchanged: "), source);
136 	perror("");
137 	return (0);
138 }
139 
140 /* encode the current file */
141 /* return 1 if successful, 0 otherwise */
142 int
143 output(char *source)
144 {
145 	int c, i, inleft;
146 	char *inp;
147 	register char **q, *outp;
148 	register int bitsleft;
149 	long temp;
150 
151 	/* output ``PACKED'' header */
152 	outbuff[0] = 037; 	/* ascii US */
153 	outbuff[1] = 036; 	/* ascii RS */
154 	/* output the length and the dictionary */
155 	temp = insize.lint.lng;
156 	for (i = 5; i >= 2; i--) {
157 		outbuff[i] =  (char)(temp & 0377);
158 		temp >>= 8;
159 	}
160 	outp = &outbuff[6];
161 	*outp++ = maxlev;
162 	for (i = 1; i < maxlev; i++)
163 		*outp++ = levcount[i];
164 	*outp++ = levcount[maxlev]-2;
165 	for (i = 1; i <= maxlev; i++)
166 		for (c = 0; c < END; c++)
167 			if (length[c] == i)
168 				*outp++ = c;
169 	dictsize = outp-&outbuff[0];
170 
171 	/* output the text */
172 	(void) lseek(infile, 0L, 0);
173 	outsize = 0;
174 	bitsleft = 8;
175 	inleft = 0;
176 	do {
177 		if (inleft <= 0) {
178 			inleft = read(infile, inp = &inbuff[0], BUFSIZ);
179 			if (inleft < 0) {
180 				(void) fprintf(stderr, gettext(
181 				    "pack: %s: read error - file unchanged: "),
182 				    source);
183 				perror("");
184 				return (0);
185 			}
186 		}
187 		c = (--inleft < 0) ? END : (*inp++ & 0377);
188 		mask.lint.lng = bits[c]<<bitsleft;
189 		q = &maskshuff[0];
190 		if (bitsleft == 8)
191 			*outp = **q++;
192 		else
193 			*outp |= **q++;
194 		bitsleft -= length[c];
195 		while (bitsleft < 0) {
196 			*++outp = **q++;
197 			bitsleft += 8;
198 		}
199 		if (outp >= &outbuff[BUFSIZ]) {
200 			if (write(outfile, outbuff, BUFSIZ) != BUFSIZ) {
201 wrerr:				(void) fprintf(stderr, gettext(
202 				    "pack: %s.z: write error - "
203 				    "file unchanged: "), source);
204 				perror("");
205 				return (0);
206 			}
207 			((union FOUR *)outbuff)->lint.lng =
208 			    ((union FOUR *)&outbuff[BUFSIZ])->lint.lng;
209 			outp -= BUFSIZ;
210 			outsize += BUFSIZ;
211 		}
212 	} while (c != END);
213 	if (bitsleft < 8)
214 		outp++;
215 	c = outp-outbuff;
216 	if (write(outfile, outbuff, c) != c)
217 		goto wrerr;
218 	outsize += c;
219 	return (1);
220 }
221 
222 /* makes a heap out of heap[i],...,heap[n] */
223 void
224 heapify(int i)
225 {
226 	register int k;
227 	int lastparent;
228 	struct heap heapsubi;
229 	hmove(heap[i], heapsubi);
230 	lastparent = n/2;
231 	while (i <= lastparent) {
232 		k = 2*i;
233 		if (heap[k].count > heap[k+1].count && k < n)
234 			k++;
235 		if (heapsubi.count < heap[k].count)
236 			break;
237 		hmove(heap[k], heap[i]);
238 		i = k;
239 	}
240 	hmove(heapsubi, heap[i]);
241 }
242 
243 /* return 1 after successful packing, 0 otherwise */
244 int
245 packfile(char *source)
246 {
247 	register int c, i, p;
248 	long bitsout;
249 
250 	/* gather frequency statistics */
251 	if (input(source) == 0)
252 		return (0);
253 
254 	/* put occurring chars in heap with their counts */
255 	diffbytes = -1;
256 	count[END] = 1;
257 	insize.lint.lng = n = 0;
258 	for (i = END; i >= 0; i--) {
259 		parent[i] = 0;
260 		if (count[i] > 0) {
261 			diffbytes++;
262 			insize.lint.lng += count[i];
263 			heap[++n].count = count[i];
264 			heap[n].node = i;
265 		}
266 	}
267 	if (diffbytes == 1) {
268 		(void) fprintf(stderr, gettext(
269 		    "pack: %s: trivial file - file unchanged\n"), source);
270 		return (0);
271 	}
272 	insize.lint.lng >>= 1;
273 	for (i = n/2; i >= 1; i--)
274 		heapify(i);
275 
276 	/* build Huffman tree */
277 	lastnode = END;
278 	while (n > 1) {
279 		parent[heap[1].node] = ++lastnode;
280 		inc = heap[1].count;
281 		hmove(heap[n], heap[1]);
282 		n--;
283 		heapify(1);
284 		parent[heap[1].node] = lastnode;
285 		heap[1].node = lastnode;
286 		heap[1].count += inc;
287 		heapify(1);
288 	}
289 	parent[lastnode] = 0;
290 
291 	/* assign lengths to encoding for each character */
292 	bitsout = maxlev = 0;
293 	for (i = 1; i <= 24; i++)
294 		levcount[i] = 0;
295 	for (i = 0; i <= END; i++) {
296 		c = 0;
297 		for (p = parent[i]; p != 0; p = parent[p])
298 			c++;
299 		levcount[c]++;
300 		length[i] = c;
301 		if (c > maxlev)
302 			maxlev = c;
303 		bitsout += c*(count[i]>>1);
304 	}
305 	if (maxlev > 24) {
306 		/* can't occur unless insize.lint.lng >= 2**24 */
307 		(void) fprintf(stderr, gettext(
308 		    "pack: %s: Huffman tree has too many levels - "
309 		    "file unchanged\n"), source);
310 		return (0);
311 	}
312 
313 	/* don't bother if no compression results */
314 	outsize = ((bitsout+7)>>3)+6+maxlev+diffbytes;
315 	if ((insize.lint.lng+BUFSIZ-1)/BUFSIZ <=
316 	    (outsize+BUFSIZ-1)/BUFSIZ && !force) {
317 		(void) printf(gettext(
318 		    "pack: %s: no saving - file unchanged\n"), source);
319 		return (0);
320 	}
321 
322 	/* compute bit patterns for each character */
323 	inc = 1L << 24;
324 	inc >>= maxlev;
325 	mask.lint.lng = 0;
326 	for (i = maxlev; i > 0; i--) {
327 		for (c = 0; c <= END; c++)
328 			if (length[c] == i) {
329 				bits[c] = mask.lint.lng;
330 				mask.lint.lng += inc;
331 			}
332 		mask.lint.lng &= ~inc;
333 		inc <<= 1;
334 	}
335 
336 	return (output(source));
337 }
338 
339 int
340 main(int argc, char *argv[])
341 {
342 	extern  int optind;
343 	register int i;
344 	register char *cp;
345 	int k, sep, errflg = 0;
346 	int c;
347 	int error;
348 	int fcount = 0; /* count failures */
349 	acl_t *aclp = NULL;
350 	char *progname;
351 	int sattr_exist = 0;
352 	int xattr_exist = 0;
353 
354 	(void) setlocale(LC_ALL, "");
355 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
356 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
357 #endif
358 	(void) textdomain(TEXT_DOMAIN);
359 
360 	if (progname = strrchr(argv[0], '/'))
361 		++progname;
362 	else
363 		progname = argv[0];
364 
365 	while ((c = getopt(argc, argv, "f-/")) != EOF) {
366 		if (c == 'f')
367 			force++;
368 		else if (c == '/')
369 			saflg++;
370 		else
371 			++errflg;
372 	}
373 	/*
374 	 * Check for invalid option.  Also check for missing
375 	 * file operand, ie: "pack" or "pack -".
376 	 */
377 	argc -= optind;
378 	argv = &argv[optind];
379 	if (errflg || argc < 1 ||
380 	    (argc == 1 && (argv[0][0] == '-' || argv[0][0] == '/' &&
381 	    argv[0][1] == '\0'))) {
382 		(void) fprintf(stderr, gettext(
383 		    "usage: pack [-f] [-] [-/] file...\n"));
384 		if (argc < 1 ||
385 		    (argc == 1 && (argv[0][0] == '-' || argv[0][0] == '/') &&
386 		    argv[0][1] == '\0')) {
387 			/*
388 			 * return 1 for usage error when no file was specified
389 			 */
390 			return (1);
391 		}
392 	}
393 
394 	/* loop through the file names */
395 	for (k = 0; k < argc; k++) {
396 		if (argv[k][0] == '-' && argv[k][1] == '\0') {
397 			vflag = 1 - vflag;
398 			continue;
399 		}
400 		fcount++; /* increase failure count - expect the worst */
401 		if (errflg) {
402 			/*
403 			 * invalid option; just count the number of files not
404 			 * packed
405 			 */
406 			continue;
407 		}
408 		/* remove any .z suffix the user may have added */
409 		for (cp = argv[k]; *cp != '\0'; ++cp)
410 			;
411 		if (cp[-1] == SUF1 && cp[-2] == SUF0) {
412 			*cp-- = '\0'; *cp-- = '\0'; *cp = '\0';
413 		}
414 		sep = -1;  cp = filename;
415 		max_name = pathconf(argv[k], _PC_NAME_MAX);
416 		if (max_name == -1) {
417 			/* pathname invalid or no limit on length of filename */
418 			max_name = _POSIX_NAME_MAX;
419 		}
420 		/* copy argv[k] to filename and count chars in base name */
421 		for (i = 0; i < (MAXPATHLEN-3) && (*cp = argv[k][i]); i++)
422 			if (*cp++ == '/') sep = i;
423 		if ((infile = open(filename, 0)) < 0) {
424 			(void) fprintf(stderr, gettext(
425 			    "pack: %s: cannot open: "), filename);
426 			perror("");
427 			continue;
428 		}
429 		if (i >= (MAXPATHLEN-3) || (i-sep) > (max_name - 1)) {
430 			(void) fprintf(stderr, gettext(
431 			    "pack: %s: file name too long\n"), argv[k]);
432 			continue;
433 		}
434 		(void) fstat(infile, &status);
435 		if (S_ISDIR(status.st_mode)) {
436 			(void) fprintf(stderr, gettext(
437 			    "pack: %s: cannot pack a directory\n"),
438 			    argv[k]);
439 			goto closein;
440 		}
441 		if (status.st_size == 0) {
442 			(void) fprintf(stderr, gettext(
443 			    "pack: %s: cannot pack a zero length file\n"),
444 			    argv[k]);
445 			goto closein;
446 		}
447 		if (status.st_nlink != 1) {
448 			(void) fprintf(stderr, gettext(
449 			    "pack: %s: has links\n"),
450 			    argv[k]);
451 			goto closein;
452 		}
453 		*cp++ = SUF0;  *cp++ = SUF1;  *cp = '\0';
454 		if (stat(filename, &ostatus) != -1) {
455 			(void) fprintf(stderr, gettext(
456 			    "pack: %s: already exists\n"), filename);
457 			goto closein;
458 		}
459 		if ((outfile = creat(filename, status.st_mode | O_RDONLY))
460 		    < 0) {
461 			(void) fprintf(stderr, gettext(
462 			    "pack: %s: cannot create: "), filename);
463 			perror("");
464 			goto closein;
465 		}
466 
467 		error = facl_get(infile, ACL_NO_TRIVIAL, &aclp);
468 
469 		if (error != 0) {
470 			(void) fprintf(stderr, gettext(
471 			    "pack: %s: cannot retrieve ACL: %s\n"), argv[k],
472 			    acl_strerror(error));
473 		}
474 
475 		if (packfile(argv[k])) {
476 			if (pathconf(argv[k], _PC_XATTR_EXISTS) == 1)
477 				xattr_exist = 1;
478 			if (saflg && sysattr_support(argv[k],
479 			    _PC_SATTR_EXISTS) == 1)
480 				sattr_exist = 1;
481 			if (sattr_exist || xattr_exist) {
482 				if (mv_xattrs(progname, argv[k], filename,
483 				    sattr_exist, 0) != 0) {
484 				/* Move attributes back ... */
485 					xattr_exist = 0;
486 					sattr_exist = 0;
487 					if (pathconf(filename,
488 					    _PC_XATTR_EXISTS) == 1)
489 						xattr_exist = 1;
490 					if (saflg && sysattr_support(filename,
491 					    _PC_SATTR_EXISTS) == 1)
492 						sattr_exist = 1;
493 					if (xattr_exist || sattr_exist) {
494 						(void) mv_xattrs(progname,
495 						    filename, argv[k],
496 						    sattr_exist, 1);
497 						(void) unlink(filename);
498 						goto out;
499 					}
500 				} else {
501 					errno = 0;
502 					if (unlink(argv[k]) != 0) {
503 						(void) fprintf(stderr, gettext(
504 						    "pack: %s :cannot unlink:"),
505 						    argv[k]);
506 						if (errno == EPERM)
507 							perror("No permission");
508 						else
509 							perror("");
510 					}
511 				}
512 			} else {
513 				errno = 0;
514 				if (unlink(argv[k]) != 0) {
515 					(void) fprintf(stderr, gettext(
516 					    "pack: %s :cannot unlink"),
517 					    argv[k]);
518 					if (errno == EPERM)
519 						perror("No permission");
520 					else
521 						perror("");
522 				}
523 			}
524 			(void) printf(gettext(
525 			    "pack: %s: %.1f%% Compression\n"),
526 			    argv[k],
527 			    ((double)(-outsize+(insize.lint.lng))/
528 			    (double)insize.lint.lng)*100);
529 			/* output statistics */
530 			if (vflag) {
531 				(void) printf(gettext(
532 				    "\tfrom %ld to %ld bytes\n"),
533 				    insize.lint.lng, outsize);
534 				(void) printf(gettext(
535 				    "\tHuffman tree has %d levels below "
536 				    "root\n"), maxlev);
537 				(void) printf(gettext(
538 				    "\t%d distinct bytes in input\n"),
539 				    diffbytes);
540 				(void) printf(gettext(
541 				    "\tdictionary overhead = %ld bytes\n"),
542 				    dictsize);
543 				(void) printf(gettext(
544 				    "\teffective  entropy  = %.2f bits/byte\n"),
545 				    ((double)outsize / (double)insize.lint.lng)
546 				    * 8);
547 				(void) printf(gettext(
548 				    "\tasymptotic entropy  = %.2f bits/byte\n"),
549 				    ((double)(outsize-dictsize) /
550 				    (double)insize.lint.lng) * 8);
551 			}
552 
553 			u_times.actime = status.st_atime;
554 			u_times.modtime = status.st_mtime;
555 			if (utime(filename, &u_times) != 0) {
556 				errflg++;
557 				(void) fprintf(stderr,
558 				    gettext(
559 				    "pack: cannot change times on %s: "),
560 				    filename);
561 				perror("");
562 			}
563 			if (chmod(filename, status.st_mode) != 0) {
564 				errflg++;
565 				(void) fprintf(stderr,
566 				    gettext(
567 				    "pack: can't change mode to %o on %s: "),
568 				    status.st_mode, filename);
569 				perror("");
570 			}
571 			(void) chown(filename, status.st_uid, status.st_gid);
572 			if (aclp && (facl_set(outfile, aclp) < 0)) {
573 				(void) fprintf(stderr, gettext(
574 				    "pack: %s: failed to set acl entries\n"),
575 				    filename);
576 				perror("");
577 			}
578 			if (!errflg)
579 				fcount--;  /* success after all */
580 
581 		}
582 out:
583 		if (aclp) {
584 			acl_free(aclp);
585 			aclp = NULL;
586 		}
587 
588 closein:	(void) close(outfile);
589 		(void) close(infile);
590 	}
591 	return (fcount);
592 }
593