xref: /illumos-gate/usr/src/cmd/bart/create.c (revision c40f76e3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 #include <signal.h>
28 #include <unistd.h>
29 #include <sys/acl.h>
30 #include <sys/statvfs.h>
31 #include <sys/wait.h>
32 #include "bart.h"
33 #include <aclutils.h>
34 
35 static int	sanitize_reloc_root(char *root, size_t bufsize);
36 static int	create_manifest_filelist(char **argv, char *reloc_root);
37 static int	create_manifest_rule(char *reloc_root, FILE *rule_fp);
38 static void	output_manifest(void);
39 static int	eval_file(const char *fname, const struct stat64 *statb);
40 static char	*sanitized_fname(const char *, boolean_t);
41 static char	*get_acl_string(const char *fname, const struct stat64 *statb,
42     int *err_code);
43 static int	generate_hash(int fdin, char *hash_str);
44 static int	read_filelist(char *reloc_root, char **argv, char *buf,
45     size_t bufsize);
46 static int	walker(const char *name, const struct stat64 *sp,
47     int type, struct FTW *ftwx);
48 
49 /*
50  * The following globals are necessary due to the "walker" function
51  * provided by nftw().  Since there is no way to pass them through to the
52  * walker function, they must be global.
53  */
54 static int		compute_chksum = 1, eval_err = 0;
55 static struct rule	*subtree_root;
56 static char		reloc_root[PATH_MAX];
57 static struct statvfs	parent_vfs;
58 
59 int
60 bart_create(int argc, char **argv)
61 {
62 	boolean_t	filelist_input;
63 	int		ret, c, output_pipe[2];
64 	FILE 		*rules_fd = NULL;
65 	pid_t		pid;
66 
67 	filelist_input = B_FALSE;
68 	reloc_root[0] = '\0';
69 
70 	while ((c = getopt(argc, argv, "Inr:R:")) != EOF) {
71 		switch (c) {
72 		case 'I':
73 			if (rules_fd != NULL) {
74 				(void) fprintf(stderr, "%s", INPUT_ERR);
75 				usage();
76 			}
77 			filelist_input = B_TRUE;
78 			break;
79 
80 		case 'n':
81 			compute_chksum = 0;
82 			break;
83 
84 		case 'r':
85 			if (strcmp(optarg, "-") == 0)
86 				rules_fd = stdin;
87 			else
88 				rules_fd = fopen(optarg, "r");
89 			if (rules_fd == NULL) {
90 				perror(optarg);
91 				usage();
92 			}
93 			break;
94 
95 		case 'R':
96 			(void) strlcpy(reloc_root, optarg, sizeof (reloc_root));
97 			ret = sanitize_reloc_root(reloc_root,
98 			    sizeof (reloc_root));
99 			if (ret == 0)
100 				usage();
101 			break;
102 
103 		case '?':
104 		default :
105 			usage();
106 		}
107 	}
108 	argv += optind;
109 
110 	if (pipe(output_pipe) < 0) {
111 		perror("");
112 		exit(FATAL_EXIT);
113 	}
114 
115 	pid = fork();
116 	if (pid < 0) {
117 		perror(NULL);
118 		exit(FATAL_EXIT);
119 	}
120 
121 	/*
122 	 * Break the creation of a manifest into two parts: the parent process
123 	 * generated the data whereas the child process sorts the data.
124 	 *
125 	 * The processes communicate through the pipe.
126 	 */
127 	if (pid > 0) {
128 		/*
129 		 * Redirect the stdout of this process so it goes into
130 		 * output_pipe[0].  The output of this process will be read
131 		 * by the child, which will sort the output.
132 		 */
133 		if (dup2(output_pipe[0], STDOUT_FILENO) != STDOUT_FILENO) {
134 			perror(NULL);
135 			exit(FATAL_EXIT);
136 		}
137 		(void) close(output_pipe[0]);
138 		(void) close(output_pipe[1]);
139 
140 		if (filelist_input == B_TRUE) {
141 			ret = create_manifest_filelist(argv, reloc_root);
142 		} else {
143 			ret = create_manifest_rule(reloc_root, rules_fd);
144 		}
145 
146 		/* Close stdout so the sort in the child proc will complete */
147 		(void) fclose(stdout);
148 	} else {
149 		/*
150 		 * Redirect the stdin of this process so its read in from
151 		 * the pipe, which is the parent process in this case.
152 		 */
153 		if (dup2(output_pipe[1], STDIN_FILENO) != STDIN_FILENO) {
154 			perror(NULL);
155 			exit(FATAL_EXIT);
156 		}
157 		(void) close(output_pipe[0]);
158 
159 		output_manifest();
160 	}
161 
162 	/* Wait for the child proc (the sort) to complete */
163 	(void) wait(0);
164 
165 	return (ret);
166 }
167 
168 /*
169  * Handle the -R option and sets 'root' to be the absolute path of the
170  * relocatable root.  This is useful when the user specifies '-R ../../foo'.
171  *
172  * Return code is whether or not the location spec'd by the -R flag is a
173  * directory or not.
174  */
175 static int
176 sanitize_reloc_root(char *root, size_t bufsize)
177 {
178 	char		pwd[PATH_MAX];
179 
180 	/*
181 	 * First, save the current directory and go to the location
182 	 * specified with the -R option.
183 	 */
184 	(void) getcwd(pwd, sizeof (pwd));
185 	if (chdir(root) < 0) {
186 		/* Failed to change directory, something is wrong.... */
187 		perror(root);
188 		return (0);
189 	}
190 
191 	/*
192 	 * Save the absolute path of the relocatable root directory.
193 	 */
194 	(void) getcwd(root, bufsize);
195 
196 	/*
197 	 * Now, go back to where we started, necessary for picking up a rules
198 	 * file.
199 	 */
200 	if (chdir(pwd) < 0) {
201 		/* Failed to change directory, something is wrong.... */
202 		perror(root);
203 		return (0);
204 	}
205 
206 	/*
207 	 * Make sure the path returned does not have a trailing /. This
208 	 * can only happen when the entire pathname is "/".
209 	 */
210 	if (strcmp(root, "/") == 0)
211 		root[0] = '\0';
212 
213 	/*
214 	 * Since the earlier chdir() succeeded, return success.
215 	 */
216 	return (1);
217 }
218 
219 /*
220  * This is the worker bee which creates the manifest based upon the command
221  * line options supplied by the user.
222  *
223  * NOTE: create_manifest() eventually outputs data to a pipe, which is read in
224  * by the child process.  The child process is running output_manifest(), which
225  * is responsible for generating sorted output.
226  */
227 static int
228 create_manifest_rule(char *reloc_root, FILE *rule_fp)
229 {
230 	struct rule	*root;
231 	int		ret_status = EXIT;
232 	uint_t		flags;
233 
234 	if (compute_chksum)
235 		flags = ATTR_CONTENTS;
236 	else
237 		flags = 0;
238 	ret_status = read_rules(rule_fp, reloc_root, flags, 1);
239 
240 	/* Loop through every single subtree */
241 	for (root = get_first_subtree(); root != NULL;
242 	    root = get_next_subtree(root)) {
243 
244 		/*
245 		 * This subtree has already been traversed by a
246 		 * previous stanza, i.e. this rule is a subset of a
247 		 * previous rule.
248 		 *
249 		 * Subtree has already been handled so move on!
250 		 */
251 		if (root->traversed)
252 			continue;
253 
254 		/*
255 		 * Check to see if this subtree should have contents
256 		 * checking turned on or off.
257 		 *
258 		 * NOTE: The 'compute_chksum' and 'parent_vfs'
259 		 * are a necessary hack: the variables are used in
260 		 * walker(), both directly and indirectly.  Since
261 		 * the parameters to walker() are defined by nftw(),
262 		 * the globals are really a backdoor mechanism.
263 		 */
264 		ret_status = statvfs(root->subtree, &parent_vfs);
265 		if (ret_status < 0) {
266 			perror(root->subtree);
267 			continue;
268 		}
269 
270 		/*
271 		 * Walk the subtree and invoke the callback function
272 		 * walker()
273 		 */
274 		subtree_root = root;
275 		(void) nftw64(root->subtree, &walker, 20, FTW_PHYS);
276 		root->traversed = B_TRUE;
277 
278 		/*
279 		 * Ugly but necessary:
280 		 *
281 		 * walker() must return 0, or the tree walk will stop,
282 		 * so warning flags must be set through a global.
283 		 */
284 		if (eval_err == WARNING_EXIT)
285 			ret_status = WARNING_EXIT;
286 
287 	}
288 	return (ret_status);
289 }
290 
291 static int
292 create_manifest_filelist(char **argv, char *reloc_root)
293 {
294 	int	ret_status = EXIT;
295 	char	input_fname[PATH_MAX];
296 
297 	while (read_filelist(reloc_root, argv,
298 	    input_fname, sizeof (input_fname)) != -1) {
299 
300 		struct stat64	stat_buf;
301 		int		ret;
302 
303 		ret = lstat64(input_fname, &stat_buf);
304 		if (ret < 0) {
305 			ret_status = WARNING_EXIT;
306 			perror(input_fname);
307 		} else {
308 			ret = eval_file(input_fname, &stat_buf);
309 
310 			if (ret == WARNING_EXIT)
311 				ret_status = WARNING_EXIT;
312 		}
313 	}
314 
315 	return (ret_status);
316 }
317 
318 /*
319  * output_manifest() the child process.  It reads in the output from
320  * create_manifest() and sorts it.
321  */
322 static void
323 output_manifest(void)
324 {
325 	char	*env[] = {"LC_CTYPE=C", "LC_COLLATE=C", "LC_NUMERIC=C", NULL};
326 	time_t		time_val;
327 	struct tm	*tm;
328 	char		time_buf[1024];
329 
330 	(void) printf("%s", MANIFEST_VER);
331 	time_val = time((time_t)0);
332 	tm = localtime(&time_val);
333 	(void) strftime(time_buf, sizeof (time_buf), "%A, %B %d, %Y (%T)", tm);
334 	(void) printf("! %s\n", time_buf);
335 	(void) printf("%s", FORMAT_STR);
336 	(void) fflush(stdout);
337 	/*
338 	 * Simply run sort and read from the the current stdin, which is really
339 	 * the output of create_manifest().
340 	 * Also, make sure the output is unique, since a given file may be
341 	 * included by several stanzas.
342 	 */
343 	if (execle("/usr/bin/sort", "sort", NULL, env) < 0) {
344 		perror("");
345 		exit(FATAL_EXIT);
346 	}
347 
348 	/*NOTREACHED*/
349 }
350 
351 /*
352  * Callback function for nftw()
353  */
354 static int
355 walker(const char *name, const struct stat64 *sp, int type, struct FTW *ftwx)
356 {
357 	int		ret;
358 	struct statvfs	path_vfs;
359 	boolean_t	dir_flag = B_FALSE;
360 	struct rule	*rule;
361 
362 	switch (type) {
363 	case FTW_F:	/* file 		*/
364 		rule = check_rules(name, 'F');
365 		if (rule != NULL) {
366 			if (rule->attr_list & ATTR_CONTENTS)
367 				compute_chksum = 1;
368 			else
369 				compute_chksum = 0;
370 		}
371 		break;
372 	case FTW_SL:	/* symbolic link	*/
373 	case FTW_DP:	/* end of directory	*/
374 	case FTW_DNR:	/* unreadable directory	*/
375 	case FTW_NS:	/* unstatable file	*/
376 		break;
377 	case FTW_D:	/* enter directory 		*/
378 
379 		/*
380 		 * Check to see if any subsequent rules are a subset
381 		 * of this rule; if they are, then mark them as
382 		 * "traversed".
383 		 */
384 		rule = subtree_root->next;
385 		while (rule != NULL) {
386 			if (strcmp(name, rule->subtree) == 0)
387 				rule->traversed = B_TRUE;
388 
389 			rule = rule->next;
390 		}
391 		dir_flag = B_TRUE;
392 		ret = statvfs(name, &path_vfs);
393 		if (ret < 0)
394 			eval_err = WARNING_EXIT;
395 		break;
396 	default:
397 		(void) fprintf(stderr, INVALID_FILE, name);
398 		eval_err = WARNING_EXIT;
399 		break;
400 	}
401 
402 	/* This is the function which really processes the file */
403 	ret = eval_file(name, sp);
404 
405 	/*
406 	 * Since the parameters to walker() are constrained by nftw(),
407 	 * need to use a global to reflect a WARNING.  Sigh.
408 	 */
409 	if (ret == WARNING_EXIT)
410 		eval_err = WARNING_EXIT;
411 
412 	/*
413 	 * This is a case of a directory which crosses into a mounted
414 	 * filesystem of a different type, e.g., UFS -> NFS.
415 	 * BART should not walk the new filesystem (by specification), so
416 	 * set this consolidation-private flag so the rest of the subtree
417 	 * under this directory is not waled.
418 	 */
419 	if (dir_flag &&
420 	    (strcmp(parent_vfs.f_basetype, path_vfs.f_basetype) != 0))
421 		ftwx->quit = FTW_PRUNE;
422 
423 	return (0);
424 }
425 
426 /*
427  * This file does the per-file evaluation and is run to generate every entry
428  * in the manifest.
429  *
430  * All output is written to a pipe which is read by the child process,
431  * which is running output_manifest().
432  */
433 static int
434 eval_file(const char *fname, const struct stat64 *statb)
435 {
436 	int	fd, ret, err_code, i;
437 	char	last_field[PATH_MAX], ftype, *acl_str,
438 		*quoted_name;
439 
440 	err_code = EXIT;
441 
442 	switch (statb->st_mode & S_IFMT) {
443 	/* Regular file */
444 	case S_IFREG: ftype = 'F'; break;
445 
446 	/* Directory */
447 	case S_IFDIR: ftype = 'D'; break;
448 
449 	/* Block Device */
450 	case S_IFBLK: ftype = 'B'; break;
451 
452 	/* Character Device */
453 	case S_IFCHR: ftype = 'C'; break;
454 
455 	/* Named Pipe */
456 	case S_IFIFO: ftype = 'P'; break;
457 
458 	/* Socket */
459 	case S_IFSOCK: ftype = 'S'; break;
460 
461 	/* Door */
462 	case S_IFDOOR: ftype = 'O'; break;
463 
464 	/* Symbolic link */
465 	case S_IFLNK: ftype = 'L'; break;
466 
467 	default: ftype = '-'; break;
468 	}
469 
470 	/* First, make sure this file should be cataloged */
471 
472 	if ((subtree_root != NULL) &&
473 	    (exclude_fname(fname, ftype, subtree_root)))
474 		return (err_code);
475 
476 	for (i = 0; i < PATH_MAX; i++)
477 		last_field[i] = '\0';
478 
479 	/*
480 	 * Regular files, compute the MD5 checksum and put it into 'last_field'
481 	 * UNLESS instructed to ignore the checksums.
482 	 */
483 	if (ftype == 'F') {
484 		if (compute_chksum) {
485 			fd = open(fname, O_RDONLY|O_LARGEFILE);
486 			if (fd < 0) {
487 				err_code = WARNING_EXIT;
488 				perror(fname);
489 
490 				/* default value since the computution failed */
491 				(void) strcpy(last_field, "-");
492 			} else {
493 				if (generate_hash(fd, last_field) != 0) {
494 					err_code = WARNING_EXIT;
495 					(void) fprintf(stderr, CONTENTS_WARN,
496 					    fname);
497 					(void) strcpy(last_field, "-");
498 				}
499 			}
500 			(void) close(fd);
501 		}
502 		/* Instructed to ignore checksums, just put in a '-' */
503 		else
504 			(void) strcpy(last_field, "-");
505 	}
506 
507 	/*
508 	 * For symbolic links, put the destination of the symbolic link into
509 	 * 'last_field'
510 	 */
511 	if (ftype == 'L') {
512 		ret = readlink(fname, last_field, sizeof (last_field));
513 		if (ret < 0) {
514 			err_code = WARNING_EXIT;
515 			perror(fname);
516 
517 			/* default value since the computation failed */
518 			(void) strcpy(last_field, "-");
519 		}
520 		else
521 			(void) strlcpy(last_field,
522 			    sanitized_fname(last_field, B_FALSE),
523 			    sizeof (last_field));
524 
525 		/*
526 		 * Boundary condition: possible for a symlink to point to
527 		 * nothing [ ln -s '' link_name ].  For this case, set the
528 		 * destination to "\000".
529 		 */
530 		if (strlen(last_field) == 0)
531 			(void) strcpy(last_field, "\\000");
532 	}
533 
534 	acl_str = get_acl_string(fname, statb, &err_code);
535 
536 	/* Sanitize 'fname', so its in the proper format for the manifest */
537 	quoted_name = sanitized_fname(fname, B_TRUE);
538 
539 	/* Start to build the entry.... */
540 	(void) printf("%s %c %d %o %s %x %d %d", quoted_name, ftype,
541 	    (int)statb->st_size, (int)statb->st_mode, acl_str,
542 	    (int)statb->st_mtime, (int)statb->st_uid, (int)statb->st_gid);
543 
544 	/* Finish it off based upon whether or not it's a device node */
545 	if ((ftype == 'B') || (ftype == 'C'))
546 		(void) printf(" %x\n", (int)statb->st_rdev);
547 	else if (strlen(last_field) > 0)
548 		(void) printf(" %s\n", last_field);
549 	else
550 		(void) printf("\n");
551 
552 	/* free the memory consumed */
553 	free(acl_str);
554 	free(quoted_name);
555 
556 	return (err_code);
557 }
558 
559 /*
560  * When creating a manifest, make sure all '?', tabs, space, newline, '/'
561  * and '[' are all properly quoted.  Convert them to a "\ooo" where the 'ooo'
562  * represents their octal value. For filesystem objects, as opposed to symlink
563  * targets, also canonicalize the pathname.
564  */
565 static char *
566 sanitized_fname(const char *fname, boolean_t canon_path)
567 {
568 	const char *ip;
569 	unsigned char ch;
570 	char *op, *quoted_name;
571 
572 	/* Initialize everything */
573 	quoted_name = safe_calloc((4 * PATH_MAX) + 1);
574 	ip = fname;
575 	op = quoted_name;
576 
577 	if (canon_path) {
578 		/*
579 		 * In the case when a relocatable root was used, the relocatable
580 		 * root should *not* be part of the manifest.
581 		 */
582 		ip += strlen(reloc_root);
583 
584 		/*
585 		 * In the case when the '-I' option was used, make sure
586 		 * the quoted_name starts with a '/'.
587 		 */
588 		if (*ip != '/')
589 			*op++ = '/';
590 	}
591 
592 	/* Now walk through 'fname' and build the quoted string */
593 	while ((ch = *ip++) != 0) {
594 		switch (ch) {
595 		/* Quote the following characters */
596 		case ' ':
597 		case '*':
598 		case '\n':
599 		case '?':
600 		case '[':
601 		case '\\':
602 		case '\t':
603 			op += sprintf(op, "\\%.3o", (unsigned char)ch);
604 			break;
605 
606 		/* Otherwise, simply append them */
607 		default:
608 			*op++ = ch;
609 			break;
610 		}
611 	}
612 
613 	*op = 0;
614 
615 	return (quoted_name);
616 }
617 
618 /*
619  * Function responsible for generating the ACL information for a given
620  * file.  Note, the string is put into buffer malloc'd by this function.
621  * Its the responsibility of the caller to free the buffer.
622  */
623 static char *
624 get_acl_string(const char *fname, const struct stat64 *statb, int *err_code)
625 {
626 	acl_t		*aclp;
627 	char		*acltext;
628 	int		error;
629 
630 	if (S_ISLNK(statb->st_mode)) {
631 		return (safe_strdup("-"));
632 	}
633 
634 	/*
635 	 *  Include trivial acl's
636 	 */
637 	error = acl_get(fname, 0, &aclp);
638 
639 	if (error != 0) {
640 		*err_code = WARNING_EXIT;
641 		(void) fprintf(stderr, "%s: %s\n", fname, acl_strerror(error));
642 		return (safe_strdup("-"));
643 	} else {
644 		acltext = acl_totext(aclp, 0);
645 		acl_free(aclp);
646 		return (acltext);
647 	}
648 }
649 
650 
651 /*
652  *
653  * description:	This routine reads stdin in BUF_SIZE chunks, uses the bits
654  *		to update the md5 hash buffer, and outputs the chunks
655  *		to stdout.  When stdin is exhausted, the hash is computed,
656  *		converted to a hexadecimal string, and returned.
657  *
658  * returns:	The md5 hash of stdin, or NULL if unsuccessful for any reason.
659  */
660 static int
661 generate_hash(int fdin, char *hash_str)
662 {
663 	unsigned char buf[BUF_SIZE];
664 	unsigned char hash[MD5_DIGEST_LENGTH];
665 	int i, amtread;
666 	MD5_CTX ctx;
667 
668 	MD5Init(&ctx);
669 
670 	for (;;) {
671 		amtread = read(fdin, buf, sizeof (buf));
672 		if (amtread == 0)
673 			break;
674 		if (amtread <  0)
675 			return (1);
676 
677 		/* got some data.  Now update hash */
678 		MD5Update(&ctx, buf, amtread);
679 	}
680 
681 	/* done passing through data, calculate hash */
682 	MD5Final(hash, &ctx);
683 
684 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
685 		(void) sprintf(hash_str + (i*2), "%2.2x", hash[i]);
686 
687 	return (0);
688 }
689 
690 /*
691  * Used by 'bart create' with the '-I' option.  Return each entry into a 'buf'
692  * with the appropriate exit code: '0' for success and '-1' for failure.
693  */
694 static int
695 read_filelist(char *reloc_root, char **argv, char *buf, size_t bufsize)
696 {
697 	static int		argv_index = -1;
698 	static boolean_t	read_stdinput = B_FALSE;
699 	char			temp_buf[PATH_MAX];
700 	char 			*cp;
701 
702 	/*
703 	 * INITIALIZATION:
704 	 * Setup this code so it knows whether or not to read sdtin.
705 	 * Also, if reading from argv, setup the index, "argv_index"
706 	 */
707 	if (argv_index == -1) {
708 		argv_index = 0;
709 
710 		/* In this case, no args after '-I', so read stdin */
711 		if (argv[0] == NULL)
712 			read_stdinput = B_TRUE;
713 	}
714 
715 	buf[0] = '\0';
716 
717 	if (read_stdinput) {
718 		if (fgets(temp_buf, PATH_MAX, stdin) == NULL)
719 			return (-1);
720 		cp = strtok(temp_buf, "\n");
721 	} else {
722 		cp = argv[argv_index++];
723 	}
724 
725 	if (cp == NULL)
726 		return (-1);
727 
728 	/*
729 	 * Unlike similar code elsewhere, avoid adding a leading
730 	 * slash for relative pathnames.
731 	 */
732 	(void) snprintf(buf, bufsize,
733 	    (reloc_root[0] == '\0' || cp[0] == '/') ? "%s%s" : "%s/%s",
734 	    reloc_root, cp);
735 
736 	return (0);
737 }
738