1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
22  * Use is subject to license terms.
23  */
24 
25 /*
26  * Copyright (c) 2018, Joyent, Inc.
27  */
28 
29 /*
30  * Finds all unreferenced files in a source tree that do not match a list of
31  * permitted pathnames.
32  */
33 
34 #include <ctype.h>
35 #include <errno.h>
36 #include <fnmatch.h>
37 #include <ftw.h>
38 #include <stdarg.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <time.h>
43 #include <unistd.h>
44 #include <sys/param.h>
45 #include <sys/stat.h>
46 #include <sys/types.h>
47 
48 /*
49  * Pathname set: a simple datatype for storing pathname pattern globs and
50  * for checking whether a given pathname is matched by a pattern glob in
51  * the set.
52  */
53 typedef struct {
54 	char		**paths;
55 	unsigned int	npath;
56 	unsigned int	maxpaths;
57 } pnset_t;
58 
59 /*
60  * Data associated with the current SCM manifest.
61  */
62 typedef struct scmdata {
63 	pnset_t		*manifest;
64 	char		metapath[MAXPATHLEN];
65 	char		root[MAXPATHLEN];
66 	unsigned int	rootlen;
67 	boolean_t	rootwarn;
68 } scmdata_t;
69 
70 /*
71  * Hooks used to check if a given unreferenced file is known to an SCM
72  * (currently Git, Mercurial and TeamWare).
73  */
74 typedef int checkscm_func_t(const char *, const struct FTW *);
75 typedef void chdirscm_func_t(const char *);
76 
77 typedef struct {
78 	const char	*name;
79 	checkscm_func_t	*checkfunc;
80 	chdirscm_func_t	*chdirfunc;
81 } scm_t;
82 
83 static checkscm_func_t check_tw, check_scmdata;
84 static chdirscm_func_t chdir_hg, chdir_git;
85 static int	pnset_add(pnset_t *, const char *);
86 static int	pnset_check(const pnset_t *, const char *);
87 static void	pnset_empty(pnset_t *);
88 static void	pnset_free(pnset_t *);
89 static int	checkpath(const char *, const struct stat *, int, struct FTW *);
90 static pnset_t	*make_exset(const char *);
91 static void	warn(const char *, ...);
92 static void	die(const char *, ...);
93 
94 static const scm_t scms[] = {
95 	{ "tw",		check_tw,	NULL		},
96 	{ "teamware",	check_tw,	NULL		},
97 	{ "hg",		check_scmdata,	chdir_hg 	},
98 	{ "mercurial",	check_scmdata,	chdir_hg	},
99 	{ "git",	check_scmdata,	chdir_git	},
100 	{ NULL,		NULL, 		NULL		}
101 };
102 
103 static const scm_t	*scm;
104 static scmdata_t	scmdata;
105 static time_t		tstamp;		/* timestamp to compare files to */
106 static pnset_t		*exsetp;	/* pathname globs to ignore */
107 static const char	*progname;
108 
109 int
main(int argc,char * argv[])110 main(int argc, char *argv[])
111 {
112 	int c;
113 	char path[MAXPATHLEN];
114 	char subtree[MAXPATHLEN] = "./";
115 	char *tstampfile = ".build.tstamp";
116 	struct stat tsstat;
117 
118 	progname = strrchr(argv[0], '/');
119 	if (progname == NULL)
120 		progname = argv[0];
121 	else
122 		progname++;
123 
124 	while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
125 		switch (c) {
126 		case 'a':
127 			/* for compatibility; now the default */
128 			break;
129 
130 		case 's':
131 			(void) strlcat(subtree, optarg, MAXPATHLEN);
132 			break;
133 
134 		case 't':
135 			tstampfile = optarg;
136 			break;
137 
138 		case 'S':
139 			for (scm = scms; scm->name != NULL; scm++) {
140 				if (strcmp(scm->name, optarg) == 0)
141 					break;
142 			}
143 			if (scm->name == NULL)
144 				die("unsupported SCM `%s'\n", optarg);
145 			break;
146 
147 		default:
148 		case '?':
149 			goto usage;
150 		}
151 	}
152 
153 	argc -= optind;
154 	argv += optind;
155 
156 	if (argc != 2) {
157 usage:		(void) fprintf(stderr, "usage: %s [-s <subtree>] "
158 		    "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
159 		    progname);
160 		return (EXIT_FAILURE);
161 	}
162 
163 	/*
164 	 * Interpret a relative timestamp path as relative to srcroot.
165 	 */
166 	if (tstampfile[0] == '/')
167 		(void) strlcpy(path, tstampfile, MAXPATHLEN);
168 	else
169 		(void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
170 
171 	if (stat(path, &tsstat) == -1)
172 		die("cannot stat timestamp file \"%s\"", path);
173 	tstamp = tsstat.st_mtime;
174 
175 	/*
176 	 * Create the exception pathname set.
177 	 */
178 	exsetp = make_exset(argv[1]);
179 	if (exsetp == NULL)
180 		die("cannot make exception pathname set\n");
181 
182 	/*
183 	 * Walk the specified subtree of the tree rooted at argv[0].
184 	 */
185 	if (chdir(argv[0]) == -1)
186 		die("cannot change directory to \"%s\"", argv[0]);
187 
188 	if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
189 		die("cannot walk tree rooted at \"%s\"\n", argv[0]);
190 
191 	pnset_empty(exsetp);
192 	return (EXIT_SUCCESS);
193 }
194 
195 /*
196  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
197  */
198 static pnset_t *
hg_manifest(const char * hgroot)199 hg_manifest(const char *hgroot)
200 {
201 	FILE	*fp = NULL;
202 	char	*hgcmd = NULL;
203 	char	*newline;
204 	pnset_t	*pnsetp;
205 	char	path[MAXPATHLEN];
206 
207 	pnsetp = calloc(1, sizeof (pnset_t));
208 	if (pnsetp == NULL ||
209 	    asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
210 		goto fail;
211 
212 	fp = popen(hgcmd, "r");
213 	if (fp == NULL)
214 		goto fail;
215 
216 	while (fgets(path, sizeof (path), fp) != NULL) {
217 		newline = strrchr(path, '\n');
218 		if (newline != NULL)
219 			*newline = '\0';
220 
221 		if (pnset_add(pnsetp, path) == 0)
222 			goto fail;
223 	}
224 
225 	(void) pclose(fp);
226 	free(hgcmd);
227 	return (pnsetp);
228 fail:
229 	warn("cannot load hg manifest at %s", hgroot);
230 	if (fp != NULL)
231 		(void) pclose(fp);
232 	free(hgcmd);
233 	pnset_free(pnsetp);
234 	return (NULL);
235 }
236 
237 /*
238  * Load and return a pnset for the manifest for the Git repo at `gitroot'.
239  */
240 static pnset_t *
git_manifest(const char * gitroot)241 git_manifest(const char *gitroot)
242 {
243 	FILE	*fp = NULL;
244 	char	*gitcmd = NULL;
245 	char	*newline;
246 	pnset_t	*pnsetp;
247 	char	path[MAXPATHLEN];
248 
249 	pnsetp = calloc(1, sizeof (pnset_t));
250 	if (pnsetp == NULL ||
251 	    asprintf(&gitcmd, "git --git-dir=%s/.git ls-files", gitroot) == -1)
252 		goto fail;
253 
254 	fp = popen(gitcmd, "r");
255 	if (fp == NULL)
256 		goto fail;
257 
258 	while (fgets(path, sizeof (path), fp) != NULL) {
259 		newline = strrchr(path, '\n');
260 		if (newline != NULL)
261 			*newline = '\0';
262 
263 		if (pnset_add(pnsetp, path) == 0)
264 			goto fail;
265 	}
266 
267 	(void) pclose(fp);
268 	free(gitcmd);
269 	return (pnsetp);
270 fail:
271 	warn("cannot load git manifest at %s", gitroot);
272 	if (fp != NULL)
273 		(void) pclose(fp);
274 	free(gitcmd);
275 	pnset_free(pnsetp);
276 	return (NULL);
277 }
278 
279 /*
280  * If necessary, change our active manifest to be appropriate for `path'.
281  */
282 static void
chdir_scmdata(const char * path,const char * meta,pnset_t * (* manifest_func)(const char * path))283 chdir_scmdata(const char *path, const char *meta,
284     pnset_t *(*manifest_func)(const char *path))
285 {
286 	char scmpath[MAXPATHLEN];
287 	char basepath[MAXPATHLEN];
288 	char *slash;
289 
290 	(void) snprintf(scmpath, MAXPATHLEN, "%s/%s", path, meta);
291 
292 	/*
293 	 * Change our active manifest if any one of the following is true:
294 	 *
295 	 *   1. No manifest is loaded.  Find the nearest SCM root to load from.
296 	 *
297 	 *   2. A manifest is loaded, but we've moved into a directory with
298 	 *	its own metadata directory (e.g., usr/closed).  Load from its
299 	 *	root.
300 	 *
301 	 *   3. A manifest is loaded, but no longer applies (e.g., the manifest
302 	 *	under usr/closed is loaded, but we've moved to usr/src).
303 	 */
304 	if (scmdata.manifest == NULL ||
305 	    (strcmp(scmpath, scmdata.metapath) != 0 &&
306 	    access(scmpath, X_OK) == 0) ||
307 	    strncmp(path, scmdata.root, scmdata.rootlen - 1) != 0) {
308 		pnset_free(scmdata.manifest);
309 		scmdata.manifest = NULL;
310 
311 		(void) strlcpy(basepath, path, MAXPATHLEN);
312 
313 		/*
314 		 * Walk up the directory tree looking for metadata
315 		 * subdirectories.
316 		 */
317 		while (access(scmpath, X_OK) == -1) {
318 			slash = strrchr(basepath, '/');
319 			if (slash == NULL) {
320 				if (!scmdata.rootwarn) {
321 					warn("no metadata directory "
322 					    "for \"%s\"\n", path);
323 					scmdata.rootwarn = B_TRUE;
324 				}
325 				return;
326 			}
327 			*slash = '\0';
328 			(void) snprintf(scmpath, MAXPATHLEN, "%s/%s", basepath,
329 			    meta);
330 		}
331 
332 		/*
333 		 * We found a directory with an SCM metadata directory; record
334 		 * it and load its manifest.
335 		 */
336 		(void) strlcpy(scmdata.metapath, scmpath, MAXPATHLEN);
337 		(void) strlcpy(scmdata.root, basepath, MAXPATHLEN);
338 		scmdata.manifest = manifest_func(scmdata.root);
339 
340 		/*
341 		 * The logic in check_scmdata() depends on scmdata.root having
342 		 * a single trailing slash, so only add it if it's missing.
343 		 */
344 		if (scmdata.root[strlen(scmdata.root) - 1] != '/')
345 			(void) strlcat(scmdata.root, "/", MAXPATHLEN);
346 		scmdata.rootlen = strlen(scmdata.root);
347 	}
348 }
349 
350 /*
351  * If necessary, change our active manifest to be appropriate for `path'.
352  */
353 static void
chdir_git(const char * path)354 chdir_git(const char *path)
355 {
356 	chdir_scmdata(path, ".git", git_manifest);
357 }
358 
359 static void
chdir_hg(const char * path)360 chdir_hg(const char *path)
361 {
362 	chdir_scmdata(path, ".hg", hg_manifest);
363 }
364 
365 /* ARGSUSED */
366 static int
check_scmdata(const char * path,const struct FTW * ftwp)367 check_scmdata(const char *path, const struct FTW *ftwp)
368 {
369 	/*
370 	 * The manifest paths are relative to the manifest root; skip past it.
371 	 */
372 	path += scmdata.rootlen;
373 
374 	return (scmdata.manifest != NULL && pnset_check(scmdata.manifest,
375 	    path));
376 }
377 
378 /*
379  * Check if a file is under TeamWare control by checking for its corresponding
380  * SCCS "s-dot" file.
381  */
382 static int
check_tw(const char * path,const struct FTW * ftwp)383 check_tw(const char *path, const struct FTW *ftwp)
384 {
385 	char sccspath[MAXPATHLEN];
386 
387 	(void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
388 	    path, path + ftwp->base);
389 
390 	return (access(sccspath, F_OK) == 0);
391 }
392 
393 /*
394  * Using `exceptfile' and a built-in list of exceptions, build and return a
395  * pnset_t consisting of all of the pathnames globs which are allowed to be
396  * unreferenced in the source tree.
397  */
398 static pnset_t *
make_exset(const char * exceptfile)399 make_exset(const char *exceptfile)
400 {
401 	FILE		*fp;
402 	char		line[MAXPATHLEN];
403 	char		*newline;
404 	pnset_t		*pnsetp;
405 	unsigned int	i;
406 
407 	pnsetp = calloc(1, sizeof (pnset_t));
408 	if (pnsetp == NULL)
409 		return (NULL);
410 
411 	/*
412 	 * Add any exceptions from the file.
413 	 */
414 	fp = fopen(exceptfile, "r");
415 	if (fp == NULL) {
416 		warn("cannot open exception file \"%s\"", exceptfile);
417 		goto fail;
418 	}
419 
420 	while (fgets(line, sizeof (line), fp) != NULL) {
421 		newline = strrchr(line, '\n');
422 		if (newline != NULL)
423 			*newline = '\0';
424 
425 		for (i = 0; isspace(line[i]); i++)
426 			;
427 
428 		if (line[i] == '#' || line[i] == '\0')
429 			continue;
430 
431 		if (pnset_add(pnsetp, line) == 0) {
432 			(void) fclose(fp);
433 			goto fail;
434 		}
435 	}
436 
437 	(void) fclose(fp);
438 	return (pnsetp);
439 fail:
440 	pnset_free(pnsetp);
441 	return (NULL);
442 }
443 
444 /*
445  * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
446  */
447 static int
checkpath(const char * path,const struct stat * statp,int type,struct FTW * ftwp)448 checkpath(const char *path, const struct stat *statp, int type,
449     struct FTW *ftwp)
450 {
451 	switch (type) {
452 	case FTW_F:
453 		/*
454 		 * Skip if the file is referenced or in the exception list.
455 		 */
456 		if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
457 			return (0);
458 
459 		/*
460 		 * If requested, restrict ourselves to unreferenced files
461 		 * under SCM control.
462 		 */
463 		if (scm == NULL || scm->checkfunc(path, ftwp))
464 			(void) puts(path);
465 		return (0);
466 
467 	case FTW_D:
468 		/*
469 		 * Prune any directories in the exception list.
470 		 */
471 		if (pnset_check(exsetp, path)) {
472 			ftwp->quit = FTW_PRUNE;
473 			return (0);
474 		}
475 
476 		/*
477 		 * If necessary, advise the SCM logic of our new directory.
478 		 */
479 		if (scm != NULL && scm->chdirfunc != NULL)
480 			scm->chdirfunc(path);
481 
482 		return (0);
483 
484 	case FTW_DNR:
485 		warn("cannot read \"%s\"", path);
486 		return (0);
487 
488 	case FTW_NS:
489 		warn("cannot stat \"%s\"", path);
490 		return (0);
491 
492 	default:
493 		break;
494 	}
495 
496 	return (0);
497 }
498 
499 /*
500  * Add `path' to the pnset_t pointed to by `pnsetp'.
501  */
502 static int
pnset_add(pnset_t * pnsetp,const char * path)503 pnset_add(pnset_t *pnsetp, const char *path)
504 {
505 	char **newpaths;
506 	unsigned int maxpaths;
507 
508 	if (pnsetp->npath == pnsetp->maxpaths) {
509 		maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
510 		newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
511 		if (newpaths == NULL)
512 			return (0);
513 		pnsetp->paths = newpaths;
514 		pnsetp->maxpaths = maxpaths;
515 	}
516 
517 	pnsetp->paths[pnsetp->npath] = strdup(path);
518 	if (pnsetp->paths[pnsetp->npath] == NULL)
519 		return (0);
520 
521 	pnsetp->npath++;
522 	return (1);
523 }
524 
525 /*
526  * Check `path' against the pnset_t pointed to by `pnsetp'.
527  */
528 static int
pnset_check(const pnset_t * pnsetp,const char * path)529 pnset_check(const pnset_t *pnsetp, const char *path)
530 {
531 	unsigned int i;
532 
533 	for (i = 0; i < pnsetp->npath; i++) {
534 		if (fnmatch(pnsetp->paths[i], path, 0) == 0)
535 			return (1);
536 	}
537 	return (0);
538 }
539 
540 /*
541  * Empty the pnset_t pointed to by `pnsetp'.
542  */
543 static void
pnset_empty(pnset_t * pnsetp)544 pnset_empty(pnset_t *pnsetp)
545 {
546 	while (pnsetp->npath-- != 0)
547 		free(pnsetp->paths[pnsetp->npath]);
548 
549 	free(pnsetp->paths);
550 	pnsetp->maxpaths = 0;
551 }
552 
553 /*
554  * Free the pnset_t pointed to by `pnsetp'.
555  */
556 static void
pnset_free(pnset_t * pnsetp)557 pnset_free(pnset_t *pnsetp)
558 {
559 	if (pnsetp != NULL) {
560 		pnset_empty(pnsetp);
561 		free(pnsetp);
562 	}
563 }
564 
565 /* PRINTFLIKE1 */
566 static void
warn(const char * format,...)567 warn(const char *format, ...)
568 {
569 	va_list alist;
570 	char *errstr = strerror(errno);
571 
572 	if (errstr == NULL)
573 		errstr = "<unknown error>";
574 
575 	(void) fprintf(stderr, "%s: ", progname);
576 
577 	va_start(alist, format);
578 	(void) vfprintf(stderr, format, alist);
579 	va_end(alist);
580 
581 	if (strrchr(format, '\n') == NULL)
582 		(void) fprintf(stderr, ": %s\n", errstr);
583 }
584 
585 /* PRINTFLIKE1 */
586 static void
die(const char * format,...)587 die(const char *format, ...)
588 {
589 	va_list alist;
590 	char *errstr = strerror(errno);
591 
592 	if (errstr == NULL)
593 		errstr = "<unknown error>";
594 
595 	(void) fprintf(stderr, "%s: fatal: ", progname);
596 
597 	va_start(alist, format);
598 	(void) vfprintf(stderr, format, alist);
599 	va_end(alist);
600 
601 	if (strrchr(format, '\n') == NULL)
602 		(void) fprintf(stderr, ": %s\n", errstr);
603 
604 	exit(EXIT_FAILURE);
605 }
606