xref: /illumos-gate/usr/src/cmd/filesync/anal.c (revision 63360950)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
24  *
25  * module:
26  *	anal.c
27  *
28  * purpose:
29  *	routines to analyze the file trees and figure out what has changed
30  *	and queue files for reconciliation.  It also contains tree enumeration
31  *	routines to for other purposes (pruning and link location).
32  *
33  * contents:
34  *
35  *  change analysis:
36  *	analyze .... (top level) analyze all files in the tree for changes
37  *	summary .... print out change/reconciliation statistics for each base
38  *	check_file . (static) look for changes and queue file for reconciliation
39  *	check_changes (static) figure out if a particular file has changed
40  *	queue_file . (static) add a file to the reconciliation list
41  *
42  *  other tree enumeration functions:
43  *	prune_file . (static) recursive descent and actual pruning
44  *	prune ...... (top level) initiate pruning analysis for nonexistant files
45  *	find_link .. look for other files to which a file may be a link
46  *	link_update. propagate changed stat info to all other links
47  *	same_name .. (static) figure out if two nodes describe same file
48  *
49  *  misc:
50  *	push_name .. maintain a running full pathname as we descend
51  *	pop_name ... maintain a running full pathname as we pop back
52  *	get_name ... return full pathname for the current file
53  *
54  * notes:
55  *	analysis is limited to files that were evaluated in the previous
56  *	pass ... since we don't have complete information about files that
57  *	were not evaluated in the previous pass.
58  */
59 #pragma ident	"%Z%%M%	%I%	%E% SMI"
60 
61 #include <stdio.h>
62 #include <stdlib.h>
63 #include <strings.h>
64 
65 #include "messages.h"
66 #include "filesync.h"
67 #include "database.h"
68 #include "debug.h"
69 
70 /*
71  * routines:
72  */
73 void push_name(const char *);
74 void pop_name();
75 char *get_name(struct file *);
76 static errmask_t check_file(struct file *fp);
77 static diffmask_t check_changes(struct file *fp, int first, int second);
78 static int prune_file(struct file *fp);
79 static void queue_file(struct file *fp);
80 
81 /*
82  * globals
83  */
84 static struct file *changes;	/* list of files to be reconciled	*/
85 
86 static long total_files;	/* total number of files being considered  */
87 static long est_deletes;	/* estimated number of files to be deleted */
88 static long est_rmdirs;		/* est rmdirs of non-empty directories	   */
89 
90 int inum_changes;		/* LISTed directories whose I#s changed	   */
91 
92 /*
93  * routine:
94  *	analyze
95  *
96  * purpose:
97  *	top level routine for the analysis/reconciliation process
98  *
99  * parameters:
100  *	none
101  *
102  * returns:
103  *	error mask
104  *
105  * notes:
106  *	a critical side effect of this routine is the creation of
107  *	the reconciliation list, an ordered list of files that
108  *	needed to be processed in the subsequent reconciliation pass
109  */
110 errmask_t
analyze()111 analyze()
112 {	struct base *bp;
113 	struct file *fp;
114 	int errs = 0;
115 	int err;
116 	int percentage;
117 	bool_t aborted = FALSE;
118 	char msgbuf[MAX_LINE];
119 
120 	/*
121 	 * run through all bases and directories looking for files
122 	 * that have been renamed.  This must be done before the
123 	 * difference analysis because a directory rename can introduce
124 	 * radical restructuring into a name-based tree.
125 	 */
126 	for (bp = bases; bp; bp = bp->b_next) {
127 		for (fp = bp->b_files; fp; fp = fp->f_next)
128 			if (fp->f_flags & F_EVALUATE)
129 				errs |= find_renames(fp);
130 	}
131 
132 	/*
133 	 * run through all bases and files looking for candidates
134 	 * note, however that we only descend into trees that have
135 	 * the evaluate flag turned on.  As a result of new rules or
136 	 * restriction arguments, we may be deliberatly ignoring
137 	 * large amounts of the baseline.   This means we won't do
138 	 * any stats to update the information in those nodes, and
139 	 * they will be written back just as they were.
140 	 *
141 	 * note that there is code to prune out baseline nodes for
142 	 * files that no longer exist, but that code is in reconcile
143 	 * and will never get a chance to run on nodes that aren't
144 	 * analyzed.
145 	 *
146 	 * we also want to run though all nodes with STAT errors
147 	 * so that we can put them on the reconciliation list.
148 	 */
149 	for (bp = bases; bp; bp = bp->b_next) {
150 		for (fp = bp->b_files; fp; fp = fp->f_next)
151 			if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
152 				errs |= check_file(fp);
153 	}
154 
155 	/*
156 	 * my greatest fear is that someday, somehow, by messing with
157 	 * variables or baselines or who-knows-what, that someone will
158 	 * run a reconciliation against a large tree that doesn't correspond
159 	 * to the baseline, and I will infer that a bazillion files have
160 	 * been deleted and will propagate the slaughter before anyone
161 	 * can say somebody stop that maniac.
162 	 *
163 	 * in order to prevent such a possibility, we have a few different
164 	 * sanity checks.  There is, of course, a tradeoff here between
165 	 * danger and irritation.  The current set of heuristics for whether
166 	 * or not to generate a warning are (any of)
167 	 *
168 	 *	at least CONFIRM_MIN files have been deleted AND
169 	 *	CONFIRM_PCT of all files have been deleted
170 	 *
171 	 *	the inode number on a LISTed directory has changed
172 	 *
173 	 *	a non-empty directory has been deleted.
174 	 */
175 	msgbuf[0] = 0;
176 
177 	percentage = (est_deletes * 100) / (total_files ? total_files : 1);
178 	if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
179 		sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
180 	else if (inum_changes > 0)
181 		sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
182 	else if (est_rmdirs)
183 		sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);
184 
185 	if (msgbuf[0])
186 		confirm(msgbuf);
187 
188 	/*
189 	 * TRICK:
190 	 *	the change list contains both files that have changed
191 	 *	(and probably warrant reconciliation) and files that
192 	 *	we couldn't get up-to-date stat information on.  The
193 	 *	latter files should just be flagged as being in conflict
194 	 *	so they can be reported in the summary.  The same is
195 	 *	true of all subsequent files if we abort reconciliation.
196 	 */
197 	for (fp = changes; fp; fp = fp->f_rnext)
198 		if (aborted || (fp->f_flags & F_STAT_ERROR)) {
199 			fp->f_flags |= F_CONFLICT;
200 			/* if it isn't in the baseline yet, don't add it */
201 			if ((fp->f_flags & F_IN_BASELINE) == 0)
202 				fp->f_flags |= F_REMOVE;
203 			fp->f_problem = aborted ? PROB_aborted : PROB_restat;
204 			(fp->f_base)->b_unresolved++;
205 			errs |= ERR_UNRESOLVED;
206 			if (opt_verbose)
207 				fprintf(stdout,
208 					gettext(aborted ? V_suppressed
209 							: V_nostat),
210 					fp->f_fullname);
211 		} else {
212 			err = reconcile(fp);
213 			errs |= err;
214 			if (opt_halt && (err & ERR_ABORT)) {
215 				fprintf(stderr, gettext(ERR_abort_h));
216 				aborted = TRUE;
217 			}
218 		}
219 
220 	return (errs);
221 }
222 
223 /*
224  * routine:
225  *	prune_file
226  *
227  * purpose:
228  *	to look for file entries that should be pruned from baseline
229  *	prune the current file if it needs pruning, and recursively
230  *	descend if it is a directory.
231  *
232  * parameters:
233  *	pointer to file node
234  */
235 static int
prune_file(struct file * fp)236 prune_file(struct file *fp)
237 {	struct file *cp;
238 	int prunes = 0;
239 
240 	/* if node hasn't been evaluated, mark it for removal	*/
241 	if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
242 		fp->f_flags |= F_REMOVE;
243 		prunes++;
244 		if (opt_debug & DBG_ANAL)
245 			fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
246 	}
247 
248 	/* now check our children				*/
249 	for (cp = fp->f_files; cp; cp = cp->f_next)
250 		prunes += prune_file(cp);
251 
252 	return (prunes);
253 }
254 
255 /*
256  * routine:
257  *	prune
258  *
259  * purpose:
260  *	to prune the baseline of entries that no longer correspond to
261  *	existing rules.
262  *
263  * notes:
264  *	This routine just calls prune_file on the top of each base tree.
265  */
266 int
prune()267 prune()
268 {	struct base *bp;
269 	struct file *fp;
270 	int prunes = 0;
271 
272 	for (bp = bases; bp; bp = bp->b_next) {
273 		for (fp = bp->b_files; fp; fp = fp->f_next)
274 			prunes += prune_file(fp);
275 
276 		if ((bp->b_flags & F_EVALUATE) == 0)
277 			bp->b_flags |= F_REMOVE;
278 	}
279 
280 	return (prunes);
281 }
282 
283 /*
284  * routine:
285  *	summary
286  *
287  * purpose:
288  *	to print out statics and conflict lists
289  */
290 void
summary()291 summary()
292 {	struct base *bp;
293 	struct file *fp;
294 	extern bool_t need_super;
295 
296 	(void) fflush(stdout);
297 
298 	for (bp = bases; bp; bp = bp->b_next) {
299 
300 		/* see if this base was irrelevant	*/
301 		if ((bp->b_flags & F_EVALUATE) == 0)
302 			continue;
303 
304 		/* print out a summary for this base	*/
305 		fprintf(stderr, gettext(SUM_hd),
306 			bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
307 		fprintf(stderr, gettext(SUM_dst),
308 			bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
309 		fprintf(stderr, gettext(SUM_src),
310 			bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
311 		if (bp->b_unresolved)
312 			fprintf(stderr, gettext(SUM_unresolved),
313 				bp->b_unresolved);
314 
315 
316 		/* print out a list of unreconciled files for this base	*/
317 		for (fp = changes; fp; fp = fp->f_rnext) {
318 			if (fp->f_base != bp)
319 				continue;
320 			if ((fp->f_flags & F_CONFLICT) == 0)
321 				continue;
322 			fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
323 				fp->f_problem ? fp->f_problem : "???");
324 		}
325 
326 		fprintf(stderr, "\n");
327 	}
328 
329 	if (need_super)
330 		fprintf(stderr, gettext(WARN_super));
331 }
332 
333 /*
334  * routine:
335  *	check_file
336  *
337  * purpose:
338  *	figure out if a file requires reconciliation and recursively
339  *	descend into all sub-files and directories
340  *
341  * parameters:
342  *	base pointer
343  *	file pointer
344  *
345  * returns:
346  *	error mask
347  *	built up changes needed list
348  *	updated statistics
349  *
350  * notes:
351  *	this routine builds up a path name as it descends through
352  *	the tree (see push_name, pop_name, get_name).
353  */
354 static errmask_t
check_file(struct file * fp)355 check_file(struct file *fp)
356 {	struct file *cp;
357 	int errs = 0;
358 
359 	if ((fp->f_flags & F_STAT_ERROR) == 0) {
360 		/* see if the source has changed	*/
361 		fp->f_info[OPT_BASE].f_modtime	= fp->f_s_modtime;
362 		fp->f_info[OPT_BASE].f_ino	= fp->f_s_inum;
363 		fp->f_info[OPT_BASE].f_d_maj	= fp->f_s_maj;
364 		fp->f_info[OPT_BASE].f_d_min	= fp->f_s_min;
365 		fp->f_info[OPT_BASE].f_nlink	= fp->f_s_nlink;
366 		fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);
367 
368 		/* see if the destination has changed	*/
369 		fp->f_info[OPT_BASE].f_modtime	= fp->f_d_modtime;
370 		fp->f_info[OPT_BASE].f_ino    	= fp->f_d_inum;
371 		fp->f_info[OPT_BASE].f_d_maj    = fp->f_d_maj;
372 		fp->f_info[OPT_BASE].f_d_min    = fp->f_d_min;
373 		fp->f_info[OPT_BASE].f_nlink	= fp->f_d_nlink;
374 		fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);
375 
376 		/* if nobody thinks the file exists, baseline needs pruning */
377 		if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
378 			fp->f_srcdiffs |= D_DELETE;
379 			fp->f_dstdiffs |= D_DELETE;
380 		}
381 
382 		/* keep track of possible deletions to look for trouble	*/
383 		if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
384 			est_deletes++;
385 
386 			/* see if file is (or has been) a non-empty directory */
387 			if (fp->f_files)
388 				est_rmdirs++;
389 		}
390 	}
391 
392 	/* if we found differences, queue the file for reconciliation 	*/
393 	if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
394 		queue_file(fp);
395 
396 		if (opt_debug & DBG_ANAL) {
397 			fprintf(stderr, "ANAL: src=%s",
398 				showflags(diffmap, fp->f_srcdiffs));
399 			fprintf(stderr, " dst=%s",
400 				showflags(diffmap, fp->f_dstdiffs));
401 			fprintf(stderr, " flgs=%s",
402 				showflags(fileflags, fp->f_flags));
403 			fprintf(stderr, " name=%s\n", fp->f_fullname);
404 		}
405 	}
406 
407 	/* bump the total file count	*/
408 	fp->f_base->b_totfiles++;
409 	total_files++;
410 
411 	/* if this is not a directory, we're done	*/
412 	if (fp->f_files == 0)
413 		return (errs);
414 
415 	/*
416 	 * If this is a directory, we need to recursively analyze
417 	 * our children, but only children who have been evaluated.
418 	 * If a node has not been evaluated, then we don't have
419 	 * updated stat information and there is nothing to analyze.
420 	 *
421 	 * we also want to run though all nodes with STAT errors
422 	 * so that we can put them on the reconciliation list.
423 	 * If a directory is unreadable on one side, all files
424 	 * under that directory (ON BOTH SIDES) must be marked as
425 	 * blocked by stat errors.
426 	 */
427 	push_name(fp->f_name);
428 
429 	for (cp = fp->f_files; cp; cp = cp->f_next) {
430 		if (fp->f_flags & F_STAT_ERROR)
431 			cp->f_flags |= F_STAT_ERROR;
432 		if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
433 			errs |= check_file(cp);
434 	}
435 
436 	pop_name();
437 
438 	return (errs);
439 }
440 
441 /*
442  * routine:
443  *	check_changes
444  *
445  * purpose:
446  *	to figure out what has changed for a specific file
447  *
448  * parameters:
449  *	file pointer
450  *	the reference info
451  *	the info to be checked for changes
452  *
453  * returns:
454  *	diff mask
455  *
456  * notes:
457  *	this routine doesn't pretend to understand what happened.
458  *	it merely enumerates the ways in which the files differ.
459  */
460 static diffmask_t
check_changes(struct file * fp,int ref,int new)461 check_changes(struct file *fp, int ref, int new)
462 {	struct fileinfo *rp, *np;
463 	int mask = 0;
464 	int type;
465 
466 	rp = &fp->f_info[ref];
467 	np = &fp->f_info[new];
468 
469 	if (np->f_uid != rp->f_uid)
470 		mask |= D_UID;
471 
472 	if (np->f_gid != rp->f_gid)
473 		mask |= D_GID;
474 
475 	if (np->f_mode != rp->f_mode)
476 		mask |= D_PROT;
477 
478 	type = np->f_type;
479 	if (type != rp->f_type) {
480 		if (type == 0)
481 			mask |= D_DELETE;
482 		else if (rp->f_type == 0)
483 			mask |= D_CREATE;
484 		else
485 			mask |= D_TYPE;
486 	} else if (type == S_IFBLK || type == S_IFCHR) {
487 		/*
488 		 * for special files, we only look at the maj/min
489 		 */
490 		if (np->f_rd_maj != rp->f_rd_maj)
491 			mask |= D_SIZE;
492 		if (np->f_rd_min != rp->f_rd_min)
493 			mask |= D_SIZE;
494 	} else if (type != S_IFDIR) {
495 		/*
496 		 * for directories, we don't look directly at
497 		 * the contents, so these fields don't mean
498 		 * anything.  If the directories have changed
499 		 * in any interesting way, we'll find it by
500 		 * walking the tree.
501 		 */
502 		if (np->f_modtime > rp->f_modtime)
503 			mask |= D_MTIME;
504 
505 		if (np->f_size != rp->f_size)
506 			mask |= D_SIZE;
507 
508 		if (np->f_nlink != rp->f_nlink)
509 			mask |= D_LINKS;
510 	}
511 
512 	if (cmp_acls(rp, np) == 0)
513 		mask |= D_FACLS;
514 
515 	return (mask);
516 }
517 
518 /*
519  * routine:
520  *	same_name
521  *
522  * purpose:
523  *	to figure out whether or not two databsae nodes actually refer to
524  *	the same file.
525  *
526  * parameters:
527  *	pointers to two file description nodes
528  *	which side we should check
529  *
530  * returns:
531  *	TRUE/FALSE
532  *
533  * notes:
534  *	if a single directory is specified in multiple base pairs, it
535  *	is possible to have multiple nodes in the database describing
536  *	the same file.  This routine is supposed to detect those cases.
537  *
538  *	what should be a trivial string comparison is complicated by
539  *	the possibility that the two nodes might describe the same file
540  *	from base directories at different depths.  Thus, rather than
541  *	comparing two strings, we really want to compare the concatenation
542  *	of two pairs of strings.  Unfortunately calling full_name would
543  *	be awkward right now, so instead we have our own comparison
544  *	routine that automatically skips from the first string to
545  *	the second.
546  */
547 static bool_t
same_name(struct file * f1,struct file * f2,side_t srcdst)548 same_name(struct file *f1, struct file *f2, side_t srcdst)
549 {
550 	char *s1, *s2, *x1, *x2;
551 
552 	if (srcdst == OPT_SRC) {
553 		s1 = (f1->f_base)->b_src_name;
554 		s2 = (f2->f_base)->b_src_name;
555 	} else {
556 		s1 = (f1->f_base)->b_dst_name;
557 		s2 = (f2->f_base)->b_dst_name;
558 	}
559 	x1 = f1->f_fullname;
560 	x2 = f2->f_fullname;
561 
562 	/*
563 	 * Compare the two names, and if they differ before they end
564 	 * this is a non-match.  If they both end at the same time,
565 	 * this is a match.
566 	 *
567 	 * The trick here is that each string is actually the logical
568 	 * concatenation of two strings, and we need to automatically
569 	 * wrap from the first to the second string in each pair.  There
570 	 * is no requirement that the two (concatenated) strings be
571 	 * broken at the same point, so we have a slightly baroque
572 	 * comparsion loop.
573 	 */
574 	while (*s1 && *s1 == *s2) {
575 
576 		/*
577 		 * strings have been identical so far, so advance the
578 		 * pointers and continue the comparison.  The trick
579 		 * is that when either string ends, we have to wrap
580 		 * over to its extension.
581 		 */
582 		s1++; s2++;
583 		if (*s1 && *s2)
584 			continue;
585 
586 		/*
587 		 * at least one of the strings has ended.
588 		 * there is an implicit slash between the string
589 		 * and its extension, and this has to be matched
590 		 * against the other string.
591 		 */
592 		if (*s1 != *s2) {
593 			if (*s1 == 0 && *s2 == '/')
594 				s2++;
595 			else if (*s2 == 0 && *s1 == '/')
596 				s1++;
597 			else
598 				/* the disagreement doesn't come at a slash */
599 				break;
600 		}
601 
602 		/*
603 		 * if either string has ended, wrap to its extension
604 		 */
605 		if (*s1 == 0 && x1 != 0) {
606 			s1 = x1;
607 			x1 = 0;
608 		}
609 		if (*s2 == 0 && x2 != 0) {
610 			s2 = x2;
611 			x2 = 0;
612 		}
613 	}
614 
615 	return (*s1 == *s2);
616 }
617 
618 /*
619  * routine:
620  *	find_link
621  *
622  * purpose:
623  *	to figure out if there is a file to which we should
624  *	be creating a link (rather than making a copy)
625  *
626  * parameters:
627  *	file node for the file to be created (that we hope is merely a link)
628  *	which side is to be changed (src/dst)
629  *
630  * return:
631  *	0	no link is appropriate
632  *	else	pointer to file node for link referent
633  *
634  * notes:
635  *	there are a few strange heuristics in this routine and I
636  *	wouldn't bet my soul that I got all of them right.  The general
637  *	theory is that when a new file is created, we look to see if it
638  *	is a link to another file on the changed side, and if it is, we
639  *	find the corresponding file on the unchanged side.
640  *
641  *	cases we want to be able to handle:
642  *	    1.	one or more links are created to a prexisting file
643  *	    2.	a preexisting only link is renamed
644  *	    3.  a rename of one of multiple links to a preexisting file
645  *	    4.	a single file is created with multiple links
646  */
647 struct file *
find_link(struct file * fp,side_t srcdst)648 find_link(struct file *fp, side_t srcdst)
649 {	struct file *lp;
650 	side_t chgside, tgtside;
651 	struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;
652 
653 	/* chg = side on which the change was noticed		*/
654 	/* tgt = side to which the change is to be propagated	*/
655 	chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
656 	tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
657 	fcp = &fp->f_info[chgside];
658 	ftp = &fp->f_info[tgtside];
659 
660 	/*
661 	 * cases 1 and 3
662 	 *
663 	 * When a new link is created, we should be able to find
664 	 * another file in the changed hierarchy that has the same
665 	 * I-node number.  We expect it to be on the changed list
666 	 * because the link count will have gone up or because all
667 	 * of the copies are new.  If we find one, then the new file
668 	 * on the receiving file should be a link to the corresponding
669 	 * existing file.
670 	 *
671 	 * case 4
672 	 *
673 	 * the first link will be dealt with as a copy, but all
674 	 * subsequent links should find an existing file analogous
675 	 * to one of the links on the changed side, and create
676 	 * corresponding links on the other side.
677 	 *
678 	 * in each of these cases, there should be multiple links
679 	 * on the changed side.  If the linkcount on the changed
680 	 * side is one, we needn't bother searching for other links.
681 	 */
682 	if (fcp->f_nlink > 1)
683 	for (lp = changes; lp; lp = lp->f_rnext) {
684 		/* finding the same node doesn't count	*/
685 		if (fp == lp)
686 			continue;
687 
688 		tgtp = &lp->f_info[tgtside];
689 		chgp = &lp->f_info[chgside];
690 
691 		/*
692 		 * if the file doesn't already exist on the target side
693 		 * we cannot make a link to it
694 		 */
695 		if (tgtp->f_mode == 0)
696 			continue;
697 
698 		/*
699 		 * if this is indeed a link, then the prospective file on
700 		 * the changed side will have the same dev/inum as the file
701 		 * we are looking for
702 		 */
703 		if (fcp->f_d_maj != chgp->f_d_maj)
704 			continue;
705 		if (fcp->f_d_min != chgp->f_d_min)
706 			continue;
707 		if (fcp->f_ino != chgp->f_ino)
708 			continue;
709 
710 		/*
711 		 * if the target side is already a link to this file,
712 		 * then there is no new link to be created
713 		 * FIX: how does this interact with copies over links
714 		 */
715 		if ((ftp->f_d_maj == tgtp->f_d_maj) &&
716 		    (ftp->f_d_min == tgtp->f_d_min) &&
717 		    (ftp->f_ino   == tgtp->f_ino))
718 			continue;
719 
720 		/*
721 		 * there is a pathological situation where a single file
722 		 * might appear under multiple base directories.  This is
723 		 * damned awkward to detect in any other way, so we must
724 		 * check to see if we have just found another database
725 		 * instance for the same file (on the changed side).
726 		 */
727 		if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
728 			continue;
729 
730 		if (opt_debug & DBG_ANAL)
731 			fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
732 				fp->f_fullname, lp->f_fullname);
733 
734 		return (lp);
735 	}
736 
737 	/*
738 	 * case 2: a simple rename of the only link
739 	 *
740 	 * In this case, there may not be any other existing file on
741 	 * the changed side that has the same I-node number.  There
742 	 * might, however, be a record of such a file in the baseline.
743 	 * If we can find an identical file with a different name that
744 	 * has recently disappeared, we have a likely rename.
745 	 */
746 	for (lp = changes; lp; lp = lp->f_rnext) {
747 
748 		/* finding the same node doesn't count			*/
749 		if (fp == lp)
750 			continue;
751 
752 		tgtp = &lp->f_info[tgtside];
753 		chgp = &lp->f_info[chgside];
754 
755 		/*
756 		 * if the file still exists on the changed side this is
757 		 * not a simple rename, and in fact the previous pass
758 		 * would have found it.
759 		 */
760 		if (chgp->f_mode != 0)
761 			continue;
762 
763 		/*
764 		 * the inode number for the new link on the changed
765 		 * side must match the inode number for the old link
766 		 * from the baseline.
767 		 */
768 		if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
769 							: lp->f_s_maj))
770 			continue;
771 		if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
772 							: lp->f_s_min))
773 			continue;
774 		if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
775 							: lp->f_s_inum))
776 			continue;
777 
778 		/* finding a file we are already linked to doesn't help	*/
779 		if ((ftp->f_d_maj == tgtp->f_d_maj) &&
780 		    (ftp->f_d_min == tgtp->f_d_min) &&
781 		    (ftp->f_ino   == tgtp->f_ino))
782 			continue;
783 
784 		/*
785 		 * there is a danger that we will confuse an
786 		 * inode reallocation with a rename.  We should
787 		 * only consider this to be a rename if the
788 		 * new file is identical to the old one
789 		 */
790 		basp = &lp->f_info[OPT_BASE];
791 		if (fcp->f_type != basp->f_type)
792 			continue;
793 		if (fcp->f_size != basp->f_size)
794 			continue;
795 		if (fcp->f_mode != basp->f_mode)
796 			continue;
797 		if (fcp->f_uid != basp->f_uid)
798 			continue;
799 		if (fcp->f_gid != basp->f_gid)
800 			continue;
801 
802 		if (opt_debug & DBG_ANAL)
803 			fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
804 				fp->f_fullname, lp->f_fullname);
805 
806 		return (lp);
807 	}
808 
809 	return (0);
810 }
811 
812 /*
813  * routine:
814  *	has_other_links
815  *
816  * purpose:
817  *	to determine whether or not there is more that one link to a
818  *	particular file.  We are willing to delete a link to a file
819  *	that has changed if we will still have other links to it.
820  *	The trick here is that we only care about links under our
821  *	dominion.
822  *
823  * parameters:
824  *	file pointer to node we are interested in
825  *	which side we are looking to additional links on
826  *
827  * returns:
828  *	TRUE if there are multiple links
829  *	FALSE if this is the only one we know of
830  */
831 bool_t
has_other_links(struct file * fp,side_t srcdst)832 has_other_links(struct file *fp, side_t srcdst)
833 {	struct file *lp;
834 	struct fileinfo *fip, *lip;
835 
836 	fip = &fp->f_info[srcdst];
837 
838 	/* if the link count is one, there couldn't be others	*/
839 	if (fip->f_nlink < 2)
840 		return (FALSE);
841 
842 	/* look for any other files for the same inode		*/
843 	for (lp = changes; lp; lp = lp->f_rnext) {
844 		/* finding the same node doesn't count	*/
845 		if (fp == lp)
846 			continue;
847 
848 		lip = &lp->f_info[srcdst];
849 
850 		/*
851 		 * file must still exist on this side
852 		 */
853 		if (lip->f_mode == 0)
854 			continue;
855 
856 		/*
857 		 * if this is indeed a link, then the prospective file on
858 		 * the changed side will have the same dev/inum as the file
859 		 * we are looking for
860 		 */
861 		if (lip->f_d_maj != fip->f_d_maj)
862 			continue;
863 		if (lip->f_d_min != fip->f_d_min)
864 			continue;
865 		if (lip->f_ino != fip->f_ino)
866 			continue;
867 
868 		/*
869 		 * we have found at least one other link
870 		 */
871 		return (TRUE);
872 	}
873 
874 	return (FALSE);
875 }
876 
877 /*
878  * routine:
879  *	link_update
880  *
881  * purpose:
882  *	to propoagate a stat change to all other file nodes that
883  *	correspond to the same I-node on the changed side
884  *
885  * parameters:
886  *	file pointer for the updated file
887  *	which side was changed
888  *
889  * returns:
890  *	void
891  *
892  * notes:
893  *	if we have copied onto a file, we have copied onto all
894  *	of its links, but since we do all stats before we do any
895  *	copies, the stat information recently collected for links
896  *	is no longer up-to-date, and this would result in incorrect
897  *	reconciliation (redundant copies).
898  *
899  *	There is an assumption here that all links to a changed
900  *	file will be in the change list.  This is true for almost
901  *	all cases not involving restriction.  If we do fail to
902  *	update the baseline for a file that was off the change list,
903  *	the worst that is likely to happen is that we will think
904  *	it changed later (but will almost surely find that both
905  *	copies agree).
906  */
907 void
link_update(struct file * fp,side_t which)908 link_update(struct file *fp, side_t which)
909 {	struct file *lp;
910 
911 	for (lp = changes; lp; lp = lp->f_rnext) {
912 		/* finding the current entry doesn't count	*/
913 		if (lp == fp)
914 			continue;
915 
916 		/* look for same i#, maj, min on changed side	*/
917 		if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
918 			continue;
919 		if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
920 			continue;
921 		if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
922 			continue;
923 
924 		/*
925 		 * this appears to be another link to the same file
926 		 * so the updated stat information for one must be
927 		 * correct for the other.
928 		 */
929 		lp->f_info[which].f_type	= fp->f_info[which].f_type;
930 		lp->f_info[which].f_size	= fp->f_info[which].f_size;
931 		lp->f_info[which].f_mode	= fp->f_info[which].f_mode;
932 		lp->f_info[which].f_uid		= fp->f_info[which].f_uid;
933 		lp->f_info[which].f_gid		= fp->f_info[which].f_gid;
934 		lp->f_info[which].f_modtime	= fp->f_info[which].f_modtime;
935 		lp->f_info[which].f_modns	= fp->f_info[which].f_modns;
936 		lp->f_info[which].f_nlink	= fp->f_info[which].f_nlink;
937 		lp->f_info[which].f_rd_maj	= fp->f_info[which].f_rd_maj;
938 		lp->f_info[which].f_rd_min	= fp->f_info[which].f_rd_min;
939 
940 		if (opt_debug & DBG_STAT)
941 			fprintf(stderr,
942 				"STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
943 				lp->f_name, lp->f_info[which].f_modtime,
944 				lp->f_info[which].f_modns);
945 	}
946 }
947 
948 /*
949  * routine:
950  *	queue_file
951  *
952  * purpose:
953  *	append a file to the list of needed reconciliations
954  *
955  * parameters:
956  *	pointer to file
957  *
958  * notes:
959  *	when a request is appended to the reconciliation list,
960  *	we fill in the full name.  We delayed this in hopes that
961  *	it wouldn't be necessary (saving cycles and memory)
962  *
963  *	There is some funny business with modification times.
964  *	In general, we queue files in order of the latest modification
965  *	time so that propagations preserve relative ordering.  There
966  *	are, however, a few important exceptions:
967  *	    1.	all directory creations happen at time zero,
968  *		so that they are created before any files can
969  *		be added to them.
970  *	    2.	all directory deletions happen at time infinity-depth,
971  *		so that everything else can be removed before the
972  *		directories themselves are removed.
973  *	    3.	all file deletions happen at time infinity-depth
974  *		so that (in renames) the links will preceed the unlinks.
975  */
976 static void
queue_file(struct file * fp)977 queue_file(struct file *fp)
978 {	struct file **pp, *np;
979 
980 #define	TIME_ZERO	0L		/* the earliest possible time	*/
981 #define	TIME_LONG	0x7FFFFFFF	/* the latest possible time	*/
982 
983 	/*
984 	 * figure out the modification time for sequencing purposes
985 	 */
986 	if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
987 		/*
988 		 * deletions are performed last, and depth first
989 		 */
990 		fp->f_modtime = TIME_LONG - fp->f_depth;
991 	} else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
992 	    fp->f_info[OPT_DST].f_type != S_IFDIR) {
993 		/*
994 		 * for most files we use the latest mod time
995 		 */
996 		fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
997 		fp->f_modns   = fp->f_info[OPT_SRC].f_modns;
998 		if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
999 			fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
1000 			fp->f_modns   = fp->f_info[OPT_DST].f_modns;
1001 		}
1002 	} else {
1003 		/*
1004 		 * new directory creations need to happen before anything
1005 		 * else and are automatically sequenced in traversal order
1006 		 */
1007 		fp->f_modtime = TIME_ZERO;
1008 	}
1009 
1010 	/*
1011 	 * insertion is time ordered, and for equal times,
1012 	 * insertions is in (pre-order) traversal order
1013 	 */
1014 	for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
1015 		if (fp->f_modtime > np->f_modtime)
1016 			continue;
1017 		if (fp->f_modtime < np->f_modtime)
1018 			break;
1019 		if (fp->f_modns < np->f_modns)
1020 			break;
1021 	}
1022 
1023 	fp->f_fullname = strdup(get_name(fp));
1024 	fp->f_rnext = np;
1025 	*pp = fp;
1026 }
1027 
1028 
1029 /*
1030  * routines:
1031  *	push_name/pop_name/get_name
1032  *
1033  * purpose:
1034  *	maintain a name stack so we can form name of a particular file
1035  *	as the concatenation of all of the names between it and the
1036  *	(know to be fully qualified) base directory.
1037  *
1038  * notes:
1039  *	we go to this trouble because most files never change and
1040  *	so we don't need to associate full names with every one.
1041  *	This stack is maintained during analysis, and if we decide
1042  *	to add a file to the reconciliation list, we can use the
1043  *	stack to generate a fully qualified name at that time.
1044  *
1045  *	we compress out '/./' when we return a name.  Given that the
1046  *	stack was built by a tree walk, the only place a /./ should
1047  *	appear is at the first level after the base ... but there
1048  *	are legitimate ways for them to appear there.
1049  *
1050  *	these names can get deep, so we dynamically size our name buffer
1051  */
1052 static const char *namestack[ MAX_DEPTH + 1 ];
1053 static int namedepth = 0;
1054 static int namelen = 0;
1055 
1056 void
push_name(const char * name)1057 push_name(const char *name)
1058 {
1059 	namestack[ namedepth++ ] = name;
1060 	namelen += 2 + strlen(name);
1061 
1062 	/* make sure we don't overflow our name stack	*/
1063 	if (namedepth >= MAX_DEPTH) {
1064 		fprintf(stderr, gettext(ERR_deep), name);
1065 		exit(ERR_OTHER);
1066 	}
1067 }
1068 
1069 void
pop_name(void)1070 pop_name(void)
1071 {
1072 	namelen -= 2 + strlen(namestack[--namedepth]);
1073 	namestack[ namedepth ] = 0;
1074 
1075 #ifdef	DBG_ERRORS
1076 	/* just a little sanity check here	*/
1077 	if (namedepth <= 0) {
1078 		if (namedepth < 0) {
1079 			fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
1080 			exit(ERR_OTHER);
1081 		} else if (namelen != 0) {
1082 			fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
1083 			exit(ERR_OTHER);
1084 		}
1085 	}
1086 #endif
1087 }
1088 
1089 char
get_name(struct file * fp)1090 *get_name(struct file *fp)
1091 {	int i;
1092 	static char *namebuf = 0;
1093 	static int buflen = 0;
1094 
1095 	/* make sure we have an adequate buffer	*/
1096 	i = namelen + 1 + strlen(fp->f_name);
1097 	if (buflen < i) {
1098 		for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
1099 		namebuf = (char *) realloc(namebuf, buflen);
1100 	}
1101 
1102 	/* assemble the name	*/
1103 	namebuf[0] = 0;
1104 	for (i = 0; i < namedepth; i++) {
1105 		if (strcmp(namestack[i], ".")) {
1106 			strcat(namebuf, namestack[i]);
1107 			strcat(namebuf, "/");
1108 		}
1109 	}
1110 
1111 	strcat(namebuf, fp->f_name);
1112 
1113 	return (namebuf);
1114 }
1115