OpenSolaris Launch

author: stevel@tonic-gate <none@none> 2005-06-14 00:00:00 -0700
committer: stevel@tonic-gate <none@none> 2005-06-14 00:00:00 -0700
commit: 7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
tree: c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/cmd/filesync/anal.c
download: illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz
1 files changed, 1114 insertions, 0 deletions
diff --git a/usr/src/cmd/filesync/anal.c b/usr/src/cmd/filesync/anal.c
new file mode 100644
index 0000000000..fe10e49620
--- /dev/null
+++ b/usr/src/cmd/filesync/anal.c
@@ -0,0 +1,1114 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1995 Sun Microsystems, Inc.  All Rights Reserved
+ *
+ * module:
+ *	anal.c
+ *
+ * purpose:
+ *	routines to analyze the file trees and figure out what has changed
+ *	and queue files for reconciliation.  It also contains tree enumeration
+ *	routines to for other purposes (pruning and link location).
+ *
+ * contents:
+ *
+ *  change analysis:
+ *	analyze .... (top level) analyze all files in the tree for changes
+ *	summary .... print out change/reconciliation statistics for each base
+ *	check_file . (static) look for changes and queue file for reconciliation
+ *	check_changes (static) figure out if a particular file has changed
+ *	queue_file . (static) add a file to the reconciliation list
+ *
+ *  other tree enumeration functions:
+ *	prune_file . (static) recursive descent and actual pruning
+ *	prune ...... (top level) initiate pruning analysis for nonexistant files
+ *	find_link .. look for other files to which a file may be a link
+ *	link_update. propagate changed stat info to all other links
+ *	same_name .. (static) figure out if two nodes describe same file
+ *
+ *  misc:
+ *	push_name .. maintain a running full pathname as we descend
+ *	pop_name ... maintain a running full pathname as we pop back
+ *	get_name ... return full pathname for the current file
+ *
+ * notes:
+ *	analysis is limited to files that were evaluated in the previous
+ *	pass ... since we don't have complete information about files that
+ *	were not evaluated in the previous pass.
+ */
+#ident	"%W%	%E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "messages.h"
+#include "filesync.h"
+#include "database.h"
+#include "debug.h"
+
+/*
+ * routines:
+ */
+void push_name(const char *);
+void pop_name();
+char *get_name(struct file *);
+static errmask_t check_file(struct file *fp);
+static diffmask_t check_changes(struct file *fp, int first, int second);
+static int prune_file(struct file *fp);
+static void queue_file(struct file *fp);
+
+/*
+ * globals
+ */
+static struct file *changes;	/* list of files to be reconciled	*/
+
+static long total_files;	/* total number of files being considered  */
+static long est_deletes;	/* estimated number of files to be deleted */
+static long est_rmdirs;		/* est rmdirs of non-empty directories	   */
+
+int inum_changes;		/* LISTed directories whose I#s changed	   */
+
+/*
+ * routine:
+ *	analyze
+ *
+ * purpose:
+ *	top level routine for the analysis/reconciliation process
+ *
+ * parameters:
+ *	none
+ *
+ * returns:
+ *	error mask
+ *
+ * notes:
+ *	a critical side effect of this routine is the creation of
+ *	the reconciliation list, an ordered list of files that
+ *	needed to be processed in the subsequent reconciliation pass
+ */
+errmask_t
+analyze()
+{	struct base *bp;
+	struct file *fp;
+	int errs = 0;
+	int err;
+	int percentage;
+	bool_t aborted = FALSE;
+	char msgbuf[MAX_LINE];
+
+	/*
+	 * run through all bases and directories looking for files
+	 * that have been renamed.  This must be done before the
+	 * difference analysis because a directory rename can introduce
+	 * radical restructuring into a name-based tree.
+	 */
+	for (bp = bases; bp; bp = bp->b_next) {
+		for (fp = bp->b_files; fp; fp = fp->f_next)
+			if (fp->f_flags & F_EVALUATE)
+				errs |= find_renames(fp);
+	}
+
+	/*
+	 * run through all bases and files looking for candidates
+	 * note, however that we only descend into trees that have
+	 * the evaluate flag turned on.  As a result of new rules or
+	 * restriction arguments, we may be deliberatly ignoring
+	 * large amounts of the baseline.   This means we won't do
+	 * any stats to update the information in those nodes, and
+	 * they will be written back just as they were.
+	 *
+	 * note that there is code to prune out baseline nodes for
+	 * files that no longer exist, but that code is in reconcile
+	 * and will never get a chance to run on nodes that aren't
+	 * analyzed.
+	 *
+	 * we also want to run though all nodes with STAT errors
+	 * so that we can put them on the reconciliation list.
+	 */
+	for (bp = bases; bp; bp = bp->b_next) {
+		for (fp = bp->b_files; fp; fp = fp->f_next)
+			if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
+				errs |= check_file(fp);
+	}
+
+	/*
+	 * my greatest fear is that someday, somehow, by messing with
+	 * variables or baselines or who-knows-what, that someone will
+	 * run a reconciliation against a large tree that doesn't correspond
+	 * to the baseline, and I will infer that a bazillion files have
+	 * been deleted and will propagate the slaughter before anyone
+	 * can say somebody stop that maniac.
+	 *
+	 * in order to prevent such a possibility, we have a few different
+	 * sanity checks.  There is, of course, a tradeoff here between
+	 * danger and irritation.  The current set of heuristics for whether
+	 * or not to generate a warning are (any of)
+	 *
+	 *	at least CONFIRM_MIN files have been deleted AND
+	 *	CONFIRM_PCT of all files have been deleted
+	 *
+	 *	the inode number on a LISTed directory has changed
+	 *
+	 *	a non-empty directory has been deleted.
+	 */
+	msgbuf[0] = 0;
+
+	percentage = (est_deletes * 100) / (total_files ? total_files : 1);
+	if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
+		sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
+	else if (inum_changes > 0)
+		sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
+	else if (est_rmdirs)
+		sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);
+
+	if (msgbuf[0])
+		confirm(msgbuf);
+
+	/*
+	 * TRICK:
+	 *	the change list contains both files that have changed
+	 *	(and probably warrant reconciliation) and files that
+	 *	we couldn't get up-to-date stat information on.  The
+	 *	latter files should just be flagged as being in conflict
+	 *	so they can be reported in the summary.  The same is
+	 *	true of all subsequent files if we abort reconciliation.
+	 */
+	for (fp = changes; fp; fp = fp->f_rnext)
+		if (aborted || (fp->f_flags & F_STAT_ERROR)) {
+			fp->f_flags |= F_CONFLICT;
+			/* if it isn't in the baseline yet, don't add it */
+			if ((fp->f_flags & F_IN_BASELINE) == 0)
+				fp->f_flags |= F_REMOVE;
+			fp->f_problem = aborted ? PROB_aborted : PROB_restat;
+			(fp->f_base)->b_unresolved++;
+			errs |= ERR_UNRESOLVED;
+			if (opt_verbose)
+				fprintf(stdout,
+					gettext(aborted ? V_suppressed
+							: V_nostat),
+					fp->f_fullname);
+		} else {
+			err = reconcile(fp);
+			errs |= err;
+			if (opt_halt && (err & ERR_ABORT)) {
+				fprintf(stderr, gettext(ERR_abort_h));
+				aborted = TRUE;
+			}
+		}
+
+	return (errs);
+}
+
+/*
+ * routine:
+ *	prune_file
+ *
+ * purpose:
+ *	to look for file entries that should be pruned from baseline
+ *	prune the current file if it needs pruning, and recursively
+ *	descend if it is a directory.
+ *
+ * parameters:
+ *	pointer to file node
+ */
+static int
+prune_file(struct file *fp)
+{	struct file *cp;
+	int prunes = 0;
+
+	/* if node hasn't been evaluated, mark it for removal	*/
+	if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
+		fp->f_flags |= F_REMOVE;
+		prunes++;
+		if (opt_debug & DBG_ANAL)
+			fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
+	}
+
+	/* now check our children				*/
+	for (cp = fp->f_files; cp; cp = cp->f_next)
+		prunes += prune_file(cp);
+
+	return (prunes);
+}
+
+/*
+ * routine:
+ *	prune
+ *
+ * purpose:
+ *	to prune the baseline of entries that no longer correspond to
+ *	existing rules.
+ *
+ * notes:
+ *	This routine just calls prune_file on the top of each base tree.
+ */
+int
+prune()
+{	struct base *bp;
+	struct file *fp;
+	int prunes = 0;
+
+	for (bp = bases; bp; bp = bp->b_next) {
+		for (fp = bp->b_files; fp; fp = fp->f_next)
+			prunes += prune_file(fp);
+
+		if ((bp->b_flags & F_EVALUATE) == 0)
+			bp->b_flags |= F_REMOVE;
+	}
+
+	return (prunes);
+}
+
+/*
+ * routine:
+ *	summary
+ *
+ * purpose:
+ *	to print out statics and conflict lists
+ */
+void
+summary()
+{	struct base *bp;
+	struct file *fp;
+	extern bool_t need_super;
+
+	(void) fflush(stdout);
+
+	for (bp = bases; bp; bp = bp->b_next) {
+
+		/* see if this base was irrelevent	*/
+		if ((bp->b_flags & F_EVALUATE) == 0)
+			continue;
+
+		/* print out a summary for this base	*/
+		fprintf(stderr, gettext(SUM_hd),
+			bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
+		fprintf(stderr, gettext(SUM_dst),
+			bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
+		fprintf(stderr, gettext(SUM_src),
+			bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
+		if (bp->b_unresolved)
+			fprintf(stderr, gettext(SUM_unresolved),
+				bp->b_unresolved);
+
+
+		/* print out a list of unreconciled files for this base	*/
+		for (fp = changes; fp; fp = fp->f_rnext) {
+			if (fp->f_base != bp)
+				continue;
+			if ((fp->f_flags & F_CONFLICT) == 0)
+				continue;
+			fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
+				fp->f_problem ? fp->f_problem : "???");
+		}
+
+		fprintf(stderr, "\n");
+	}
+
+	if (need_super)
+		fprintf(stderr, gettext(WARN_super));
+}
+
+/*
+ * routine:
+ *	check_file
+ *
+ * purpose:
+ *	figure out if a file requires reconciliation and recursively
+ *	descend into all sub-files and directories
+ *
+ * parameters:
+ *	base pointer
+ *	file pointer
+ *
+ * returns:
+ *	error mask
+ *	built up changes needed list
+ *	updated statistics
+ *
+ * notes:
+ *	this routine builds up a path name as it descends through
+ *	the tree (see push_name, pop_name, get_name).
+ */
+static errmask_t
+check_file(struct file *fp)
+{	struct file *cp;
+	int errs = 0;
+
+	if ((fp->f_flags & F_STAT_ERROR) == 0) {
+		/* see if the source has changed	*/
+		fp->f_info[OPT_BASE].f_modtime	= fp->f_s_modtime;
+		fp->f_info[OPT_BASE].f_ino	= fp->f_s_inum;
+		fp->f_info[OPT_BASE].f_d_maj	= fp->f_s_maj;
+		fp->f_info[OPT_BASE].f_d_min	= fp->f_s_min;
+		fp->f_info[OPT_BASE].f_nlink	= fp->f_s_nlink;
+		fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);
+
+		/* see if the destination has changed	*/
+		fp->f_info[OPT_BASE].f_modtime	= fp->f_d_modtime;
+		fp->f_info[OPT_BASE].f_ino    	= fp->f_d_inum;
+		fp->f_info[OPT_BASE].f_d_maj    = fp->f_d_maj;
+		fp->f_info[OPT_BASE].f_d_min    = fp->f_d_min;
+		fp->f_info[OPT_BASE].f_nlink	= fp->f_d_nlink;
+		fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);
+
+		/* if nobody thinks the file exists, baseline needs pruning */
+		if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
+			fp->f_srcdiffs |= D_DELETE;
+			fp->f_dstdiffs |= D_DELETE;
+		}
+
+		/* keep track of possible deletions to look for trouble	*/
+		if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
+			est_deletes++;
+
+			/* see if file is (or has been) a non-empty directory */
+			if (fp->f_files)
+				est_rmdirs++;
+		}
+	}
+
+	/* if we found differences, queue the file for reconciliation 	*/
+	if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
+		queue_file(fp);
+
+		if (opt_debug & DBG_ANAL) {
+			fprintf(stderr, "ANAL: src=%s",
+				showflags(diffmap, fp->f_srcdiffs));
+			fprintf(stderr, " dst=%s",
+				showflags(diffmap, fp->f_dstdiffs));
+			fprintf(stderr, " flgs=%s",
+				showflags(fileflags, fp->f_flags));
+			fprintf(stderr, " name=%s\n", fp->f_fullname);
+		}
+	}
+
+	/* bump the total file count	*/
+	fp->f_base->b_totfiles++;
+	total_files++;
+
+	/* if this is not a directory, we're done	*/
+	if (fp->f_files == 0)
+		return (errs);
+
+	/*
+	 * If this is a directory, we need to recursively analyze
+	 * our children, but only children who have been evaluated.
+	 * If a node has not been evaluated, then we don't have
+	 * updated stat information and there is nothing to analyze.
+	 *
+	 * we also want to run though all nodes with STAT errors
+	 * so that we can put them on the reconciliation list.
+	 * If a directory is unreadable on one side, all files
+	 * under that directory (ON BOTH SIDES) must be marked as
+	 * blocked by stat errors.
+	 */
+	push_name(fp->f_name);
+
+	for (cp = fp->f_files; cp; cp = cp->f_next) {
+		if (fp->f_flags & F_STAT_ERROR)
+			cp->f_flags |= F_STAT_ERROR;
+		if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
+			errs |= check_file(cp);
+	}
+
+	pop_name();
+
+	return (errs);
+}
+
+/*
+ * routine:
+ *	check_changes
+ *
+ * purpose:
+ *	to figure out what has changed for a specific file
+ *
+ * parameters:
+ *	file pointer
+ *	the reference info
+ *	the info to be checked for changes
+ *
+ * returns:
+ *	diff mask
+ *
+ * notes:
+ *	this routine doesn't pretend to understand what happened.
+ *	it merely enumerates the ways in which the files differ.
+ */
+static diffmask_t
+check_changes(struct file *fp, int ref, int new)
+{	struct fileinfo *rp, *np;
+	int mask = 0;
+	int type;
+
+	rp = &fp->f_info[ref];
+	np = &fp->f_info[new];
+
+	if (np->f_uid != rp->f_uid)
+		mask |= D_UID;
+
+	if (np->f_gid != rp->f_gid)
+		mask |= D_GID;
+
+	if (np->f_mode != rp->f_mode)
+		mask |= D_PROT;
+
+	type = np->f_type;
+	if (type != rp->f_type) {
+		if (type == 0)
+			mask |= D_DELETE;
+		else if (rp->f_type == 0)
+			mask |= D_CREATE;
+		else
+			mask |= D_TYPE;
+	} else if (type == S_IFBLK || type == S_IFCHR) {
+		/*
+		 * for special files, we only look at the maj/min
+		 */
+		if (np->f_rd_maj != rp->f_rd_maj)
+			mask |= D_SIZE;
+		if (np->f_rd_min != rp->f_rd_min)
+			mask |= D_SIZE;
+	} else if (type != S_IFDIR) {
+		/*
+		 * for directories, we don't look directly at
+		 * the contents, so these fields don't mean
+		 * anything.  If the directories have changed
+		 * in any interesting way, we'll find it by
+		 * walking the tree.
+		 */
+		if (np->f_modtime > rp->f_modtime)
+			mask |= D_MTIME;
+
+		if (np->f_size != rp->f_size)
+			mask |= D_SIZE;
+
+		if (np->f_nlink != rp->f_nlink)
+			mask |= D_LINKS;
+	}
+
+	if (cmp_acls(rp, np) == 0)
+		mask |= D_FACLS;
+
+	return (mask);
+}
+
+/*
+ * routine:
+ *	same_name
+ *
+ * purpose:
+ *	to figure out whether or not two databsae nodes actually refer to
+ *	the same file.
+ *
+ * parameters:
+ *	pointers to two file description nodes
+ *	which side we should check
+ *
+ * returns:
+ *	TRUE/FALSE
+ *
+ * notes:
+ *	if a single directory is specified in multiple base pairs, it
+ *	is possible to have multiple nodes in the database describing
+ *	the same file.  This routine is supposed to detect those cases.
+ *
+ *	what should be a trivial string comparison is complicated by
+ *	the possibility that the two nodes might describe the same file
+ *	from base directories at different depths.  Thus, rather than
+ *	comparing two strings, we really want to compare the concatenation
+ *	of two pairs of strings.  Unfortunately calling full_name would
+ *	be awkward right now, so instead we have our own comparison
+ *	routine that automatically skips from the first string to
+ *	the second.
+ */
+static bool_t
+same_name(struct file *f1, struct file *f2, side_t srcdst)
+{
+	char *s1, *s2, *x1, *x2;
+
+	if (srcdst == OPT_SRC) {
+		s1 = (f1->f_base)->b_src_name;
+		s2 = (f2->f_base)->b_src_name;
+	} else {
+		s1 = (f1->f_base)->b_dst_name;
+		s2 = (f2->f_base)->b_dst_name;
+	}
+	x1 = f1->f_fullname;
+	x2 = f2->f_fullname;
+
+	/*
+	 * Compare the two names, and if they differ before they end
+	 * this is a non-match.  If they both end at the same time,
+	 * this is a match.
+	 *
+	 * The trick here is that each string is actually the logical
+	 * concatenation of two strings, and we need to automatically
+	 * wrap from the first to the second string in each pair.  There
+	 * is no requirement that the two (concatenated) strings be
+	 * broken at the same point, so we have a slightly baroque
+	 * comparsion loop.
+	 */
+	while (*s1 && *s1 == *s2) {
+
+		/*
+		 * strings have been identical so far, so advance the
+		 * pointers and continue the comparison.  The trick
+		 * is that when either string ends, we have to wrap
+		 * over to its extension.
+		 */
+		s1++; s2++;
+		if (*s1 && *s2)
+			continue;
+
+		/*
+		 * at least one of the strings has ended.
+		 * there is an implicit slash between the string
+		 * and its extension, and this has to be matched
+		 * against the other string.
+		 */
+		if (*s1 != *s2) {
+			if (*s1 == 0 && *s2 == '/')
+				s2++;
+			else if (*s2 == 0 && *s1 == '/')
+				s1++;
+			else
+				/* the disagreement doesn't come at a slash */
+				break;
+		}
+
+		/*
+		 * if either string has ended, wrap to its extension
+		 */
+		if (*s1 == 0 && x1 != 0) {
+			s1 = x1;
+			x1 = 0;
+		}
+		if (*s2 == 0 && x2 != 0) {
+			s2 = x2;
+			x2 = 0;
+		}
+	}
+
+	return (*s1 == *s2);
+}
+
+/*
+ * routine:
+ *	find_link
+ *
+ * purpose:
+ *	to figure out if there is a file to which we should
+ *	be creating a link (rather than making a copy)
+ *
+ * parameters:
+ *	file node for the file to be created (that we hope is merely a link)
+ *	which side is to be changed (src/dst)
+ *
+ * return:
+ *	0	no link is appropriate
+ *	else	pointer to file node for link referent
+ *
+ * notes:
+ *	there are a few strange heuristics in this routine and I
+ *	wouldn't bet my soul that I got all of them right.  The general
+ *	theory is that when a new file is created, we look to see if it
+ *	is a link to another file on the changed side, and if it is, we
+ *	find the corresponding file on the unchanged side.
+ *
+ *	cases we want to be able to handle:
+ *	    1.	one or more links are created to a prexisting file
+ *	    2.	a preexisting only link is renamed
+ *	    3.  a rename of one of multiple links to a preexisting file
+ *	    4.	a single file is created with multiple links
+ */
+struct file *
+find_link(struct file *fp, side_t srcdst)
+{	struct file *lp;
+	side_t chgside, tgtside;
+	struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;
+
+	/* chg = side on which the change was noticed		*/
+	/* tgt = side to which the change is to be propagated	*/
+	chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
+	tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
+	fcp = &fp->f_info[chgside];
+	ftp = &fp->f_info[tgtside];
+
+	/*
+	 * cases 1 and 3
+	 *
+	 * When a new link is created, we should be able to find
+	 * another file in the changed hierarchy that has the same
+	 * I-node number.  We expect it to be on the changed list
+	 * because the link count will have gone up or because all
+	 * of the copies are new.  If we find one, then the new file
+	 * on the receiving file should be a link to the corresponding
+	 * existing file.
+	 *
+	 * case 4
+	 *
+	 * the first link will be dealt with as a copy, but all
+	 * subsequent links should find an existing file analogous
+	 * to one of the links on the changed side, and create
+	 * corresponding links on the other side.
+	 *
+	 * in each of these cases, there should be multiple links
+	 * on the changed side.  If the linkcount on the changed
+	 * side is one, we needn't bother searching for other links.
+	 */
+	if (fcp->f_nlink > 1)
+	for (lp = changes; lp; lp = lp->f_rnext) {
+		/* finding the same node doesn't count	*/
+		if (fp == lp)
+			continue;
+
+		tgtp = &lp->f_info[tgtside];
+		chgp = &lp->f_info[chgside];
+
+		/*
+		 * if the file doesn't already exist on the target side
+		 * we cannot make a link to it
+		 */
+		if (tgtp->f_mode == 0)
+			continue;
+
+		/*
+		 * if this is indeed a link, then the prospective file on
+		 * the changed side will have the same dev/inum as the file
+		 * we are looking for
+		 */
+		if (fcp->f_d_maj != chgp->f_d_maj)
+			continue;
+		if (fcp->f_d_min != chgp->f_d_min)
+			continue;
+		if (fcp->f_ino != chgp->f_ino)
+			continue;
+
+		/*
+		 * if the target side is already a link to this file,
+		 * then there is no new link to be created
+		 * FIX: how does this interact with copies over links
+		 */
+		if ((ftp->f_d_maj == tgtp->f_d_maj) &&
+		    (ftp->f_d_min == tgtp->f_d_min) &&
+		    (ftp->f_ino   == tgtp->f_ino))
+			continue;
+
+		/*
+		 * there is a pathological situation where a single file
+		 * might appear under multiple base directories.  This is
+		 * damned awkward to detect in any other way, so we must
+		 * check to see if we have just found another database
+		 * instance for the same file (on the changed side).
+		 */
+		if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
+			continue;
+
+		if (opt_debug & DBG_ANAL)
+			fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
+				fp->f_fullname, lp->f_fullname);
+
+		return (lp);
+	}
+
+	/*
+	 * case 2: a simple rename of the only link
+	 *
+	 * In this case, there may not be any other existing file on
+	 * the changed side that has the same I-node number.  There
+	 * might, however, be a record of such a file in the baseline.
+	 * If we can find an identical file with a different name that
+	 * has recently disappeared, we have a likely rename.
+	 */
+	for (lp = changes; lp; lp = lp->f_rnext) {
+
+		/* finding the same node doesn't count			*/
+		if (fp == lp)
+			continue;
+
+		tgtp = &lp->f_info[tgtside];
+		chgp = &lp->f_info[chgside];
+
+		/*
+		 * if the file still exists on the changed side this is
+		 * not a simple rename, and in fact the previous pass
+		 * would have found it.
+		 */
+		if (chgp->f_mode != 0)
+			continue;
+
+		/*
+		 * the inode number for the new link on the changed
+		 * side must match the inode number for the old link
+		 * from the baseline.
+		 */
+		if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
+							: lp->f_s_maj))
+			continue;
+		if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
+							: lp->f_s_min))
+			continue;
+		if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
+							: lp->f_s_inum))
+			continue;
+
+		/* finding a file we are already linked to doesn't help	*/
+		if ((ftp->f_d_maj == tgtp->f_d_maj) &&
+		    (ftp->f_d_min == tgtp->f_d_min) &&
+		    (ftp->f_ino   == tgtp->f_ino))
+			continue;
+
+		/*
+		 * there is a danger that we will confuse an
+		 * inode reallocation with a rename.  We should
+		 * only consider this to be a rename if the
+		 * new file is identical to the old one
+		 */
+		basp = &lp->f_info[OPT_BASE];
+		if (fcp->f_type != basp->f_type)
+			continue;
+		if (fcp->f_size != basp->f_size)
+			continue;
+		if (fcp->f_mode != basp->f_mode)
+			continue;
+		if (fcp->f_uid != basp->f_uid)
+			continue;
+		if (fcp->f_gid != basp->f_gid)
+			continue;
+
+		if (opt_debug & DBG_ANAL)
+			fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
+				fp->f_fullname, lp->f_fullname);
+
+		return (lp);
+	}
+
+	return (0);
+}
+
+/*
+ * routine:
+ *	has_other_links
+ *
+ * purpose:
+ *	to determine whether or not there is more that one link to a
+ *	particular file.  We are willing to delete a link to a file
+ *	that has changed if we will still have other links to it.
+ *	The trick here is that we only care about links under our
+ *	dominion.
+ *
+ * parameters:
+ *	file pointer to node we are interested in
+ *	which side we are looking to additional links on
+ *
+ * returns:
+ *	TRUE if there are multiple links
+ *	FALSE if this is the only one we know of
+ */
+bool_t
+has_other_links(struct file *fp, side_t srcdst)
+{	struct file *lp;
+	struct fileinfo *fip, *lip;
+
+	fip = &fp->f_info[srcdst];
+
+	/* if the link count is one, there couldn't be others	*/
+	if (fip->f_nlink < 2)
+		return (FALSE);
+
+	/* look for any other files for the same inode		*/
+	for (lp = changes; lp; lp = lp->f_rnext) {
+		/* finding the same node doesn't count	*/
+		if (fp == lp)
+			continue;
+
+		lip = &lp->f_info[srcdst];
+
+		/*
+		 * file must still exist on this side
+		 */
+		if (lip->f_mode == 0)
+			continue;
+
+		/*
+		 * if this is indeed a link, then the prospective file on
+		 * the changed side will have the same dev/inum as the file
+		 * we are looking for
+		 */
+		if (lip->f_d_maj != fip->f_d_maj)
+			continue;
+		if (lip->f_d_min != fip->f_d_min)
+			continue;
+		if (lip->f_ino != fip->f_ino)
+			continue;
+
+		/*
+		 * we have found at least one other link
+		 */
+		return (TRUE);
+	}
+
+	return (FALSE);
+}
+
+/*
+ * routine:
+ *	link_update
+ *
+ * purpose:
+ *	to propoagate a stat change to all other file nodes that
+ *	correspond to the same I-node on the changed side
+ *
+ * parameters:
+ *	file pointer for the updated file
+ *	which side was changed
+ *
+ * returns:
+ *	void
+ *
+ * notes:
+ *	if we have copied onto a file, we have copied onto all
+ *	of its links, but since we do all stats before we do any
+ *	copies, the stat information recently collected for links
+ *	is no longer up-to-date, and this would result in incorrect
+ *	reconciliation (redundant copies).
+ *
+ *	There is an assumption here that all links to a changed
+ *	file will be in the change list.  This is true for almost
+ *	all cases not involving restriction.  If we do fail to
+ *	update the baseline for a file that was off the change list,
+ *	the worst that is likely to happen is that we will think
+ *	it changed later (but will almost surely find that both
+ *	copies agree).
+ */
+void
+link_update(struct file *fp, side_t which)
+{	struct file *lp;
+
+	for (lp = changes; lp; lp = lp->f_rnext) {
+		/* finding the current entry doesn't count	*/
+		if (lp == fp)
+			continue;
+
+		/* look for same i#, maj, min on changed side	*/
+		if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
+			continue;
+		if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
+			continue;
+		if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
+			continue;
+
+		/*
+		 * this appears to be another link to the same file
+		 * so the updated stat information for one must be
+		 * correct for the other.
+		 */
+		lp->f_info[which].f_type	= fp->f_info[which].f_type;
+		lp->f_info[which].f_size	= fp->f_info[which].f_size;
+		lp->f_info[which].f_mode	= fp->f_info[which].f_mode;
+		lp->f_info[which].f_uid		= fp->f_info[which].f_uid;
+		lp->f_info[which].f_gid		= fp->f_info[which].f_gid;
+		lp->f_info[which].f_modtime	= fp->f_info[which].f_modtime;
+		lp->f_info[which].f_modns	= fp->f_info[which].f_modns;
+		lp->f_info[which].f_nlink	= fp->f_info[which].f_nlink;
+		lp->f_info[which].f_rd_maj	= fp->f_info[which].f_rd_maj;
+		lp->f_info[which].f_rd_min	= fp->f_info[which].f_rd_min;
+
+		if (opt_debug & DBG_STAT)
+			fprintf(stderr,
+				"STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
+				lp->f_name, lp->f_info[which].f_modtime,
+				lp->f_info[which].f_modns);
+	}
+}
+
+/*
+ * routine:
+ *	queue_file
+ *
+ * purpose:
+ *	append a file to the list of needed reconciliations
+ *
+ * parameters:
+ *	pointer to file
+ *
+ * notes:
+ *	when a request is appended to the reconciliation list,
+ *	we fill in the full name.  We delayed this in hopes that
+ *	it wouldn't be necessary (saving cycles and memory)
+ *
+ *	There is some funny business with modification times.
+ *	In general, we queue files in order of the latest modification
+ *	time so that propagations preserve relative ordering.  There
+ *	are, however, a few important exceptions:
+ *	    1.	all directory creations happen at time zero,
+ *		so that they are created before any files can
+ *		be added to them.
+ *	    2.	all directory deletions happen at time infinity-depth,
+ *		so that everything else can be removed before the
+ *		directories themselves are removed.
+ *	    3.	all file deletions happen at time infinity-depth
+ *		so that (in renames) the links will preceed the unlinks.
+ */
+static void
+queue_file(struct file *fp)
+{	struct file **pp, *np;
+
+#define	TIME_ZERO	0L		/* the earliest possible time	*/
+#define	TIME_LONG	0x7FFFFFFF	/* the latest possible time	*/
+
+	/*
+	 * figure out the modification time for sequencing purposes
+	 */
+	if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
+		/*
+		 * deletions are performed last, and depth first
+		 */
+		fp->f_modtime = TIME_LONG - fp->f_depth;
+	} else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
+	    fp->f_info[OPT_DST].f_type != S_IFDIR) {
+		/*
+		 * for most files we use the latest mod time
+		 */
+		fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
+		fp->f_modns   = fp->f_info[OPT_SRC].f_modns;
+		if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
+			fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
+			fp->f_modns   = fp->f_info[OPT_DST].f_modns;
+		}
+	} else {
+		/*
+		 * new directory creations need to happen before anything
+		 * else and are automatically sequenced in traversal order
+		 */
+		fp->f_modtime = TIME_ZERO;
+	}
+
+	/*
+	 * insertion is time ordered, and for equal times,
+	 * insertions is in (pre-order) traversal order
+	 */
+	for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
+		if (fp->f_modtime > np->f_modtime)
+			continue;
+		if (fp->f_modtime < np->f_modtime)
+			break;
+		if (fp->f_modns < np->f_modns)
+			break;
+	}
+
+	fp->f_fullname = strdup(get_name(fp));
+	fp->f_rnext = np;
+	*pp = fp;
+}
+
+
+/*
+ * routines:
+ *	push_name/pop_name/get_name
+ *
+ * purpose:
+ *	maintain a name stack so we can form name of a particular file
+ *	as the concatenation of all of the names between it and the
+ *	(know to be fully qualified) base directory.
+ *
+ * notes:
+ *	we go to this trouble because most files never change and
+ *	so we don't need to associate full names with every one.
+ *	This stack is maintained during analysis, and if we decide
+ *	to add a file to the reconciliation list, we can use the
+ *	stack to generate a fully qualified name at that time.
+ *
+ *	we compress out '/./' when we return a name.  Given that the
+ *	stack was built by a tree walk, the only place a /./ should
+ *	appear is at the first level after the base ... but there
+ *	are legitimate ways for them to appear there.
+ *
+ *	these names can get deep, so we dynamically size our name buffer
+ */
+static const char *namestack[ MAX_DEPTH + 1 ];
+static int namedepth = 0;
+static int namelen = 0;
+
+void
+push_name(const char *name)
+{
+	namestack[ namedepth++ ] = name;
+	namelen += 2 + strlen(name);
+
+	/* make sure we don't overflow our name stack	*/
+	if (namedepth >= MAX_DEPTH) {
+		fprintf(stderr, gettext(ERR_deep), name);
+		exit(ERR_OTHER);
+	}
+}
+
+void
+pop_name(void)
+{
+	namelen -= 2 + strlen(namestack[--namedepth]);
+	namestack[ namedepth ] = 0;
+
+#ifdef	DBG_ERRORS
+	/* just a little sanity check here	*/
+	if (namedepth <= 0) {
+		if (namedepth < 0) {
+			fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
+			exit(ERR_OTHER);
+		} else if (namelen != 0) {
+			fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
+			exit(ERR_OTHER);
+		}
+	}
+#endif
+}
+
+char
+*get_name(struct file *fp)
+{	int i;
+	static char *namebuf = 0;
+	static int buflen = 0;
+
+	/* make sure we have an adequate buffer	*/
+	i = namelen + 1 + strlen(fp->f_name);
+	if (buflen < i) {
+		for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
+		namebuf = (char *) realloc(namebuf, buflen);
+	}
+
+	/* assemble the name	*/
+	namebuf[0] = 0;
+	for (i = 0; i < namedepth; i++) {
+		if (strcmp(namestack[i], ".")) {
+			strcat(namebuf, namestack[i]);
+			strcat(namebuf, "/");
+		}
+	}
+
+	strcat(namebuf, fp->f_name);
+
+	return (namebuf);
+}
author	stevel@tonic-gate <none@none>	2005-06-14 00:00:00 -0700
committer	stevel@tonic-gate <none@none>	2005-06-14 00:00:00 -0700
commit	7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
tree	c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/cmd/filesync/anal.c
download	illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz