summaryrefslogtreecommitdiff
path: root/usr/src/cmd/filesync/anal.c
diff options
context:
space:
mode:
authorstevel@tonic-gate <none@none>2005-06-14 00:00:00 -0700
committerstevel@tonic-gate <none@none>2005-06-14 00:00:00 -0700
commit7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
treec871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/cmd/filesync/anal.c
downloadillumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz
OpenSolaris Launch
Diffstat (limited to 'usr/src/cmd/filesync/anal.c')
-rw-r--r--usr/src/cmd/filesync/anal.c1114
1 files changed, 1114 insertions, 0 deletions
diff --git a/usr/src/cmd/filesync/anal.c b/usr/src/cmd/filesync/anal.c
new file mode 100644
index 0000000000..fe10e49620
--- /dev/null
+++ b/usr/src/cmd/filesync/anal.c
@@ -0,0 +1,1114 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved
+ *
+ * module:
+ * anal.c
+ *
+ * purpose:
+ * routines to analyze the file trees and figure out what has changed
+ * and queue files for reconciliation. It also contains tree enumeration
+ * routines to for other purposes (pruning and link location).
+ *
+ * contents:
+ *
+ * change analysis:
+ * analyze .... (top level) analyze all files in the tree for changes
+ * summary .... print out change/reconciliation statistics for each base
+ * check_file . (static) look for changes and queue file for reconciliation
+ * check_changes (static) figure out if a particular file has changed
+ * queue_file . (static) add a file to the reconciliation list
+ *
+ * other tree enumeration functions:
+ * prune_file . (static) recursive descent and actual pruning
+ * prune ...... (top level) initiate pruning analysis for nonexistant files
+ * find_link .. look for other files to which a file may be a link
+ * link_update. propagate changed stat info to all other links
+ * same_name .. (static) figure out if two nodes describe same file
+ *
+ * misc:
+ * push_name .. maintain a running full pathname as we descend
+ * pop_name ... maintain a running full pathname as we pop back
+ * get_name ... return full pathname for the current file
+ *
+ * notes:
+ * analysis is limited to files that were evaluated in the previous
+ * pass ... since we don't have complete information about files that
+ * were not evaluated in the previous pass.
+ */
+#ident "%W% %E% SMI"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "messages.h"
+#include "filesync.h"
+#include "database.h"
+#include "debug.h"
+
+/*
+ * routines:
+ */
+void push_name(const char *);
+void pop_name();
+char *get_name(struct file *);
+static errmask_t check_file(struct file *fp);
+static diffmask_t check_changes(struct file *fp, int first, int second);
+static int prune_file(struct file *fp);
+static void queue_file(struct file *fp);
+
+/*
+ * globals
+ */
+static struct file *changes; /* list of files to be reconciled */
+
+static long total_files; /* total number of files being considered */
+static long est_deletes; /* estimated number of files to be deleted */
+static long est_rmdirs; /* est rmdirs of non-empty directories */
+
+int inum_changes; /* LISTed directories whose I#s changed */
+
+/*
+ * routine:
+ * analyze
+ *
+ * purpose:
+ * top level routine for the analysis/reconciliation process
+ *
+ * parameters:
+ * none
+ *
+ * returns:
+ * error mask
+ *
+ * notes:
+ * a critical side effect of this routine is the creation of
+ * the reconciliation list, an ordered list of files that
+ * needed to be processed in the subsequent reconciliation pass
+ */
+errmask_t
+analyze()
+{ struct base *bp;
+ struct file *fp;
+ int errs = 0;
+ int err;
+ int percentage;
+ bool_t aborted = FALSE;
+ char msgbuf[MAX_LINE];
+
+ /*
+ * run through all bases and directories looking for files
+ * that have been renamed. This must be done before the
+ * difference analysis because a directory rename can introduce
+ * radical restructuring into a name-based tree.
+ */
+ for (bp = bases; bp; bp = bp->b_next) {
+ for (fp = bp->b_files; fp; fp = fp->f_next)
+ if (fp->f_flags & F_EVALUATE)
+ errs |= find_renames(fp);
+ }
+
+ /*
+ * run through all bases and files looking for candidates
+ * note, however that we only descend into trees that have
+ * the evaluate flag turned on. As a result of new rules or
+ * restriction arguments, we may be deliberatly ignoring
+ * large amounts of the baseline. This means we won't do
+ * any stats to update the information in those nodes, and
+ * they will be written back just as they were.
+ *
+ * note that there is code to prune out baseline nodes for
+ * files that no longer exist, but that code is in reconcile
+ * and will never get a chance to run on nodes that aren't
+ * analyzed.
+ *
+ * we also want to run though all nodes with STAT errors
+ * so that we can put them on the reconciliation list.
+ */
+ for (bp = bases; bp; bp = bp->b_next) {
+ for (fp = bp->b_files; fp; fp = fp->f_next)
+ if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR))
+ errs |= check_file(fp);
+ }
+
+ /*
+ * my greatest fear is that someday, somehow, by messing with
+ * variables or baselines or who-knows-what, that someone will
+ * run a reconciliation against a large tree that doesn't correspond
+ * to the baseline, and I will infer that a bazillion files have
+ * been deleted and will propagate the slaughter before anyone
+ * can say somebody stop that maniac.
+ *
+ * in order to prevent such a possibility, we have a few different
+ * sanity checks. There is, of course, a tradeoff here between
+ * danger and irritation. The current set of heuristics for whether
+ * or not to generate a warning are (any of)
+ *
+ * at least CONFIRM_MIN files have been deleted AND
+ * CONFIRM_PCT of all files have been deleted
+ *
+ * the inode number on a LISTed directory has changed
+ *
+ * a non-empty directory has been deleted.
+ */
+ msgbuf[0] = 0;
+
+ percentage = (est_deletes * 100) / (total_files ? total_files : 1);
+ if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT)
+ sprintf(msgbuf, gettext(WARN_deletes), est_deletes);
+ else if (inum_changes > 0)
+ sprintf(msgbuf, gettext(WARN_ichange), inum_changes);
+ else if (est_rmdirs)
+ sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs);
+
+ if (msgbuf[0])
+ confirm(msgbuf);
+
+ /*
+ * TRICK:
+ * the change list contains both files that have changed
+ * (and probably warrant reconciliation) and files that
+ * we couldn't get up-to-date stat information on. The
+ * latter files should just be flagged as being in conflict
+ * so they can be reported in the summary. The same is
+ * true of all subsequent files if we abort reconciliation.
+ */
+ for (fp = changes; fp; fp = fp->f_rnext)
+ if (aborted || (fp->f_flags & F_STAT_ERROR)) {
+ fp->f_flags |= F_CONFLICT;
+ /* if it isn't in the baseline yet, don't add it */
+ if ((fp->f_flags & F_IN_BASELINE) == 0)
+ fp->f_flags |= F_REMOVE;
+ fp->f_problem = aborted ? PROB_aborted : PROB_restat;
+ (fp->f_base)->b_unresolved++;
+ errs |= ERR_UNRESOLVED;
+ if (opt_verbose)
+ fprintf(stdout,
+ gettext(aborted ? V_suppressed
+ : V_nostat),
+ fp->f_fullname);
+ } else {
+ err = reconcile(fp);
+ errs |= err;
+ if (opt_halt && (err & ERR_ABORT)) {
+ fprintf(stderr, gettext(ERR_abort_h));
+ aborted = TRUE;
+ }
+ }
+
+ return (errs);
+}
+
+/*
+ * routine:
+ * prune_file
+ *
+ * purpose:
+ * to look for file entries that should be pruned from baseline
+ * prune the current file if it needs pruning, and recursively
+ * descend if it is a directory.
+ *
+ * parameters:
+ * pointer to file node
+ */
+static int
+prune_file(struct file *fp)
+{ struct file *cp;
+ int prunes = 0;
+
+ /* if node hasn't been evaluated, mark it for removal */
+ if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) {
+ fp->f_flags |= F_REMOVE;
+ prunes++;
+ if (opt_debug & DBG_ANAL)
+ fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name);
+ }
+
+ /* now check our children */
+ for (cp = fp->f_files; cp; cp = cp->f_next)
+ prunes += prune_file(cp);
+
+ return (prunes);
+}
+
+/*
+ * routine:
+ * prune
+ *
+ * purpose:
+ * to prune the baseline of entries that no longer correspond to
+ * existing rules.
+ *
+ * notes:
+ * This routine just calls prune_file on the top of each base tree.
+ */
+int
+prune()
+{ struct base *bp;
+ struct file *fp;
+ int prunes = 0;
+
+ for (bp = bases; bp; bp = bp->b_next) {
+ for (fp = bp->b_files; fp; fp = fp->f_next)
+ prunes += prune_file(fp);
+
+ if ((bp->b_flags & F_EVALUATE) == 0)
+ bp->b_flags |= F_REMOVE;
+ }
+
+ return (prunes);
+}
+
+/*
+ * routine:
+ * summary
+ *
+ * purpose:
+ * to print out statics and conflict lists
+ */
+void
+summary()
+{ struct base *bp;
+ struct file *fp;
+ extern bool_t need_super;
+
+ (void) fflush(stdout);
+
+ for (bp = bases; bp; bp = bp->b_next) {
+
+ /* see if this base was irrelevent */
+ if ((bp->b_flags & F_EVALUATE) == 0)
+ continue;
+
+ /* print out a summary for this base */
+ fprintf(stderr, gettext(SUM_hd),
+ bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles);
+ fprintf(stderr, gettext(SUM_dst),
+ bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc);
+ fprintf(stderr, gettext(SUM_src),
+ bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc);
+ if (bp->b_unresolved)
+ fprintf(stderr, gettext(SUM_unresolved),
+ bp->b_unresolved);
+
+
+ /* print out a list of unreconciled files for this base */
+ for (fp = changes; fp; fp = fp->f_rnext) {
+ if (fp->f_base != bp)
+ continue;
+ if ((fp->f_flags & F_CONFLICT) == 0)
+ continue;
+ fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname,
+ fp->f_problem ? fp->f_problem : "???");
+ }
+
+ fprintf(stderr, "\n");
+ }
+
+ if (need_super)
+ fprintf(stderr, gettext(WARN_super));
+}
+
+/*
+ * routine:
+ * check_file
+ *
+ * purpose:
+ * figure out if a file requires reconciliation and recursively
+ * descend into all sub-files and directories
+ *
+ * parameters:
+ * base pointer
+ * file pointer
+ *
+ * returns:
+ * error mask
+ * built up changes needed list
+ * updated statistics
+ *
+ * notes:
+ * this routine builds up a path name as it descends through
+ * the tree (see push_name, pop_name, get_name).
+ */
+static errmask_t
+check_file(struct file *fp)
+{ struct file *cp;
+ int errs = 0;
+
+ if ((fp->f_flags & F_STAT_ERROR) == 0) {
+ /* see if the source has changed */
+ fp->f_info[OPT_BASE].f_modtime = fp->f_s_modtime;
+ fp->f_info[OPT_BASE].f_ino = fp->f_s_inum;
+ fp->f_info[OPT_BASE].f_d_maj = fp->f_s_maj;
+ fp->f_info[OPT_BASE].f_d_min = fp->f_s_min;
+ fp->f_info[OPT_BASE].f_nlink = fp->f_s_nlink;
+ fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC);
+
+ /* see if the destination has changed */
+ fp->f_info[OPT_BASE].f_modtime = fp->f_d_modtime;
+ fp->f_info[OPT_BASE].f_ino = fp->f_d_inum;
+ fp->f_info[OPT_BASE].f_d_maj = fp->f_d_maj;
+ fp->f_info[OPT_BASE].f_d_min = fp->f_d_min;
+ fp->f_info[OPT_BASE].f_nlink = fp->f_d_nlink;
+ fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST);
+
+ /* if nobody thinks the file exists, baseline needs pruning */
+ if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) {
+ fp->f_srcdiffs |= D_DELETE;
+ fp->f_dstdiffs |= D_DELETE;
+ }
+
+ /* keep track of possible deletions to look for trouble */
+ if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) {
+ est_deletes++;
+
+ /* see if file is (or has been) a non-empty directory */
+ if (fp->f_files)
+ est_rmdirs++;
+ }
+ }
+
+ /* if we found differences, queue the file for reconciliation */
+ if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) {
+ queue_file(fp);
+
+ if (opt_debug & DBG_ANAL) {
+ fprintf(stderr, "ANAL: src=%s",
+ showflags(diffmap, fp->f_srcdiffs));
+ fprintf(stderr, " dst=%s",
+ showflags(diffmap, fp->f_dstdiffs));
+ fprintf(stderr, " flgs=%s",
+ showflags(fileflags, fp->f_flags));
+ fprintf(stderr, " name=%s\n", fp->f_fullname);
+ }
+ }
+
+ /* bump the total file count */
+ fp->f_base->b_totfiles++;
+ total_files++;
+
+ /* if this is not a directory, we're done */
+ if (fp->f_files == 0)
+ return (errs);
+
+ /*
+ * If this is a directory, we need to recursively analyze
+ * our children, but only children who have been evaluated.
+ * If a node has not been evaluated, then we don't have
+ * updated stat information and there is nothing to analyze.
+ *
+ * we also want to run though all nodes with STAT errors
+ * so that we can put them on the reconciliation list.
+ * If a directory is unreadable on one side, all files
+ * under that directory (ON BOTH SIDES) must be marked as
+ * blocked by stat errors.
+ */
+ push_name(fp->f_name);
+
+ for (cp = fp->f_files; cp; cp = cp->f_next) {
+ if (fp->f_flags & F_STAT_ERROR)
+ cp->f_flags |= F_STAT_ERROR;
+ if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR))
+ errs |= check_file(cp);
+ }
+
+ pop_name();
+
+ return (errs);
+}
+
+/*
+ * routine:
+ * check_changes
+ *
+ * purpose:
+ * to figure out what has changed for a specific file
+ *
+ * parameters:
+ * file pointer
+ * the reference info
+ * the info to be checked for changes
+ *
+ * returns:
+ * diff mask
+ *
+ * notes:
+ * this routine doesn't pretend to understand what happened.
+ * it merely enumerates the ways in which the files differ.
+ */
+static diffmask_t
+check_changes(struct file *fp, int ref, int new)
+{ struct fileinfo *rp, *np;
+ int mask = 0;
+ int type;
+
+ rp = &fp->f_info[ref];
+ np = &fp->f_info[new];
+
+ if (np->f_uid != rp->f_uid)
+ mask |= D_UID;
+
+ if (np->f_gid != rp->f_gid)
+ mask |= D_GID;
+
+ if (np->f_mode != rp->f_mode)
+ mask |= D_PROT;
+
+ type = np->f_type;
+ if (type != rp->f_type) {
+ if (type == 0)
+ mask |= D_DELETE;
+ else if (rp->f_type == 0)
+ mask |= D_CREATE;
+ else
+ mask |= D_TYPE;
+ } else if (type == S_IFBLK || type == S_IFCHR) {
+ /*
+ * for special files, we only look at the maj/min
+ */
+ if (np->f_rd_maj != rp->f_rd_maj)
+ mask |= D_SIZE;
+ if (np->f_rd_min != rp->f_rd_min)
+ mask |= D_SIZE;
+ } else if (type != S_IFDIR) {
+ /*
+ * for directories, we don't look directly at
+ * the contents, so these fields don't mean
+ * anything. If the directories have changed
+ * in any interesting way, we'll find it by
+ * walking the tree.
+ */
+ if (np->f_modtime > rp->f_modtime)
+ mask |= D_MTIME;
+
+ if (np->f_size != rp->f_size)
+ mask |= D_SIZE;
+
+ if (np->f_nlink != rp->f_nlink)
+ mask |= D_LINKS;
+ }
+
+ if (cmp_acls(rp, np) == 0)
+ mask |= D_FACLS;
+
+ return (mask);
+}
+
+/*
+ * routine:
+ * same_name
+ *
+ * purpose:
+ * to figure out whether or not two databsae nodes actually refer to
+ * the same file.
+ *
+ * parameters:
+ * pointers to two file description nodes
+ * which side we should check
+ *
+ * returns:
+ * TRUE/FALSE
+ *
+ * notes:
+ * if a single directory is specified in multiple base pairs, it
+ * is possible to have multiple nodes in the database describing
+ * the same file. This routine is supposed to detect those cases.
+ *
+ * what should be a trivial string comparison is complicated by
+ * the possibility that the two nodes might describe the same file
+ * from base directories at different depths. Thus, rather than
+ * comparing two strings, we really want to compare the concatenation
+ * of two pairs of strings. Unfortunately calling full_name would
+ * be awkward right now, so instead we have our own comparison
+ * routine that automatically skips from the first string to
+ * the second.
+ */
+static bool_t
+same_name(struct file *f1, struct file *f2, side_t srcdst)
+{
+ char *s1, *s2, *x1, *x2;
+
+ if (srcdst == OPT_SRC) {
+ s1 = (f1->f_base)->b_src_name;
+ s2 = (f2->f_base)->b_src_name;
+ } else {
+ s1 = (f1->f_base)->b_dst_name;
+ s2 = (f2->f_base)->b_dst_name;
+ }
+ x1 = f1->f_fullname;
+ x2 = f2->f_fullname;
+
+ /*
+ * Compare the two names, and if they differ before they end
+ * this is a non-match. If they both end at the same time,
+ * this is a match.
+ *
+ * The trick here is that each string is actually the logical
+ * concatenation of two strings, and we need to automatically
+ * wrap from the first to the second string in each pair. There
+ * is no requirement that the two (concatenated) strings be
+ * broken at the same point, so we have a slightly baroque
+ * comparsion loop.
+ */
+ while (*s1 && *s1 == *s2) {
+
+ /*
+ * strings have been identical so far, so advance the
+ * pointers and continue the comparison. The trick
+ * is that when either string ends, we have to wrap
+ * over to its extension.
+ */
+ s1++; s2++;
+ if (*s1 && *s2)
+ continue;
+
+ /*
+ * at least one of the strings has ended.
+ * there is an implicit slash between the string
+ * and its extension, and this has to be matched
+ * against the other string.
+ */
+ if (*s1 != *s2) {
+ if (*s1 == 0 && *s2 == '/')
+ s2++;
+ else if (*s2 == 0 && *s1 == '/')
+ s1++;
+ else
+ /* the disagreement doesn't come at a slash */
+ break;
+ }
+
+ /*
+ * if either string has ended, wrap to its extension
+ */
+ if (*s1 == 0 && x1 != 0) {
+ s1 = x1;
+ x1 = 0;
+ }
+ if (*s2 == 0 && x2 != 0) {
+ s2 = x2;
+ x2 = 0;
+ }
+ }
+
+ return (*s1 == *s2);
+}
+
+/*
+ * routine:
+ * find_link
+ *
+ * purpose:
+ * to figure out if there is a file to which we should
+ * be creating a link (rather than making a copy)
+ *
+ * parameters:
+ * file node for the file to be created (that we hope is merely a link)
+ * which side is to be changed (src/dst)
+ *
+ * return:
+ * 0 no link is appropriate
+ * else pointer to file node for link referent
+ *
+ * notes:
+ * there are a few strange heuristics in this routine and I
+ * wouldn't bet my soul that I got all of them right. The general
+ * theory is that when a new file is created, we look to see if it
+ * is a link to another file on the changed side, and if it is, we
+ * find the corresponding file on the unchanged side.
+ *
+ * cases we want to be able to handle:
+ * 1. one or more links are created to a prexisting file
+ * 2. a preexisting only link is renamed
+ * 3. a rename of one of multiple links to a preexisting file
+ * 4. a single file is created with multiple links
+ */
+struct file *
+find_link(struct file *fp, side_t srcdst)
+{ struct file *lp;
+ side_t chgside, tgtside;
+ struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp;
+
+ /* chg = side on which the change was noticed */
+ /* tgt = side to which the change is to be propagated */
+ chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC;
+ tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST;
+ fcp = &fp->f_info[chgside];
+ ftp = &fp->f_info[tgtside];
+
+ /*
+ * cases 1 and 3
+ *
+ * When a new link is created, we should be able to find
+ * another file in the changed hierarchy that has the same
+ * I-node number. We expect it to be on the changed list
+ * because the link count will have gone up or because all
+ * of the copies are new. If we find one, then the new file
+ * on the receiving file should be a link to the corresponding
+ * existing file.
+ *
+ * case 4
+ *
+ * the first link will be dealt with as a copy, but all
+ * subsequent links should find an existing file analogous
+ * to one of the links on the changed side, and create
+ * corresponding links on the other side.
+ *
+ * in each of these cases, there should be multiple links
+ * on the changed side. If the linkcount on the changed
+ * side is one, we needn't bother searching for other links.
+ */
+ if (fcp->f_nlink > 1)
+ for (lp = changes; lp; lp = lp->f_rnext) {
+ /* finding the same node doesn't count */
+ if (fp == lp)
+ continue;
+
+ tgtp = &lp->f_info[tgtside];
+ chgp = &lp->f_info[chgside];
+
+ /*
+ * if the file doesn't already exist on the target side
+ * we cannot make a link to it
+ */
+ if (tgtp->f_mode == 0)
+ continue;
+
+ /*
+ * if this is indeed a link, then the prospective file on
+ * the changed side will have the same dev/inum as the file
+ * we are looking for
+ */
+ if (fcp->f_d_maj != chgp->f_d_maj)
+ continue;
+ if (fcp->f_d_min != chgp->f_d_min)
+ continue;
+ if (fcp->f_ino != chgp->f_ino)
+ continue;
+
+ /*
+ * if the target side is already a link to this file,
+ * then there is no new link to be created
+ * FIX: how does this interact with copies over links
+ */
+ if ((ftp->f_d_maj == tgtp->f_d_maj) &&
+ (ftp->f_d_min == tgtp->f_d_min) &&
+ (ftp->f_ino == tgtp->f_ino))
+ continue;
+
+ /*
+ * there is a pathological situation where a single file
+ * might appear under multiple base directories. This is
+ * damned awkward to detect in any other way, so we must
+ * check to see if we have just found another database
+ * instance for the same file (on the changed side).
+ */
+ if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside))
+ continue;
+
+ if (opt_debug & DBG_ANAL)
+ fprintf(stderr, "ANAL: FIND LINK %s and %s\n",
+ fp->f_fullname, lp->f_fullname);
+
+ return (lp);
+ }
+
+ /*
+ * case 2: a simple rename of the only link
+ *
+ * In this case, there may not be any other existing file on
+ * the changed side that has the same I-node number. There
+ * might, however, be a record of such a file in the baseline.
+ * If we can find an identical file with a different name that
+ * has recently disappeared, we have a likely rename.
+ */
+ for (lp = changes; lp; lp = lp->f_rnext) {
+
+ /* finding the same node doesn't count */
+ if (fp == lp)
+ continue;
+
+ tgtp = &lp->f_info[tgtside];
+ chgp = &lp->f_info[chgside];
+
+ /*
+ * if the file still exists on the changed side this is
+ * not a simple rename, and in fact the previous pass
+ * would have found it.
+ */
+ if (chgp->f_mode != 0)
+ continue;
+
+ /*
+ * the inode number for the new link on the changed
+ * side must match the inode number for the old link
+ * from the baseline.
+ */
+ if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj
+ : lp->f_s_maj))
+ continue;
+ if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min
+ : lp->f_s_min))
+ continue;
+ if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum
+ : lp->f_s_inum))
+ continue;
+
+ /* finding a file we are already linked to doesn't help */
+ if ((ftp->f_d_maj == tgtp->f_d_maj) &&
+ (ftp->f_d_min == tgtp->f_d_min) &&
+ (ftp->f_ino == tgtp->f_ino))
+ continue;
+
+ /*
+ * there is a danger that we will confuse an
+ * inode reallocation with a rename. We should
+ * only consider this to be a rename if the
+ * new file is identical to the old one
+ */
+ basp = &lp->f_info[OPT_BASE];
+ if (fcp->f_type != basp->f_type)
+ continue;
+ if (fcp->f_size != basp->f_size)
+ continue;
+ if (fcp->f_mode != basp->f_mode)
+ continue;
+ if (fcp->f_uid != basp->f_uid)
+ continue;
+ if (fcp->f_gid != basp->f_gid)
+ continue;
+
+ if (opt_debug & DBG_ANAL)
+ fprintf(stderr, "ANAL: FIND RENAME %s and %s\n",
+ fp->f_fullname, lp->f_fullname);
+
+ return (lp);
+ }
+
+ return (0);
+}
+
+/*
+ * routine:
+ * has_other_links
+ *
+ * purpose:
+ * to determine whether or not there is more that one link to a
+ * particular file. We are willing to delete a link to a file
+ * that has changed if we will still have other links to it.
+ * The trick here is that we only care about links under our
+ * dominion.
+ *
+ * parameters:
+ * file pointer to node we are interested in
+ * which side we are looking to additional links on
+ *
+ * returns:
+ * TRUE if there are multiple links
+ * FALSE if this is the only one we know of
+ */
+bool_t
+has_other_links(struct file *fp, side_t srcdst)
+{ struct file *lp;
+ struct fileinfo *fip, *lip;
+
+ fip = &fp->f_info[srcdst];
+
+ /* if the link count is one, there couldn't be others */
+ if (fip->f_nlink < 2)
+ return (FALSE);
+
+ /* look for any other files for the same inode */
+ for (lp = changes; lp; lp = lp->f_rnext) {
+ /* finding the same node doesn't count */
+ if (fp == lp)
+ continue;
+
+ lip = &lp->f_info[srcdst];
+
+ /*
+ * file must still exist on this side
+ */
+ if (lip->f_mode == 0)
+ continue;
+
+ /*
+ * if this is indeed a link, then the prospective file on
+ * the changed side will have the same dev/inum as the file
+ * we are looking for
+ */
+ if (lip->f_d_maj != fip->f_d_maj)
+ continue;
+ if (lip->f_d_min != fip->f_d_min)
+ continue;
+ if (lip->f_ino != fip->f_ino)
+ continue;
+
+ /*
+ * we have found at least one other link
+ */
+ return (TRUE);
+ }
+
+ return (FALSE);
+}
+
+/*
+ * routine:
+ * link_update
+ *
+ * purpose:
+ * to propoagate a stat change to all other file nodes that
+ * correspond to the same I-node on the changed side
+ *
+ * parameters:
+ * file pointer for the updated file
+ * which side was changed
+ *
+ * returns:
+ * void
+ *
+ * notes:
+ * if we have copied onto a file, we have copied onto all
+ * of its links, but since we do all stats before we do any
+ * copies, the stat information recently collected for links
+ * is no longer up-to-date, and this would result in incorrect
+ * reconciliation (redundant copies).
+ *
+ * There is an assumption here that all links to a changed
+ * file will be in the change list. This is true for almost
+ * all cases not involving restriction. If we do fail to
+ * update the baseline for a file that was off the change list,
+ * the worst that is likely to happen is that we will think
+ * it changed later (but will almost surely find that both
+ * copies agree).
+ */
+void
+link_update(struct file *fp, side_t which)
+{ struct file *lp;
+
+ for (lp = changes; lp; lp = lp->f_rnext) {
+ /* finding the current entry doesn't count */
+ if (lp == fp)
+ continue;
+
+ /* look for same i#, maj, min on changed side */
+ if (lp->f_info[which].f_ino != fp->f_info[which].f_ino)
+ continue;
+ if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj)
+ continue;
+ if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min)
+ continue;
+
+ /*
+ * this appears to be another link to the same file
+ * so the updated stat information for one must be
+ * correct for the other.
+ */
+ lp->f_info[which].f_type = fp->f_info[which].f_type;
+ lp->f_info[which].f_size = fp->f_info[which].f_size;
+ lp->f_info[which].f_mode = fp->f_info[which].f_mode;
+ lp->f_info[which].f_uid = fp->f_info[which].f_uid;
+ lp->f_info[which].f_gid = fp->f_info[which].f_gid;
+ lp->f_info[which].f_modtime = fp->f_info[which].f_modtime;
+ lp->f_info[which].f_modns = fp->f_info[which].f_modns;
+ lp->f_info[which].f_nlink = fp->f_info[which].f_nlink;
+ lp->f_info[which].f_rd_maj = fp->f_info[which].f_rd_maj;
+ lp->f_info[which].f_rd_min = fp->f_info[which].f_rd_min;
+
+ if (opt_debug & DBG_STAT)
+ fprintf(stderr,
+ "STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n",
+ lp->f_name, lp->f_info[which].f_modtime,
+ lp->f_info[which].f_modns);
+ }
+}
+
+/*
+ * routine:
+ * queue_file
+ *
+ * purpose:
+ * append a file to the list of needed reconciliations
+ *
+ * parameters:
+ * pointer to file
+ *
+ * notes:
+ * when a request is appended to the reconciliation list,
+ * we fill in the full name. We delayed this in hopes that
+ * it wouldn't be necessary (saving cycles and memory)
+ *
+ * There is some funny business with modification times.
+ * In general, we queue files in order of the latest modification
+ * time so that propagations preserve relative ordering. There
+ * are, however, a few important exceptions:
+ * 1. all directory creations happen at time zero,
+ * so that they are created before any files can
+ * be added to them.
+ * 2. all directory deletions happen at time infinity-depth,
+ * so that everything else can be removed before the
+ * directories themselves are removed.
+ * 3. all file deletions happen at time infinity-depth
+ * so that (in renames) the links will preceed the unlinks.
+ */
+static void
+queue_file(struct file *fp)
+{ struct file **pp, *np;
+
+#define TIME_ZERO 0L /* the earliest possible time */
+#define TIME_LONG 0x7FFFFFFF /* the latest possible time */
+
+ /*
+ * figure out the modification time for sequencing purposes
+ */
+ if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) {
+ /*
+ * deletions are performed last, and depth first
+ */
+ fp->f_modtime = TIME_LONG - fp->f_depth;
+ } else if (fp->f_info[OPT_SRC].f_type != S_IFDIR &&
+ fp->f_info[OPT_DST].f_type != S_IFDIR) {
+ /*
+ * for most files we use the latest mod time
+ */
+ fp->f_modtime = fp->f_info[OPT_SRC].f_modtime;
+ fp->f_modns = fp->f_info[OPT_SRC].f_modns;
+ if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) {
+ fp->f_modtime = fp->f_info[OPT_DST].f_modtime;
+ fp->f_modns = fp->f_info[OPT_DST].f_modns;
+ }
+ } else {
+ /*
+ * new directory creations need to happen before anything
+ * else and are automatically sequenced in traversal order
+ */
+ fp->f_modtime = TIME_ZERO;
+ }
+
+ /*
+ * insertion is time ordered, and for equal times,
+ * insertions is in (pre-order) traversal order
+ */
+ for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) {
+ if (fp->f_modtime > np->f_modtime)
+ continue;
+ if (fp->f_modtime < np->f_modtime)
+ break;
+ if (fp->f_modns < np->f_modns)
+ break;
+ }
+
+ fp->f_fullname = strdup(get_name(fp));
+ fp->f_rnext = np;
+ *pp = fp;
+}
+
+
+/*
+ * routines:
+ * push_name/pop_name/get_name
+ *
+ * purpose:
+ * maintain a name stack so we can form name of a particular file
+ * as the concatenation of all of the names between it and the
+ * (know to be fully qualified) base directory.
+ *
+ * notes:
+ * we go to this trouble because most files never change and
+ * so we don't need to associate full names with every one.
+ * This stack is maintained during analysis, and if we decide
+ * to add a file to the reconciliation list, we can use the
+ * stack to generate a fully qualified name at that time.
+ *
+ * we compress out '/./' when we return a name. Given that the
+ * stack was built by a tree walk, the only place a /./ should
+ * appear is at the first level after the base ... but there
+ * are legitimate ways for them to appear there.
+ *
+ * these names can get deep, so we dynamically size our name buffer
+ */
+static const char *namestack[ MAX_DEPTH + 1 ];
+static int namedepth = 0;
+static int namelen = 0;
+
+void
+push_name(const char *name)
+{
+ namestack[ namedepth++ ] = name;
+ namelen += 2 + strlen(name);
+
+ /* make sure we don't overflow our name stack */
+ if (namedepth >= MAX_DEPTH) {
+ fprintf(stderr, gettext(ERR_deep), name);
+ exit(ERR_OTHER);
+ }
+}
+
+void
+pop_name(void)
+{
+ namelen -= 2 + strlen(namestack[--namedepth]);
+ namestack[ namedepth ] = 0;
+
+#ifdef DBG_ERRORS
+ /* just a little sanity check here */
+ if (namedepth <= 0) {
+ if (namedepth < 0) {
+ fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n");
+ exit(ERR_OTHER);
+ } else if (namelen != 0) {
+ fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n");
+ exit(ERR_OTHER);
+ }
+ }
+#endif
+}
+
+char
+*get_name(struct file *fp)
+{ int i;
+ static char *namebuf = 0;
+ static int buflen = 0;
+
+ /* make sure we have an adequate buffer */
+ i = namelen + 1 + strlen(fp->f_name);
+ if (buflen < i) {
+ for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME);
+ namebuf = (char *) realloc(namebuf, buflen);
+ }
+
+ /* assemble the name */
+ namebuf[0] = 0;
+ for (i = 0; i < namedepth; i++) {
+ if (strcmp(namestack[i], ".")) {
+ strcat(namebuf, namestack[i]);
+ strcat(namebuf, "/");
+ }
+ }
+
+ strcat(namebuf, fp->f_name);
+
+ return (namebuf);
+}