diff options
Diffstat (limited to 'usr/src/cmd/filesync/recon.c')
-rw-r--r-- | usr/src/cmd/filesync/recon.c | 833 |
1 files changed, 833 insertions, 0 deletions
diff --git a/usr/src/cmd/filesync/recon.c b/usr/src/cmd/filesync/recon.c new file mode 100644 index 0000000000..3ba61b0f5c --- /dev/null +++ b/usr/src/cmd/filesync/recon.c @@ -0,0 +1,833 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * recon.c + * + * purpose: + * process the reconciliation list, figure out exactly what the + * changes were, and what we should do about them. + * + * contents: + * reconcile ... (top level) process the reconciliation list + * samedata .... (static) do two files have the same contents + * samestuff ... (static) do two files have the same ownership/protection + * samecompare . (static) actually read and compare the contents + * samelink .... (static) do two symlinks have the same contents + * truncated ... (static) was one of the two copies truncted + * older ....... (static) which copy is older + * newer ....... (static) which copy is newer + * full_name ... generate a full path name for a file + * + * notes: + * If you only study one routine in this whole program, reconcile + * is that routine. Everything else is just book keeping. + * + * things were put onto the reconciliation list because analyze + * thought that they might have changed ... but up until now + * nobody has figured out what the changes really were, or even + * if there really were any changes. + * + * queue_file has ordered the reconciliation list with directory + * creations first (depth ordered) and deletions last (inversely + * depth ordered). all other changes have been ordered by mod time. + */ +#ident "%W% %E% SMI" + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> + +#include "filesync.h" +#include "database.h" +#include "messages.h" +#include "debug.h" + +/* + * local routines to figure out how the files really differ + */ +static bool_t samedata(struct file *); +static bool_t samestuff(struct file *); +static bool_t samecompare(struct file *); +static bool_t truncated(struct file *); +static bool_t samelink(); +static side_t newer(struct file *); +static side_t older(struct file *); + +/* + * globals + */ +char *srcname; /* file we are emulating */ +char *dstname; /* file we are updating */ + +/* + * routine: + * reconcile + * + * purpose: + * to perform the reconciliation action associated with a file + * + * parameters: + * file pointer + * + * returns: + * built up error mask + * updated statistics + * + * notes: + * The switch statement handles the obvious stuff. + * The TRUE side of the samedata test handles minor differences. + * The interesting stuff is in the FALSE side of the samedata test. + * + * The desparation heuristics (in the diffmask&CONTENTS test) are + * not rigorously correct ... but they always try do the right thing + * with data, and only lose mode/ownership changes in relatively + * pathological cases. But I claim that the benefits outweigh the + * risks, and most users will be pleased with the resulting decisions. + * + * Another trick is in the deletion cases of the switch. We + * normally won't allow an unlink that conflicts with data + * changes. If there are multiple links to the file, however, + * we can make the changes and do the deletion. + * + * The action routines do_{remove,rename,like,copy} handle all + * of their own statistics and status updating. This routine + * only has to handle its own reconciliation failures (when we + * can't decide what to do). + */ +errmask_t +reconcile(struct file *fp) +{ errmask_t errs = 0; + diffmask_t diffmask; + + if (opt_debug & DBG_RECON) + fprintf(stderr, "RECO: %s flgs=%s, mtime=%08lx.%08lx\n", + fp->f_fullname, + showflags(fileflags, fp->f_flags), + fp->f_modtime, fp->f_modns); + + /* + * form the fully qualified names for both files + */ + srcname = full_name(fp, OPT_SRC, OPT_SRC); + dstname = full_name(fp, OPT_DST, OPT_DST); + + /* + * because they are so expensive to read and so troublesome + * to set, we try to put off reading ACLs as long as possible. + * If we haven't read them yet, we must read them now (so that + * samestuff can compare them). + */ + if (opt_acls == 0 && fp->f_info[ OPT_BASE ].f_numacls == 0) { + if (get_acls(srcname, &fp->f_info[ OPT_SRC ])) + fp->f_srcdiffs |= D_FACLS; + if (get_acls(dstname, &fp->f_info[ OPT_DST ])) + fp->f_dstdiffs |= D_FACLS; + } + + /* + * If a rename has been detected, we don't have to figure + * it out, since both the rename-to and rename-from files + * have already been designated. When we encounter a rename-to + * we should carry it out. When we encounter a rename-from + * we can ignore it, since it should be dealt with as a side + * effect of processing the rename-to. + */ + if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_RENAME_FROM) + return (0); + + if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_RENAME_TO) { + + if (opt_verbose) + fprintf(stdout, gettext(V_renamed), + fp->f_previous->f_fullname, fp->f_name); + + if (fp->f_srcdiffs & D_RENAME_TO) { + errs = do_rename(fp, OPT_DST); + fp->f_srcdiffs &= D_MTIME | D_SIZE; + } else if (fp->f_dstdiffs & D_RENAME_TO) { + errs = do_rename(fp, OPT_SRC); + fp->f_dstdiffs &= D_MTIME | D_SIZE; + } + + if (errs != ERR_RESOLVABLE) + goto done; + + /* + * if any differences remain, then we may be dealing + * with contents changes in addition to a rename + */ + if ((fp->f_srcdiffs | fp->f_dstdiffs) == 0) + goto done; + + /* + * fall through to reconcile the data changes + */ + } + + /* + * pull of the easy cases (non-conflict creations & deletions) + */ + switch (fp->f_flags & (F_WHEREFOUND)) { + case F_IN_BASELINE: /* only exists in baseline */ + case 0: /* only exists in rules */ + if (opt_verbose) + fprintf(stdout, gettext(V_nomore), + fp->f_fullname); + fp->f_flags |= F_REMOVE; /* fix baseline */ + return (0); + + case F_IN_BASELINE|F_IN_SOURCE: /* deleted from dest */ + /* + * the basic principle here is that we are willing + * to do the deletion if: + * no changes were made on the other side + * OR + * we have been told to force in this direction + * + * we do, however, make an exception for files that + * will still have other links. In this case, the + * (changed) data will still be accessable through + * another link and so we are willing to do the unlink + * inspite of conflicting changes (which may well + * have been introduced through another link. + * + * The jury is still out on this one + */ + if (((fp->f_srcdiffs&D_IMPORTANT) == 0) || + (opt_force == OPT_DST) || + has_other_links(fp, OPT_SRC)) { + if (opt_verbose) + fprintf(stdout, gettext(V_deleted), + fp->f_fullname, "dst"); + errs = do_remove(fp, OPT_SRC); + goto done; + } + + /* a deletion combined with changes */ + if (opt_verbose) + fprintf(stdout, gettext(V_delconf), + fp->f_fullname); + + /* if we are to resolve in favor of source */ + if (opt_force == OPT_SRC) { + errs = do_copy(fp, OPT_DST); + goto done; + } + + fp->f_problem = gettext(PROB_del_change); + goto cant; + + case F_IN_BASELINE|F_IN_DEST: /* deleted from src */ + /* just like previous case, w/sides reversed */ + if (((fp->f_dstdiffs&D_IMPORTANT) == 0) || + (opt_force == OPT_SRC) || + has_other_links(fp, OPT_DST)) { + if (opt_verbose) + fprintf(stdout, gettext(V_deleted), + fp->f_fullname, "src"); + errs = do_remove(fp, OPT_DST); + goto done; + } + + /* a deletion combined with changes */ + if (opt_verbose) + fprintf(stdout, gettext(V_delconf), + fp->f_fullname); + + /* if we are to resolve in favor of destination */ + if (opt_force == OPT_DST) { + errs = do_copy(fp, OPT_SRC); + goto done; + } + + fp->f_problem = gettext(PROB_del_change); + goto cant; + + /* + * if something new shows up, and for some reason we cannot + * propagate it to the other side, we should suppress the + * file from the baseline, so it will show up as a new + * creation next time too. + */ + case F_IN_SOURCE: /* created in src */ + if (opt_verbose) + fprintf(stdout, gettext(V_created), + fp->f_fullname, "src"); + errs = do_copy(fp, OPT_DST); + goto done; + + case F_IN_DEST: /* created in dest */ + if (opt_verbose) + fprintf(stdout, gettext(V_created), + fp->f_fullname, "dst"); + errs = do_copy(fp, OPT_SRC); + goto done; + + case F_IN_SOURCE|F_IN_DEST: /* not in baseline */ + /* + * since we don't have a baseline, we cannot + * know which of the two copies should prevail + */ + break; + + case F_IN_BASELINE|F_IN_SOURCE|F_IN_DEST: + /* + * we have a baseline where the two copies agreed, + * so maybe we can determine that only one of the + * two copies have changed ... but before we decide + * who should be the winner we should determine + * that the two copies are actually different. + */ + break; + } + + /* + * if we have fallen out of the case statement, it is because + * we have discovered a non-obvious situation where potentially + * changed versions of the file exist on both sides. + * + * if the two copies turn out to be identical, this is simple + */ + if (samedata(fp)) { + if (samestuff(fp)) { + /* files are identical, just update baseline */ + if (opt_verbose) + fprintf(stdout, gettext(V_unchanged), + fp->f_fullname); + update_info(fp, OPT_SRC); + goto done; + } else { + /* + * contents agree but ownership/protection does + * not agree, so we have to bring these into + * agreement. We can pick a winner if one + * side hasn't changed, or if the user has + * specified a force flag. + */ + if (opt_verbose) + fprintf(stdout, gettext(V_modes), + fp->f_fullname); + + if (((fp->f_srcdiffs & D_ADMIN) == 0) || + (opt_force == OPT_DST)) { + errs = do_like(fp, OPT_SRC, TRUE); + goto done; + } + + if (((fp->f_dstdiffs & D_ADMIN) == 0) || + (opt_force == OPT_SRC)) { + errs = do_like(fp, OPT_DST, TRUE); + goto done; + } + } + /* falls down to cant */ + } else { + /* + * The two files have different contents, so we have + * a potential conflict here. If we know that only one + * side has changed, we go with that side. + */ + if (fp->f_dstdiffs == 0 || fp->f_srcdiffs == 0) { + if (opt_verbose) + fprintf(stdout, gettext(V_changed), + fp->f_fullname); + errs = do_copy(fp, fp->f_srcdiffs ? OPT_DST : OPT_SRC); + goto done; + } + + /* + * Both sides have changed, so we have a real conflict. + */ + if (opt_verbose) + fprintf(stdout, + gettext(truncated(fp) ? + V_trunconf : V_different), + fp->f_fullname); + + /* + * See if the user has given us explicit instructions + * on how to resolve conflicts. We may have been told + * to favor the older, the newer, the source, or the + * destination ... but the default is to leave the + * conflict unresolved. + */ + if (opt_force == OPT_OLD) { + errs = do_copy(fp, newer(fp)); + goto done; + } + + if (opt_force == OPT_NEW) { + errs = do_copy(fp, older(fp)); + goto done; + } + + if (opt_force != 0) { + errs = do_copy(fp, (opt_force == OPT_SRC) ? + OPT_DST : OPT_SRC); + goto done; + } + + + /* + * This is our last chance before giving up. + * + * We know that the files have different contents and + * that there were changes on both sides. The only way + * we can safely handle this is if there were pure contents + * changes on one side and pure ownership changes on the + * other side. In this case we can propagate the ownership + * one way and the contents the other way. + * + * We decide whether or not this is possible by ANDing + * together the changes on the two sides, and seeing + * if the changes were all orthogonal (none of the same + * things changed on both sides). + */ + diffmask = fp->f_srcdiffs & fp->f_dstdiffs; + if ((diffmask & D_CONTENTS) == 0) { + /* + * if ownership changes were only made on one side + * (presumably the side that didn't have data changes) + * we can handle them separately. In this case, + * ownership changes must be fixed first, because + * the subsequent do_copy will overwrite them. + */ + if ((diffmask & D_ADMIN) == 0) + errs |= do_like(fp, (fp->f_srcdiffs&D_ADMIN) ? + OPT_DST : OPT_SRC, + TRUE); + + /* + * Now we can deal with the propagation of the data + * changes. Note that any ownership/protection + * changes (from the other side) that have not been + * propagated yet are about to be lost. The cases + * in which this might happen are all pathological + * and the consequences of losing the protection + * changes are (IMHO) minor when compared to the + * obviously correct data propagation. + */ + errs |= do_copy(fp, (fp->f_srcdiffs&D_CONTENTS) ? + OPT_DST : OPT_SRC); + goto done; + } + + /* + * there are conflicting changes, nobody has told us how to + * resolve conflicts, and we cannot figure out how to merge + * the differences. + */ + fp->f_problem = gettext(PROB_different); + } + +cant: + /* + * I'm not smart enough to resolve this conflict automatically, + * so I have no choice but to bounce it back to the user. + */ + fp->f_flags |= F_CONFLICT; + fp->f_base->b_unresolved++; + errs |= ERR_UNRESOLVED; + +done: + /* + * if we have a conflict and the file is not in the baseline, + * then there was never any point at which the two copies were + * in agreement, and we want to preserve the conflict for future + * resolution. + */ + if ((errs&ERR_UNRESOLVED) && (fp->f_flags & F_IN_BASELINE) == 0) + if (fp->f_files == 0) + /* + * in most cases, this is most easily done by just + * excluding the file in question from the baseline + */ + fp->f_flags |= F_REMOVE; + else + /* + * but ... if the file in question is a directory + * with children, excluding it from the baseline + * would keep all of its children (even those with + * no conflicts) out of the baseline as well. In + * This case, it is better to tell a lie and to + * manufacture a point of imaginary agreement + * in the baseline ... but one that is absurd enough + * that we will still see conflicts each time we run. + * + * recording a type of directory, and everything + * else as zero should be absurd enough. + */ + fp->f_info[ OPT_BASE ].f_type = S_IFDIR; + + if (opt_debug & DBG_MISC) + fprintf(stderr, "MISC: %s ERRS=%s\n", fp->f_fullname, + showflags(errmap, errs)); + + return (errs); +} + +/* + * routine: + * newer + * + * purpose: + * determine which of two files is newer + * + * parameters: + * struct file + * + * returns: + * side_t (src/dest) + */ +static side_t +newer(struct file *fp) +{ + struct fileinfo *sp, *dp; + + sp = &fp->f_info[OPT_SRC]; + dp = &fp->f_info[OPT_DST]; + + if (sp->f_modtime > dp->f_modtime) + return (OPT_SRC); + + if (sp->f_modtime < dp->f_modtime) + return (OPT_DST); + + if (sp->f_modns >= dp->f_modns) + return (OPT_SRC); + + return (OPT_DST); +} + +/* + * routine: + * older + * + * purpose: + * determine which of two files is older + * + * parameters: + * struct file + * + * returns: + * side_t (src/dest) + */ +static side_t +older(struct file *fp) +{ + struct fileinfo *sp, *dp; + + sp = &fp->f_info[OPT_SRC]; + dp = &fp->f_info[OPT_DST]; + + if (sp->f_modtime < dp->f_modtime) + return (OPT_SRC); + + if (sp->f_modtime > dp->f_modtime) + return (OPT_DST); + + if (sp->f_modns <= dp->f_modns) + return (OPT_SRC); + + return (OPT_DST); +} + +/* + * routine: + * samedata + * + * purpose: + * determine whether or not two files contain the same data + * + * parameters: + * struct file + * + * returns: + * bool_t (true/false) + */ +static bool_t +samedata(struct file *fp) +{ + struct fileinfo *sp, *dp; + + sp = &fp->f_info[OPT_SRC]; + dp = &fp->f_info[OPT_DST]; + + /* cheap test: types are different */ + if (sp->f_type != dp->f_type) + return (FALSE); + + /* cheap test: directories have same contents */ + if (sp->f_type == S_IFDIR) + return (TRUE); + + /* special files are compared via their maj/min */ + if ((sp->f_type == S_IFBLK) || (sp->f_type == S_IFCHR)) { + if (sp->f_rd_maj != dp->f_rd_maj) + return (FALSE); + if (sp->f_rd_min != dp->f_rd_min) + return (FALSE); + return (TRUE); + } + + /* symlinks are the same if their contents are the same */ + if (sp->f_type == S_IFLNK) + return (samelink()); + + /* cheap test: sizes are different */ + if (fp->f_info[OPT_SRC].f_size != fp->f_info[OPT_DST].f_size) + return (FALSE); + + /* expensive test: byte for byte comparison */ + if (samecompare(fp) == 0) + return (FALSE); + + return (TRUE); +} + +/* + * routine: + * samestuff + * + * purpose: + * determine whether or not two files have same owner/protection + * + * parameters: + * struct file + * + * returns: + * bool_t (true/false) + */ +static bool_t +samestuff(struct file *fp) +{ int same_mode, same_uid, same_gid, same_acl; + struct fileinfo *sp, *dp; + + sp = &fp->f_info[OPT_SRC]; + dp = &fp->f_info[OPT_DST]; + + same_mode = (sp->f_mode == dp->f_mode); + same_uid = (sp->f_uid == dp->f_uid); + same_gid = (sp->f_gid == dp->f_gid); + same_acl = cmp_acls(sp, dp); + + /* if the are all the same, it is easy to tell the truth */ + if (same_uid && same_gid && same_mode && same_acl) + return (TRUE); + + /* note the nature of the conflict */ + if (!same_uid || !same_gid || !same_acl) + fp->f_problem = gettext(PROB_ownership); + else + fp->f_problem = gettext(PROB_protection); + + return (FALSE); +} + +/* + * routine: + * samecompare + * + * purpose: + * do a byte-for-byte comparison of two files + * + * parameters: + * struct file + * + * returns: + * bool_t (true/false) + */ +static bool_t +samecompare(struct file *fp) +{ int sfd, dfd; + int i, count; + char srcbuf[ COPY_BSIZE ], dstbuf[ COPY_BSIZE ]; + bool_t same = TRUE; + + + sfd = open(srcname, 0); + if (sfd < 0) + return (FALSE); + + dfd = open(dstname, 0); + if (dfd < 0) { + close(sfd); + return (FALSE); + } + + for ( + count = read(sfd, srcbuf, COPY_BSIZE); + count > 0; + count = read(sfd, srcbuf, COPY_BSIZE)) { + + /* do a matching read */ + if (read(dfd, dstbuf, COPY_BSIZE) != count) { + same = FALSE; + goto done; + } + + /* do the comparison for this block */ + for (i = 0; i < count; i++) { + if (srcbuf[i] != dstbuf[i]) { + same = FALSE; + goto done; + } + } + } + +done: + if (opt_debug & DBG_ANAL) + fprintf(stderr, "ANAL: SAME=%d %s\n", same, fp->f_fullname); + + close(sfd); + close(dfd); + return (same); +} + +/* + * routine: + * truncated + * + * purpose: + * to determine whether or not a file has been truncated + * + * parameters: + * pointer to file structure + * + * returns: + * true/false + */ +static bool_t +truncated(struct file *fp) +{ + /* either source or destination must now be zero length */ + if (fp->f_info[OPT_SRC].f_size && fp->f_info[OPT_DST].f_size) + return (FALSE); + + /* file must have originally had a non-zero length */ + if (fp->f_info[OPT_BASE].f_size == 0) + return (FALSE); + + /* file type must "normal" all around */ + if (fp->f_info[OPT_BASE].f_type != S_IFREG) + return (FALSE); + if (fp->f_info[OPT_SRC].f_type != S_IFREG) + return (FALSE); + if (fp->f_info[OPT_DST].f_type != S_IFREG) + return (FALSE); + + + return (TRUE); +} + +/* + * routine: + * samelink + * + * purpose: + * to determine whether or not two symbolic links agree + * + * parameters: + * pointer to file structure + * + * returns: + * true/false + */ +static bool_t +samelink() +{ int i, srclen, dstlen; + char srcbuf[ MAX_PATH ], dstbuf[ MAX_PATH ]; + + + /* read both copies of the link */ + srclen = readlink(srcname, srcbuf, sizeof (srcbuf)); + dstlen = readlink(dstname, dstbuf, sizeof (dstbuf)); + + /* if they aren't the same length, they disagree */ + if (srclen < 0 || dstlen < 0 || srclen != dstlen) + return (FALSE); + + /* look for differences in contents */ + for (i = 0; i < srclen; i++) + if (srcbuf[i] != dstbuf[i]) + return (FALSE); + + return (TRUE); +} + +/* + * routine: + * full_name + * + * purpose: + * to figure out the fully qualified path name to a file on the + * reconciliation list. + * + * parameters: + * pointer to the file structure + * side indication for which base to use + * side indication for which buffer to use + * + * returns: + * pointer to a clobberable buffer + * + * notes: + * the zero'th buffer is used for renames and links, where + * we need the name of another file on the same side. + */ +char * +full_name(struct file *fp, side_t srcdst, side_t whichbuf) +{ static char *buffers[3]; + static int buflen = 0; + char *p, *b; + int l; + + /* see if the existing buffer is long enough */ + b = (srcdst == OPT_SRC) ? fp->f_base->b_src_name + : fp->f_base->b_dst_name; + + /* see if the allocated buffer is long enough */ + l = strlen(b) + strlen(fp->f_fullname) + 2; + if (l > buflen) { + /* figure out the next "nice" size to use */ + for (buflen = MAX_PATH; buflen < l; buflen += MAX_NAME); + + /* reallocate all buffers to this size */ + for (l = 0; l < 3; l++) { + buffers[l] = (char *) realloc(buffers[l], buflen); + if (buffers[l] == 0) + nomem("full name"); + } + } + + /* assemble the name in the buffer and reurn it */ + p = buffers[whichbuf]; + strcpy(p, b); + strcat(p, "/"); + strcat(p, fp->f_fullname); + return (p); +} |