diff options
author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
---|---|---|
committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 |
commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/cmd/filesync | |
download | illumos-gate-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz |
OpenSolaris Launch
Diffstat (limited to 'usr/src/cmd/filesync')
-rw-r--r-- | usr/src/cmd/filesync/Makefile | 75 | ||||
-rw-r--r-- | usr/src/cmd/filesync/README | 453 | ||||
-rw-r--r-- | usr/src/cmd/filesync/acls.c | 293 | ||||
-rw-r--r-- | usr/src/cmd/filesync/action.c | 1258 | ||||
-rw-r--r-- | usr/src/cmd/filesync/anal.c | 1114 | ||||
-rw-r--r-- | usr/src/cmd/filesync/base.c | 912 | ||||
-rw-r--r-- | usr/src/cmd/filesync/database.h | 307 | ||||
-rw-r--r-- | usr/src/cmd/filesync/debug.c | 359 | ||||
-rw-r--r-- | usr/src/cmd/filesync/debug.h | 66 | ||||
-rw-r--r-- | usr/src/cmd/filesync/eval.c | 997 | ||||
-rw-r--r-- | usr/src/cmd/filesync/files.c | 591 | ||||
-rw-r--r-- | usr/src/cmd/filesync/filesync.h | 163 | ||||
-rw-r--r-- | usr/src/cmd/filesync/ignore.c | 364 | ||||
-rw-r--r-- | usr/src/cmd/filesync/main.c | 688 | ||||
-rw-r--r-- | usr/src/cmd/filesync/messages.h | 225 | ||||
-rw-r--r-- | usr/src/cmd/filesync/recon.c | 833 | ||||
-rw-r--r-- | usr/src/cmd/filesync/rename.c | 261 | ||||
-rw-r--r-- | usr/src/cmd/filesync/rules.c | 638 |
18 files changed, 9597 insertions, 0 deletions
diff --git a/usr/src/cmd/filesync/Makefile b/usr/src/cmd/filesync/Makefile new file mode 100644 index 0000000000..d2e128b11c --- /dev/null +++ b/usr/src/cmd/filesync/Makefile @@ -0,0 +1,75 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +#ident "%Z%%M% %I% %E% SMI" +# +# Copyright 2005 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +PROG= filesync + +OBJS= action.o anal.o base.o debug.o files.o ignore.o main.o recon.o rename.o rules.o acls.o eval.o +SRCS= $(OBJS:%.o=%.c) +HDRS= filesync.h messages.h database.h + +include ../Makefile.cmd + +LDLIBS += -lgen + +FILEMODE= 0555 +GROUP= bin + +.KEEP_STATE: + +.PARALLEL: $(OBJS) + +all: $(PROG) + +$(OBJS): $(HDRS) + +$(PROG): $(OBJS) + $(LINK.c) $(OBJS) -o $@ $(LDLIBS) + $(POST_PROCESS) + +install: all $(ROOTPROG) + +clean: + $(RM) $(OBJS) + +lint: lint_SRCS + +# +# we need our own rule for building a message catalog because our strings +# are already extracted, and xgettext expects to find them in situ. +# +# The sed command turns each string define in messages.h into +# a call to gettext, so that xgettext can extract them. Also +# we capture all comments, since the only comments in messages.h +# are for the benefit of the translators. +# +$(POFILE): messages.h + $(SED) -e "s/#define/gettext(/" -e "/gettext/s/$$/)/" messages.h | \ + $(XGETTEXT) -c "" -d $(TEXT_DOMAIN) - + $(MV) $(TEXT_DOMAIN).po $@ + +include ../Makefile.targ diff --git a/usr/src/cmd/filesync/README b/usr/src/cmd/filesync/README new file mode 100644 index 0000000000..eb5d03ade6 --- /dev/null +++ b/usr/src/cmd/filesync/README @@ -0,0 +1,453 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved +# +#ident "%W% %E% SMI" +# +# design notes that are likely to be of general (rather than +# merely historical) interest. + +Table of Contents + + Overview what filesync does + + Primary Data Structures + general principles why they exist + key concepts what they represent + data structures major structures and their contents + + Overview of Passes main phases of program execution + + Modules list and descriptions of files + + Studying the Code + active ingredients a reading list of high points + the whole thing a suggested order for everything + + Gross calling structure who calls whom + + Helpful hints good things to know + +Overview + + The purpose of this program is to compare pairs of directory + trees with a baseline snapshot, to determine which files have + changed, and to propagate the changes in order to bring the + trees back into congruency. The baseline snapshot describes + size, ownership, ... for all files that filesync is managing + WHEN THEY WERE LAST IN SYNC. + + The files and directory trees to be compared are determined + by a relatively flexible (user editable) rules file, whose + format (packingrules.4) permits files and or trees to be + specified, explicitly, implicitly, or with wild cards. + There are also provisions for filtering out unwanted files + and for running programs to generate lists of files and + directories to be included or excluded. + + The comparisons begin by comparing the structured name + spaces. For names that appear in both trees, the files + are then compared on the basis of type, size, contents, + ownership and protections. For files that are already + in the baseline snapshot, if the sizes and modification + times have not changed, we do not bother to recheck the + contents. + + The reconciliation process (resolving the differences) + will only propagate a change if it is obvious what should + be done (one side has changed relative to the snapshot, + while the other has not). If there are conflicting changes, + the file is flagged and the user is asked to reconcile the + differences manually. There are, however a few switches + that can be used to constrain the analysis or reconciliation, + or to force one particular side to win in case of a conflict. + + +Primary Data Structures + + general principles: + we will build up an in-memory tree that represents + the union of the name spaces found in the baseline + and on the source and destination sides. + + keep in mind that the baseline recalls the state of + files THE LAST TIME THEY WERE IN AGREEMENT. If files + have disagreed for a long time, the baseline still + remembers what they were like when they agreed. If + files have never agreed, the baseline has no notions + of how they "used to be". + + key concepts: + a "base pair" is a pair of directories whose + contents (or a subset of whose contents) are to + be syncrhonized. The "base pairs" to be managed + are specified in the packing rules file. + + associated with each "base pair" is a set of rules + that describe which files (under those directories) + are to be kept in sync. Each rule is a list of: + files and or directories to be included + wild cards for files or directories to be included + programs to generate lists of names for inclusion + file names to be ignored + wild cards for file names to be ignored + programs to generate lists of names for ignoring + + as a result of the "evaluation" process we build up + (under each base pair) a tree that represents all of + the files that we are supposed to keep in sync, and + contains everything we need to know about each one + of those files. The structure of the tree mirrors + the directory hierarchy ... actually the union of the + three hiearchies (baseline, source and destination). + + for each file, we record interesting information (type, + size, owner, protection, mod time) and keep separate + note of what these values were: + in the baseline last time two sides agreed + on the source side, as we just examined it + on the destination side, as we just examined it + + data structures: + + there is an ordered list of "base" structures + for each base, we maintain + three lists of associated "rule" descriptions: + inclusion rules + exclusion rules + restriction rules (from the command line) + a "file" tree, representing all files below the bases + a list of statistics to be printed as a summary + + for each "rule", we maintain + some flags describing the type of rule + the character string that is the rule + + for each "file", we maintain + sibling and child pointers to give them tree structure + flags to describe what we have done/should do + "fileinfo" information from the src, dest, and baseline + + in addition there are some fields that are used + to add the file to a list of files requiring + reconciliation and record what happened to it. + + a "fileinfo" structure contains a subset of the information + that we obtain from a stat call: + major/minor/inum + type + link count + ownership, protection, and acls + size + modification time + + there is also, built up during analysis, a reconciliation + list. This is an ordered list of "file" structures which + are believed to descibe files that have changed and require + reconciliation. The ordering is important both for correctness + and to preserve relative modification times. + +Overview of passes: + + pass I (evaluate) + + stat every file that we might be interested in + (on both src/dest sides). This includes walking + the trees under all directories in order to + find out what files exist and stating all of + them. + + the main trick in this pass is that there may be + files we don't want to evaluate (because we are + limiting our attention to specific files and trees). + There is a LISTED flag kept in the database that + tells me whether or not I need to stat/descend any + given node. + + all restrictions and ignores take effect during this pass. + + pass II (analyze) + + given the baseline and all of the current stat information + gained during pass I, figure out what might conceivably + have changed and queue it for pass III. This pass doesn't + try to figure out what happened or who should win ... it + merely identifies candidates for pass III. This pass + ignores any nodes that were not evaluated during pass I. + + the queueing process, however, determines the order in + which the files will be processed in pass III, and the + order is very important. + + pass III (reconcile) + + process the list of candidates, figuring out what has + actually changed and which versions deserve to win. If + is clear what needs doing, we actually do it in this + pass. + +Modules + + filesync.h + defines for limits, sizes and return codes + declarations for global variables (mostly cmd-line parms) + defines for default file names + declarations for routines of general interest + + database.h + data-structures for recording rules + data-structures for recording information about files + declarations for routines that operate on/with those structures + + messages.h + the text of all localizable messages + + debug.h + definitions and declarations for routines for error + simulation and bit-map display. + + acls.c + routines to get, set, compare, and display Access Control Lists + action.c + routines to do the real work of copying, deleting, or + changing ownership in order to make one side agree + with the other. + anal.c + routines to examine the in-core list of files and + determine what has changed (and therefore what is + files are candidates for reconciliation). This + analysis includes figuring out which files should + be links rather than copies. + base.c + routines to read and write the baseline file + routines to search and manipulate the in-core base list + debug.c + data structures and routines, used to sumulate errors + and produce debug output, that map between bits (as found + in various flag words) character string names for their + meanings. + + eval.c + routines to build up the internal tree that describes + the status of all of the files that are described + by the current rules. + files.c + routines to manipulate file name arguments, including + wild cards and embedded environment variables. + ignore.c + routines to maintain a list of names or patterns for + files to be ignored, and to check file names against + that list. + main.c + global variables, cmd-line parameter processing, + parameter validation, error reporting, and the + main loop. + recon.c + routines to examine a list of files that appear to + have changed, and figure out what the appropriate + reconciliation course of action is. + rename.c + routines to search the tree to determine whether + or not any creates/deletes are actually renames. + rules.c + routines to read and write the rules file + routines to add rules and enumerate in-core rules + + filecheck.c + not really a part of filesync, but rather a utility + program that is used in the test suite. It extracts + information about files that is not readily available + from other unix commands. + +Comments on studying the code + + if you are only interested in the "active ingredients": + + read the above notes on data structures and then + + read the structure declarations in database.h + + read the above notes overviewing the passes + + in recon.c: read reconcile + + this routine almost makes sense on its own, + and it is unquestionably the most important + routine in the entire program. Everything + else just gathers data for reconcile to use, + or updates the books to reflect the changes. + + in eval.c: read evaluate, eval_file, walker, and note_info + + this is the main guts of pass I + + in anal.c: read analyze, check_file, check_changes & queue_file + + this is the main guts of pass II + + if you want to read the whole thing: + + the following routines do fundamentally simple things + in simple ways, and can (for the most part) be understood + in vaccuuo. The things they do are probably sufficiently + obvious that you can probably understand the more interesting + code without having read them at all. + + base.c + rules.c + files.c + debug.c + ignore.c + acls.c + + the following routines constitute the real meat of the + program, and while they are broken into specialized + modules, they probably need to be understood as an + organic whole: + + main.c setup and control + eval.c pass I + anal.c pass II + recon.c pass III + action.c execution and book-keeping + rename.c a special case for a common situation + + +Gross calling structure / flow of control + + main.c:main + findfiles + read_baseline + read_rules + if new rules + add_base + add_include + evaluate + analyze + write_baseline + write_summary + + eval.c:evaluate + add_file_to_base + add_glob + add_run + ignore_pgm + ignore_file + ignore_expr + eval_file + + eval.c:eval_file + note_info + nftw + walker + note_info + + anal.c:analyze + check_file + reconcile + + anal.c:check_file + check_changes + queue_file + + + recon.c:reconcile + samedata + samestuff + do_copy + copy + do_like + update_info + do_like + do_remove + +Helpful Hints + + the "file" structure contains a bunch of flags. Many of them + just summarize what we know about the file (e.g. where it was + found). Others are more subtle and control the evaluation + process or the writing out of the baseline file. You can't + really understand the processing unless you understand what + these flags mean. + + F_NEW added by a new rule + + F_LISTED this name was generated by a rule + + F_SPARSE this directory is an intermediate on + the way to a name generated by a rule + and should not be recursively walked. + + F_EVALUATE this node was found in evaluation and + has up-to-date stat information + + F_CONFLICT there is a conflict on this node so + baseline should remain unchanged + + F_REMOVE this node should be purged from the baseline + + F_STAT_ERROR it was impossible to stat this file + (and anything below it) + + the implications of these flags on processing are + + F_NEW, F_LISTED, F_SPARSE + + affect whether or not a particular node should + be included in the evaluation pass. + + in some situations, only new rules are interpreted. + + listed files and directories should be evaluated + and analyzed. sparse directories should not be + recursively enumerated. + + F_EVALUATE + + determines whether or not a node is included + in the analysis pass. Only nodes that have + been evaluated will be analyzed. + + F_CONFLICT, F_REMOVE, F_EVALUATE + + affect how a node should be written back into the baseline file. + + if there is a conflict or we haven't evaluated + a node, we won't update the baseline. + + if a node is marked for removal, it will be + excluded from the baseline when it is written out. + + F_STAT_ERROR + + if we could not get proper status information + about a file (or the tree under it) we cannot, + with any confidence, determine what its state + is or do anything about it. Such files are + flagged as "in conflict". + + it is somewhat kinky that we put error flagged + files on the reconciliation list. We do this + because this is the easiest way to pull them + out for reporting as conflicts. + + diff --git a/usr/src/cmd/filesync/acls.c b/usr/src/cmd/filesync/acls.c new file mode 100644 index 0000000000..aef5dac80e --- /dev/null +++ b/usr/src/cmd/filesync/acls.c @@ -0,0 +1,293 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * acls.c + * + * purpose: + * routines to manipulate access control lists, mapping between + * the data structures required by the filesystem ACL system calls + * and the representation used in our fileinfo structure. + * + */ +#ident "%W% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> + +#include "filesync.h" +#include "database.h" + +#ifdef NO_ACLS +/* + * Solaris 2.4 libc.so does not contain this entry point, so if we + * want to build a 2.4 version of filesync, we need to provide a + * dummy entry point that will fail when-ever it is called. + */ +#define acl bogus_acl + +static int acl(const char *name, int opcode, int count, aclent_t *acls) +{ + return (-1); +} +#endif + +/* + * routine: + * get_acls + * + * purpose: + * to read the ACL (if any) from a file into a fileinfo structure + * + * parameters: + * name of file + * pointer to fileinfo structure + * + * returns: + * number of ACL entries + */ +int +get_acls(const char *name, struct fileinfo *ip) +{ int count; + int i; + static aclent_t acls[MAX_ACL_ENTRIES]; + aclent_t *list; + + count = acl(name, GETACL, MAX_ACL_ENTRIES, acls); + if (count <= 0) + return (0); + + /* with a count of 3 or 4 there may not be any real ones */ + if (count > 4) + goto gotsome; + + /* look for anything beyond the normal unix protection */ + for (i = 0; i < count; i++) + switch (acls[i].a_type) { + default: /* weird types are real */ + goto gotsome; + + case USER_OBJ: + case GROUP_OBJ: + case OTHER_OBJ: + case CLASS_OBJ: + continue; /* all file have these */ + } + + return (0); /* nothing interesting */ + +gotsome: + /* allocate an array to hold the acls */ + list = (aclent_t *) malloc(count * sizeof (*list)); + if (list == 0) + nomem("Access Control List"); + + /* copy the acls into the new list */ + for (i = 0; i < count; i++) { + list[i].a_type = acls[i].a_type; + list[i].a_id = acls[i].a_id; + list[i].a_perm = acls[i].a_perm; + } + + ip->f_acls = list; + ip->f_numacls = count; + return (ip->f_numacls); +} + +/* + * routine: + * cmp_acls + * + * purpose: + * determine whether or not two ACLs are the same + * + * parameters: + * pointer to first fileinfo + * pointer to second fileinfo + * + * returns: + * true equal + * false different + */ +int +cmp_acls(struct fileinfo *f1, struct fileinfo *f2) +{ int i; + + if (f1->f_numacls != f2->f_numacls) + return (0); + + if (f1->f_numacls == 0) + return (1); + + for (i = 0; i < f1->f_numacls; i++) { + if (f1->f_acls[i].a_type != f2->f_acls[i].a_type) + return (0); + if (f1->f_acls[i].a_id != f2->f_acls[i].a_id) + return (0); + if (f1->f_acls[i].a_perm != f2->f_acls[i].a_perm) + return (0); + } + + return (1); +} + +/* + * routine: + * set_acls + * + * purpose: + * to write the ACL of a file + * + * parameters: + * name of file + * fileinfo pointer (which contains an acl pointer) + * + * returns: + * retcode and errno + */ +int +set_acls(const char *name, struct fileinfo *fp) +{ int rc; + int nacl; + aclent_t acls[4], *list; + + if (fp->f_numacls == 0) { + /* fabricate a standard set of bogus ACLs */ + acls[0].a_type = USER_OBJ; + acls[0].a_id = fp->f_uid; + acls[0].a_perm = (fp->f_mode >> 6) & 7; + + acls[1].a_type = GROUP_OBJ; + acls[1].a_id = fp->f_gid; + acls[1].a_perm = (fp->f_mode >> 3) & 7; + + acls[2].a_type = CLASS_OBJ; + acls[2].a_id = 0; + acls[2].a_perm = (fp->f_mode >> 6) & 7; + + acls[3].a_type = OTHER_OBJ; + acls[3].a_id = 0; + acls[3].a_perm = fp->f_mode & 7; + + nacl = 4; + list = acls; + } else { + nacl = fp->f_numacls; + list = fp->f_acls; + } + + rc = acl(name, SETACL, nacl, list); + + /* non-negative number mean success */ + if (rc < 0) + return (rc); + else + return (0); +} + +/* + * routine: + * show_acls + * + * purpose: + * to map an acl into arguments for a setfacl command + * + * paramters: + * number of elements in list + * pointer to list + * + * returns: + * pointer to character buffer containing arguments + */ +char +*show_acls(int numacl, aclent_t *list) +{ int i, j; + int type, perm, id; + char *s; + static char buf[ MAX_LINE ]; + + s = buf; + + if (numacl > 0) { + *s++ = '-'; + *s++ = 's'; + *s++ = ' '; + } else { + *s++ = '-'; + *s++ = 'd'; + } + + for (i = 0; i < numacl; i++) { + type = list[i].a_type; + id = list[i].a_id; + perm = list[i].a_perm; + + if (i > 0) + *s++ = ','; + + /* note whether this is per-file or default */ + if (type & ACL_DEFAULT) { + *s++ = 'd'; + *s++ = ':'; + } + + /* print out the entry type */ + if (type & (USER_OBJ|USER)) { + *s++ = 'u'; + *s++ = ':'; + } else if (type & (GROUP_OBJ|GROUP)) { + *s++ = 'g'; + *s++ = ':'; + } else if (type & OTHER_OBJ) { + *s++ = 'o'; + *s++ = ':'; + } else if (type & CLASS_OBJ) { + *s++ = 'm'; + *s++ = ':'; + } + + /* print out the ID for this ACL */ + if (type & (USER_OBJ|GROUP_OBJ)) + *s++ = ':'; + else if (type & (USER|GROUP)) { + for (j = 1; id/j > 10; j *= 10); + + while (j > 0) { + *s++ = '0' + (id/j); + id %= j*10; + j /= 10; + } + + *s++ = ':'; + } + + /* print out the permissions for this ACL */ + *s++ = (perm & 04) ? 'r' : '-'; + *s++ = (perm & 02) ? 'w' : '-'; + *s++ = (perm & 01) ? 'x' : '-'; + } + + *s = 0; + return (buf); +} diff --git a/usr/src/cmd/filesync/action.c b/usr/src/cmd/filesync/action.c new file mode 100644 index 0000000000..6f53ffd331 --- /dev/null +++ b/usr/src/cmd/filesync/action.c @@ -0,0 +1,1258 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * action.c + * + * purpose: + * routines to carryout reconciliation actions and make the + * appropriate updates to the database file structure. + * + * contents: + * do_like ... change ownership and protection + * do_copy ... copy a file from one side to the other + * do_remove . remove a file from one side + * do_rename . rename a file on one side + * copy ...... (static) do the actual copy + * checksparse (static) figure out if a file is sparse + * + * ASSERTIONS: + * any of these action routines is responsible for all baseline + * and statistics updates associated with the reconciliation + * actions. If notouch is specified, they should fake the + * updates well enough so that link tests will still work. + * + * success: + * bump bp->b_{src,dst}_{copies,deletes,misc} + * update fp->f_info[srcdst] + * update fp->f_info[OPT_BASE] from fp->f_info[srcdst] + * if there might be multiple links, call link_update + * return ERR_RESOLVABLE + * + * failure: + * set fp->f_flags |= F_CONFLICT + * set fp->f_problem + * bump bp->b_unresolved + * return ERR_UNRESOLVED + * + * pretend this never happened: + * return 0, and baseline will be unchanged + * + * notes: + * Action routines can be called in virtually any order + * or combination, and it is certainly possible for an + * earlier action to succeed while a later action fails. + * If each successful action results in a completed baseline + * update, a subsequent failure will force the baseline to + * roll back to the last success ... which is appropriate. + */ +#ident "%W% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <utime.h> +#include <errno.h> +#include <sys/mkdev.h> +#include <sys/statvfs.h> + +#include "filesync.h" +#include "database.h" +#include "messages.h" +#include "debug.h" + +/* + * globals and importeds + */ +bool_t need_super; /* warn user that we can't fix ownership */ +extern char *srcname; /* file we are emulating */ +extern char *dstname; /* file we are updating */ + +/* + * locals + */ +static errmask_t copy(char *, char *, int); +static int checksparse(int); +static char *copy_err_str; /* what went wrong w/copy */ + +/* + * routine: + * do_like + * + * purpose: + * to propagate ownership and protection changes between + * one existing file and another. + * + * parameters: + * file pointer + * src/dst indication for who needs to change + * whether or not to update statistics (there may be a copy and a like) + * + * returns: + * error mask + * + * notes: + * if we are called from reconcile, we should update + * the statistics, but if we were called from do_copy + * that routine will do the honors. + */ +errmask_t +do_like(struct file *fp, side_t srcdst, bool_t do_stats) +{ char *dst; + int rc = 0; + int do_chown, do_chmod, do_chgrp, do_acls; + errmask_t errs = 0; + char *errstr = 0; + struct base *bp; + struct fileinfo *sp; + struct fileinfo *dp; + struct fileinfo *ip; + extern int errno; + + bp = fp->f_base; + + /* see if this is a forbidden propagation */ + if (srcdst == opt_oneway) { + fp->f_flags |= F_CONFLICT; + fp->f_problem = gettext(PROB_prohibited); + bp->b_unresolved++; + return (ERR_UNRESOLVED); + } + + + /* get info about source and target files */ + if (srcdst == OPT_SRC) { + sp = &fp->f_info[ OPT_DST ]; + dp = &fp->f_info[ OPT_SRC ]; + dst = srcname; + } else { + sp = &fp->f_info[ OPT_SRC ]; + dp = &fp->f_info[ OPT_DST ]; + dst = dstname; + } + ip = &fp->f_info[ OPT_BASE ]; + + /* figure out what needs fixing */ + do_chmod = (sp->f_mode != dp->f_mode); + do_chown = (sp->f_uid != dp->f_uid); + do_chgrp = (sp->f_gid != dp->f_gid); + do_acls = ((fp->f_srcdiffs|fp->f_dstdiffs) & D_FACLS); + + /* + * try to anticipate things that we might not be able to + * do, and return appropriate errorst if the calling user + * cannot safely perform the requiested updates. + */ + if (my_uid != 0) { + if (do_chown) + errstr = gettext(PROB_chown); + else if (my_uid != dp->f_uid) { + if (do_chmod) + errstr = gettext(PROB_chmod); + else if (do_acls) + errstr = gettext(PROB_chacl); + else if (do_chgrp) + errstr = gettext(PROB_chgrp); + } +#ifdef ACL_UID_BUG + else if (do_acls && my_gid != dp->f_gid) + errstr = gettext(PROB_botch); +#endif + + if (errstr) { + need_super = TRUE; + + /* if the user doesn't care, shine it on */ + if (opt_everything == 0) + return (0); + + /* if the user does care, return the error */ + rc = -1; + goto nogood; + } + } + + if (opt_debug & DBG_RECON) { + fprintf(stderr, "RECO: do_like %s (", dst); + if (do_chmod) + fprintf(stderr, "chmod "); + if (do_acls) + fprintf(stderr, "acls "); + if (do_chown) + fprintf(stderr, "chown "); + if (do_chgrp) + fprintf(stderr, "chgrp "); + fprintf(stderr, ")\n"); + } + + if (do_chmod) { + if (!opt_quiet) + fprintf(stdout, "chmod %o %s\n", sp->f_mode, + noblanks(dst)); + +#ifdef DBG_ERRORS + /* should we simulate a chmod failure */ + if (errno = dbg_chk_error(dst, 'p')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : chmod(dst, sp->f_mode); + + if (opt_debug & DBG_RECON) + fprintf(stderr, "RECO: do_chmod %o -> %d(%d)\n", + sp->f_mode, rc, errno); + + /* update dest and baseline to reflect the change */ + if (rc == 0) { + dp->f_mode = sp->f_mode; + ip->f_mode = sp->f_mode; + } else + errstr = gettext(PROB_chmod); + } + + /* + * see if we need to fix the acls + */ + if (rc == 0 && do_acls) { + if (!opt_quiet) + fprintf(stdout, "setfacl %s %s\n", + show_acls(sp->f_numacls, sp->f_acls), + noblanks(dst)); + +#ifdef DBG_ERRORS + /* should we simulate a set acl failure */ + if (errno = dbg_chk_error(dst, 'a')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : set_acls(dst, sp); + + if (opt_debug & DBG_RECON) + fprintf(stderr, "RECO: do_acls %d -> %d(%d)\n", + sp->f_numacls, rc, errno); + + /* update dest and baseline to reflect the change */ + if (rc == 0) { + dp->f_numacls = sp->f_numacls; + dp->f_acls = sp->f_acls; + ip->f_numacls = sp->f_numacls; + ip->f_acls = sp->f_acls; +#ifdef ACL_UID_BUG + /* SETFACL changes a file's UID/GID */ + if (my_uid != dp->f_uid) { + do_chown = 1; + dp->f_uid = my_uid; + } + if (my_gid != dp->f_gid) { + do_chgrp = 1; + dp->f_gid = my_gid; + } +#endif + } else if (errno == ENOSYS) { + /* + * if the file system doesn't support ACLs + * we should just pretend we never saw them + */ + fprintf(stderr, gettext(WARN_noacls), dst); + ip->f_numacls = 0; + sp->f_numacls = 0; + dp->f_numacls = 0; + rc = 0; + } else + errstr = gettext(PROB_chacl); + } + + /* + * see if we need to fix the ownership + */ + if (rc == 0 && (do_chown || do_chgrp)) { + if (do_chown) + fprintf(stdout, "chown %ld %s; ", + sp->f_uid, noblanks(dst)); + if (do_chgrp) + fprintf(stdout, "chgrp %ld %s", + sp->f_gid, noblanks(dst)); + + fprintf(stdout, "\n"); + +#ifdef DBG_ERRORS + /* should we simulate a chown failure */ + if (errno = dbg_chk_error(dst, 'O')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : lchown(dst, sp->f_uid, sp->f_gid); + + if (opt_debug & DBG_RECON) + fprintf(stderr, "RECO: do_chown %ld %ld -> %d(%d)\n", + sp->f_uid, sp->f_gid, rc, errno); + + /* update the destination to reflect changes */ + if (rc == 0) { + dp->f_uid = sp->f_uid; + dp->f_gid = sp->f_gid; + ip->f_uid = sp->f_uid; + ip->f_gid = sp->f_gid; + } else { + if (errno == EPERM) { + need_super = TRUE; + if (opt_everything == 0) + return (0); + } + + if (rc != 0) + errstr = gettext(do_chown ? + PROB_chown : PROB_chgrp); + } + } + + /* + * if we were successful, we should make sure the other links + * see the changes. If we were called from do_copy, we don't + * want to do the link_updates either because do_copy will + * handle them too. + */ + if (rc == 0 && do_stats) + link_update(fp, srcdst); + +nogood: + if (!do_stats) + return (errs); + + if (rc != 0) { + fprintf(stderr, gettext(ERR_cannot), errstr, dst); + fp->f_problem = errstr; + fp->f_flags |= F_CONFLICT; + bp->b_unresolved++; + errs |= ERR_PERM | ERR_UNRESOLVED; + } else { + /* + * it worked, so update the baseline and statistics + */ + if (srcdst == OPT_SRC) + bp->b_src_misc++; + else + bp->b_dst_misc++; + + fp->f_problem = 0; + errs |= ERR_RESOLVABLE; + } + + return (errs); +} + +/* + * routine: + * do_copy + * + * purpose: + * to propagate a creation or change + * + * parameters: + * file pointer + * src/dst indication for who gets the copy + * + * returns: + * error mask + * + * note: + * after any successful operation we update the stat/info + * structure for the updated file. This is somewhat redundant + * because we will restat at the end of the routine, but these + * anticipatory updates help to ensure that the link finding + * code will still behave properly in notouch mode (when restats + * cannot be done). + */ +errmask_t +do_copy(struct file *fp, side_t srcdst) +{ char *src, *dst; + char cmdbuf[ MAX_PATH + MAX_NAME ]; + int mode, maj, min, type; + uid_t uid; + gid_t gid; + int rc; + long mtime; + int do_chmod = 0; + int do_chown = 0; + int do_chgrp = 0; + int do_unlink = 0; + int do_acls = 0; + int do_create = 0; + char *errstr = "???"; + errmask_t errs = 0; + struct base *bp; + struct file *lp; + struct fileinfo *sp, *dp; + struct utimbuf newtimes; + struct stat statb; + + bp = fp->f_base; + + /* see if this is a forbidden propagation */ + if (srcdst == opt_oneway) { + fp->f_problem = gettext(PROB_prohibited); + fp->f_flags |= F_CONFLICT; + bp->b_unresolved++; + return (ERR_UNRESOLVED); + } + + /* figure out who is the source and who is the destination */ + if (srcdst == OPT_SRC) { + sp = &fp->f_info[ OPT_DST ]; + dp = &fp->f_info[ OPT_SRC ]; + src = dstname; + dst = srcname; + } else { + sp = &fp->f_info[ OPT_SRC ]; + dp = &fp->f_info[ OPT_DST ]; + src = srcname; + dst = dstname; + } + + /* note information about the file to be created */ + type = sp->f_type; /* type of the new file */ + uid = sp->f_uid; /* owner of the new file */ + gid = sp->f_gid; /* group of the new file */ + mode = sp->f_mode; /* modes for the new file */ + mtime = sp->f_modtime; /* modtime (if preserving) */ + maj = sp->f_rd_maj; /* major (if it is a device) */ + min = sp->f_rd_min; /* minor (if it is a device) */ + + /* + * creating a file does not guarantee it will get the desired + * modes, uid and gid. If the file already exists, it will + * retain its old ownership and protection. If my UID/GID + * are not the desired ones, the new file will also require + * manual correction. If the file has the wrong type, we will + * need to delete it and recreate it. If the file is not writable, + * it is easier to delete it than to chmod it to permit overwrite + */ + if ((dp->f_type == S_IFREG && sp->f_type == S_IFREG) && + (dp->f_mode & 0200)) { + /* if the file already exists */ + if (dp->f_uid != uid) + do_chown = 1; + + if (dp->f_gid != gid) + do_chgrp = 1; + + if (dp->f_mode != mode) + do_chmod = 1; + } else { + /* if we will be creating a new file */ + do_create = 1; + if (dp->f_type) + do_unlink = 1; + if (uid != my_uid) + do_chown = 1; + if (gid != my_gid) + do_chgrp = 1; + } + + /* + * if the source has acls, we will surely have to set them for dest + */ + if (sp->f_numacls) + do_acls = 1; + + /* + * for any case other than replacing a normal file with a normal + * file, we need to delete the existing file before creating + * the new one. + */ + if (do_unlink) { + if (dp->f_type == S_IFDIR) { + if (!opt_quiet) + fprintf(stdout, "rmdir %s\n", noblanks(dst)); + + errstr = gettext(PROB_rmdir); +#ifdef DBG_ERRORS + /* should we simulate a rmdir failure */ + if (errno = dbg_chk_error(dst, 'D')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : rmdir(dst); + } else { + if (!opt_quiet) + fprintf(stdout, "rm %s\n", noblanks(dst)); + + errstr = gettext(PROB_unlink); +#ifdef DBG_ERRORS + /* should we simulate a unlink failure */ + if (errno = dbg_chk_error(dst, 'u')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : unlink(dst); + } + + if (rc != 0) + goto cant; + + /* note that this file no longer exists */ + dp->f_type = 0; + dp->f_mode = 0; + } + + if (opt_debug & DBG_RECON) { + fprintf(stderr, "RECO: do_copy %s %s (", src, dst); + if (do_unlink) + fprintf(stderr, "unlink "); + if (do_chmod) + fprintf(stderr, "chmod "); + if (do_acls) + fprintf(stderr, "acls "); + if (do_chown) + fprintf(stderr, "chown "); + if (do_chgrp) + fprintf(stderr, "chgrp "); + fprintf(stderr, ")\n"); + } + + /* + * how we go about copying a file depends on what type of file + * it is that we are supposed to copy + */ + switch (type) { + case S_IFDIR: + if (!opt_quiet) { + fprintf(stdout, "mkdir %s;", noblanks(dst)); + fprintf(stdout, " chmod %o %s;\n", mode, noblanks(dst)); + } + + errstr = gettext(PROB_mkdir); + +#ifdef DBG_ERRORS + /* should we simulate a mkdir failure */ + if (errno = dbg_chk_error(dst, 'd')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : mkdir(dst, mode); + + /* update stat with what we have just created */ + if (rc == 0) { + dp->f_type = S_IFDIR; + dp->f_uid = my_uid; + dp->f_gid = my_gid; + dp->f_mode = mode; + } + + break; + + case S_IFLNK: + errstr = gettext(PROB_readlink); +#ifdef DBG_ERRORS + /* should we simulate a symlink read failure */ + if (errno = dbg_chk_error(dst, 'r')) + rc = -1; + else +#endif + rc = readlink(src, cmdbuf, sizeof (cmdbuf)); + if (rc > 0) { + cmdbuf[rc] = 0; + if (!opt_quiet) { + fprintf(stdout, "ln -s %s", noblanks(cmdbuf)); + fprintf(stdout, " %s;\n", noblanks(dst)); + } + errstr = gettext(PROB_symlink); +#ifdef DBG_ERRORS + /* should we simulate a symlink failure */ + if (errno = dbg_chk_error(dst, 'l')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : symlink(cmdbuf, dst); + + if (rc == 0) + dp->f_type = S_IFLNK; + } + break; + + case S_IFBLK: + case S_IFCHR: + if (!opt_quiet) + fprintf(stdout, "mknod %s %s %d %d\n", noblanks(dst), + (type == S_IFBLK) ? "b" : "c", maj, min); + + errstr = gettext(PROB_mknod); +#ifdef DBG_ERRORS + /* should we simulate a mknod failure */ + if (errno = dbg_chk_error(dst, 'd')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 + : mknod(dst, mode|type, makedev(maj, min)); + + /* update stat with what we have just created */ + if (rc == 0) { + dp->f_type = type; + dp->f_uid = my_uid; + dp->f_gid = my_gid; + dp->f_mode = 0666; + + if (dp->f_mode != mode) + do_chmod = 1; + } + break; + + case S_IFREG: + /* + * The first thing to do is ascertain whether or not + * the alleged new copy might in fact be a new link. + * We trust find_link to weigh all the various factors, + * so if he says make a link, we'll do it. + */ + lp = find_link(fp, srcdst); + if (lp) { + /* figure out name of existing file */ + src = full_name(lp, srcdst, OPT_BASE); + + /* + * if file already exists, it must be deleted + */ + if (dp->f_type) { + if (!opt_quiet) + fprintf(stdout, "rm %s\n", + noblanks(dst)); + + errstr = gettext(PROB_unlink); +#ifdef DBG_ERRORS + /* should we simulate a unlink failure */ + if (errno = dbg_chk_error(dst, 'u')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : unlink(dst); + + /* + * if we couldn't do the unlink, we must + * mark the linkee in conflict as well + * so his reference count remains the same + * in the baseline and he continues to show + * up on the change list. + */ + if (rc != 0) { + lp->f_flags |= F_CONFLICT; + lp->f_problem = gettext(PROB_link); + goto cant; + } + } + + if (!opt_quiet) { + fprintf(stdout, "ln %s", noblanks(src)); + fprintf(stdout, " %s\n", noblanks(dst)); + } + errstr = gettext(PROB_link); + +#ifdef DBG_ERRORS + /* should we simulate a link failure */ + if (errno = dbg_chk_error(dst, 'l')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : link(src, dst); + + /* + * if this is a link, there is no reason to worry + * about ownership and modes, they are automatic + */ + do_chown = 0; do_chgrp = 0; do_chmod = 0; do_acls = 0; + if (rc == 0) { + dp->f_type = type; + dp->f_uid = uid; + dp->f_gid = gid; + dp->f_mode = mode; + break; + } else { + /* + * if we failed to make a link, we want to + * mark the linkee in conflict too, so that + * his reference count remains the same in + * the baseline, and he shows up on the change + * list again next time. + */ + lp->f_flags |= F_CONFLICT; + lp->f_problem = errstr; + break; + } + + /* + * in some situation we haven't figured out yet + * we might want to fall through and try a copy + * if the link failed. + */ + } + + /* we are going to resolve this by making a copy */ + if (!opt_quiet) { + fprintf(stdout, "cp %s", noblanks(src)); + fprintf(stdout, " %s\n", noblanks(dst)); + } + rc = opt_notouch ? 0 : copy(src, dst, mode); + if (rc != 0) { + errs |= rc; + if (copy_err_str) + errstr = copy_err_str; + else + errstr = gettext(PROB_copy); + + /* + * The new copy (if it exists at all) is a botch. + * If this was a new create or a remove and copy + * we should get rid of the botched copy so that + * it doesn't show up as two versions next time. + */ + if (do_create) + unlink(dst); + } else if (dp->f_mode == 0) { + dp->f_type = S_IFREG; + dp->f_uid = my_uid; + dp->f_gid = my_gid; + dp->f_mode = mode; + + /* FIX: inode number is still wrong */ + } + + /* for normal files we have an option to preserve mod time */ + if (rc == 0 && opt_notouch == 0 && opt_mtime) { + newtimes.actime = mtime; + newtimes.modtime = mtime; + + /* ignore the error return on this one */ + (void) utime(dst, &newtimes); + } + break; + + default: + errstr = gettext(PROB_deal); + rc = -1; + } + + /* + * if any of the file's attributes need attention, I should let + * do_like take care of them, since it knows all rules for who + * can and cannot make what types of changes. + */ + if (rc == 0 && (do_chmod || do_chown || do_chgrp || do_acls)) { + rc = do_like(fp, srcdst, FALSE); + errstr = fp->f_problem; + errs |= rc; + } + + /* + * finish off by re-stating the destination and using that to + * update the baseline. If we were completely successful in + * our chowns/chmods, stating the destination will confirm it. + * If we were unable to make all the necessary changes, stating + * the destination will make the source appear to have changed, + * so that the differences will continue to reappear as new + * changes (inconsistancies). + */ + if (rc == 0) + if (!opt_notouch) { + errstr = gettext(PROB_restat); + +#ifdef DBG_ERRORS + /* should we simulate a restat failure */ + if (errno = dbg_chk_error(dst, 'R')) + rc = -1; + else +#endif + rc = lstat(dst, &statb); + + if (rc == 0) { + note_info(fp, &statb, srcdst); + link_update(fp, srcdst); + if (do_acls) + (void) get_acls(dst, dp); + update_info(fp, srcdst); + } + } else { + /* + * BOGOSITY ALERT + * we are in notouch mode and haven't really + * done anything, but if we want link detection + * to work and be properly reflected in the + * what-I-would-do output for a case where + * multiple links are created to a new file, + * we have to make the new file appear to + * have been created. Since we didn't create + * the new file we can't stat it, but if + * no file exists, we can't make a link to + * it, so we will pretend we created a file. + */ + if (dp->f_ino == 0 || dp->f_nlink == 0) { + dp->f_ino = sp->f_ino; + dp->f_nlink = 1; + } + } + +cant: if (rc != 0) { + fprintf(stderr, gettext(ERR_cannot), errstr, dst); + bp->b_unresolved++; + fp->f_flags |= F_CONFLICT; + fp->f_problem = errstr; + if (errs == 0) + errs = ERR_PERM; + errs |= ERR_UNRESOLVED; + } else { + /* update the statistics */ + if (srcdst == OPT_SRC) + bp->b_src_copies++; + else + bp->b_dst_copies++; + errs |= ERR_RESOLVABLE; + } + + return (errs); +} + +/* + * routine: + * do_remove + * + * purpose: + * to propagate a deletion + * + * parameters: + * file pointer + * src/dst indication for which side gets changed + * + * returns: + * error mask + */ +errmask_t +do_remove(struct file *fp, side_t srcdst) +{ char *name; + int rc; + struct base *bp = fp->f_base; + errmask_t errs = 0; + char *errstr = "???"; + + /* see if this is a forbidden propagation */ + if (srcdst == opt_oneway) { + fp->f_problem = gettext(PROB_prohibited); + fp->f_flags |= F_CONFLICT; + bp->b_unresolved++; + return (ERR_UNRESOLVED); + } + + name = (srcdst == OPT_SRC) ? srcname : dstname; + + if (fp->f_info[0].f_type == S_IFDIR) { + if (!opt_quiet) + fprintf(stdout, "rmdir %s\n", noblanks(name)); + + errstr = gettext(PROB_rmdir); + +#ifdef DBG_ERRORS + /* should we simulate a rmdir failure */ + if (errno = dbg_chk_error(name, 'D')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : rmdir(name); + } else { + if (!opt_quiet) + fprintf(stdout, "rm %s\n", noblanks(name)); + + errstr = gettext(PROB_unlink); + +#ifdef DBG_ERRORS + /* should we simulate an unlink failure */ + if (errno = dbg_chk_error(name, 'u')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : unlink(name); + } + + if (opt_debug & DBG_RECON) + fprintf(stderr, "RECO: do_remove %s -> %d(%d)\n", + name, rc, errno); + + if (rc == 0) { + /* tell any other hard links that one has gone away */ + fp->f_info[srcdst].f_nlink--; + link_update(fp, srcdst); + + fp->f_flags |= F_REMOVE; + if (srcdst == OPT_SRC) + fp->f_base->b_src_deletes++; + else + fp->f_base->b_dst_deletes++; + errs |= ERR_RESOLVABLE; + } else { + fprintf(stderr, gettext(ERR_cannot), errstr, name); + fp->f_problem = errstr; + fp->f_flags |= F_CONFLICT; + bp->b_unresolved++; + errs |= ERR_PERM | ERR_UNRESOLVED; + } + + return (errs); +} + +/* + * routine: + * do_rename + * + * purpose: + * to propagate a rename + * + * parameters: + * file pointer for the new name + * src/dst indication for which side gets changed + * + * returns: + * error mask + */ +errmask_t +do_rename(struct file *fp, side_t srcdst) +{ int rc; + struct file *pp = fp->f_previous; + struct base *bp = fp->f_base; + errmask_t errs = 0; + char *errstr = "???"; + char *newname; + char *oldname; + struct stat statb; + + /* see if this is a forbidden propagation */ + if (srcdst == opt_oneway) { + fp->f_problem = gettext(PROB_prohibited); + + /* if we can't resolve the TO, the FROM is also unresolved */ + pp->f_problem = gettext(PROB_prohibited); + pp->f_flags |= F_CONFLICT; + bp->b_unresolved++; + return (ERR_UNRESOLVED); + } + + newname = (srcdst == OPT_SRC) ? srcname : dstname; + oldname = full_name(pp, srcdst, OPT_BASE); + + if (!opt_quiet) + fprintf(stdout, "%s %s %s\n", + (fp->f_info[0].f_type == S_IFDIR) ? "mvdir" : "mv", + noblanks(oldname), noblanks(newname)); + +#ifdef DBG_ERRORS + /* should we simulate a rename failure */ + if (errno = dbg_chk_error(oldname, 'm')) + rc = -1; + else +#endif + rc = opt_notouch ? 0 : rename(oldname, newname); + + if (opt_debug & DBG_RECON) + fprintf(stderr, "RECO: do_rename %s %s -> %d(%d)\n", + oldname, newname, rc, errno); + + /* if we succeed, update the baseline */ + if (rc == 0) + if (!opt_notouch) { + errstr = gettext(PROB_restat); + +#ifdef DBG_ERRORS + /* should we simulate a restat failure */ + if (errno = dbg_chk_error(newname, 'S')) + rc = -1; + else +#endif + rc = lstat(newname, &statb); + + if (rc == 0) { + note_info(fp, &statb, srcdst); + link_update(fp, srcdst); + update_info(fp, srcdst); + } + } else { + /* + * BOGOSITY ALERT + * in order for link tests to work in notouch + * mode we have to dummy up some updated status + */ + fp->f_info[srcdst].f_ino = pp->f_info[srcdst].f_ino; + fp->f_info[srcdst].f_nlink = pp->f_info[srcdst].f_nlink; + fp->f_info[srcdst].f_type = pp->f_info[srcdst].f_type; + fp->f_info[srcdst].f_size = pp->f_info[srcdst].f_size; + fp->f_info[srcdst].f_mode = pp->f_info[srcdst].f_mode; + fp->f_info[srcdst].f_uid = pp->f_info[srcdst].f_uid; + fp->f_info[srcdst].f_gid = pp->f_info[srcdst].f_gid; + update_info(fp, srcdst); + } + else + errstr = gettext(PROB_rename2); + + if (rc == 0) { + pp->f_flags |= F_REMOVE; + + if (srcdst == OPT_SRC) { + bp->b_src_copies++; + bp->b_src_deletes++; + } else { + bp->b_dst_copies++; + bp->b_dst_deletes++; + } + errs |= ERR_RESOLVABLE; + } else { + fprintf(stderr, gettext(ERR_cannot), errstr, oldname); + + bp->b_unresolved++; + fp->f_flags |= F_CONFLICT; + pp->f_flags |= F_CONFLICT; + + fp->f_problem = errstr; + pp->f_problem = gettext(PROB_rename); + + errs |= ERR_PERM | ERR_UNRESOLVED; + } + + return (errs); +} + +/* + * routine: + * copy + * + * purpose: + * to copy one file to another + * + * parameters: + * source file name + * destination file name + * desired modes + * + * returns: + * 0 OK + * else error mask, and a setting of copy_err_str + * + * notes: + * We try to preserve the holes in sparse files, by skipping over + * any holes that are at least MIN_HOLE bytes long. There are + * pathological cases where the hole detection test could become + * expensive, but for most blocks of most files we will fall out + * of the zero confirming loop in the first couple of bytes. + */ +static errmask_t +copy(char *src, char *dst, int mode) +{ int ifd, ofd, count, ret; + long *p, *e; + long long length; /* total size of file */ + errmask_t errs = 0; + int bsize; /* block-size for file */ + bool_t sparse; /* file may be sparse */ + bool_t was_hole = FALSE; /* file ends with hole */ + long inbuf[ COPY_BSIZE/4 ]; /* long to speed checks */ + struct stat statbuf; /* info on source file */ + struct statvfs statvsbuf; /* info on target fs */ + + copy_err_str = 0; + + /* open the input file */ +#ifdef DBG_ERRORS + if (opt_errors && dbg_chk_error(src, 'o')) + ifd = -1; + else +#endif + ifd = open(src, O_RDONLY); + + if (ifd < 0) { + copy_err_str = gettext(PROB_copyin); + return (ERR_PERM); + } + + /* + * if we suspect a file may be sparse, we must process it + * a little more carefully, looking for holes and skipping + * over them in the output. If a file is not sparse, we + * can move through it at greater speed. + */ + bsize = checksparse(ifd); + if (bsize > 0 && bsize <= COPY_BSIZE) + sparse = TRUE; + else { + sparse = FALSE; + bsize = COPY_BSIZE; + } + + /* + * if the target file already exists and we overwrite it without + * first ascertaining that there is enough room, we could wind + * up actually losing data. Try to determine how much space is + * available on the target file system, and if that is not enough + * for the source file, fail without even trying. If, however, + * the target file does not already exist, we have nothing to + * lose by just doing the copy without checking the space. + */ + ret = statvfs(dst, &statvsbuf); + if (ret == 0 && statvsbuf.f_frsize != 0) { +#ifdef DBG_ERRORS + /* should we simulate an out-of-space situation */ + if ((length = dbg_chk_error(dst, 'Z')) == 0) +#endif + length = statvsbuf.f_bavail * statvsbuf.f_frsize; + + ret = fstat(ifd, &statbuf); + if (ret == 0) { + length /= 512; /* st_blocks in 512s */ + if (length < statbuf.st_blocks) { + copy_err_str = gettext(PROB_space); + close(ifd); + return (ERR_FILES); + } + } else { + copy_err_str = gettext(PROB_restat); + close(ifd); + return (ERR_FILES); + } + } + + /* create the output file */ +#ifdef DBG_ERRORS + if (opt_errors && dbg_chk_error(dst, 'c')) + ofd = -1; + else +#endif + ofd = creat(dst, mode); + + if (ofd < 0) { + close(ifd); + copy_err_str = gettext(PROB_copyout); + return (ERR_PERM); + } + + /* copy the data from the input file to the output file */ + for (;;) { +#ifdef DBG_ERRORS + if (opt_errors && dbg_chk_error(dst, 'r')) + count = -1; + else +#endif + count = read(ifd, (char *) inbuf, bsize); + if (count <= 0) + break; + + /* + * if the file might be sparse and we got an entire block, + * we should see if the block is all zeros + */ + if (sparse && count == bsize) { + p = inbuf; e = &inbuf[count/4]; + while (p < e && *p == 0) + p++; + if (p == e) { + (void) lseek(ofd, (off_t) count, SEEK_CUR); + was_hole = TRUE; + continue; + } + } + was_hole = FALSE; + +#ifdef DBG_ERRORS + if (opt_errors && dbg_chk_error(dst, 'w')) + ret = -1; + else +#endif + ret = write(ofd, (char *) inbuf, count); + + if (ret != count) { + errs = ERR_FILES; + copy_err_str = gettext(PROB_write); + break; + } + } + + if (count < 0) { + copy_err_str = gettext(PROB_read); + errs = ERR_FILES; + } else if (was_hole) { + /* + * if we skipped the last write because of a hole, we + * need to make sure that we write a single byte of null + * at the end of the file to update the file length. + */ + (void) lseek(ofd, (off_t)-1, SEEK_CUR); + (void) write(ofd, "", 1); + } + + /* + * if the output file was botched, free up its space + */ + if (errs) + ftruncate(ofd, (off_t) 0); + + close(ifd); + close(ofd); + return (errs); +} + +/* + * routine: + * checksparse + * + * purpose: + * to determine whether or not a file might be sparse, and if + * it is sparse, what the granularity of the holes is likely + * to be. + * + * parameters: + * file descriptor for file in question + * + * returns: + * 0 file does not appear to be sparse + * else block size for this file + */ +static int +checksparse(int fd) +{ + struct stat statb; + + /* + * unable to stat the file is very strange (since we got it + * open) but it probably isn't worth causing a fuss over. + * Return the conservative answer + */ + if (fstat(fd, &statb) < 0) + return (MIN_HOLE); + + /* + * if the file doesn't have enough blocks to account for + * all of its bytes, there is a reasonable chance that it + * is sparse. This test is not perfect, in that it will + * fail to find holes in cases where the holes aren't + * numerous enough to componsent for the indirect blocks + * ... but losing those few holes is not going to be a + * big deal. + */ + if (statb.st_size > 512 * statb.st_blocks) + return (statb.st_blksize); + else + return (0); +} diff --git a/usr/src/cmd/filesync/anal.c b/usr/src/cmd/filesync/anal.c new file mode 100644 index 0000000000..fe10e49620 --- /dev/null +++ b/usr/src/cmd/filesync/anal.c @@ -0,0 +1,1114 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * anal.c + * + * purpose: + * routines to analyze the file trees and figure out what has changed + * and queue files for reconciliation. It also contains tree enumeration + * routines to for other purposes (pruning and link location). + * + * contents: + * + * change analysis: + * analyze .... (top level) analyze all files in the tree for changes + * summary .... print out change/reconciliation statistics for each base + * check_file . (static) look for changes and queue file for reconciliation + * check_changes (static) figure out if a particular file has changed + * queue_file . (static) add a file to the reconciliation list + * + * other tree enumeration functions: + * prune_file . (static) recursive descent and actual pruning + * prune ...... (top level) initiate pruning analysis for nonexistant files + * find_link .. look for other files to which a file may be a link + * link_update. propagate changed stat info to all other links + * same_name .. (static) figure out if two nodes describe same file + * + * misc: + * push_name .. maintain a running full pathname as we descend + * pop_name ... maintain a running full pathname as we pop back + * get_name ... return full pathname for the current file + * + * notes: + * analysis is limited to files that were evaluated in the previous + * pass ... since we don't have complete information about files that + * were not evaluated in the previous pass. + */ +#ident "%W% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> + +#include "messages.h" +#include "filesync.h" +#include "database.h" +#include "debug.h" + +/* + * routines: + */ +void push_name(const char *); +void pop_name(); +char *get_name(struct file *); +static errmask_t check_file(struct file *fp); +static diffmask_t check_changes(struct file *fp, int first, int second); +static int prune_file(struct file *fp); +static void queue_file(struct file *fp); + +/* + * globals + */ +static struct file *changes; /* list of files to be reconciled */ + +static long total_files; /* total number of files being considered */ +static long est_deletes; /* estimated number of files to be deleted */ +static long est_rmdirs; /* est rmdirs of non-empty directories */ + +int inum_changes; /* LISTed directories whose I#s changed */ + +/* + * routine: + * analyze + * + * purpose: + * top level routine for the analysis/reconciliation process + * + * parameters: + * none + * + * returns: + * error mask + * + * notes: + * a critical side effect of this routine is the creation of + * the reconciliation list, an ordered list of files that + * needed to be processed in the subsequent reconciliation pass + */ +errmask_t +analyze() +{ struct base *bp; + struct file *fp; + int errs = 0; + int err; + int percentage; + bool_t aborted = FALSE; + char msgbuf[MAX_LINE]; + + /* + * run through all bases and directories looking for files + * that have been renamed. This must be done before the + * difference analysis because a directory rename can introduce + * radical restructuring into a name-based tree. + */ + for (bp = bases; bp; bp = bp->b_next) { + for (fp = bp->b_files; fp; fp = fp->f_next) + if (fp->f_flags & F_EVALUATE) + errs |= find_renames(fp); + } + + /* + * run through all bases and files looking for candidates + * note, however that we only descend into trees that have + * the evaluate flag turned on. As a result of new rules or + * restriction arguments, we may be deliberatly ignoring + * large amounts of the baseline. This means we won't do + * any stats to update the information in those nodes, and + * they will be written back just as they were. + * + * note that there is code to prune out baseline nodes for + * files that no longer exist, but that code is in reconcile + * and will never get a chance to run on nodes that aren't + * analyzed. + * + * we also want to run though all nodes with STAT errors + * so that we can put them on the reconciliation list. + */ + for (bp = bases; bp; bp = bp->b_next) { + for (fp = bp->b_files; fp; fp = fp->f_next) + if (fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) + errs |= check_file(fp); + } + + /* + * my greatest fear is that someday, somehow, by messing with + * variables or baselines or who-knows-what, that someone will + * run a reconciliation against a large tree that doesn't correspond + * to the baseline, and I will infer that a bazillion files have + * been deleted and will propagate the slaughter before anyone + * can say somebody stop that maniac. + * + * in order to prevent such a possibility, we have a few different + * sanity checks. There is, of course, a tradeoff here between + * danger and irritation. The current set of heuristics for whether + * or not to generate a warning are (any of) + * + * at least CONFIRM_MIN files have been deleted AND + * CONFIRM_PCT of all files have been deleted + * + * the inode number on a LISTed directory has changed + * + * a non-empty directory has been deleted. + */ + msgbuf[0] = 0; + + percentage = (est_deletes * 100) / (total_files ? total_files : 1); + if (est_deletes >= CONFIRM_MIN && percentage >= CONFIRM_PCT) + sprintf(msgbuf, gettext(WARN_deletes), est_deletes); + else if (inum_changes > 0) + sprintf(msgbuf, gettext(WARN_ichange), inum_changes); + else if (est_rmdirs) + sprintf(msgbuf, gettext(WARN_rmdirs), est_rmdirs); + + if (msgbuf[0]) + confirm(msgbuf); + + /* + * TRICK: + * the change list contains both files that have changed + * (and probably warrant reconciliation) and files that + * we couldn't get up-to-date stat information on. The + * latter files should just be flagged as being in conflict + * so they can be reported in the summary. The same is + * true of all subsequent files if we abort reconciliation. + */ + for (fp = changes; fp; fp = fp->f_rnext) + if (aborted || (fp->f_flags & F_STAT_ERROR)) { + fp->f_flags |= F_CONFLICT; + /* if it isn't in the baseline yet, don't add it */ + if ((fp->f_flags & F_IN_BASELINE) == 0) + fp->f_flags |= F_REMOVE; + fp->f_problem = aborted ? PROB_aborted : PROB_restat; + (fp->f_base)->b_unresolved++; + errs |= ERR_UNRESOLVED; + if (opt_verbose) + fprintf(stdout, + gettext(aborted ? V_suppressed + : V_nostat), + fp->f_fullname); + } else { + err = reconcile(fp); + errs |= err; + if (opt_halt && (err & ERR_ABORT)) { + fprintf(stderr, gettext(ERR_abort_h)); + aborted = TRUE; + } + } + + return (errs); +} + +/* + * routine: + * prune_file + * + * purpose: + * to look for file entries that should be pruned from baseline + * prune the current file if it needs pruning, and recursively + * descend if it is a directory. + * + * parameters: + * pointer to file node + */ +static int +prune_file(struct file *fp) +{ struct file *cp; + int prunes = 0; + + /* if node hasn't been evaluated, mark it for removal */ + if ((fp->f_flags & (F_EVALUATE|F_STAT_ERROR)) == 0) { + fp->f_flags |= F_REMOVE; + prunes++; + if (opt_debug & DBG_ANAL) + fprintf(stderr, "ANAL: PRUNE %s\n", fp->f_name); + } + + /* now check our children */ + for (cp = fp->f_files; cp; cp = cp->f_next) + prunes += prune_file(cp); + + return (prunes); +} + +/* + * routine: + * prune + * + * purpose: + * to prune the baseline of entries that no longer correspond to + * existing rules. + * + * notes: + * This routine just calls prune_file on the top of each base tree. + */ +int +prune() +{ struct base *bp; + struct file *fp; + int prunes = 0; + + for (bp = bases; bp; bp = bp->b_next) { + for (fp = bp->b_files; fp; fp = fp->f_next) + prunes += prune_file(fp); + + if ((bp->b_flags & F_EVALUATE) == 0) + bp->b_flags |= F_REMOVE; + } + + return (prunes); +} + +/* + * routine: + * summary + * + * purpose: + * to print out statics and conflict lists + */ +void +summary() +{ struct base *bp; + struct file *fp; + extern bool_t need_super; + + (void) fflush(stdout); + + for (bp = bases; bp; bp = bp->b_next) { + + /* see if this base was irrelevent */ + if ((bp->b_flags & F_EVALUATE) == 0) + continue; + + /* print out a summary for this base */ + fprintf(stderr, gettext(SUM_hd), + bp->b_src_spec, bp->b_dst_spec, bp->b_totfiles); + fprintf(stderr, gettext(SUM_dst), + bp->b_dst_copies, bp->b_dst_deletes, bp->b_dst_misc); + fprintf(stderr, gettext(SUM_src), + bp->b_src_copies, bp->b_src_deletes, bp->b_src_misc); + if (bp->b_unresolved) + fprintf(stderr, gettext(SUM_unresolved), + bp->b_unresolved); + + + /* print out a list of unreconciled files for this base */ + for (fp = changes; fp; fp = fp->f_rnext) { + if (fp->f_base != bp) + continue; + if ((fp->f_flags & F_CONFLICT) == 0) + continue; + fprintf(stderr, "\t\t%s (%s)\n", fp->f_fullname, + fp->f_problem ? fp->f_problem : "???"); + } + + fprintf(stderr, "\n"); + } + + if (need_super) + fprintf(stderr, gettext(WARN_super)); +} + +/* + * routine: + * check_file + * + * purpose: + * figure out if a file requires reconciliation and recursively + * descend into all sub-files and directories + * + * parameters: + * base pointer + * file pointer + * + * returns: + * error mask + * built up changes needed list + * updated statistics + * + * notes: + * this routine builds up a path name as it descends through + * the tree (see push_name, pop_name, get_name). + */ +static errmask_t +check_file(struct file *fp) +{ struct file *cp; + int errs = 0; + + if ((fp->f_flags & F_STAT_ERROR) == 0) { + /* see if the source has changed */ + fp->f_info[OPT_BASE].f_modtime = fp->f_s_modtime; + fp->f_info[OPT_BASE].f_ino = fp->f_s_inum; + fp->f_info[OPT_BASE].f_d_maj = fp->f_s_maj; + fp->f_info[OPT_BASE].f_d_min = fp->f_s_min; + fp->f_info[OPT_BASE].f_nlink = fp->f_s_nlink; + fp->f_srcdiffs |= check_changes(fp, OPT_BASE, OPT_SRC); + + /* see if the destination has changed */ + fp->f_info[OPT_BASE].f_modtime = fp->f_d_modtime; + fp->f_info[OPT_BASE].f_ino = fp->f_d_inum; + fp->f_info[OPT_BASE].f_d_maj = fp->f_d_maj; + fp->f_info[OPT_BASE].f_d_min = fp->f_d_min; + fp->f_info[OPT_BASE].f_nlink = fp->f_d_nlink; + fp->f_dstdiffs |= check_changes(fp, OPT_BASE, OPT_DST); + + /* if nobody thinks the file exists, baseline needs pruning */ + if ((fp->f_flags & (F_IN_SOURCE|F_IN_DEST)) == 0) { + fp->f_srcdiffs |= D_DELETE; + fp->f_dstdiffs |= D_DELETE; + } + + /* keep track of possible deletions to look for trouble */ + if ((fp->f_dstdiffs | fp->f_srcdiffs) & D_DELETE) { + est_deletes++; + + /* see if file is (or has been) a non-empty directory */ + if (fp->f_files) + est_rmdirs++; + } + } + + /* if we found differences, queue the file for reconciliation */ + if (fp->f_srcdiffs || fp->f_dstdiffs || fp->f_flags & F_STAT_ERROR) { + queue_file(fp); + + if (opt_debug & DBG_ANAL) { + fprintf(stderr, "ANAL: src=%s", + showflags(diffmap, fp->f_srcdiffs)); + fprintf(stderr, " dst=%s", + showflags(diffmap, fp->f_dstdiffs)); + fprintf(stderr, " flgs=%s", + showflags(fileflags, fp->f_flags)); + fprintf(stderr, " name=%s\n", fp->f_fullname); + } + } + + /* bump the total file count */ + fp->f_base->b_totfiles++; + total_files++; + + /* if this is not a directory, we're done */ + if (fp->f_files == 0) + return (errs); + + /* + * If this is a directory, we need to recursively analyze + * our children, but only children who have been evaluated. + * If a node has not been evaluated, then we don't have + * updated stat information and there is nothing to analyze. + * + * we also want to run though all nodes with STAT errors + * so that we can put them on the reconciliation list. + * If a directory is unreadable on one side, all files + * under that directory (ON BOTH SIDES) must be marked as + * blocked by stat errors. + */ + push_name(fp->f_name); + + for (cp = fp->f_files; cp; cp = cp->f_next) { + if (fp->f_flags & F_STAT_ERROR) + cp->f_flags |= F_STAT_ERROR; + if (cp->f_flags & (F_EVALUATE|F_STAT_ERROR)) + errs |= check_file(cp); + } + + pop_name(); + + return (errs); +} + +/* + * routine: + * check_changes + * + * purpose: + * to figure out what has changed for a specific file + * + * parameters: + * file pointer + * the reference info + * the info to be checked for changes + * + * returns: + * diff mask + * + * notes: + * this routine doesn't pretend to understand what happened. + * it merely enumerates the ways in which the files differ. + */ +static diffmask_t +check_changes(struct file *fp, int ref, int new) +{ struct fileinfo *rp, *np; + int mask = 0; + int type; + + rp = &fp->f_info[ref]; + np = &fp->f_info[new]; + + if (np->f_uid != rp->f_uid) + mask |= D_UID; + + if (np->f_gid != rp->f_gid) + mask |= D_GID; + + if (np->f_mode != rp->f_mode) + mask |= D_PROT; + + type = np->f_type; + if (type != rp->f_type) { + if (type == 0) + mask |= D_DELETE; + else if (rp->f_type == 0) + mask |= D_CREATE; + else + mask |= D_TYPE; + } else if (type == S_IFBLK || type == S_IFCHR) { + /* + * for special files, we only look at the maj/min + */ + if (np->f_rd_maj != rp->f_rd_maj) + mask |= D_SIZE; + if (np->f_rd_min != rp->f_rd_min) + mask |= D_SIZE; + } else if (type != S_IFDIR) { + /* + * for directories, we don't look directly at + * the contents, so these fields don't mean + * anything. If the directories have changed + * in any interesting way, we'll find it by + * walking the tree. + */ + if (np->f_modtime > rp->f_modtime) + mask |= D_MTIME; + + if (np->f_size != rp->f_size) + mask |= D_SIZE; + + if (np->f_nlink != rp->f_nlink) + mask |= D_LINKS; + } + + if (cmp_acls(rp, np) == 0) + mask |= D_FACLS; + + return (mask); +} + +/* + * routine: + * same_name + * + * purpose: + * to figure out whether or not two databsae nodes actually refer to + * the same file. + * + * parameters: + * pointers to two file description nodes + * which side we should check + * + * returns: + * TRUE/FALSE + * + * notes: + * if a single directory is specified in multiple base pairs, it + * is possible to have multiple nodes in the database describing + * the same file. This routine is supposed to detect those cases. + * + * what should be a trivial string comparison is complicated by + * the possibility that the two nodes might describe the same file + * from base directories at different depths. Thus, rather than + * comparing two strings, we really want to compare the concatenation + * of two pairs of strings. Unfortunately calling full_name would + * be awkward right now, so instead we have our own comparison + * routine that automatically skips from the first string to + * the second. + */ +static bool_t +same_name(struct file *f1, struct file *f2, side_t srcdst) +{ + char *s1, *s2, *x1, *x2; + + if (srcdst == OPT_SRC) { + s1 = (f1->f_base)->b_src_name; + s2 = (f2->f_base)->b_src_name; + } else { + s1 = (f1->f_base)->b_dst_name; + s2 = (f2->f_base)->b_dst_name; + } + x1 = f1->f_fullname; + x2 = f2->f_fullname; + + /* + * Compare the two names, and if they differ before they end + * this is a non-match. If they both end at the same time, + * this is a match. + * + * The trick here is that each string is actually the logical + * concatenation of two strings, and we need to automatically + * wrap from the first to the second string in each pair. There + * is no requirement that the two (concatenated) strings be + * broken at the same point, so we have a slightly baroque + * comparsion loop. + */ + while (*s1 && *s1 == *s2) { + + /* + * strings have been identical so far, so advance the + * pointers and continue the comparison. The trick + * is that when either string ends, we have to wrap + * over to its extension. + */ + s1++; s2++; + if (*s1 && *s2) + continue; + + /* + * at least one of the strings has ended. + * there is an implicit slash between the string + * and its extension, and this has to be matched + * against the other string. + */ + if (*s1 != *s2) { + if (*s1 == 0 && *s2 == '/') + s2++; + else if (*s2 == 0 && *s1 == '/') + s1++; + else + /* the disagreement doesn't come at a slash */ + break; + } + + /* + * if either string has ended, wrap to its extension + */ + if (*s1 == 0 && x1 != 0) { + s1 = x1; + x1 = 0; + } + if (*s2 == 0 && x2 != 0) { + s2 = x2; + x2 = 0; + } + } + + return (*s1 == *s2); +} + +/* + * routine: + * find_link + * + * purpose: + * to figure out if there is a file to which we should + * be creating a link (rather than making a copy) + * + * parameters: + * file node for the file to be created (that we hope is merely a link) + * which side is to be changed (src/dst) + * + * return: + * 0 no link is appropriate + * else pointer to file node for link referent + * + * notes: + * there are a few strange heuristics in this routine and I + * wouldn't bet my soul that I got all of them right. The general + * theory is that when a new file is created, we look to see if it + * is a link to another file on the changed side, and if it is, we + * find the corresponding file on the unchanged side. + * + * cases we want to be able to handle: + * 1. one or more links are created to a prexisting file + * 2. a preexisting only link is renamed + * 3. a rename of one of multiple links to a preexisting file + * 4. a single file is created with multiple links + */ +struct file * +find_link(struct file *fp, side_t srcdst) +{ struct file *lp; + side_t chgside, tgtside; + struct fileinfo *chgp, *tgtp, *basp, *fcp, *ftp; + + /* chg = side on which the change was noticed */ + /* tgt = side to which the change is to be propagated */ + chgside = (srcdst == OPT_SRC) ? OPT_DST : OPT_SRC; + tgtside = (srcdst == OPT_SRC) ? OPT_SRC : OPT_DST; + fcp = &fp->f_info[chgside]; + ftp = &fp->f_info[tgtside]; + + /* + * cases 1 and 3 + * + * When a new link is created, we should be able to find + * another file in the changed hierarchy that has the same + * I-node number. We expect it to be on the changed list + * because the link count will have gone up or because all + * of the copies are new. If we find one, then the new file + * on the receiving file should be a link to the corresponding + * existing file. + * + * case 4 + * + * the first link will be dealt with as a copy, but all + * subsequent links should find an existing file analogous + * to one of the links on the changed side, and create + * corresponding links on the other side. + * + * in each of these cases, there should be multiple links + * on the changed side. If the linkcount on the changed + * side is one, we needn't bother searching for other links. + */ + if (fcp->f_nlink > 1) + for (lp = changes; lp; lp = lp->f_rnext) { + /* finding the same node doesn't count */ + if (fp == lp) + continue; + + tgtp = &lp->f_info[tgtside]; + chgp = &lp->f_info[chgside]; + + /* + * if the file doesn't already exist on the target side + * we cannot make a link to it + */ + if (tgtp->f_mode == 0) + continue; + + /* + * if this is indeed a link, then the prospective file on + * the changed side will have the same dev/inum as the file + * we are looking for + */ + if (fcp->f_d_maj != chgp->f_d_maj) + continue; + if (fcp->f_d_min != chgp->f_d_min) + continue; + if (fcp->f_ino != chgp->f_ino) + continue; + + /* + * if the target side is already a link to this file, + * then there is no new link to be created + * FIX: how does this interact with copies over links + */ + if ((ftp->f_d_maj == tgtp->f_d_maj) && + (ftp->f_d_min == tgtp->f_d_min) && + (ftp->f_ino == tgtp->f_ino)) + continue; + + /* + * there is a pathological situation where a single file + * might appear under multiple base directories. This is + * damned awkward to detect in any other way, so we must + * check to see if we have just found another database + * instance for the same file (on the changed side). + */ + if ((fp->f_base != lp->f_base) && same_name(fp, lp, chgside)) + continue; + + if (opt_debug & DBG_ANAL) + fprintf(stderr, "ANAL: FIND LINK %s and %s\n", + fp->f_fullname, lp->f_fullname); + + return (lp); + } + + /* + * case 2: a simple rename of the only link + * + * In this case, there may not be any other existing file on + * the changed side that has the same I-node number. There + * might, however, be a record of such a file in the baseline. + * If we can find an identical file with a different name that + * has recently disappeared, we have a likely rename. + */ + for (lp = changes; lp; lp = lp->f_rnext) { + + /* finding the same node doesn't count */ + if (fp == lp) + continue; + + tgtp = &lp->f_info[tgtside]; + chgp = &lp->f_info[chgside]; + + /* + * if the file still exists on the changed side this is + * not a simple rename, and in fact the previous pass + * would have found it. + */ + if (chgp->f_mode != 0) + continue; + + /* + * the inode number for the new link on the changed + * side must match the inode number for the old link + * from the baseline. + */ + if (fcp->f_d_maj != ((srcdst == OPT_SRC) ? lp->f_d_maj + : lp->f_s_maj)) + continue; + if (fcp->f_d_min != ((srcdst == OPT_SRC) ? lp->f_d_min + : lp->f_s_min)) + continue; + if (fcp->f_ino != ((srcdst == OPT_SRC) ? lp->f_d_inum + : lp->f_s_inum)) + continue; + + /* finding a file we are already linked to doesn't help */ + if ((ftp->f_d_maj == tgtp->f_d_maj) && + (ftp->f_d_min == tgtp->f_d_min) && + (ftp->f_ino == tgtp->f_ino)) + continue; + + /* + * there is a danger that we will confuse an + * inode reallocation with a rename. We should + * only consider this to be a rename if the + * new file is identical to the old one + */ + basp = &lp->f_info[OPT_BASE]; + if (fcp->f_type != basp->f_type) + continue; + if (fcp->f_size != basp->f_size) + continue; + if (fcp->f_mode != basp->f_mode) + continue; + if (fcp->f_uid != basp->f_uid) + continue; + if (fcp->f_gid != basp->f_gid) + continue; + + if (opt_debug & DBG_ANAL) + fprintf(stderr, "ANAL: FIND RENAME %s and %s\n", + fp->f_fullname, lp->f_fullname); + + return (lp); + } + + return (0); +} + +/* + * routine: + * has_other_links + * + * purpose: + * to determine whether or not there is more that one link to a + * particular file. We are willing to delete a link to a file + * that has changed if we will still have other links to it. + * The trick here is that we only care about links under our + * dominion. + * + * parameters: + * file pointer to node we are interested in + * which side we are looking to additional links on + * + * returns: + * TRUE if there are multiple links + * FALSE if this is the only one we know of + */ +bool_t +has_other_links(struct file *fp, side_t srcdst) +{ struct file *lp; + struct fileinfo *fip, *lip; + + fip = &fp->f_info[srcdst]; + + /* if the link count is one, there couldn't be others */ + if (fip->f_nlink < 2) + return (FALSE); + + /* look for any other files for the same inode */ + for (lp = changes; lp; lp = lp->f_rnext) { + /* finding the same node doesn't count */ + if (fp == lp) + continue; + + lip = &lp->f_info[srcdst]; + + /* + * file must still exist on this side + */ + if (lip->f_mode == 0) + continue; + + /* + * if this is indeed a link, then the prospective file on + * the changed side will have the same dev/inum as the file + * we are looking for + */ + if (lip->f_d_maj != fip->f_d_maj) + continue; + if (lip->f_d_min != fip->f_d_min) + continue; + if (lip->f_ino != fip->f_ino) + continue; + + /* + * we have found at least one other link + */ + return (TRUE); + } + + return (FALSE); +} + +/* + * routine: + * link_update + * + * purpose: + * to propoagate a stat change to all other file nodes that + * correspond to the same I-node on the changed side + * + * parameters: + * file pointer for the updated file + * which side was changed + * + * returns: + * void + * + * notes: + * if we have copied onto a file, we have copied onto all + * of its links, but since we do all stats before we do any + * copies, the stat information recently collected for links + * is no longer up-to-date, and this would result in incorrect + * reconciliation (redundant copies). + * + * There is an assumption here that all links to a changed + * file will be in the change list. This is true for almost + * all cases not involving restriction. If we do fail to + * update the baseline for a file that was off the change list, + * the worst that is likely to happen is that we will think + * it changed later (but will almost surely find that both + * copies agree). + */ +void +link_update(struct file *fp, side_t which) +{ struct file *lp; + + for (lp = changes; lp; lp = lp->f_rnext) { + /* finding the current entry doesn't count */ + if (lp == fp) + continue; + + /* look for same i#, maj, min on changed side */ + if (lp->f_info[which].f_ino != fp->f_info[which].f_ino) + continue; + if (lp->f_info[which].f_d_maj != fp->f_info[which].f_d_maj) + continue; + if (lp->f_info[which].f_d_min != fp->f_info[which].f_d_min) + continue; + + /* + * this appears to be another link to the same file + * so the updated stat information for one must be + * correct for the other. + */ + lp->f_info[which].f_type = fp->f_info[which].f_type; + lp->f_info[which].f_size = fp->f_info[which].f_size; + lp->f_info[which].f_mode = fp->f_info[which].f_mode; + lp->f_info[which].f_uid = fp->f_info[which].f_uid; + lp->f_info[which].f_gid = fp->f_info[which].f_gid; + lp->f_info[which].f_modtime = fp->f_info[which].f_modtime; + lp->f_info[which].f_modns = fp->f_info[which].f_modns; + lp->f_info[which].f_nlink = fp->f_info[which].f_nlink; + lp->f_info[which].f_rd_maj = fp->f_info[which].f_rd_maj; + lp->f_info[which].f_rd_min = fp->f_info[which].f_rd_min; + + if (opt_debug & DBG_STAT) + fprintf(stderr, + "STAT: UPDATE LINK, file=%s, mod=%08lx.%08lx\n", + lp->f_name, lp->f_info[which].f_modtime, + lp->f_info[which].f_modns); + } +} + +/* + * routine: + * queue_file + * + * purpose: + * append a file to the list of needed reconciliations + * + * parameters: + * pointer to file + * + * notes: + * when a request is appended to the reconciliation list, + * we fill in the full name. We delayed this in hopes that + * it wouldn't be necessary (saving cycles and memory) + * + * There is some funny business with modification times. + * In general, we queue files in order of the latest modification + * time so that propagations preserve relative ordering. There + * are, however, a few important exceptions: + * 1. all directory creations happen at time zero, + * so that they are created before any files can + * be added to them. + * 2. all directory deletions happen at time infinity-depth, + * so that everything else can be removed before the + * directories themselves are removed. + * 3. all file deletions happen at time infinity-depth + * so that (in renames) the links will preceed the unlinks. + */ +static void +queue_file(struct file *fp) +{ struct file **pp, *np; + +#define TIME_ZERO 0L /* the earliest possible time */ +#define TIME_LONG 0x7FFFFFFF /* the latest possible time */ + + /* + * figure out the modification time for sequencing purposes + */ + if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_DELETE) { + /* + * deletions are performed last, and depth first + */ + fp->f_modtime = TIME_LONG - fp->f_depth; + } else if (fp->f_info[OPT_SRC].f_type != S_IFDIR && + fp->f_info[OPT_DST].f_type != S_IFDIR) { + /* + * for most files we use the latest mod time + */ + fp->f_modtime = fp->f_info[OPT_SRC].f_modtime; + fp->f_modns = fp->f_info[OPT_SRC].f_modns; + if (fp->f_modtime < fp->f_info[OPT_DST].f_modtime) { + fp->f_modtime = fp->f_info[OPT_DST].f_modtime; + fp->f_modns = fp->f_info[OPT_DST].f_modns; + } + } else { + /* + * new directory creations need to happen before anything + * else and are automatically sequenced in traversal order + */ + fp->f_modtime = TIME_ZERO; + } + + /* + * insertion is time ordered, and for equal times, + * insertions is in (pre-order) traversal order + */ + for (pp = &changes; (np = *pp) != 0; pp = &np->f_rnext) { + if (fp->f_modtime > np->f_modtime) + continue; + if (fp->f_modtime < np->f_modtime) + break; + if (fp->f_modns < np->f_modns) + break; + } + + fp->f_fullname = strdup(get_name(fp)); + fp->f_rnext = np; + *pp = fp; +} + + +/* + * routines: + * push_name/pop_name/get_name + * + * purpose: + * maintain a name stack so we can form name of a particular file + * as the concatenation of all of the names between it and the + * (know to be fully qualified) base directory. + * + * notes: + * we go to this trouble because most files never change and + * so we don't need to associate full names with every one. + * This stack is maintained during analysis, and if we decide + * to add a file to the reconciliation list, we can use the + * stack to generate a fully qualified name at that time. + * + * we compress out '/./' when we return a name. Given that the + * stack was built by a tree walk, the only place a /./ should + * appear is at the first level after the base ... but there + * are legitimate ways for them to appear there. + * + * these names can get deep, so we dynamically size our name buffer + */ +static const char *namestack[ MAX_DEPTH + 1 ]; +static int namedepth = 0; +static int namelen = 0; + +void +push_name(const char *name) +{ + namestack[ namedepth++ ] = name; + namelen += 2 + strlen(name); + + /* make sure we don't overflow our name stack */ + if (namedepth >= MAX_DEPTH) { + fprintf(stderr, gettext(ERR_deep), name); + exit(ERR_OTHER); + } +} + +void +pop_name(void) +{ + namelen -= 2 + strlen(namestack[--namedepth]); + namestack[ namedepth ] = 0; + +#ifdef DBG_ERRORS + /* just a little sanity check here */ + if (namedepth <= 0) { + if (namedepth < 0) { + fprintf(stderr, "ASSERTION FAILURE: namedepth < 0\n"); + exit(ERR_OTHER); + } else if (namelen != 0) { + fprintf(stderr, "ASSERTION FAILURE: namelen != 0\n"); + exit(ERR_OTHER); + } + } +#endif +} + +char +*get_name(struct file *fp) +{ int i; + static char *namebuf = 0; + static int buflen = 0; + + /* make sure we have an adequate buffer */ + i = namelen + 1 + strlen(fp->f_name); + if (buflen < i) { + for (buflen = MAX_PATH; buflen < i; buflen += MAX_NAME); + namebuf = (char *) realloc(namebuf, buflen); + } + + /* assemble the name */ + namebuf[0] = 0; + for (i = 0; i < namedepth; i++) { + if (strcmp(namestack[i], ".")) { + strcat(namebuf, namestack[i]); + strcat(namebuf, "/"); + } + } + + strcat(namebuf, fp->f_name); + + return (namebuf); +} diff --git a/usr/src/cmd/filesync/base.c b/usr/src/cmd/filesync/base.c new file mode 100644 index 0000000000..efcfeb6046 --- /dev/null +++ b/usr/src/cmd/filesync/base.c @@ -0,0 +1,912 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * base.c + * + * purpose: + * routines to create, traverse, read and write the baseline database + * + * contents: + * manipulation: + * add_base, add_file_to_base, add_file_to_dir + * (static) add_file_to_list + * reading baseline: + * read_baseline + * (static) gettype + * writing baseline: + * write_baseline + * (static) bw_header, bw_base, bw_file, showtype + */ +#ident "%W% %E% SMI" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> + +#include "filesync.h" +#include "database.h" +#include "messages.h" + +#define BASE_MAJOR 1 /* base file format major rev */ +#define BASE_MINOR 2 /* base file format minor rev */ +#define BASE_TAG "filesync-BaseLine" + +/* + * globals + */ +struct base omnibase; /* dummy to hold global rules */ +struct base *bases; /* pointer to the base list */ + +/* + * locals + */ +static int num_bases; /* used to generate sequence #s */ +static errmask_t bw_header(FILE *); /* write out baseline header */ +static errmask_t bw_base(FILE *, struct base *); /* write out one base */ +static errmask_t bw_file(FILE *, struct file *, int); +static struct file *add_file_to_list(struct file **, const char *); +static char showtype(int); +static long gettype(int); + +/* + * routine: + * add_base + * + * purpose: + * to find a base pair in the chain, adding it if necessary + * + * parameters: + * spec for source directory + * spec for dest directory + * + * returns: + * pointer to the base pair + * + */ +struct base * +add_base(const char *src, const char *dst) +{ struct base *bp, **bpp; + + /* first see if we already have it */ + for (bpp = &bases; (bp = *bpp) != 0; bpp = &bp->b_next) { + /* base must match on both src and dst */ + if (strcmp(src, bp->b_src_spec)) + continue; + if (strcmp(dst, bp->b_dst_spec)) + continue; + + if (opt_debug & DBG_BASE) + fprintf(stderr, "BASE: FOUND base=%d, src=%s, dst=%s\n", + bp->b_ident, src, dst); + return (bp); + } + + /* no joy, so we have to allocate one */ + bp = malloc(sizeof (struct base)); + if (bp == 0) + nomem("base structure"); + + /* initialize the new base */ + memset((void *) bp, 0, sizeof (struct base)); + bp->b_ident = ++num_bases; + bp->b_src_spec = strdup(src); + bp->b_dst_spec = strdup(dst); + + /* names are expanded at run-time, and this is run-time */ + if ((bp->b_src_name = expand(bp->b_src_spec)) == 0) { + fprintf(stderr, gettext(ERR_badbase), bp->b_src_spec); + exit(ERR_FILES); + } + + if ((bp->b_dst_name = expand(bp->b_dst_spec)) == 0) { + fprintf(stderr, gettext(ERR_badbase), bp->b_dst_spec); + exit(ERR_FILES); + } + + /* chain it in */ + *bpp = bp; + + if (opt_debug & DBG_BASE) + fprintf(stderr, "BASE: ADDED base=%d, src=%s, dst=%s\n", + bp->b_ident, src, dst); + + return (bp); +} + +/* + * routine: + * add_file_to_list + * + * purpose: + * to find a file on a list, or if necessary add it to the list + * + * this is an internal routine, used only by add_file_to_base + * and add_file_to_dir. + * + * parameters: + * pointer to the list head + * + * returns: + * pointer to a file structure + * + * notes: + * + * list is sorted to provide some search optimization + * + * most files are in the baseline, and so come in in alphabetical + * order. If we keep a guess pointer to the last file we added/found, + * there is a better than even chance that this one should be + * added immediately onto the end of it ... and in so doing we + * can save ourselves the trouble of searching the lists most + * of the time. + * + * this win would be even better if the FTW traversal was sorted, + * but building the baseline is enough of a win to justify the + * feature ... but even without this we run a 60%-70% hit rate. + */ +static struct file * +add_file_to_list(struct file **pp, const char *name) +{ struct file *fp, *new; + int rslt; + + static struct file **last_list; + static struct file *last_file; + + /* + * start with the guess pointer, we hope to find that + * this request will be satisfied by the next file in + * the list. The two cases we are trying to optimize + * are: + * appending to the list, with appends in alphabetical order + * searches of the list, with searches in alphabetical order + */ + if (last_list == pp && (new = last_file) != 0) { + /* we like to think we belong farther down-list */ + if (strcmp(name, new->f_name) > 0) { + fp = new->f_next; + /* if we're at the end, we just won */ + if (fp == 0) { + pp = &new->f_next; + goto makeit; + } + + /* or if the next one is what we want */ + if (strcmp(name, fp->f_name) == 0) { + fp->f_flags &= ~F_NEW; + new = fp; + goto gotit; + } + } + } + + /* + * our guess pointer failed, so it is exhaustive search time + */ + last_list = pp; + + for (fp = *pp; fp; pp = &fp->f_next, fp = *pp) { + rslt = strcmp(name, fp->f_name); + + /* see if we got a match */ + if (rslt == 0) { + fp->f_flags &= ~F_NEW; + new = fp; + goto gotit; + } + + /* see if we should go no farther */ + if (rslt < 0) + break; + } + +makeit: + /* + * we didn't find it: + * pp points at where our pointer should go + * fp points at the node after ours + */ + new = (struct file *) malloc(sizeof (*new)); + if (new == 0) + nomem("file structure"); + + /* initialize the new node */ + memset((void *) new, 0, sizeof (struct file)); + new->f_name = strdup(name); + new->f_flags = F_NEW; + + /* chain it into the list */ + new->f_next = fp; + *pp = new; + +gotit: /* remember this as our next guess pointer */ + last_file = new; + return (new); +} + +/* + * routine: + * add_file_to_base + * + * purpose: + * to add a file-node to a baseline + * + * parameters: + * pointer to base + * name of file to be added + * + * returns: + * pointer to file structure + */ +struct file * +add_file_to_base(struct base *bp, const char *name) +{ struct file *fp; + + fp = add_file_to_list(&bp->b_files, name); + fp->f_base = bp; + fp->f_depth = 0; + + if (opt_debug & DBG_LIST) + fprintf(stderr, "LIST: base=%d, %s file=%s\n", + bp->b_ident, (fp->f_flags&F_NEW) ? "NEW" : "FOUND", + name); + + return (fp); +} + +/* + * routine: + * add_file_to_dir + * + * purpose: + * to add a file-node to a directory + * + * parameters: + * pointer to file entry for directory + * name of file to be added + * + * returns: + * pointer to file structure + */ +struct file * +add_file_to_dir(struct file *dp, const char *name) +{ struct file *fp; + + fp = add_file_to_list(&dp->f_files, name); + fp->f_base = dp->f_base; + fp->f_depth = dp->f_depth + 1; + + if (opt_debug & DBG_LIST) + fprintf(stderr, "LIST: dir=%s, %s file=%s\n", + dp->f_name, (fp->f_flags&F_NEW) ? "NEW" : "FOUND", + name); + + return (fp); +} + +/* + * routine: + * read_baseline + * + * purpose: + * to read in the baseline file + * + * parameters: + * name of baseline file + * + * returns: + * error mask + */ +errmask_t +read_baseline(char *name) +{ FILE *file; + errmask_t errs = 0; + + char *s; + char *s1 = 0; + char type; + char *field = "???"; + + unsigned long l; + unsigned long long ll; /* intermediate for 64 bit file support */ + int level; + int major, minor; + + struct base *bp = 0; + struct file *fp; + struct fileinfo *ip; + aclent_t *ap; + + struct file *dirstack[ MAX_DEPTH ]; + + file = fopen(name, "r"); + if (file == NULL) { + fprintf(stderr, gettext(ERR_open), gettext(TXT_base), + name); + return (ERR_FILES); + } + lex_linenum = 0; + + if (opt_debug & DBG_FILES) + fprintf(stderr, "FILE: READ BASELINE %s\n", name); + + while (!feof(file)) { + /* find the first token on the line */ + s = lex(file); + + /* skip blank lines and comments */ + if (s == 0 || *s == 0 || *s == '#' || *s == '*') + continue; + + field = "keyword"; + + /* see if the first token is a known keyword */ + if (strcmp(s, "VERSION") == 0 || strcmp(s, BASE_TAG) == 0) { + s = lex(0); + field = gettext(TXT_noargs); + if (s == 0) + goto bad; + + major = strtol(s, &s1, 10); + field = gettext(TXT_badver); + if (*s1 != '.') + goto bad; + minor = strtol(&s1[1], 0, 10); + + if (major != BASE_MAJOR || minor > BASE_MINOR) { + fprintf(stderr, gettext(ERR_badver), + major, minor, gettext(TXT_base), name); + errs |= ERR_FILES; + } + s1 = 0; + continue; + } + + if (strcmp(s, "BASE_SRC") == 0) { + s = lex(0); + field = "source directory"; + if (s == 0) + goto bad; + s1 = strdup(s); + bp = 0; + continue; + } + + if (strcmp(s, "BASE_DST") == 0) { + s = lex(0); + field = "destination directory"; + if (s == 0) + goto bad; + + /* make sure we have a source too */ + if (s1 == 0) { + field = "no source directory"; + goto bad; + } + + bp = add_base(s1, s); + free(s1); + s1 = 0; + continue; + } + + if (strcmp(s, "FILE") == 0) { + /* make sure we have a base to add to */ + if (bp == 0) { + field = "missing base"; + goto bad; + } + + s = lex(0); /* level */ + field = "level"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + level = l; + + s = lex(0); /* type */ + field = "file type"; + if (s == 0 || *s == 0) + goto bad; + type = *s; + if (gettype(type) < 0) + goto bad; + + s = lex(0); /* name */ + field = "file name"; + if (s == 0 || *s == 0) + goto bad; + + /* allocate a file structure for this entry */ + if (level == 0) + fp = add_file_to_base(bp, s); + else + fp = add_file_to_dir(dirstack[level-1], s); + + fp->f_flags |= F_IN_BASELINE; + + /* maintain the directory stack */ + if (level >= MAX_DEPTH) { + fprintf(stderr, gettext(ERR_deep), s); + exit(ERR_OTHER); + } + + dirstack[ level ] = fp; + + /* get a pointer to the baseline file info structure */ + ip = &fp->f_info[ OPT_BASE ]; + + ip->f_type = gettype(type); /* note file type */ + + s = lex(0); /* modes */ + field = "file modes"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + ip->f_mode = l; + + s = lex(0); /* uid */ + field = "file UID"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + ip->f_uid = l; + + s = lex(0); /* gid */ + field = "file GID"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + ip->f_gid = l; + + s = lex(0); /* source inode */ + field = "source i#"; + if (s == 0 || *s == 0) + goto bad; + ll = strtoull(s, 0, 0); + fp->f_s_inum = (ino_t) ll; + + s = lex(0); /* source major */ + field = "source major"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + fp->f_s_maj = l; + + s = lex(0); /* source minor */ + field = "source minor"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + fp->f_s_min = l; + + s = lex(0); /* source nlink */ + field = "source nlink"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + fp->f_s_nlink = l; + + s = lex(0); /* source mod */ + field = "source modtime"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + fp->f_s_modtime = l; + + s = lex(0); /* dest inode */ + field = "destination i#"; + if (s == 0 || *s == 0) + goto bad; + ll = strtoull(s, 0, 0); + fp->f_d_inum = (ino_t) ll; + + s = lex(0); /* dest major */ + field = "destination major"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + fp->f_d_maj = l; + + s = lex(0); /* dest minor */ + field = "destination minor"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + fp->f_d_min = l; + + s = lex(0); /* dest nlink */ + field = "dest nlink"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + fp->f_d_nlink = l; + + s = lex(0); /* dest mod */ + field = "dest modtime"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + fp->f_d_modtime = l; + + s = lex(0); /* major or size */ + + if (type == 'C' || type == 'B') { + field = "rdev major"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + ip->f_rd_maj = l; + + s = lex(0); /* minor */ + field = "rdev minor"; + if (s == 0 || *s == 0) + goto bad; + l = strtoul(s, 0, 0); + ip->f_rd_min = l; + } else { + field = "file size"; + if (s == 0 || *s == 0) + goto bad; + ll = strtoul(s, 0, 0); + ip->f_size = (off_t) ll; /* size */ + } + + /* + * all fields after this point were added to the + * 1.0 format and so should be considered optional + */ + s = lex(0); /* acl length ? */ + field = "acl count"; + if (s && *s) { + l = strtoul(s, 0, 0); + ip->f_numacls = l; + ip->f_acls = (aclent_t *) malloc(ip->f_numacls * + sizeof (aclent_t)); + if (ip->f_acls == 0) + nomem("Access Control List"); + } + + continue; + } + + if (strcmp(s, "ACL") == 0) { + /* make sure there is a place to put the ACL */ + if (ip == 0 || ip->f_acls == 0) { + field = "ACL w/o FILE/LIST"; + goto bad; + } + + /* acl entry number */ + s = lex(0); + field = "acl index"; + if (s == 0) + goto bad; + l = strtoul(s, 0, 0); + if (l >= ip->f_numacls) + goto bad; + else + ap = &ip->f_acls[l]; + + /* acl entry type */ + s = lex(0); + field = "acl type"; + if (s == 0) + goto bad; + l = strtoul(s, 0, 0); + ap->a_type = l; + + /* acl entry ID */ + s = lex(0); + field = "acl id"; + if (s == 0) + goto bad; + l = strtoul(s, 0, 0); + ap->a_id = l; + + /* acl entry perms */ + s = lex(0); + field = "acl perm"; + if (s == 0) + goto bad; + l = strtoul(s, 0, 0); + ap->a_perm = l; + + continue; + } + + bad: /* log the error and continue processing to find others */ + fprintf(stderr, gettext(ERR_badinput), lex_linenum, + field, name); + errs |= ERR_FILES; + } + + (void) fclose(file); + return (errs); +} + +/* + * routine: + * write_baseline + * + * purpose: + * to rewrite the baseline file + * + * parameters: + * name of the new baseline file + * + * returns: + * error mask + */ +errmask_t +write_baseline(char *name) +{ FILE *newfile; + errmask_t errs = 0; + struct base *bp; + char tmpname[ MAX_PATH ]; + + if (opt_debug & DBG_FILES) + fprintf(stderr, "FILE: WRITE BASELINE %s\n", name); + + /* if no-touch is specified, we don't update files */ + if (opt_notouch) + return (0); + + /* create a temporary output file */ + sprintf(tmpname, "%s-TMP", name); + + /* create our output file */ + newfile = fopen(tmpname, "w+"); + if (newfile == NULL) { + fprintf(stderr, gettext(ERR_creat), gettext(TXT_base), + tmpname); + return (ERR_FILES); + } + + errs |= bw_header(newfile); + for (bp = bases; bp; bp = bp->b_next) + errs |= bw_base(newfile, bp); + + if (ferror(newfile)) { + fprintf(stderr, gettext(ERR_write), gettext(TXT_base), + tmpname); + errs |= ERR_FILES; + } + + if (fclose(newfile)) { + fprintf(stderr, gettext(ERR_fclose), gettext(TXT_base), + tmpname); + errs |= ERR_FILES; + } + + /* now switch the new file for the old one */ + if (errs == 0) + if (rename(tmpname, name) != 0) { + fprintf(stderr, gettext(ERR_rename), + gettext(TXT_base), tmpname, name); + errs |= ERR_FILES; + } + + return (errs); +} + +/* + * routine: + * bw_header + * + * purpose: + * to write out a baseline header + * + * parameters: + * FILE* for the output file + * + * returns: + * error mask + * + * notes: + */ +static errmask_t +bw_header(FILE *file) +{ time_t now; + struct tm *local; + + /* figure out what time it is */ + (void) time(&now); + local = localtime(&now); + + fprintf(file, "%s %d.%d\n", BASE_TAG, BASE_MAJOR, BASE_MINOR); + fprintf(file, "#\n"); + fprintf(file, "# filesync baseline, last written by %s, %s", + cuserid((char *) 0), asctime(local)); + fprintf(file, "#\n"); + + return (0); +} + +/* + * routine: + * bw_base + * + * purpose: + * to write out the summary for one base-pair + * + * parameters: + * FILE * for the output file + * + * returns: + * error mask + * + * notes: + */ +static errmask_t +bw_base(FILE *file, struct base *bp) +{ struct file *fp; + errmask_t errs = 0; + + /* see if this base is to be dropped from baseline */ + if (bp->b_flags & F_REMOVE) + return (0); + + fprintf(file, "\n"); + fprintf(file, "BASE_SRC %s\n", noblanks(bp->b_src_spec)); + fprintf(file, "BASE_DST %s\n", noblanks(bp->b_dst_spec)); + + for (fp = bp->b_files; fp; fp = fp->f_next) + errs |= bw_file(file, fp, 0); + + return (errs); +} + +/* + * routine: + * bw_file + * + * purpose: + * to write a file description out to the baseline + * + * parameters: + * output FILE + * pointer to file description + * recursion depth + * + * returns: + * error mask + * + * notes: + * some of the information we write out is kept separately + * for source and destination files because the values should + * be expected to be different for different systems/copies. + * + * if a file has an unresolved conflict, we want to leave + * the old values in place so that we continue to compare + * files against the last time they agreed. + */ +static errmask_t +bw_file(FILE *file, struct file *fp, int depth) +{ struct file *cp; + int i; + errmask_t errs = 0; + long long ll; /* intermediate for 64 bit file support */ + struct fileinfo *ip = &fp->f_info[OPT_BASE]; + + /* if this file is to be removed from baseline, skip it */ + if (fp->f_flags & F_REMOVE) + return (0); + + /* + * if this node is in conflict, or if it has not been + * evaluated this time around, we should just leave the + * baseline file the way it was before. If there is a + * conflict, let the baseline reflect the last agreement. + * If the node wasn't evaluated, let the baseline reflect + * our last knowledge. + */ + if (fp->f_flags & F_CONFLICT || (fp->f_flags&F_EVALUATE) == 0) { + fp->f_info[OPT_SRC].f_ino = fp->f_s_inum; + fp->f_info[OPT_SRC].f_nlink = fp->f_s_nlink; + fp->f_info[OPT_SRC].f_d_maj = fp->f_s_maj; + fp->f_info[OPT_SRC].f_d_min = fp->f_s_min; + fp->f_info[OPT_SRC].f_modtime = fp->f_s_modtime; + fp->f_info[OPT_DST].f_ino = fp->f_d_inum; + fp->f_info[OPT_DST].f_nlink = fp->f_d_nlink; + fp->f_info[OPT_DST].f_d_maj = fp->f_d_maj; + fp->f_info[OPT_DST].f_d_min = fp->f_d_min; + fp->f_info[OPT_DST].f_modtime = fp->f_d_modtime; + } + + /* write out the entry for this file */ + fprintf(file, "FILE %d %c %-20s 0%04o", depth, showtype(ip->f_type), + noblanks(fp->f_name), ip->f_mode); + fprintf(file, " %6ld %6ld", ip->f_uid, ip->f_gid); + + ll = fp->f_info[OPT_SRC].f_ino; + fprintf(file, "\t%6lld %4ld %4ld %4d 0x%08lx", + ll, + fp->f_info[OPT_SRC].f_d_maj, + fp->f_info[OPT_SRC].f_d_min, + fp->f_info[OPT_SRC].f_nlink, + fp->f_info[OPT_SRC].f_modtime); + + ll = fp->f_info[OPT_DST].f_ino; + fprintf(file, "\t%6lld %4ld %4ld %4d 0x%08lx", + ll, + fp->f_info[OPT_DST].f_d_maj, + fp->f_info[OPT_DST].f_d_min, + fp->f_info[OPT_DST].f_nlink, + fp->f_info[OPT_DST].f_modtime); + + /* last fields are file type specific */ + if (S_ISBLK(ip->f_type) || S_ISCHR(ip->f_type)) + fprintf(file, "\t%4ld %4ld", ip->f_rd_maj, ip->f_rd_min); + else { + ll = ip->f_size; + fprintf(file, "\t%lld", ll); + } + + /* ACL count goes at the end because it was added */ + fprintf(file, "\t%d", ip->f_numacls); + + fprintf(file, "\n"); + + /* if this file has ACLs, we have to write them out too */ + for (i = 0; i < ip->f_numacls; i++) + fprintf(file, "ACL %d %d %ld %o\n", i, ip->f_acls[i].a_type, + ip->f_acls[i].a_id, ip->f_acls[i].a_perm); + + /* then enumerate all of the children (if any) */ + for (cp = fp->f_files; cp; cp = cp->f_next) + errs |= bw_file(file, cp, depth + 1); + + return (errs); +} + +/* + * routines: + * gettype/showtype + * + * purpose: + * to convert between a file type (as found in a mode word) + * and a single character representation + * + * parameters/return + * mode word -> character + * character -> mode word + */ +static char types[16] = "-PC?DNB?F?S?s???"; + +static char showtype(int mode) +{ + return (types[ (mode & S_IFMT) >> 12 ]); +} + +static long gettype(int code) +{ int i; + + for (i = 0; i < 16; i++) + if (types[i] == code) + return (i << 12); + + return (-1); +} diff --git a/usr/src/cmd/filesync/database.h b/usr/src/cmd/filesync/database.h new file mode 100644 index 0000000000..f9e0c44180 --- /dev/null +++ b/usr/src/cmd/filesync/database.h @@ -0,0 +1,307 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * database.h + * + * purpose: + * definition of the baseline and rules data structures + */ + +#ifndef _DATABASE_H +#define _DATABASE_H + +#pragma ident "%W% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/stat.h> +#include <sys/acl.h> + +#define ACL_UID_BUG 1 /* acl:SETACL sets owner to be caller */ + +/* + * flag bits describing what we know about an individual file, or in + * some cases an entire base pair. These flags are found in the + * base and file stuctures. + */ +typedef int fflags_t; /* type for file flags */ + +#define F_NEW 0x01 /* newly allocated */ +#define F_IN_BASELINE 0x02 /* file found in baseline */ +#define F_IN_SOURCE 0x04 /* file found in source tree */ +#define F_IN_DEST 0x08 /* file found in dest tree */ +#define F_EVALUATE 0x10 /* include in analysis */ +#define F_SPARSE 0x20 /* don't walk this directory */ +#define F_REMOVE 0x40 /* remove from baseline */ +#define F_CONFLICT 0x80 /* unresolvable conflict */ +#define F_LISTED 0x100 /* file came from LIST */ +#define F_STAT_ERROR 0x200 /* unable to stat file */ + +#define F_WHEREFOUND (F_IN_BASELINE|F_IN_SOURCE|F_IN_DEST) + +/* + * a base is a pair of directories to be kept in sync + * all rules and baseline data is stored beneath some base + */ +struct base { + struct base *b_next; /* pointer to next base */ + fflags_t b_flags; /* what I know about this base */ + int b_ident; /* base sequence # (DBG) */ + char *b_src_spec; /* spec name of source dir */ + char *b_dst_spec; /* spec name of dest dir */ + char *b_src_name; /* expanded name of source dir */ + char *b_dst_name; /* expanded name of dest dir */ + + struct rule *b_includes; /* chain of include rules */ + struct rule *b_excludes; /* chain of exclude rules */ + struct rule *b_restrictions; /* chain of restrictions */ + + struct file *b_files; /* chain of files */ + + /* statistics for wrap-up summary */ + int b_totfiles; /* total files found in tree */ + int b_src_copies; /* files copied to source */ + int b_src_deletes; /* files deleted from source */ + int b_src_misc; /* ownership changes on source */ + int b_dst_copies; /* files copied to dest */ + int b_dst_deletes; /* files deleted from dest */ + int b_dst_misc; /* ownership changes on source */ + int b_unresolved; /* unresolved conflicts */ +}; + +/* + * flag bits describing what we know about a particular rule. + * These flags are found in the rule structure + */ +typedef int rflags_t; /* type for rule flags */ + +#define R_NEW 0x01 /* newly added rule (=OPT_NEW) */ +#define R_PROGRAM 0x02 /* program (vs literal names) */ +#define R_IGNORE 0x04 /* IGNORE (vs INCLUDE) */ +#define R_RESTRICT 0x08 /* restriction (-r argument) */ +#define R_WILD 0x10 /* name involves wild cards */ +#define R_BOGUS 0x20 /* fabricated rule */ + +/* + * a rule describes files to be included or excluded + * they are stored under bases + */ +struct rule { + struct rule *r_next; /* pointer to next rule in base */ + rflags_t r_flags; /* flags associated with rule */ + char *r_file; /* file for this rule */ +}; + + +/* + * this is the information we keep track of for a file + */ +struct fileinfo { + ino_t f_ino; /* inode number of this file */ + long f_d_maj; /* maj dev on which it lives */ + long f_d_min; /* minj dev on which it lives */ + + int f_type; /* file/dir/special ... */ + int f_mode; /* protection */ + int f_nlink; /* number of links to file */ + + uid_t f_uid; /* owning UID */ + gid_t f_gid; /* owning GID */ + + off_t f_size; /* length in bytes */ + long f_modtime; /* last modification time */ + long f_modns; /* low order bits of modtime */ + + long f_rd_maj; /* major dev for specials */ + long f_rd_min; /* minor dev for specials */ + + int f_numacls; /* number of entries in acls */ + aclent_t *f_acls; /* acl list (if any) */ +}; + +/* + * flag bits describing the differences we have detected between a file + * and the last time it was in sync (based on the baseline). + * These flags are used in the srcdiffs and dstdiffs fields of the + * file structure + */ +typedef int diffmask_t; /* type for difference masks */ + +#define D_CREATE 0x01 /* file has been created */ +#define D_DELETE 0x02 /* file has been deleted */ +#define D_MTIME 0x04 /* file has been modified */ +#define D_SIZE 0x08 /* file has changed size */ +#define D_UID 0x10 /* file has changed user id */ +#define D_GID 0x20 /* file has changed group id */ +#define D_PROT 0x40 /* file has changed protection */ +#define D_LINKS 0x80 /* file has changed link count */ +#define D_TYPE 0x100 /* file has changed type */ +#define D_FACLS 0x200 /* file has changed facls */ +#define D_RENAME_TO 0x400 /* file came from a rename */ +#define D_RENAME_FROM 0x800 /* file has been renamed */ + +/* + * these masks are used to determine how important potential changes are. + * + * D_CONTENTS there may be changes to the file's contents + * D_ADMIN there may be changes to the ownership and protection + * D_IMPORTANT there may be changes that should block a deletion + * + * Note: + * I am torn on whether or not to include modtime in D_IMPORTANT. + * Experience suggests that deleting one of many links affects the + * file modification time. + */ +#define D_ADMIN (D_UID|D_GID|D_PROT|D_FACLS) +#define D_CONTENTS (D_SIZE|D_TYPE|D_CREATE|D_MTIME) +#define D_IMPORTANT (D_SIZE|D_TYPE|D_CREATE|D_MTIME|D_ADMIN) + +/* + * a file is an instance that follows (under a base) from a rule + * (for that base). A file structure may exist because of any + * combination of a file under the source, destination, in a + * baseline for historical reasons, or merely because a rule + * calls it out (whether it exists or not). + */ +struct file { + struct file *f_next; /* pointer to next file in base */ + struct file *f_files; /* pointer to files in subdir */ + struct base *f_base; /* pointer to owning base */ + fflags_t f_flags; /* flags associated with file */ + int f_depth; /* directory depth for file */ + char *f_name; /* name of this file */ + + /* + * these fields capture information, gleaned from the baseline + * that is side-specific, and should not be expected to be in + * agreement between the two sides. As a result, this info can + * not be properly captured in f_info[OPT_BASE] and needs to + * be kept somewhere else. + */ + long f_s_modtime; /* baseline source mod time */ + ino_t f_s_inum; /* baseline source inode # */ + long f_s_nlink; /* baseline source link count */ + long f_s_maj; /* baseline source dev maj */ + long f_s_min; /* baseline source dev min */ + long f_d_modtime; /* baseline target mod time */ + ino_t f_d_inum; /* baseline target inode # */ + long f_d_nlink; /* baseline target link count */ + long f_d_maj; /* baseline target dev maj */ + long f_d_min; /* baseline target dev min */ + + /* stat information from baseline file and evaluation */ + struct fileinfo f_info[3]; /* baseline, source, dest */ + + /* summary of changes discovered in analysis */ + diffmask_t f_srcdiffs; /* changes on source side */ + diffmask_t f_dstdiffs; /* changes on dest side */ + + /* this field is only valid for a renamed file */ + struct file * f_previous; /* node for previous filename */ + + /* + * these fields are only valid for a file that has been added + * to the reconciliation list + */ + struct file *f_rnext; /* reconciliation chain ptr */ + char *f_fullname; /* full name for reconciling */ + long f_modtime; /* modtime for ordering purpose */ + long f_modns; /* low order modtime */ + + /* this field is only valid for a file with a hard conflict */ + char *f_problem; /* description of conflict */ +}; + +/* + * globals + */ +extern struct base omnibase; /* base for global rules */ +extern struct base *bases; /* base for the main list */ +extern int inum_changes; /* LISTed dirs with i# changes */ + +/* routines to manage base nodes, file nodes, and file infor */ +errmask_t read_baseline(char *); +errmask_t write_baseline(char *); +struct file *add_file_to_base(struct base *, const char *); +struct file *add_file_to_dir(struct file *, const char *); +struct base *add_base(const char *src, const char *dst); +void note_info(struct file *, const struct stat *, side_t); +void update_info(struct file *, side_t); + +/* routines to manage rules */ +errmask_t read_rules(char *); +errmask_t write_rules(char *); +errmask_t add_include(struct base *, char *); +errmask_t add_ignore(struct base *, char *); + +/* routines to manage and querry restriction lists */ +errmask_t add_restr(char *); +bool_t check_restr(struct base *, const char *); + +/* routines for dealing with ignore lists */ +void ignore_reset(); +void ignore_pgm(const char *); +void ignore_expr(const char *); +void ignore_file(const char *); +bool_t ignore_check(const char *); + +/* database processing routines for the primary passes */ +errmask_t evaluate(struct base *, side_t, bool_t); +errmask_t analyze(void); +errmask_t find_renames(struct file *); +errmask_t reconcile(struct file *); +int prune(void); +void summary(void); +char *full_name(struct file *, side_t, side_t); + +/* routines in action.c to carry out reconciliation */ +errmask_t do_copy(struct file *, side_t); +errmask_t do_remove(struct file *, side_t); +errmask_t do_rename(struct file *, side_t); +errmask_t do_like(struct file *, side_t, bool_t); + +/* routines to deal with links in the reconciliation list */ +struct file *find_link(struct file *, side_t); +void link_update(struct file *, side_t); +bool_t has_other_links(struct file *, side_t); + +/* maintain a name stack during directory tree traversal */ +void push_name(const char *); +void pop_name(); +char *get_name(struct file *); + +/* acl manipulation functions */ +int get_acls(const char *, struct fileinfo *); +int set_acls(const char *, struct fileinfo *); +int cmp_acls(struct fileinfo *, struct fileinfo *); +char *show_acls(int, aclent_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _DATABASE_H */ diff --git a/usr/src/cmd/filesync/debug.c b/usr/src/cmd/filesync/debug.c new file mode 100644 index 0000000000..997aa46cfb --- /dev/null +++ b/usr/src/cmd/filesync/debug.c @@ -0,0 +1,359 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1995-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * module: + * debug.c + * + * purpose: + * utility routines for debugging filesync (tracing, diagnostics, + * and error simulation) + * + * contents: + * showflags display a word of flags symbolicly + * dbg_usage printout usage info for -D switch + * err_usage printout usage info for -E switch + * dbg_set_error enable an error simulation + * dbg_check_error check for error simulation + * + * + * note: + * there are numerous flag words and bit fields in this + * program, and it would be horrendous to just print them + * out in hex (in debugging output). These routines use + * a "flaglist" data structure to map between bits and + * character string names or descriptions. + * + * a flaglist is merely a list of paired bits and name strings. + */ +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> + +#include "filesync.h" +#include "database.h" +#include "debug.h" + + +/* bits in opt_debug for usage message */ +static struct flaglist dbgflags[] = +{ DBG_BASE, "BASE: base include building", + DBG_RULE, "RULE: rule tree building", + DBG_STAT, "STAT: file stats", + DBG_ANAL, "ANAL: difference analysis", + DBG_RECON, "RECO: reconciliation list processing", + DBG_VARS, "VARS: qualification and expansion", + DBG_FILES, "FILE: rule and baseline files", + DBG_LIST, "LIST: tree building", + DBG_EVAL, "EVAL: tree walking", + DBG_IGNORE, "IGNO: ignore list", + DBG_MISC, "MISC: everything else", + 0, 0 +}; + +/* bits in opt_debug for dsiplay */ +struct flaglist dbgmap[] = +{ DBG_BASE, "BASE", + DBG_RULE, "RULE", + DBG_STAT, "STAT", + DBG_ANAL, "ANAL", + DBG_RECON, "RECO", + DBG_VARS, "VARS", + DBG_FILES, "FILE", + DBG_LIST, "LIST", + DBG_EVAL, "EVAL", + DBG_IGNORE, "IGNO", + DBG_MISC, "MISC", + 0, 0 +}; + +/* bits in the rules flag field */ +struct flaglist rflags[] = +{ R_IGNORE, "IGNORE", + R_PROGRAM, "PROGRAM", + R_WILD, "WILD", + R_NEW, "NEW", + R_BOGUS, "BOGUS", + R_RESTRICT, "RESTRICT", + 0, 0 +}; + +/* bits in the files flag field */ +struct flaglist fileflags[] = +{ F_NEW, "new", + F_IN_BASELINE, "base", + F_IN_SOURCE, "srce", + F_IN_DEST, "dest", + F_EVALUATE, "eval", + F_SPARSE, "sparse", + F_REMOVE, "remove", + F_CONFLICT, "conflict", + F_LISTED, "listed", + F_STAT_ERROR, "statfail", + 0, 0 +}; + +/* bits in the file src/dst difference mask */ +struct flaglist diffmap[] = { + D_CREATE, "create", + D_DELETE, "delete", + D_MTIME, "modtime", + D_SIZE, "size", + D_UID, "uid", + D_GID, "gid", + D_PROT, "modes", + D_LINKS, "links", + D_TYPE, "type", + D_FACLS, "facls", + D_RENAME_TO, "rename2", + D_RENAME_FROM, "renamed", + 0, 0 +}; + +/* bits in the exit error code mask */ +struct flaglist errmap[] = { + ERR_RESOLVABLE, "resolvable", + ERR_UNRESOLVED, "unresolvable", + ERR_MISSING, "missing files", + ERR_PERM, "permissions", + ERR_FILES, "rule/base errors", + ERR_INVAL, "invalid arguments", + ERR_NOBASE, "bad base dir", + ERR_OTHER, "other", + 0, 0 +}; + +/* + * routine: + * showflags + * + * purpose: + * format flags for printing + * + * parameters: + * pointer to map + * mask to be interpreted \ + * + * returns: + * pointer to a static buffer + */ +char * +showflags(struct flaglist *map, long mask) +{ int i; + static char outbuf[MAX_NAME]; + + outbuf[0] = 0; + for (i = 0; map[i].fl_mask; i++) + if (mask & map[i].fl_mask) { + if (outbuf[0]) + strcat(outbuf, "|"); + strcat(outbuf, map[i].fl_name); + } + + return (outbuf); +} + +/* + * routines: + * dbg_usage, err_usage + * + * purpose: + * to print out usage messages for the secret debugging flags + * + * returns: + * void + */ +void +dbg_usage(void) +{ int i; + + fprintf(stderr, "Usage:\tfilesync -Dmask ...\n"); + for (i = 0; dbgflags[i].fl_mask; i++) + fprintf(stderr, "\t0x%04lx .... %s\n", + dbgflags[i].fl_mask, dbgflags[i].fl_name); + fprintf(stderr, "\n"); +} + +#ifdef DBG_ERRORS +/* + * The -E flag is a debugging feature that enables the user to request + * the simulation of difficult to trigger error conditions in order + * to test out the error handling code in filesync. We maintain a + * registry that specifies a file name and an operation, and an errno + * to be returned if the specified operation is attempted on the + * specified file. + */ +void +err_usage(void) +{ + fprintf(stderr, "Usage:\tfilesync -E<errno>,<code>,<filename>\n"); + fprintf(stderr, "\ts ... eval stat source\n"); + fprintf(stderr, "\tS ... eval stat destination\n"); + fprintf(stderr, "\tn ... eval nftw source\n"); + fprintf(stderr, "\tN ... eval nftw destination\n"); + fprintf(stderr, "\tc ... reconcile copy create\n"); + fprintf(stderr, "\to ... reconcile copy open\n"); + fprintf(stderr, "\tr ... reconcile copy read/readlink\n"); + fprintf(stderr, "\tw ... reconcile copy write\n"); + fprintf(stderr, "\tl ... reconcile link/symlink\n"); + fprintf(stderr, "\tu ... reconcile unlink\n"); + fprintf(stderr, "\td ... reconcile mkdir/mknod\n"); + fprintf(stderr, "\tD ... reconcile rmdir\n"); + fprintf(stderr, "\tm ... reconcile rename\n"); + fprintf(stderr, "\tR ... reconcile restat\n"); + fprintf(stderr, "\tp ... reconcile protection (chmod)"); + fprintf(stderr, "\ta ... reconcile access control (setfacl)"); + fprintf(stderr, "\tO ... reconcile ownership (chown)"); + fprintf(stderr, "\tZ ... out of space on target\n"); + fprintf(stderr, "\n"); +} + +/* + * this data structure us used to keep track of the error simulations + * that have been requested. + */ +static struct errsim { + int Errno; /* error number to return */ + char code; /* event triggering the error */ + char *file; /* file name triggering error */ +} errsim[ DBG_MAX_ERR ]; + +static int num_errs; /* number of simulated errors */ + + +/* + * routine: + * dbg_set_error + * + * purpose: + * note that we have been requested to simulate file access errors + * + * parameters: + * argument string <errno>,<errcode>,<filename> + * + * returns: + * error mask + */ +int +dbg_set_error(char *arg) +{ char *s; + char error_type; + int error_no; + + if (num_errs >= DBG_MAX_ERR) { + fprintf(stderr, "ERROR: only %d -E specifications allowed\n", + DBG_MAX_ERR); + return (ERR_INVAL); + } + + /* get the error number */ + if (!isdigit(arg[0])) + return (ERR_INVAL); + error_no = strtol(arg, &s, 0); + + /* get the error condition */ + if (*s++ != ',' || !isalpha(*s)) + return (ERR_INVAL); + error_type = *s; + + /* get the file name */ + while (*s && *s != ',') s++; + if (*s++ != ',' || *s == 0) + return (ERR_INVAL); + + /* register the error simulation */ + errsim[num_errs].Errno = error_no; + errsim[num_errs].code = error_type; + errsim[num_errs].file = s; + + if (opt_debug & DBG_MISC) + fprintf(stderr, "MISC: errsim[%d] %c(%s) -> %d\n", + num_errs, error_type, s, error_no); + + num_errs++; + + return (0); +} + +/* + * routine: + * dbg_chk_error + * + * purpose: + * determine whether or not we have been asked to simulate an + * error for a specified file. + * + * parameters: + * file name + * + * returns: + * errno (or zero if no error) + */ +int +dbg_chk_error(const char *name, char code) +{ int i; + + for (i = 0; i < num_errs; i++) { + /* see if this code matches any registered condition */ + if (code != errsim[i].code) + continue; + + /* see if this also matches the file name */ + if (!suffix(name, errsim[i].file)) + continue; + + /* we have a winner */ + if (opt_debug & DBG_MISC) + fprintf(stderr, "MISC: trigger %d for file %c(%s)\n", + errsim[i].Errno, code, name); + return (errsim[i].Errno); + } + return (0); +} + +#else /* ! DBG_ERRORS */ +void +err_usage(void) +{ + fprintf(stderr, "ERROR: this filesync does not support -E\n"); +} + +int +dbg_set_error(char *arg) +{ + return (ERR_INVAL); +} + +int +dbg_chk_error(const char *name, char code) +{ + return (0); +} +#endif diff --git a/usr/src/cmd/filesync/debug.h b/usr/src/cmd/filesync/debug.h new file mode 100644 index 0000000000..52627f222a --- /dev/null +++ b/usr/src/cmd/filesync/debug.h @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * debug.h + * + * purpose: + * definitions and declarations for special debugging features + */ + +#ifndef _DEBUG_H +#define _DEBUG_H + +#pragma ident "%W% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DBG_ERRORS 1 /* enable error simulation code */ +#define DBG_MAX_ERR 20 /* maximum # simulated errs */ + +/* + * the flaglists are used by the showflags routine in order to + * print bitmasks in a symbolic form + */ +struct flaglist { + long fl_mask; /* the bit in question */ + char *fl_name; /* the name of that bit */ +}; + +extern struct flaglist dbgmap[], rflags[], fileflags[], diffmap[], errmap[]; + +char *showflags(struct flaglist *, long); /* turn bit to a name */ +int dbg_set_error(char *arg); /* simulate error */ +int dbg_chk_error(const char *name, char code); /* check for simul err */ + +void dbg_usage(); /* debug flag usage */ +void err_usage(); /* error simul usage */ + +#ifdef __cplusplus +} +#endif + +#endif /* _DEBUG_H */ diff --git a/usr/src/cmd/filesync/eval.c b/usr/src/cmd/filesync/eval.c new file mode 100644 index 0000000000..8c63f11949 --- /dev/null +++ b/usr/src/cmd/filesync/eval.c @@ -0,0 +1,997 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1995-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * module: + * eval.c + * + * purpose: + * routines to ascertain the current status of all of the files + * described by a set of rules. Some of the routines that update + * file status information are also called later (during reconcilation) + * to reflect the changes that have been made to files. + * + * contents: + * evaluate top level - evaluate one side of one base + * add_file_arg (static) add a file to the list of files to evaluate + * eval_file (static) stat a specific file, recurse on directories + * walker (static) node visitor for recursive descent + * note_info update a file_info structure from a stat structure + * do_update (static) update one file_info structure from another + * update_info update the baseline file_info from the prevailng side + * fakedata (static) make it look like one side hasn't changed + * check_inum (static) sanity check to detect wrong-dir errors + * add_glob (static) expand a wildcard in an include rule + * add_run (static) run a program to generate an include list + * + * notes: + * pay careful attention to the use of the LISTED and EVALUATE + * flags in each file description structure. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <libgen.h> +#include <unistd.h> +#include <string.h> +#include <glob.h> +#include <ftw.h> +#include <sys/mkdev.h> +#include <errno.h> + +#include "filesync.h" +#include "database.h" +#include "messages.h" +#include "debug.h" + +/* + * routines: + */ +static errmask_t eval_file(struct base *, struct file *); +static errmask_t add_file_arg(struct base *, char *); +static int walker(const char *, const struct stat *, int, struct FTW *); +static errmask_t add_glob(struct base *, char *); +static errmask_t add_run(struct base *, char *); +static void check_inum(struct file *, int); +static void fakedata(struct file *, int); + +/* + * globals + */ +static bool_t usingsrc; /* this pass is on the source side */ +static int walk_errs; /* errors found in tree walk */ +static struct file *cur_dir; /* base directory for this pass */ +static struct base *cur_base; /* base pointer for this pass */ + +/* + * routine: + * evaluate + * + * purpose: + * to build up a baseline description for all of the files + * under one side of one base pair (as specified by the rules + * for that base pair). + * + * parameters: + * pointer to the base to be evaluated + * source/destination indication + * are we restricted to new rules + * + * returns: + * error mask + * + * notes: + * we evaluate source and destination separately, and + * reinterpret the include rules on each side (since there + * may be wild cards and programs that must be evaluated + * in a specific directory context). Similarly the ignore + * rules must be interpreted anew for each base. + */ +errmask_t +evaluate(struct base *bp, side_t srcdst, bool_t newrules) +{ errmask_t errs = 0; + char *dir; + struct rule *rp; + struct file *fp; + + /* see if this base is still relevant */ + if ((bp->b_flags & F_LISTED) == 0) + return (0); + + /* figure out what this pass is all about */ + usingsrc = (srcdst == OPT_SRC); + + /* + * the ignore engine maintains considerable per-base-directory + * state, and so must be reset at the start of a new tree. + */ + ignore_reset(); + + /* all evaluation must happen from the appropriate directory */ + dir = usingsrc ? bp->b_src_name : bp->b_dst_name; + if (chdir(dir) < 0) { + fprintf(stderr, gettext(ERR_chdir), dir); + + /* + * if we have -n -o we are actually willing to + * pretend that nothing has changed on the missing + * side. This is actually useful on a disconnected + * notebook to ask what has been changed so far. + */ + if (opt_onesided == (usingsrc ? OPT_DST : OPT_SRC)) { + for (fp = bp->b_files; fp; fp = fp->f_next) + fakedata(fp, srcdst); + + if (opt_debug & DBG_EVAL) + fprintf(stderr, "EVAL: FAKE DATA %s dir=%s\n", + usingsrc ? "SRC" : "DST", dir); + return (0); + } else + return (ERR_NOBASE); + } + + if (opt_debug & DBG_EVAL) + fprintf(stderr, "EVAL: base=%d, %s dir=%s\n", + bp->b_ident, usingsrc ? "SRC" : "DST", dir); + + /* assemble the include list */ + for (rp = bp->b_includes; rp; rp = rp->r_next) { + + /* see if we are skipping old rules */ + if (newrules && ((rp->r_flags & R_NEW) == 0)) + continue; + + if (rp->r_flags & R_PROGRAM) + errs |= add_run(bp, rp->r_file); + else if (rp->r_flags & R_WILD) + errs |= add_glob(bp, rp->r_file); + else + errs |= add_file_arg(bp, rp->r_file); + } + + /* assemble the base-specific exclude list */ + for (rp = bp->b_excludes; rp; rp = rp->r_next) + if (rp->r_flags & R_PROGRAM) + ignore_pgm(rp->r_file); + else if (rp->r_flags & R_WILD) + ignore_expr(rp->r_file); + else + ignore_file(rp->r_file); + + /* add in the global excludes */ + for (rp = omnibase.b_excludes; rp; rp = rp->r_next) + if (rp->r_flags & R_WILD) + ignore_expr(rp->r_file); + else + ignore_file(rp->r_file); + + /* + * because of restriction lists and new-rules, the baseline + * may contain many more files than we are actually supposed + * to look at during the impending evaluation/analysis phases + * + * when LIST arguments are encountered within a rule, we turn + * on the LISTED flag for the associated files. We only evaluate + * files that have the LISTED flag. We turn the LISTED flag off + * after evaluating them because just because a file was enumerated + * in the source doesn't mean that will necessarily be enumerated + * in the destination. + */ + for (fp = bp->b_files; fp; fp = fp->f_next) + if (fp->f_flags & F_LISTED) { + errs |= eval_file(bp, fp); + fp->f_flags &= ~F_LISTED; + } + + /* note that this base has been evaluated */ + bp->b_flags |= F_EVALUATE; + + return (errs); +} + +/* + * routine: + * add_file_arg + * + * purpose: + * to create file node(s) under a specified base for an explictly + * included file. + * + * parameters: + * pointer to associated base + * name of the file + * + * returns: + * error mask + * + * notes: + * the trick is that an include LIST argument need not be a file + * in the base directory, but may be a path passing through + * several intermediate directories. If this is the case we + * need to ensure that all of those directories are added to + * the tree SPARSELY since it is not intended that they be + * expanded during the course of evaluation. + * + * we ignore arguments that end in .. because they have the + * potential to walk out of the base tree, because it can + * result in different names for a single file, and because + * should never be necessary to specify files that way. + */ +static errmask_t +add_file_arg(struct base *bp, char *path) +{ int i; + errmask_t errs = 0; + struct file *dp = 0; + struct file *fp; + char *s, *p; + char name[ MAX_NAME ]; + + /* + * see if someone is trying to feed us a .. + */ + if (strcmp(path, "..") == 0 || prefix(path, "../") || + suffix(path, "/..") || contains(path, "/../")) { + fprintf(stderr, gettext(WARN_ignore), path); + return (ERR_MISSING); + } + + /* + * strip off any trailing "/." or "/" + * since noone will miss these, it is safe to actually + * take them off the name. When we fall out of this + * loop, s will point where the null belongs. We don't + * actually null the end of string yet because we want + * to leave it pristine for error messages. + */ + for (s = path; *s; s++); + while (s > path) { + if (s[-1] == '/') { + s--; + continue; + } + if (s[-1] == '.' && s > &path[1] && s[-2] == '/') { + s -= 2; + continue; + } + break; + } + + /* + * skip over leading "/" and "./" (but not over a lone ".") + */ + for (p = path; p < s; ) { + if (*p == '/') { + p++; + continue; + } + if (*p == '.' && s > &p[1] && p[1] == '/') { + p += 2; + continue; + } + break; + } + + /* + * if there is nothing left, we're miffed, but done + */ + if (p >= s) { + fprintf(stderr, gettext(WARN_ignore), path); + return (ERR_MISSING); + } else { + /* + * this is actually storing a null into the argument, + * but it is OK to do this because the stuff we are + * truncating really is garbage that noone will ever + * want to see. + */ + *s = 0; + path = p; + } + + /* + * see if there are any restrictions that would force + * us to ignore this argument + */ + if (check_restr(bp, path) == 0) + return (0); + + while (*path) { + /* lex off the next name component */ + for (i = 0; path[i] && path[i] != '/'; i++) + name[i] = path[i]; + name[i] = 0; + + /* add it into the database */ + fp = (dp == 0) ? add_file_to_base(bp, name) + : add_file_to_dir(dp, name); + + /* see if this was an intermediate directory */ + if (path[i] == '/') { + fp->f_flags |= F_LISTED | F_SPARSE; + path += i+1; + } else { + fp->f_flags |= F_LISTED; + path += i; + } + + dp = fp; + } + + return (errs); +} + +/* + * routine: + * eval_file + * + * purpose: + * to evaluate one named file under a particular directory + * + * parameters: + * pointer to base structure + * pointer to file structure + * + * returns: + * error mask + * filled in evaluations in the baseline + * + * note: + * due to new rules and other restrictions we may not be expected + * to evaluate the entire tree. We should only be called on files + * that are LISTed, and we should only invoke ourselves recursively + * on such files. + */ +static errmask_t +eval_file(struct base *bp, struct file *fp) +{ errmask_t errs = 0; + int rc; + char *name; + struct file *cp; + struct stat statb; + + if (opt_debug & DBG_EVAL) + fprintf(stderr, "EVAL: FILE, flags=%s, name=%s\n", + showflags(fileflags, fp->f_flags), fp->f_name); + + /* stat the file and fill in the file structure information */ + name = get_name(fp); + +#ifdef DBG_ERRORS + /* see if we should simulated a stat error on this file */ + if (opt_errors && (errno = dbg_chk_error(name, usingsrc ? 's' : 'S'))) + rc = -1; + else +#endif + rc = lstat(name, &statb); + + if (rc < 0) { + if (opt_debug & DBG_EVAL) + fprintf(stderr, "EVAL: FAIL lstat, errno=%d\n", errno); + switch (errno) { + case EACCES: + fp->f_flags |= F_STAT_ERROR; + return (ERR_PERM); + case EOVERFLOW: + fp->f_flags |= F_STAT_ERROR; + return (ERR_UNRESOLVED); + default: + return (ERR_MISSING); + } + } + + /* record the information we've just gained */ + note_info(fp, &statb, usingsrc ? OPT_SRC : OPT_DST); + + /* + * checking for ACLs is expensive, so we only do it if we + * have been asked to, or if we have reason to believe that + * the file has an ACL + */ + if (opt_acls || fp->f_info[OPT_BASE].f_numacls) + (void) get_acls(name, + &fp->f_info[usingsrc ? OPT_SRC : OPT_DST]); + + + /* note that this file has been evaluated */ + fp->f_flags |= F_EVALUATE; + + /* if it is not a directory, a simple stat will suffice */ + if ((statb.st_mode & S_IFMT) != S_IFDIR) + return (0); + + /* + * as a sanity check, we look for changes in the I-node + * numbers associated with LISTed directories ... on the + * assumption that these are high-enough up on the tree + * that they aren't likely to change, and so a change + * might indicate trouble. + */ + if (fp->f_flags & F_LISTED) + check_inum(fp, usingsrc); + + /* + * sparse directories are on the path between a base and + * a listed directory. As such, we don't walk these + * directories. Rather, we just enumerate the LISTed + * files. + */ + if (fp->f_flags & F_SPARSE) { + push_name(fp->f_name); + + /* this directory isn't supposed to be fully walked */ + for (cp = fp->f_files; cp; cp = cp->f_next) + if (cp->f_flags & F_LISTED) { + errs |= eval_file(bp, cp); + cp->f_flags &= ~F_LISTED; + } + pop_name(); + } else { + /* fully walk the tree beneath this directory */ + walk_errs = 0; + cur_base = bp; + cur_dir = fp; + nftw(get_name(fp), &walker, MAX_DEPTH, FTW_PHYS|FTW_MOUNT); + errs |= walk_errs; + } + + return (errs); +} + +/* + * routine: + * walker + * + * purpose: + * node visitor for recursive directory enumeration + * + * parameters: + * name of file + * pointer to stat buffer for file + * file type + * FTW structure (base name offset, walk-depth) + * + * returns: + * 0 continue + * -1 stop + * + * notes: + * Ignoring files is easy, but ignoring directories is harder. + * Ideally we would just decline to walk the trees beneath + * ignored directories, but ftw doesn't allow the walker to + * tell it to "don't enter this directory, but continue". + * + * Instead, we have to set a global to tell us to ignore + * everything under that tree. The variable ignore_level + * is set to a level, below which, everything should be + * ignored. Once the enumeration rises above that level + * again, we clear it. + */ +static int +walker(const char *name, const struct stat *sp, int type, + struct FTW *ftwx) +{ const char *path; + struct file *fp; + int level; + int which; + bool_t restr; + static struct file *dirstack[ MAX_DEPTH + 1 ]; + static int ignore_level = 0; + + path = &name[ftwx->base]; + level = ftwx->level; + which = usingsrc ? OPT_SRC : OPT_DST; + + /* + * see if we are ignoring all files in this sub-tree + */ + if (ignore_level > 0 && level >= ignore_level) { + if (opt_debug & DBG_EVAL) + fprintf(stderr, "EVAL: SKIP file=%s\n", name); + return (0); + } else + ignore_level = 0; /* we're through ignoring */ + +#ifdef DBG_ERRORS + /* see if we should simulated a stat error on this file */ + if (opt_errors && dbg_chk_error(name, usingsrc ? 'n' : 'N')) + type = FTW_NS; +#endif + + switch (type) { + case FTW_F: /* file */ + case FTW_SL: /* symbolic link */ + /* + * filter out files of inappropriate types + */ + switch (sp->st_mode & S_IFMT) { + default: /* anything else we ignore */ + return (0); + + case S_IFCHR: + case S_IFBLK: + case S_IFREG: + case S_IFLNK: + if (opt_debug & DBG_EVAL) + fprintf(stderr, + "EVAL: WALK lvl=%d, file=%s\n", + level, path); + + /* see if we were told to ignore this one */ + if (ignore_check(path)) + return (0); + + fp = add_file_to_dir(dirstack[level-1], path); + note_info(fp, sp, which); + + /* note that this file has been evaluated */ + fp->f_flags |= F_EVALUATE; + + /* see if we should check ACLs */ + if ((sp->st_mode & S_IFMT) == S_IFLNK) + return (0); + + if (fp->f_info[OPT_BASE].f_numacls || opt_acls) + (void) get_acls(name, + &fp->f_info[which]); + + return (0); + } + + case FTW_D: /* enter directory */ + if (opt_debug & DBG_EVAL) + fprintf(stderr, "EVAL: WALK lvl=%d, dir=%s\n", + level, name); + + /* + * if we have been told to ignore this directory, we should + * ignore all files under it. Similarly, if we are outside + * of our restrictions, we should ignore the entire subtree + */ + restr = check_restr(cur_base, name); + if (restr == FALSE || ignore_check(path)) { + ignore_level = level + 1; + return (0); + } + + fp = (level == 0) ? cur_dir : + add_file_to_dir(dirstack[level-1], path); + + note_info(fp, sp, which); + + /* see if we should be checking ACLs */ + if (opt_acls || fp->f_info[OPT_BASE].f_numacls) + (void) get_acls(name, &fp->f_info[which]); + + /* note that this file has been evaluated */ + fp->f_flags |= F_EVALUATE; + + /* note the parent of the children to come */ + dirstack[ level ] = fp; + + /* + * PROBLEM: given the information that nftw provides us with, + * how do we know that we have confirmed the fact + * that a file no longer exists. Or to rephrase + * this in filesync terms, how do we know when to + * set the EVALUATE flag for a file we didn't find. + * + * if we are going to fully scan this directory (we + * are completely within our restrictions) then we + * will be confirming the non-existance of files that + * used to be here. Thus any file that was in the + * base line under this directory should be considered + * to have been evaluated (whether we found it or not). + * + * if, however, we are only willing to scan selected + * files (due to restrictions), or the file was not + * in the baseline, then we should not assume that this + * pass will evaluate it. + */ + if (restr == TRUE) + for (fp = fp->f_files; fp; fp = fp->f_next) { + if ((fp->f_flags & F_IN_BASELINE) == 0) + continue; + fp->f_flags |= F_EVALUATE; + } + + return (0); + + case FTW_DP: /* end of directory */ + dirstack[ level ] = 0; + break; + + case FTW_DNR: /* unreadable directory */ + walk_errs |= ERR_PERM; + /* FALLTHROUGH */ + case FTW_NS: /* unstatable file */ + if (opt_debug & DBG_EVAL) + fprintf(stderr, "EVAL: walker can't stat/read %s\n", + name); + fp = (level == 0) ? cur_dir : + add_file_to_dir(dirstack[level-1], path); + fp->f_flags |= F_STAT_ERROR; + walk_errs |= ERR_UNRESOLVED; + break; + } + + return (0); +} + +/* + * routine: + * note_info + * + * purpose: + * to record information about a file in its file node + * + * parameters + * file node pointer + * stat buffer + * which file info structure to fill in (0-2) + * + * returns + * void + */ +void +note_info(struct file *fp, const struct stat *sp, side_t which) +{ struct fileinfo *ip; + static int flags[3] = { F_IN_BASELINE, F_IN_SOURCE, F_IN_DEST }; + + ip = &fp->f_info[ which ]; + + ip->f_ino = sp->st_ino; + ip->f_d_maj = major(sp->st_dev); + ip->f_d_min = minor(sp->st_dev); + ip->f_type = sp->st_mode & S_IFMT; + ip->f_size = sp->st_size; + ip->f_mode = sp->st_mode & S_IAMB; + ip->f_uid = sp->st_uid; + ip->f_gid = sp->st_gid; + ip->f_modtime = sp->st_mtim.tv_sec; + ip->f_modns = sp->st_mtim.tv_nsec; + ip->f_nlink = sp->st_nlink; + ip->f_rd_maj = major(sp->st_rdev); + ip->f_rd_min = minor(sp->st_rdev); + + /* indicate where this file has been found */ + fp->f_flags |= flags[which]; + + if (opt_debug & DBG_STAT) + fprintf(stderr, + "STAT: list=%d, file=%s, mod=%08lx.%08lx, nacl=%d\n", + which, fp->f_name, ip->f_modtime, ip->f_modns, + ip->f_numacls); +} + +/* + * routine: + * do_update + * + * purpose: + * to copy information from one side into the baseline in order + * to reflect the effects of recent reconciliation actions + * + * parameters + * fileinfo structure to be updated + * fileinfo structure to be updated from + * + * returns + * void + * + * note: + * we play fast and loose with the copying of acl chains + * here, but noone is going to free or reuse any of this + * memory anyway. None the less, I do feel embarassed. + */ +static void +do_update(struct fileinfo *np, struct fileinfo *ip) +{ + /* get most of the fields from the designated "right" copy */ + np->f_type = ip->f_type; + np->f_size = ip->f_size; + np->f_mode = ip->f_mode; + np->f_uid = ip->f_uid; + np->f_gid = ip->f_gid; + np->f_rd_maj = ip->f_rd_maj; + np->f_rd_min = ip->f_rd_min; + + /* see if facls have to be propagated */ + np->f_numacls = ip->f_numacls; + np->f_acls = ip->f_acls; +} + +/* + * routine: + * update_info + * + * purpose: + * to update the baseline to reflect recent reconcliations + * + * parameters + * file node pointer + * which file info structure to trust (1/2) + * + * returns + * void + * + * note: + * after we update this I-node we run down the entire + * change list looking for links and update them too. + * This is to ensure that when subsequent links get + * reconciled, they are already found to be up-to-date. + */ +void +update_info(struct file *fp, side_t which) +{ + /* first update the specified fileinfo structure */ + do_update(&fp->f_info[ OPT_BASE ], &fp->f_info[ which ]); + + if (opt_debug & DBG_STAT) + fprintf(stderr, + "STAT: UPDATE from=%d, file=%s, mod=%08lx.%08lx\n", + which, fp->f_name, fp->f_info[ which ].f_modtime, + fp->f_info[ which ].f_modns); +} + +/* + * routine: + * fakedata + * + * purpose: + * to populate a tree we cannot analyze with information from the baseline + * + * parameters: + * file to be faked + * which side to fake + * + * notes: + * We would never use this for real reconciliation, but it is useful + * if a disconnected notebook user wants to find out what has been + * changed so far. We only do this if we are notouch and oneway. + */ +static void +fakedata(struct file *fp, int which) +{ struct file *lp; + + /* pretend we actually found the file */ + fp->f_flags |= (which == OPT_SRC) ? F_IN_SOURCE : F_IN_DEST; + + /* update the specified side from the baseline */ + do_update(&fp->f_info[ which ], &fp->f_info[ OPT_BASE ]); + fp->f_info[which].f_nlink = (which == OPT_SRC) ? fp->f_s_nlink : + fp->f_d_nlink; + fp->f_info[which].f_modtime = (which == OPT_SRC) ? fp->f_s_modtime : + fp->f_d_modtime; + + for (lp = fp->f_files; lp; lp = lp->f_next) + fakedata(lp, which); +} + +/* + * routine: + * check_inum + * + * purpose: + * sanity check inode #s on directories that are unlikely to change + * + * parameters: + * pointer to file node + * are we using the source + * + * note: + * the purpose of this sanity check is to catch a case where we + * have somehow been pointed at a directory that is not the one + * we expected to be reconciling against. It could happen if a + * variable wasn't properly set, or if we were in a new domain + * where an old path no longer worked. This could result in + * bazillions of inappropriate propagations and deletions. + */ +void +check_inum(struct file *fp, int src) +{ struct fileinfo *ip; + + /* + * we validate the inode number and the major device numbers ... minor + * device numbers for NFS devices are arbitrary + */ + if (src) { + ip = &fp->f_info[ OPT_SRC ]; + if (ip->f_ino == fp->f_s_inum && ip->f_d_maj == fp->f_s_maj) + return; + + /* if file was newly created/deleted, this isn't warnable */ + if (fp->f_s_inum == 0 || ip->f_ino == 0) + return; + + if (opt_verbose) + fprintf(stdout, V_change, fp->f_name, TXT_src, + fp->f_s_maj, fp->f_s_min, fp->f_s_inum, + ip->f_d_maj, ip->f_d_min, ip->f_ino); + } else { + ip = &fp->f_info[ OPT_DST ]; + if (ip->f_ino == fp->f_d_inum && ip->f_d_maj == fp->f_d_maj) + return; + + /* if file was newly created/deleted, this isn't warnable */ + if (fp->f_d_inum == 0 || ip->f_ino == 0) + return; + + if (opt_verbose) + fprintf(stdout, V_change, fp->f_name, TXT_dst, + fp->f_d_maj, fp->f_d_min, fp->f_d_inum, + ip->f_d_maj, ip->f_d_min, ip->f_ino); + } + + /* note that something has changed */ + inum_changes++; +} + +/* + * routine: + * add_glob + * + * purpose: + * to evaluate a wild-carded expression into names, and add them + * to the evaluation list. + * + * parameters: + * base + * expression + * + * returns: + * error mask + * + * notes: + * we don't want to allow any patterns to expand to a . because + * that could result in re-evaluation of a tree under a different + * name. The real thing we are worried about here is ".*" which + * is meant to pick up . files, but shouldn't pick up . and .. + */ +static errmask_t +add_glob(struct base *bp, char *expr) +{ int i; + errmask_t errs = 0; +#ifndef BROKEN_GLOB + glob_t gt; + char *s; + + /* expand the regular expression */ + i = glob(expr, GLOB_NOSORT, 0, >); + if (i == GLOB_NOMATCH) + return (ERR_MISSING); + if (i) { + /* this shouldn't happen, so it's cryptic message time */ + fprintf(stderr, "EVAL: add_glob globfail expr=%s, ret=%d\n", + expr, i); + return (ERR_OTHER); + } + + for (i = 0; i < gt.gl_pathc; i++) { + /* make sure we don't let anything expand to a . */ + s = basename(gt.gl_pathv[i]); + if (strcmp(s, ".") == 0) { + fprintf(stderr, gettext(WARN_ignore), gt.gl_pathv[i]); + errs |= ERR_MISSING; + continue; + } + + errs |= add_file_arg(bp, gt.gl_pathv[i]); + } + + globfree(>); +#else + /* + * in 2.4 the glob function was completely broken. The + * easiest way to get around this problem is to just ask + * the shell to do the work for us. This is much slower + * but produces virtually identical results. Given that + * the 2.4 version is internal use only, I probably won't + * worry about the performance difference (less than 2 + * seconds for a typical filesync command, and no hit + * at all if they don't use regular expressions in + * their LIST rules). + */ + char cmdbuf[MAX_LINE]; + + sprintf(cmdbuf, "ls -d %s 2> /dev/null", expr); + errs |= add_run(bp, cmdbuf); +#endif + + return (errs); +} + + +/* + * routine: + * add_run + * + * purpose: + * to run a command and capture the names it outputs in the + * evaluation list. + * + * parameters + * base + * command + * + * returns: + * error mask + */ +static errmask_t +add_run(struct base *bp, char *cmd) +{ char *s, *p; + FILE *fp; + char inbuf[ MAX_LINE ]; + errmask_t errs = 0; + int added = 0; + + if (opt_debug & DBG_EVAL) + fprintf(stderr, "EVAL: RUN %s\n", cmd); + + /* run the command and collect its ouput */ + fp = popen(cmd, "r"); + if (fp == NULL) { + fprintf(stderr, gettext(ERR_badrun), cmd); + return (ERR_OTHER); + } + + while (fgets(inbuf, sizeof (inbuf), fp) != 0) { + /* strip off any trailing newline */ + for (s = inbuf; *s && *s != '\n'; s++); + *s = 0; + + /* skip any leading white space */ + for (s = inbuf; *s == ' ' || *s == '\t'; s++); + + /* make sure we don't let anything expand to a . */ + p = basename(s); + if (strcmp(p, ".") == 0) { + fprintf(stderr, gettext(WARN_ignore), s); + errs |= ERR_MISSING; + continue; + } + + /* add this file to the list */ + if (*s) { + errs |= add_file_arg(bp, s); + added++; + } + } + + pclose(fp); + +#ifdef BROKEN_GLOB + /* + * if we are being used to simulate libc glob, and we didn't + * return anything, we should probably assume that the regex + * was unable to match anything + */ + if (added == 0) + errs |= ERR_MISSING; +#endif + return (errs); +} diff --git a/usr/src/cmd/filesync/files.c b/usr/src/cmd/filesync/files.c new file mode 100644 index 0000000000..4cb4a01800 --- /dev/null +++ b/usr/src/cmd/filesync/files.c @@ -0,0 +1,591 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * files.c + * + * purpose: + * routines to examine and manipulate file names + * + * contents: + * qualify ... ensure that a name is fully qualified + * expand ... expand env variables within a string or file name + * noblanks .. ensure that a name contains no embdded unescaped blanks + * lex ....... a lexer that can handle escaped/embedded blanks + * wildcards . see whether or not a name contains wild cards + * prefix .... does one string begin with another + * suffix .... does one string end with another + * contains .. does one string contain another + * + * cannonize (static) ... compress redundant "." and ".." out of name + * + * notes: + * we are interested in embedded blanks because international + * character sets and non-unix file systems can both contain + * the byte 0x20. Thus, whenever we record a filename in + * file, we must be careful to escape any embedded blanks that + * cause trouble when we re-lex that file later. + */ +#ident "%W% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <unistd.h> + +#include "filesync.h" +#include "messages.h" + +static void cannonize(char *name); + +/* + * routine: + * qualify + * + * purpose: + * to fully qualify a name + * + * parameters: + * name to be qualified + * + * returns: + * either original pointer or copy to a new (malloced) buffer + * + * notes: + * someday I may conclude that I should always make a copy + * so that the caller can know that it is safe to free the parm + * + * I thought about this and concluded that there is never a need + * to fully qualify a string containing variables. If the string + * came from the command line, the variables were already expanded + * and if it came from the rules data base it is required to already + * be fully qualified. + */ +char * +qualify(char *name) +{ + char namebuf[ MAX_PATH ]; + + /* in the simple case, the parameter is already there */ + if (*name == '/') { + cannonize(name); + return (name); + } + + /* things that begin with variables get the benefit of the doubt */ + if (*name == '$') { + cannonize(name); + return (name); + } + + /* start with the current working directory */ + if (getcwd(namebuf, sizeof (namebuf)) == 0) { + fprintf(stderr, gettext(ERR_nocwd), name); + exit(ERR_OTHER); + } + + /* make sure we have room for our file name */ + if ((strlen(namebuf) + strlen(name) + 2) >= sizeof (namebuf)) { + fprintf(stderr, gettext(ERR_longname), name); + exit(ERR_OTHER); + } + + /* append the specified file name to it */ + strcat(namebuf, "/"); + strcat(namebuf, name); + + /* filter out redundant dots */ + cannonize(namebuf); + + if (opt_debug & DBG_VARS) + fprintf(stderr, "VARS: QUALIFY %s to %s\n", name, namebuf); + + /* and return a newly malloc'd copy */ + return (strdup(namebuf)); +} + +/* + * routine: + * expand + * + * purpose: + * to expand variable names within a string + * + * parameters: + * string to be expanded. Variable references always begin + * with a $ and are delimited by parens or curleys. + * + * returns: + * either original pointer or a copy to a new (malloced) buffer + * + * notes: + * someday I may conclude that I should always make a copy + * so that the caller can know that it is safe to free the parm + * + * someday I may decide to support escape conventions for embedding + * $(){} in file names, but I suspec that day will never come. + * + * I thought about this and concluded there was no reason to + * fully qualify these names, because the only names that should + * need qualification are src/dst lines from the command line, + * and the shell should have handled those for me. Once something + * makes it into the database, it is expected to be fully qualified + * already. + * + * We are limited to producing strings of length MAX_PATH or less + * and variable names of length MAX_NAME or less. In practice, + * these limitations should not be a problem. + */ +char * +expand(char *name) +{ const char *s; + char *p, *v; + char delim; + char namebuf[ MAX_PATH ]; + char varbuf[ MAX_NAME ]; + + /* first see if there are no variables to be bound */ + for (s = name; *s && *s != '$'; s++); + if (*s == 0) + return (name); + + /* move through the string, copying and expanding */ + for (s = name, p = namebuf; *s; s++) { + + /* check for overflow */ + if (p >= &namebuf[ MAX_PATH ]) { + fprintf(stderr, gettext(ERR_longname), name); + exit(ERR_OTHER); + } + + /* normal characters, we just copy */ + if (*s != '$') { + *p++ = *s; + continue; + } + + /* figure out how the variable name is delimited */ + delim = *++s; + if (delim == '(') { + delim = ')'; + s++; + } else if (delim == '{') { + delim = '}'; + s++; + } else + delim = 0; + + /* copy the variable name up to the closing delimiter */ + for (v = varbuf; *s; s++) { + if (isalnum(*s) || (*s == '_') || + (delim && *s != delim)) + *v++ = *s; + else + break; + + /* make sure we don't overflow var name buffer */ + if (v >= &varbuf[MAX_NAME - 1]) { + *v = 0; + fprintf(stderr, gettext(ERR_longname), varbuf); + exit(ERR_OTHER); + } + } + + *v = 0; + + /* FIX THIS ... there must be a more elegant way */ + /* we may have to back up because s will be bumped */ + if (delim == 0 || *s != delim) + s--; + + /* look up the variable */ + v = getenv(varbuf); + if (v == 0 || *v == 0) { + fprintf(stderr, gettext(ERR_undef), varbuf); + return (0); + } + + /* copy the variable into the buffer */ + while (*v) + *p++ = *v++; + } + + /* null terminate the copy */ + *p = 0; + + /* compress out any redundant dots and dot-dots */ + cannonize(namebuf); + + if (opt_debug & DBG_VARS) + fprintf(stderr, "VARS: EXPAND %s to %s\n", name, namebuf); + + /* and return a newly malloc'd copy */ + return (strdup(namebuf)); +} + +/* + * routine: + * noblanks + * + * purpose: + * to ensure that a name contains no unescaped embedded blanks + * + * parameters: + * pointer to name + * + * returns: + * pointer to name or pointer to buffer containing escaped version of name + * + * notes: + * this routine can be called on full file names, and so can + * conceivably require an arbitrarily large buffer. + */ +const char * +noblanks(const char *name) +{ + const char *s; + char *p; + static char *namebuf = 0; + static int buflen = 0; + int l; + + /* first see if there are no embedded blanks */ + for (s = name; *s && *s != ' '; s++); + if (*s == 0) + return (name); + + /* make sure we have a buffer large enough for the worst case */ + l = 4 + (2*strlen(name)); + for (buflen = MAX_PATH; buflen < l; buflen += MAX_NAME); + namebuf = (char *) realloc(namebuf, buflen); + + /* quote the name, and copy it, escaping quotes */ + p = namebuf; + *p++ = '"'; + + for (s = name; *s; s++) { + if (*s == '"' || *s == '\\') + *p++ = '\\'; + *p++ = *s; + } + + *p++ = '"'; + *p = 0; + + return (namebuf); +} + +/* + * routine: + * lex + * + * purpose: + * my own version of strtok that handles quoting and escaping + * + * parameters: + * FILE structure for file to read (0 for same string, same file) + * + * returns: + * pointer to next token + * + * notes: + * this routine makes no changes to the string it is passed, + * copying tokens into a static buffer. + * + * this routine handles continuation lines after reading and + * before the lexing even starts. This limits continued lines + * to a length of MAX_LINE, but keeps everything else very simple. + * We also, therefore, limit tokens to a maximum length of MAX_LINE. + */ +int lex_linenum; /* line number in current input file */ + +char * +lex(FILE *file) +{ char c, delim; + char *p; + char *s; + static char *savep; + static char namebuf[ MAX_LINE ]; + static char inbuf[ MAX_LINE ]; + + if (file) { /* read a new line */ + p = inbuf + sizeof (inbuf); + + /* read the next input line, with all continuations */ + for (s = inbuf; savep = fgets(s, p - s, file); ) { + lex_linenum++; + + /* go find the last character of the input line */ + while (*s && s[1]) + s++; + if (*s == '\n') + s--; + + /* see whether or not we need a continuation */ + if (s < inbuf || *s != '\\') + break; + + continue; + } + + if (savep == 0) + return (0); + + s = inbuf; + } else { /* continue with old line */ + if (savep == 0) + return (0); + s = savep; + } + savep = 0; + + /* skip over leading white space */ + while (isspace(*s)) + s++; + if (*s == 0) + return (0); + + /* see if this is a quoted string */ + c = *s; + if (c == '\'' || c == '"') { + delim = c; + s++; + } else + delim = 0; + + /* copy the token into the buffer */ + for (p = namebuf; (c = *s) != 0; s++) { + /* literal escape */ + if (c == '\\') { + s++; + *p++ = *s; + continue; + } + + /* closing delimiter */ + if (c == delim) { + s++; + break; + } + + /* delimiting white space */ + if (delim == 0 && isspace(c)) + break; + + /* ordinary characters */ + *p++ = *s; + } + + + /* remember where we left off */ + savep = *s ? s : 0; + + /* null terminate and return the buffer */ + *p = 0; + return (namebuf); +} + +/* + * routine: + * wildcards + * + * purpose: + * determine whether or not there are any wild cards in a name + * + * parameters: + * name to be checked + * + * returns: + * true/false + * + * notes: + * we use this to take shortcuts + */ +bool_t +wildcards(const char *name) +{ const char *s; + int literal = 0; + + for (s = name; *s; s++) + if (literal) + switch (*s) { + case '\'': /* end of literal string */ + literal = 0; + continue; + case '\\': /* escape next character */ + s++; + continue; + } + else + switch (*s) { + case '\'': /* literal string */ + literal = 1; + continue; + case '\\': /* escape next character */ + s++; + continue; + case '*': + case '[': + case '{': + case '?': + /* any of these is a wild card */ + return (TRUE); + } + + return (FALSE); +} + +/* + * routine: + * cannonize + * + * purpose: + * to compress redundant dots out of a path + * + * parameters: + * file name in an editable buffer + * + * returns: + * void + * + * notes: + * because we compress the string in place, there is no danger + * of our overflowing any fixed sized buffer. + */ +static void +cannonize(char *name) +{ char *s, *p; + + /* leading dot-slashes */ + for (s = name; *s == '.' && s[1] == '/'; strcpy(s, &s[2])); + + for (s = name; *s; s++) { + /* interesting things happen after slashes */ + if (*s != '/') + continue; + + /* embedded dot-slashes */ + while (s[1] == '.' && s[2] == '/') + strcpy(&s[1], &s[3]); + + /* embedded slash-dot-dot-slash */ + if (strncmp(s, "/../", 4) == 0) { + /* scan backwards to eliminate last directory */ + for (p = s-1; p > name && *p != '/'; p--); + + if (p < name) + p = name; + strcpy(p, &s[3]); + } + + continue; + } +} + +/* + * routine: + * prefix + * + * purpose: + * determine whether or not one string begins with another + * + * parameters: + * string to be tested + * suspected prefix + * + * returns: + * no 0 + * yes pointer character after prefix + */ +const char * +prefix(const char *s, const char *p) +{ + while (*p) + if (*p++ != *s++) + return (0); + + return (s); +} + +/* + * routine: + * suffix + * + * purpose: + * determine whether or not one string ends with another + * + * parameters: + * string to be tested + * suspected suffix + * + * returns: + * true/false + */ +bool_t +suffix(const char *str, const char *suf) +{ const char *s; + + /* go to where the alleged suffix would start */ + for (s = str; *s; s++); + s -= strlen(suf); + if (s < str) + return (FALSE); + + /* see if the string ends with the suffix */ + while (*suf) + if (*suf++ != *s++) + return (FALSE); + + return (TRUE); +} + +/* + * routine: + * contains + * + * purpose: + * determine whether or not one string contains another + * + * parameters: + * string to be checked + * pattern we are seeking + * + * returns: + * true/false + */ +bool_t +contains(const char *str, const char *pat) +{ const char *s, *p; + + while (*str) { + if (*str++ == *pat) { + for (s = str, p = &pat[1]; *s == *p; s++, p++) + if (p[1] == 0) + return (TRUE); + } + } + + return (FALSE); +} diff --git a/usr/src/cmd/filesync/filesync.h b/usr/src/cmd/filesync/filesync.h new file mode 100644 index 0000000000..00d5957da1 --- /dev/null +++ b/usr/src/cmd/filesync/filesync.h @@ -0,0 +1,163 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * filesync.h + * + * purpose: + * general defines for use throughout the program + */ + +#ifndef _FILESYNC_H +#define _FILESYNC_H + +#pragma ident "%W% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/types.h> + +/* + * arbitrary limits + */ +#define MAX_NAME 256 /* longest path component */ +#define MAX_PATH 1024 /* longest total path length */ +#define MAX_RLIST 32 /* max number of -r arguments */ +#define MAX_LINE 1024 /* longest input line */ +#define MAX_DEPTH 20 /* how deep to recurse */ +#define COPY_BSIZE 8192 /* block size for file copies */ +#define MIN_HOLE 1024 /* minimum hole in sparse file */ +#define HASH_SIZE 99 /* ignore list hash table */ + +/* + * sanity check limits + */ +#define CONFIRM_MIN 4 /* min # deletetes to confirm */ +#define CONFIRM_PCT 25 /* min pctg of files to confirm */ + +/* + * special types used in the program + */ +typedef enum { + FALSE = 0, + TRUE = 1, + MAYBE = 2 /* only partially true */ +} bool_t; + +typedef enum { + OPT_BASE = 0, /* use the baseline data */ + OPT_SRC = 1, /* use the source side */ + OPT_DST = 2, /* use the destination side */ + OPT_OLD = 3, /* use the old one */ + OPT_NEW = 4 /* use the new one */ +} side_t; + +/* + * values for debug mask + */ +typedef long dbgmask_t; /* type for debug masks */ +#define DBG_BASE 0x0001 /* baseline changes */ +#define DBG_RULE 0x0002 /* rule base changes */ +#define DBG_STAT 0x0004 /* file stats */ +#define DBG_ANAL 0x0008 /* analysis tracing */ +#define DBG_RECON 0x0010 /* reconciliation tracing */ +#define DBG_VARS 0x0020 /* variable tracing */ +#define DBG_FILES 0x0040 /* file reading/writing */ +#define DBG_LIST 0x0080 /* include list building */ +#define DBG_EVAL 0x0100 /* evaluation tracing */ +#define DBG_IGNORE 0x0200 /* ignore tracing */ +#define DBG_MISC 0x0400 /* catch-all everything else */ + +/* + * values for error codes + */ +typedef int errmask_t; /* type for error masks */ +#define ERR_OK 0 /* everything is fine */ +#define ERR_RESOLVABLE 1 /* resolvable conflicts */ +#define ERR_UNRESOLVED 2 /* unresolvable conflicts */ +#define ERR_MISSING 4 /* some files missing */ +#define ERR_PERM 8 /* insufficient access */ +#define ERR_FILES 16 /* file format or I/O errors */ +#define ERR_INVAL 32 /* invalid arguments */ +#define ERR_NOBASE 64 /* inaccessable base directory */ +#define ERR_OTHER 128 /* anything else */ + +/* errors that will prevent reconciliation from taking place */ +#define ERR_FATAL (ERR_FILES|ERR_INVAL|ERR_NOBASE|ERR_OTHER) + +/* errors that will cause reconciliation to stop with -h specified */ +#define ERR_ABORT (ERR_FILES|ERR_PERM) + +/* + * program defaults + */ +#define DFLT_PRFX "$HOME/" /* default location/pfx */ +#define SUFX_RULES ".packingrules" /* rules v1.1 location */ +#define SUFX_BASE ".filesync-base" /* baseline location */ +#define SUFX_OLD ".filesync-rules" /* rules v1.0 location */ + +/* + * global variables for command line options + */ +extern bool_t opt_acls; /* enable acl checking/preservation */ +extern bool_t opt_mtime; /* preserve modification times */ +extern bool_t opt_notouch; /* don't actually make any changes */ +extern side_t opt_force; /* designated winner for conflicts */ +extern side_t opt_oneway; /* one way only propagation */ +extern side_t opt_onesided; /* permit one sided analysis */ +extern bool_t opt_everything; /* everything must agree (modes/uid/gid) */ +extern bool_t opt_quiet; /* stiffle reconciliaton descriptions */ +extern bool_t opt_verbose; /* generate analysis commentary */ +extern bool_t opt_errors; /* simulate errors on specified files */ +extern bool_t opt_halt; /* halt on any propagation error */ +extern dbgmask_t opt_debug; /* debugging options */ + +/* + * information gained during startup that other people may need + */ +extern uid_t my_uid; /* User ID for files I create */ +extern gid_t my_gid; /* Group ID for files I create */ + +/* error and warning routines */ +void confirm(char *); /* ask user if he's sure */ +void nomem(char *); /* die from malloc failure */ + +/* routines for dealing with strings and file names */ +const char *prefix(const char *, const char *); /* does s1 begin with s2 */ +char *qualify(char *); /* validate and fully qualify */ +char *expand(char *); /* expand variables in name */ +char *lex(FILE *); /* lex off one token */ +extern int lex_linenum; /* current input file line number */ +const char *noblanks(const char *); /* escape strings for embedded blanks */ +bool_t wildcards(const char *); /* does name contain wildcards */ +bool_t suffix(const char *, const char *); /* does s1 end with s2 */ +bool_t contains(const char *, const char *); /* does s1 contain s2 */ + +#ifdef __cplusplus +} +#endif + +#endif /* _FILESYNC_H */ diff --git a/usr/src/cmd/filesync/ignore.c b/usr/src/cmd/filesync/ignore.c new file mode 100644 index 0000000000..0a08037306 --- /dev/null +++ b/usr/src/cmd/filesync/ignore.c @@ -0,0 +1,364 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * ignore.c + * + * purpose: + * routines to manage the ignore lists and test names against them, + * + * contents: + * ignore_check ... is a particular file covered by an ignore rule + * ignore_file .... add a specific file name to be ignored + * ignore_expr .... add a regular expression for files to be ignored + * ignore_pgm ..... add a rule to run a program to generate a list + * ignore_reset ... flush the internal optimization data structures + * + * static + * ign_hash ... maintain a hash table of ignored names + * cheap_check. build up a table of safe suffixes + * + * notes: + * a much simpler implementation could have been provided, but + * this test (every file tested against every rule) has the + * potential to be EXTREMELY expensive. This module implements + * an engine that attempts to optimize the process of determining + * that a file has not been ignored. + * + * the usage scenario is + * per base + * call ignore_{file,expr,pgm} for each ignore rule + * call ignore_check for every file under the base + * call ignore_reset when you are done + */ +#ident "%W% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libgen.h> + +#include "filesync.h" +#include "messages.h" + +/* + * routines: + */ +static struct list *ign_hash(const char *, int); +static void cheap_check(const char *); + +/* + * globals + */ +struct list { + char *l_value; /* the actual string */ + struct list *l_next; /* pointer to next element */ +}; + +static struct list *expr_list; /* list of regular expressions */ +static struct list *file_list[ HASH_SIZE ]; /* hash table of literal names */ + +static char cheap_last[256]; /* cheap test: last char */ +static char cheap_penu[256]; /* cheap test: penultimate char */ + +/* + * routine: + * ignore_check + * + * purpose: + * determine whether or not a particular name matches an ignore pattern. + * + * parameters: + * file name + * + * returns: + * true/false + * + * note: + * becuse this routine is called on every single file in + * every single sub-directory, it is critical that we make + * it fail quickly for most files. The purpose of the cheap_last + * and cheap_penu arrays is to quickly determine there is no chance + * that a name will match any expression. Most expressions have + * wildcards near the front and constant suffixes, so our cheap + * test is to look at the last two bytes. + */ +bool_t +ignore_check(const char *name) +{ struct list *lp; + const char *s; + + /* + * start with the cheap test + */ + for (s = name; *s; s++); + if (cheap_last[ (unsigned char) s[-1] ] == 0 || + cheap_penu[ (unsigned char) s[-2] ] == 0) + return (FALSE); + + /* check the literal names in the hash table */ + if (ign_hash(name, 0)) { + if (opt_debug & DBG_IGNORE) + fprintf(stderr, "IGNO: match %s\n", name); + return (TRUE); + } + + /* check all the regular expressions */ + for (lp = expr_list; lp; lp = lp->l_next) { + if (gmatch(name, lp->l_value) == 0) + continue; + + if (opt_debug & DBG_IGNORE) + fprintf(stderr, "IGNO: regex %s : %s\n", + lp->l_value, name); + return (TRUE); + } + + return (FALSE); +} + +/* + * routine: + * ignore_file + * + * purpose: + * to add a specific file to an ignore list + * + * parameters: + * command to run + */ +void +ignore_file(const char *name) +{ + cheap_check(name); + + (void) ign_hash(name, 1); + + if (opt_debug & DBG_IGNORE) + fprintf(stderr, "IGNO: add file %s\n", name); +} + +/* + * routine: + * ignore_expr + * + * purpose: + * to add a regular expression to an ignore list + * + * parameters: + * command to run + */ +void +ignore_expr(const char *expr) +{ struct list *lp; + + cheap_check(expr); + + /* allocate a new node and stick it on the front of the list */ + lp = malloc(sizeof (*lp)); + if (lp == 0) + nomem("ignore list"); + lp->l_value = strdup(expr); + lp->l_next = expr_list; + expr_list = lp; + + if (opt_debug & DBG_IGNORE) + fprintf(stderr, "IGNO: add expr %s\n", expr); +} + +/* + * routine: + * ignore_pgm + * + * purpose: + * to run a program and gather up the ignore list it produces + * + * parameters: + * command to run + */ +void +ignore_pgm(const char *cmd) +{ char *s; + FILE *fp; + char inbuf[ MAX_LINE ]; + + if (opt_debug & DBG_IGNORE) + fprintf(stderr, "IGNO: add pgm %s\n", cmd); + + /* run the command and collect its ouput */ + fp = popen(cmd, "r"); + if (fp == NULL) { + fprintf(stderr, gettext(ERR_badrun), cmd); + return; + } + + /* + * read each line, strip off the newline and add it to the list + */ + while (fgets(inbuf, sizeof (inbuf), fp) != 0) { + /* strip off any trailing newline */ + for (s = inbuf; *s && *s != '\n'; s++); + *s = 0; + + /* skip any leading white space */ + for (s = inbuf; *s == ' ' || *s == '\t'; s++); + + /* add this file to the list */ + if (*s) { + cheap_check(s); + (void) ign_hash(s, 1); + + if (opt_debug & DBG_IGNORE) + fprintf(stderr, "IGNO: ... %s\n", s); + } + } + + pclose(fp); +} + +/* + * routine: + * ign_hash + * + * purpose: + * to find an entry in the hash list + * + * parameters: + * name + * allocate flag + * + * returns: + * pointer to new list entry or 0 + */ +static struct list * +ign_hash(const char *name, int alloc) +{ const unsigned char *s; + int i; + struct list *lp; + struct list **pp; + + /* perform the hash and find the chain */ + for (s = (const unsigned char *) name, i = 0; *s; s++) + i += *s; + pp = &file_list[i % HASH_SIZE ]; + + /* search for the specified entry */ + for (lp = *pp; lp; lp = *pp) { + if (strcmp(name, lp->l_value) == 0) + return (lp); + pp = &(lp->l_next); + } + + /* if caller said alloc, buy a new node and chain it in */ + if (alloc) { + lp = malloc(sizeof (*lp)); + if (lp == 0) + nomem("ignore list"); + lp->l_value = strdup(name); + lp->l_next = 0; + *pp = lp; + } + + return (lp); +} + +/* + * routine: + * cheap_check + * + * purpose: + * to update the cheap-check arrays for an ignore expression + * + * parameters: + * name/expression + */ +static void +cheap_check(const char *name) +{ const char *s; + unsigned char c; + int i; + + for (s = name; *s; s++); + s--; + + /* if expr ends in a wild card, we are undone */ + c = *s; + if (c == '*' || c == '?' || c == ']' || c == '}') { + for (i = 0; i < 256; i++) { + cheap_last[i] = 1; + cheap_penu[i] = 1; + } + return; + } else + cheap_last[c] = 1; + + if (s <= name) + return; + + /* check the next to last character too */ + c = s[-1]; + if (c == '*' || c == '?' || c == ']' || c == '}') { + for (i = 0; i < 256; i++) + cheap_penu[i] = 1; + } else + cheap_penu[c] = 1; +} + +/* + * routine: + * ignore_reset + * + * purpose: + * to free up all the ignore entries so we can start anew + */ +void +ignore_reset(void) +{ int i; + struct list *np = 0; /* for LINT */ + struct list *lp; + + /* clear the cheap check arrays */ + for (i = 0; i < 255; i++) { + cheap_last[i] = 0; + cheap_penu[i] = 0; + } + + /* free all of the literal hash chains */ + for (i = 0; i < HASH_SIZE; i++) { + for (lp = file_list[i]; lp; lp = np) { + np = lp->l_next; + free(lp->l_value); + free(lp); + } + file_list[i] = 0; + } + + /* free all of the expressions on the chain */ + for (lp = expr_list; lp; lp = np) { + np = lp->l_next; + free(lp->l_value); + free(lp); + } + expr_list = 0; +} diff --git a/usr/src/cmd/filesync/main.c b/usr/src/cmd/filesync/main.c new file mode 100644 index 0000000000..b12e7e47fc --- /dev/null +++ b/usr/src/cmd/filesync/main.c @@ -0,0 +1,688 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1995-2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* + * module: + * main.c + * + * purpose: + * argument handling and top level dispatch + * + * contents: + * main argument handling and main loop + * usage (static) print out usage message + * confirm prompt the user for a confirmation and get it + * nomem fatal error handler for malloc failures + * findfiles (static) locate our baseline and rules files + * cleanup (static) unlock baseline and delete temp file + * check_access (static) do we have adequate access to a file/directory + * whoami (static) get uid/gid/umask + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <unistd.h> +#include <stdlib.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#include <sys/stat.h> + +#include "filesync.h" +#include "database.h" +#include "messages.h" +#include "debug.h" + +/* + * local routines in this module: + */ +static errmask_t findfiles(); /* find rule and baseline files */ +static void cleanup(int); /* cleanup locks and temps */ +static errmask_t check_access(char *, int *); /* check access to file */ +static void whoami(); /* gather information about me */ +static void usage(void); /* general usage */ + + +/* + * globals exported to the rest of the program + */ +bool_t opt_mtime; /* preserve modification times on propagations */ +bool_t opt_notouch; /* don't actually make any changes */ +bool_t opt_quiet; /* disable reconciliation command output */ +bool_t opt_verbose; /* enable analysis descriptions */ +side_t opt_force; /* designated winner for conflicts */ +side_t opt_oneway; /* one way only propagation */ +side_t opt_onesided; /* permit one-sided evaluation */ +bool_t opt_everything; /* everything must agree (modes/uid/gid) */ +bool_t opt_yes; /* pre-confirm massive deletions are OK */ +bool_t opt_acls; /* always scan for acls on all files */ +bool_t opt_errors; /* simulate errors on specified files */ +bool_t opt_halt; /* halt on propagation errors */ +dbgmask_t opt_debug; /* debug mask */ + +uid_t my_uid; /* default UID for files I create */ +gid_t my_gid; /* default GID for files I create */ + +static char *file_rules; /* name of rules file */ +static char *file_base; /* name of baseline file */ + +static int new_baseline; /* are we creating a new baseline */ +static int new_rules; /* are we creating a new rules file */ +static int my_umask; /* default UMASK for files I create */ +static int lockfd; /* file descriptor for locking baseline */ + +static char *rlist[MAX_RLIST]; +static int num_restrs = 0; + +/* + * routine: + * main + * + * purpose: + * argument processing and primary dispatch + * + * returns: + * error codes per filesync.1 (ERR_* in filesync.h) + * + * notes: + * read filesync.1 in order to understand the argument processing + * + * most of the command line options just set some opt_ global + * variable that is later looked at by the code that actually + * implements the features. Only file names are really processed + * in this routine. + */ +void +main(int argc, char **argv) +{ int i; + int c; + errmask_t errs = ERR_OK; + int do_prune = 0; + char *srcname = 0; + char *dstname = 0; + struct base *bp; + + /* keep the error messages simple */ + argv[0] = "filesync"; + + /* gather together all of the options */ + while ((c = getopt(argc, argv, "AaehmnqvyD:E:r:s:d:f:o:")) != EOF) + switch (c) { + case 'a': /* always scan for acls */ + opt_acls = TRUE; + break; + case 'e': /* everything agrees */ + opt_everything = TRUE; + break; + case 'h': /* halt on error */ + opt_halt = TRUE; + break; + case 'm': /* preserve modtimes */ + opt_mtime = TRUE; + break; + case 'n': /* notouch */ + opt_notouch = TRUE; + break; + case 'q': /* quiet */ + opt_quiet = TRUE; + break; + case 'v': /* verbose */ + opt_verbose = TRUE; + break; + case 'y': /* yes */ + opt_yes = TRUE; + break; + case 'D': /* debug options */ + if (!isdigit(optarg[0])) { + dbg_usage(); + exit(ERR_INVAL); + } + opt_debug |= strtol(optarg, (char **) NULL, 0); + break; + + case 'E': /* error simulation */ + if (dbg_set_error(optarg)) { + err_usage(); + exit(ERR_INVAL); + } + opt_errors = TRUE; + break; + + case 'f': /* force conflict resolution */ + switch (optarg[0]) { + case 's': + opt_force = OPT_SRC; + break; + case 'd': + opt_force = OPT_DST; + break; + case 'o': + opt_force = OPT_OLD; + break; + case 'n': + opt_force = OPT_NEW; + break; + default: + fprintf(stderr, + gettext(ERR_badopt), + c, optarg); + errs |= ERR_INVAL; + break; + } + break; + + case 'o': /* one way propagation */ + switch (optarg[0]) { + case 's': + opt_oneway = OPT_SRC; + break; + case 'd': + opt_oneway = OPT_DST; + break; + default: + fprintf(stderr, + gettext(ERR_badopt), + c, optarg); + errs |= ERR_INVAL; + break; + } + break; + + case 'r': /* restricted reconciliation */ + if (num_restrs < MAX_RLIST) + rlist[ num_restrs++ ] = optarg; + else { + fprintf(stderr, gettext(ERR_tomany), + MAX_RLIST); + errs |= ERR_INVAL; + } + break; + + case 's': + if ((srcname = qualify(optarg)) == 0) + errs |= ERR_MISSING; + break; + + case 'd': + if ((dstname = qualify(optarg)) == 0) + errs |= ERR_MISSING; + break; + + default: + case '?': + errs |= ERR_INVAL; + break; + } + + if (opt_debug & DBG_MISC) + fprintf(stderr, "MISC: DBG=%s\n", showflags(dbgmap, opt_debug)); + + /* if we have file names, we need a source and destination */ + if (optind < argc) { + if (srcname == 0) { + fprintf(stderr, gettext(ERR_nosrc)); + errs |= ERR_INVAL; + } + if (dstname == 0) { + fprintf(stderr, gettext(ERR_nodst)); + errs |= ERR_INVAL; + } + } + + /* check for simple usage errors */ + if (errs & ERR_INVAL) { + usage(); + exit(errs); + } + + /* locate our baseline and rules files */ + if (c = findfiles()) + exit(c); + + /* figure out file creation defaults */ + whoami(); + + /* read in our initial baseline */ + if (!new_baseline && (c = read_baseline(file_base))) + errs |= c; + + /* read in the rules file if we need or have rules */ + if (optind >= argc && new_rules) { + fprintf(stderr, ERR_nonames); + errs |= ERR_INVAL; + } else if (!new_rules) + errs |= read_rules(file_rules); + + /* if anything has failed with our setup, go no further */ + if (errs) { + cleanup(errs); + exit(errs); + } + + /* + * figure out whether or not we are willing to do a one-sided + * analysis (where we don't even look at the other side. This + * is an "I'm just curious what has changed" query, and we are + * only willing to do it if: + * we aren't actually going to do anything + * we have a baseline we can compare against + * otherwise, we are going to insist on being able to access + * both the source and destination. + */ + if (opt_notouch && !new_baseline) + opt_onesided = opt_oneway; + + /* + * there are two interested usage scenarios: + * file names specified + * create new rules for the specified files + * evaulate and reconcile only the specified files + * no file names specified + * use already existing rules + * consider restricting them to specified subdirs/files + */ + if (optind < argc) { + /* figure out what base pair we're working on */ + bp = add_base(srcname, dstname); + + /* perverse default rules to avoid trouble */ + if (new_rules) { + errs |= add_ignore(0, SUFX_RULES); + errs |= add_ignore(0, SUFX_BASE); + } + + /* create include rules for each file/dir arg */ + while (optind < argc) + errs |= add_include(bp, argv[ optind++ ]); + + /* + * evaluate the specified base on each side, + * being careful to limit evaulation to new rules + */ + errs |= evaluate(bp, OPT_SRC, TRUE); + errs |= evaluate(bp, OPT_DST, TRUE); + } else { + /* note any possible evaluation restrictions */ + for (i = 0; i < num_restrs; i++) + errs |= add_restr(rlist[i]); + + /* + * we can only prune the baseline file if we have done + * a complete (unrestricted) analysis. + */ + if (i == 0) + do_prune = 1; + + /* evaulate each base on each side */ + for (bp = bases; bp; bp = bp->b_next) { + errs |= evaluate(bp, OPT_SRC, FALSE); + errs |= evaluate(bp, OPT_DST, FALSE); + } + } + + /* if anything serious happened, skip reconciliation */ + if (errs & ERR_FATAL) { + cleanup(errs); + exit(errs); + } + + /* analyze and deal with the differenecs */ + errs |= analyze(); + + /* see if there is any dead-wood in the baseline */ + if (do_prune) { + c = prune(); + + if (c > 0 && opt_verbose) + fprintf(stdout, V_prunes, c); + } + + /* print out a final summary */ + summary(); + + /* update the rules and baseline files (if needed) */ + (void) umask(my_umask); + errs |= write_baseline(file_base); + errs |= write_rules(file_rules); + + if (opt_debug & DBG_MISC) + fprintf(stderr, "MISC: EXIT=%s\n", showflags(errmap, errs)); + + /* just returning ERR_RESOLVABLE upsets some people */ + if (errs == ERR_RESOLVABLE && !opt_notouch) + errs = 0; + + /* all done */ + cleanup(0); + exit(errs); +} + + +/* + * routine: + * usage + * + * purpose: + * print out a usage message + * + * parameters: + * none + * + * returns: + * none + * + * note: + * the -D and -E switches are for development/test/support + * use only and do not show up in the general usage message. + */ +static void +usage(void) +{ + fprintf(stderr, "%s\t%s %s\n", gettext(ERR_usage), "filesync", + gettext(USE_simple)); + fprintf(stderr, "\t%s %s\n", "filesync", gettext(USE_all)); + fprintf(stderr, "\t-a .......... %s\n", gettext(USE_a)); + fprintf(stderr, "\t-e .......... %s\n", gettext(USE_e)); + fprintf(stderr, "\t-h .......... %s\n", gettext(USE_h)); + fprintf(stderr, "\t-m .......... %s\n", gettext(USE_m)); + fprintf(stderr, "\t-n .......... %s\n", gettext(USE_n)); + fprintf(stderr, "\t-q .......... %s\n", gettext(USE_q)); + fprintf(stderr, "\t-v .......... %s\n", gettext(USE_v)); + fprintf(stderr, "\t-y .......... %s\n", gettext(USE_y)); + fprintf(stderr, "\t-s dir ...... %s\n", gettext(USE_s)); + fprintf(stderr, "\t-d dir ...... %s\n", gettext(USE_d)); + fprintf(stderr, "\t-r dir ...... %s\n", gettext(USE_r)); + fprintf(stderr, "\t-f [sdon].... %s\n", gettext(USE_f)); + fprintf(stderr, "\t-o src/dst... %s\n", gettext(USE_o)); +} + +/* + * routine: + * confirm + * + * purpose: + * to confirm that the user is willing to do something dangerous + * + * parameters: + * warning message to be printed + * + * returns: + * void + * + * notes: + * if this is a "notouch" or if the user has pre-confirmed, + * we should not obtain the confirmation and just return that + * the user has confirmed. + */ +void +confirm(char *message) +{ FILE *ttyi, *ttyo; + char ansbuf[ MAX_LINE ]; + + /* if user pre-confirmed, we don't have to ask */ + if (opt_yes || opt_notouch) + return; + + ttyo = fopen("/dev/tty", "w"); + ttyi = fopen("/dev/tty", "r"); + if (ttyi == NULL || ttyo == NULL) + exit(ERR_OTHER); + + /* explain the problem and prompt for confirmation */ + fprintf(ttyo, message); + fprintf(ttyo, gettext(WARN_proceed)); + + /* if the user doesn't kill us, we can continue */ + (void) fgets(ansbuf, sizeof (ansbuf), ttyi); + + /* close the files and return */ + (void) fclose(ttyi); + (void) fclose(ttyo); +} + +void +nomem(char *reason) +{ + fprintf(stderr, gettext(ERR_nomem), reason); + exit(ERR_OTHER); +} + +/* + * routine: + * findfiles + * + * purpose: + * to locate our baseline and rules files + * + * parameters: + * none + * + * returns: + * error mask + * settings of file_base and file_rules + * + * side-effects: + * in order to keep multiple filesyncs from running in parallel + * we put an advisory lock on the baseline file. If the baseline + * file does not exist we create one. The unlocking (and deletion + * of extraneous baselines) is handled in cleanup. + */ +static errmask_t +findfiles(void) /* find rule and baseline files */ +{ char *s, *where; + char namebuf[MAX_PATH]; + int ret; + errmask_t errs = 0; + + /* figure out where the files should be located */ + s = getenv("FILESYNC"); + where = (s && *s) ? expand(s) : expand(DFLT_PRFX); + + /* see if we got a viable name */ + if (where == 0) { + fprintf(stderr, gettext(ERR_nofsync)); + return (ERR_FILES); + } + + /* try to form the name of the rules file */ + strcpy(namebuf, where); + strcat(namebuf, SUFX_RULES); + s = strdup(namebuf); + errs = check_access(namebuf, &new_rules); + + /* if we cannot find a proper rules file, look in the old place */ + if (new_rules && errs == 0) { + strcpy(namebuf, where); + strcat(namebuf, SUFX_OLD); + file_rules = strdup(namebuf); + errs = check_access(namebuf, &new_rules); + + /* if we couldn't find that either, go with new name */ + if (new_rules && errs == 0) + file_rules = s; + } else + file_rules = s; + + /* try to form the name of the baseline file */ + strcpy(namebuf, where); + strcat(namebuf, SUFX_BASE); + file_base = strdup(namebuf); + errs |= check_access(namebuf, &new_baseline); + + if (opt_debug & DBG_FILES) { + fprintf(stderr, "FILE: %s rules file: %s\n", + new_rules ? "new" : "existing", file_rules); + + fprintf(stderr, "FILE: %s base file: %s\n", + new_baseline ? "new" : "existing", file_base); + } + + /* + * in order to lock out other filesync programs we need some + * file we can lock. We do an advisory lock on the baseline + * file. If no baseline file exists, we create an empty one. + */ + if (new_baseline) + lockfd = creat(file_base, 0666); + else + lockfd = open(file_base, O_RDWR); + + if (lockfd < 0) { + fprintf(stderr, new_baseline ? ERR_creat : ERR_open, + TXT_base, file_base); + errs |= ERR_FILES; + } else { + ret = lockf(lockfd, F_TLOCK, 0L); + if (ret < 0) { + fprintf(stderr, ERR_lock, TXT_base, file_base); + errs |= ERR_FILES; + } else if (opt_debug & DBG_FILES) + fprintf(stderr, "FILE: locking baseline file %s\n", + file_base); + } + + return (errs); +} + +/* + * routine: + * cleanup + * + * purpose: + * to clean up temporary files and locking prior to exit + * + * paremeters: + * error mask + * + * returns: + * void + * + * notes: + * if there are no errors, the baseline file is assumed to be good. + * Otherwise, if we created a temporary baseline file (just for + * locking) we will delete it. + */ +static void +cleanup(errmask_t errmask) +{ + /* unlock the baseline file */ + if (opt_debug & DBG_FILES) + fprintf(stderr, "FILE: unlock baseline file %s\n", file_base); + (void) lockf(lockfd, F_ULOCK, 0); + + /* see if we need to delete a temporary copy */ + if (errmask && new_baseline) { + if (opt_debug & DBG_FILES) + fprintf(stderr, "FILE: unlink temp baseline file %s\n", + file_base); + (void) unlink(file_base); + } +} + +/* + * routine: + * check_access + * + * purpose: + * to determine whether or not we can access an existing file + * or create a new one + * + * parameters: + * name of file (in a clobberable buffer) + * pointer to new file flag + * + * returns: + * error mask + * setting of the new file flag + * + * note: + * it is kind of a kluge that this routine clobbers the name, + * but it is only called from one place, it needs a modified + * copy of the name, and the one caller doesn't mind. + */ +static errmask_t +check_access(char *name, int *newflag) +{ char *s; + + /* start out by asking for what we want */ + if (access(name, R_OK|W_OK) == 0) { + *newflag = 0; + return (0); + } + + /* if the problem is isn't non-existance, lose */ + if (errno != ENOENT) { + *newflag = 0; + fprintf(stderr, gettext(ERR_rdwri), name); + return (ERR_FILES); + } + + /* + * the file doesn't exist, so there is still hope if we can + * write in the directory that should contain the file + */ + *newflag = 1; + + /* truncate the file name to its containing directory */ + for (s = name; s[1]; s++); + while (s > name && *s != '/') + s--; + if (s > name) + *s = 0; + else if (*s == '/') + s[1] = 0; + else + name = "."; + + /* then see if we have write access to the directory */ + if (access(name, W_OK) == 0) + return (0); + + fprintf(stderr, gettext(ERR_dirwac), name); + return (ERR_FILES); +} + +/* + * routine: + * whoami + * + * purpose: + * to figure out who I am and what the default modes/ownership + * is on files that I create. + */ +static void +whoami() +{ + my_uid = geteuid(); + my_gid = getegid(); + my_umask = umask(0); + + if (opt_debug & DBG_MISC) + fprintf(stderr, "MISC: my_uid=%ld, my_gid=%ld, my_umask=%03o\n", + my_uid, my_gid, my_umask); +} diff --git a/usr/src/cmd/filesync/messages.h b/usr/src/cmd/filesync/messages.h new file mode 100644 index 0000000000..0f13e30237 --- /dev/null +++ b/usr/src/cmd/filesync/messages.h @@ -0,0 +1,225 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * messages.h + * + * purpose: + * contins defines for all localizable messages + * + * notes: + * unless otherwise specified, all %s arguments can be assumed + * to be file names. Non-obvious arguments are explained in + * comments. + */ + +#ifndef _MESSAGES_H +#define _MESSAGES_H + +#pragma ident "%W% %E% SMI" + +#ifdef __cplusplus +extern "C" { +#endif + +#include <libintl.h> + +/* + * summary output messages + */ +#define SUM_hd "RECONCILE %s and %s (%d files)\n" +#define SUM_dst "\t-> %4d copies, %4d deletes, %4d ownership\n" +#define SUM_src "\t<- %4d copies, %4d deletes, %4d ownership\n" +#define SUM_unresolved "\tUNRESOLVED CONFLICTS: %d\n" + +/* + * verbose mode analysis commentary + */ +#define V_nomore "# file %s no longer exists\n" +#define V_deleted "# file %s deleted from %s\n" /* src/dst */ +#define V_created "# file %s created on %s\n" /* src/dst */ +#define V_delconf "# file %s has been deleted and changed\n" +#define V_trunconf "# file %s has been truncated and changed\n" +#define V_unchanged "# file %s has two identical versions\n" +#define V_different "# file %s has two different versions\n" +#define V_modes "# file %s has changed modes/ownership\n" +#define V_changed "# file %s has been modified\n" +#define V_renamed "# file %s has been renamed to %s\n" +#define V_prunes "# %d stale entries pruned from baseline\n" +#define V_nostat "# WARNING: unable to stat file %s\n" +#define V_change "# WARNING: file %s on %s, was <%ld,%ld>#%ld, now <%ld,%ld>#%ld\n" +#define V_suppressed "# file %s not reconciled due to halt-on-error\n" + +/* + * usage messages + */ +#define ERR_usage "Usage:" +#define USE_a "always check for Access Control Lists" +#define USE_e "everything must agree (modes, owner, group)" +#define USE_h "halt immediately after a file propagation error" +#define USE_m "modification times should be preserved" +#define USE_n "no touch (do not change any files)" +#define USE_q "quiet (do not list reconciliation commands)" +#define USE_v "verbose (commentary on each changed file)" +#define USE_y "yes (do not prompt for confirmations)" +#define USE_s "source directory for new rules" +#define USE_d "destination directory for new rules" +#define USE_r "restrict reconciliation to specified directories" +#define USE_f "force conflicts to resolve in favor of src/dst/old/new" +#define USE_o "one-way: only propagate changes from src/dst" + +/* + * These are the basic usage scenario line, and in most cases should not + * be translated. + */ +#define USE_simple "[-mnqv] -s dir -d dir file ..." +#define USE_all "[-aehmnqvy] [-r dir] [-f src/dst/old/new] [-o src/dst]" + +/* + * error messages + */ +#define ERR_open "ERROR: cannot open %s file %s\n" +#define ERR_creat "ERROR: unable to create %s file %s\n" +#define ERR_write "ERROR: write error in %s file %s\n" +#define ERR_fclose "ERROR: error in flushing and closing %s file %s\n" +#define ERR_chdir "ERROR: unable to chdir to %s\n" +#define ERR_rename "ERROR: unable to rename %s file %s to %s\n" +#define ERR_lock "ERROR: unable to lock %s file %s\n" + /* + * first %s argument is "rules" or "baseline" + */ +#define ERR_badinput "ERROR: invalid input at line %d, %s in %s\n" + /* + * first %s argument is a the name of the offending + * field (e.g. "mode" or "major dev"). The last + * %s argument is the name of the file being + * processed. + */ +#define ERR_badver "ERROR: bad version (%d.%d) found in %s file %s\n" + /* + * second %s is "rules" or "baseline" + * last %s is file name + */ + + +#define ERR_nocwd "ERROR: unable to get working directory for %s\n" +#define ERR_longname "ERROR: excessively long name %s\n" +#define ERR_undef "ERROR: undefined variable %s\n" +#define ERR_deep "ERROR: directory tree is too deep at directory %s\n" + +#define ERR_badopt "ERROR: unrecognized option -%c %s\n" + /* + * the %c argument is the offending flag + * (e.g. -f or -o) and the %s is the argument + * that followed it. + */ + +#define ERR_nofsync "ERROR: unable to find rule and baseline files\n" +#define ERR_badbase "ERROR: invalid BASE directory %s\n" +#define ERR_nosrc "ERROR: no source directory specified\n" +#define ERR_nodst "ERROR: no destination directory specified\n" +#define ERR_nonames "ERROR: no file/directory names specified and no rules file found\n" +#define ERR_tomany "ERROR: only %d -r arguments allowed\n" +#define ERR_rdwri "ERROR: cannot read/write file %s\n" +#define ERR_dirwac "ERROR: cannot create files in directory %s\n" +#define ERR_nomem "ERROR: unable to allocate memory for %s\n" + /* + * the %s argument is the name of a data structure + * that could not be allocated. It is only useful + * for telling the support person over the phone. + */ + +#define ERR_badrun "ERROR: bad exit code from %s\n" + /* + * argument is a command from the rules file + */ + +#define ERR_cannot "ERROR: %s %s\n" + /* + * The first %s argument will be a PROB_ string. + * The second %s argument is the file we were + * trying to do it to. + */ + +#define ERR_abort_h "ERROR: aborting because of propagation failure\n" + +#define WARN_ignore "WARNING: ignoring LIST rule for %s (illegal '.', '..', or '/')\n" +#define WARN_noacls "WARNING: ACLs are not supported for file %s\n" +#define WARN_deletes "WARNING: this operation might delete %d files\n" +#define WARN_rmdirs "WARNING: operation might delete %d non-empty directories\n" +#define WARN_ichange "WARNING: %d listed directories have changed Inode #s\n" +#define WARN_proceed "Press Enter to confirm, or interrupt to abort\n" +#define WARN_super "NOTE: there are ownership and protection conflicts that can only be\n resolved by the super user\n" + +/* + * descriptions of problems in unreconcilable files + */ +#define PROB_del_change "deleted and changed" +#define PROB_different "two different versions" +#define PROB_ownership "different owners" +#define PROB_protection "different protections" +#define PROB_prohibited "blocked by -o switch" +#define PROB_aborted "aborted by -h switch" + +#define PROB_chown "unable to chown" +#define PROB_chgrp "unable to chgrp" +#define PROB_chmod "unable to chmod" +#define PROB_chacl "unable to setfacl" +#define PROB_link "unable to link" +#define PROB_unlink "unable to unlink" +#define PROB_rmdir "unable to rmdir" +#define PROB_copy "unable to copy" +#define PROB_mknod "unable to mknod" +#define PROB_mkdir "unable to mkdir" +#define PROB_readlink "unable to read symlink" +#define PROB_symlink "unable to create symlink" +#define PROB_restat "unable to stat/restat" +#define PROB_deal "unable to deal with" +#define PROB_copyin "unable to open changed file" +#define PROB_copyout "unable to create new file" +#define PROB_botch "unable to safely setfacl" +#define PROB_rename "unable to rename" +#define PROB_rename2 "unable to rename/create" +#define PROB_read "read error" +#define PROB_write "write error" +#define PROB_space "insufficient space to copy" + + +/* + * text snippets + */ +#define TXT_src "source" /* for WARN_change */ +#define TXT_dst "destination" /* for WARN_change */ +#define TXT_srcdst "missing source/destination" /* for ERR_bad_input */ +#define TXT_noargs "missing arguments" /* for ERR_bad_input */ +#define TXT_badver "invalid version number" /* for ERR_bad_input */ +#define TXT_nobase "LIST without a BASE" /* for ERR_bad_input */ +#define TXT_rules "rules" /* for ERR_bad_ver */ +#define TXT_base "baseline" /* for ERR_bad_ver */ + +#ifdef __cplusplus +} +#endif + +#endif /* _MESSAGES_H */ diff --git a/usr/src/cmd/filesync/recon.c b/usr/src/cmd/filesync/recon.c new file mode 100644 index 0000000000..3ba61b0f5c --- /dev/null +++ b/usr/src/cmd/filesync/recon.c @@ -0,0 +1,833 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * recon.c + * + * purpose: + * process the reconciliation list, figure out exactly what the + * changes were, and what we should do about them. + * + * contents: + * reconcile ... (top level) process the reconciliation list + * samedata .... (static) do two files have the same contents + * samestuff ... (static) do two files have the same ownership/protection + * samecompare . (static) actually read and compare the contents + * samelink .... (static) do two symlinks have the same contents + * truncated ... (static) was one of the two copies truncted + * older ....... (static) which copy is older + * newer ....... (static) which copy is newer + * full_name ... generate a full path name for a file + * + * notes: + * If you only study one routine in this whole program, reconcile + * is that routine. Everything else is just book keeping. + * + * things were put onto the reconciliation list because analyze + * thought that they might have changed ... but up until now + * nobody has figured out what the changes really were, or even + * if there really were any changes. + * + * queue_file has ordered the reconciliation list with directory + * creations first (depth ordered) and deletions last (inversely + * depth ordered). all other changes have been ordered by mod time. + */ +#ident "%W% %E% SMI" + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> + +#include "filesync.h" +#include "database.h" +#include "messages.h" +#include "debug.h" + +/* + * local routines to figure out how the files really differ + */ +static bool_t samedata(struct file *); +static bool_t samestuff(struct file *); +static bool_t samecompare(struct file *); +static bool_t truncated(struct file *); +static bool_t samelink(); +static side_t newer(struct file *); +static side_t older(struct file *); + +/* + * globals + */ +char *srcname; /* file we are emulating */ +char *dstname; /* file we are updating */ + +/* + * routine: + * reconcile + * + * purpose: + * to perform the reconciliation action associated with a file + * + * parameters: + * file pointer + * + * returns: + * built up error mask + * updated statistics + * + * notes: + * The switch statement handles the obvious stuff. + * The TRUE side of the samedata test handles minor differences. + * The interesting stuff is in the FALSE side of the samedata test. + * + * The desparation heuristics (in the diffmask&CONTENTS test) are + * not rigorously correct ... but they always try do the right thing + * with data, and only lose mode/ownership changes in relatively + * pathological cases. But I claim that the benefits outweigh the + * risks, and most users will be pleased with the resulting decisions. + * + * Another trick is in the deletion cases of the switch. We + * normally won't allow an unlink that conflicts with data + * changes. If there are multiple links to the file, however, + * we can make the changes and do the deletion. + * + * The action routines do_{remove,rename,like,copy} handle all + * of their own statistics and status updating. This routine + * only has to handle its own reconciliation failures (when we + * can't decide what to do). + */ +errmask_t +reconcile(struct file *fp) +{ errmask_t errs = 0; + diffmask_t diffmask; + + if (opt_debug & DBG_RECON) + fprintf(stderr, "RECO: %s flgs=%s, mtime=%08lx.%08lx\n", + fp->f_fullname, + showflags(fileflags, fp->f_flags), + fp->f_modtime, fp->f_modns); + + /* + * form the fully qualified names for both files + */ + srcname = full_name(fp, OPT_SRC, OPT_SRC); + dstname = full_name(fp, OPT_DST, OPT_DST); + + /* + * because they are so expensive to read and so troublesome + * to set, we try to put off reading ACLs as long as possible. + * If we haven't read them yet, we must read them now (so that + * samestuff can compare them). + */ + if (opt_acls == 0 && fp->f_info[ OPT_BASE ].f_numacls == 0) { + if (get_acls(srcname, &fp->f_info[ OPT_SRC ])) + fp->f_srcdiffs |= D_FACLS; + if (get_acls(dstname, &fp->f_info[ OPT_DST ])) + fp->f_dstdiffs |= D_FACLS; + } + + /* + * If a rename has been detected, we don't have to figure + * it out, since both the rename-to and rename-from files + * have already been designated. When we encounter a rename-to + * we should carry it out. When we encounter a rename-from + * we can ignore it, since it should be dealt with as a side + * effect of processing the rename-to. + */ + if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_RENAME_FROM) + return (0); + + if ((fp->f_srcdiffs|fp->f_dstdiffs) & D_RENAME_TO) { + + if (opt_verbose) + fprintf(stdout, gettext(V_renamed), + fp->f_previous->f_fullname, fp->f_name); + + if (fp->f_srcdiffs & D_RENAME_TO) { + errs = do_rename(fp, OPT_DST); + fp->f_srcdiffs &= D_MTIME | D_SIZE; + } else if (fp->f_dstdiffs & D_RENAME_TO) { + errs = do_rename(fp, OPT_SRC); + fp->f_dstdiffs &= D_MTIME | D_SIZE; + } + + if (errs != ERR_RESOLVABLE) + goto done; + + /* + * if any differences remain, then we may be dealing + * with contents changes in addition to a rename + */ + if ((fp->f_srcdiffs | fp->f_dstdiffs) == 0) + goto done; + + /* + * fall through to reconcile the data changes + */ + } + + /* + * pull of the easy cases (non-conflict creations & deletions) + */ + switch (fp->f_flags & (F_WHEREFOUND)) { + case F_IN_BASELINE: /* only exists in baseline */ + case 0: /* only exists in rules */ + if (opt_verbose) + fprintf(stdout, gettext(V_nomore), + fp->f_fullname); + fp->f_flags |= F_REMOVE; /* fix baseline */ + return (0); + + case F_IN_BASELINE|F_IN_SOURCE: /* deleted from dest */ + /* + * the basic principle here is that we are willing + * to do the deletion if: + * no changes were made on the other side + * OR + * we have been told to force in this direction + * + * we do, however, make an exception for files that + * will still have other links. In this case, the + * (changed) data will still be accessable through + * another link and so we are willing to do the unlink + * inspite of conflicting changes (which may well + * have been introduced through another link. + * + * The jury is still out on this one + */ + if (((fp->f_srcdiffs&D_IMPORTANT) == 0) || + (opt_force == OPT_DST) || + has_other_links(fp, OPT_SRC)) { + if (opt_verbose) + fprintf(stdout, gettext(V_deleted), + fp->f_fullname, "dst"); + errs = do_remove(fp, OPT_SRC); + goto done; + } + + /* a deletion combined with changes */ + if (opt_verbose) + fprintf(stdout, gettext(V_delconf), + fp->f_fullname); + + /* if we are to resolve in favor of source */ + if (opt_force == OPT_SRC) { + errs = do_copy(fp, OPT_DST); + goto done; + } + + fp->f_problem = gettext(PROB_del_change); + goto cant; + + case F_IN_BASELINE|F_IN_DEST: /* deleted from src */ + /* just like previous case, w/sides reversed */ + if (((fp->f_dstdiffs&D_IMPORTANT) == 0) || + (opt_force == OPT_SRC) || + has_other_links(fp, OPT_DST)) { + if (opt_verbose) + fprintf(stdout, gettext(V_deleted), + fp->f_fullname, "src"); + errs = do_remove(fp, OPT_DST); + goto done; + } + + /* a deletion combined with changes */ + if (opt_verbose) + fprintf(stdout, gettext(V_delconf), + fp->f_fullname); + + /* if we are to resolve in favor of destination */ + if (opt_force == OPT_DST) { + errs = do_copy(fp, OPT_SRC); + goto done; + } + + fp->f_problem = gettext(PROB_del_change); + goto cant; + + /* + * if something new shows up, and for some reason we cannot + * propagate it to the other side, we should suppress the + * file from the baseline, so it will show up as a new + * creation next time too. + */ + case F_IN_SOURCE: /* created in src */ + if (opt_verbose) + fprintf(stdout, gettext(V_created), + fp->f_fullname, "src"); + errs = do_copy(fp, OPT_DST); + goto done; + + case F_IN_DEST: /* created in dest */ + if (opt_verbose) + fprintf(stdout, gettext(V_created), + fp->f_fullname, "dst"); + errs = do_copy(fp, OPT_SRC); + goto done; + + case F_IN_SOURCE|F_IN_DEST: /* not in baseline */ + /* + * since we don't have a baseline, we cannot + * know which of the two copies should prevail + */ + break; + + case F_IN_BASELINE|F_IN_SOURCE|F_IN_DEST: + /* + * we have a baseline where the two copies agreed, + * so maybe we can determine that only one of the + * two copies have changed ... but before we decide + * who should be the winner we should determine + * that the two copies are actually different. + */ + break; + } + + /* + * if we have fallen out of the case statement, it is because + * we have discovered a non-obvious situation where potentially + * changed versions of the file exist on both sides. + * + * if the two copies turn out to be identical, this is simple + */ + if (samedata(fp)) { + if (samestuff(fp)) { + /* files are identical, just update baseline */ + if (opt_verbose) + fprintf(stdout, gettext(V_unchanged), + fp->f_fullname); + update_info(fp, OPT_SRC); + goto done; + } else { + /* + * contents agree but ownership/protection does + * not agree, so we have to bring these into + * agreement. We can pick a winner if one + * side hasn't changed, or if the user has + * specified a force flag. + */ + if (opt_verbose) + fprintf(stdout, gettext(V_modes), + fp->f_fullname); + + if (((fp->f_srcdiffs & D_ADMIN) == 0) || + (opt_force == OPT_DST)) { + errs = do_like(fp, OPT_SRC, TRUE); + goto done; + } + + if (((fp->f_dstdiffs & D_ADMIN) == 0) || + (opt_force == OPT_SRC)) { + errs = do_like(fp, OPT_DST, TRUE); + goto done; + } + } + /* falls down to cant */ + } else { + /* + * The two files have different contents, so we have + * a potential conflict here. If we know that only one + * side has changed, we go with that side. + */ + if (fp->f_dstdiffs == 0 || fp->f_srcdiffs == 0) { + if (opt_verbose) + fprintf(stdout, gettext(V_changed), + fp->f_fullname); + errs = do_copy(fp, fp->f_srcdiffs ? OPT_DST : OPT_SRC); + goto done; + } + + /* + * Both sides have changed, so we have a real conflict. + */ + if (opt_verbose) + fprintf(stdout, + gettext(truncated(fp) ? + V_trunconf : V_different), + fp->f_fullname); + + /* + * See if the user has given us explicit instructions + * on how to resolve conflicts. We may have been told + * to favor the older, the newer, the source, or the + * destination ... but the default is to leave the + * conflict unresolved. + */ + if (opt_force == OPT_OLD) { + errs = do_copy(fp, newer(fp)); + goto done; + } + + if (opt_force == OPT_NEW) { + errs = do_copy(fp, older(fp)); + goto done; + } + + if (opt_force != 0) { + errs = do_copy(fp, (opt_force == OPT_SRC) ? + OPT_DST : OPT_SRC); + goto done; + } + + + /* + * This is our last chance before giving up. + * + * We know that the files have different contents and + * that there were changes on both sides. The only way + * we can safely handle this is if there were pure contents + * changes on one side and pure ownership changes on the + * other side. In this case we can propagate the ownership + * one way and the contents the other way. + * + * We decide whether or not this is possible by ANDing + * together the changes on the two sides, and seeing + * if the changes were all orthogonal (none of the same + * things changed on both sides). + */ + diffmask = fp->f_srcdiffs & fp->f_dstdiffs; + if ((diffmask & D_CONTENTS) == 0) { + /* + * if ownership changes were only made on one side + * (presumably the side that didn't have data changes) + * we can handle them separately. In this case, + * ownership changes must be fixed first, because + * the subsequent do_copy will overwrite them. + */ + if ((diffmask & D_ADMIN) == 0) + errs |= do_like(fp, (fp->f_srcdiffs&D_ADMIN) ? + OPT_DST : OPT_SRC, + TRUE); + + /* + * Now we can deal with the propagation of the data + * changes. Note that any ownership/protection + * changes (from the other side) that have not been + * propagated yet are about to be lost. The cases + * in which this might happen are all pathological + * and the consequences of losing the protection + * changes are (IMHO) minor when compared to the + * obviously correct data propagation. + */ + errs |= do_copy(fp, (fp->f_srcdiffs&D_CONTENTS) ? + OPT_DST : OPT_SRC); + goto done; + } + + /* + * there are conflicting changes, nobody has told us how to + * resolve conflicts, and we cannot figure out how to merge + * the differences. + */ + fp->f_problem = gettext(PROB_different); + } + +cant: + /* + * I'm not smart enough to resolve this conflict automatically, + * so I have no choice but to bounce it back to the user. + */ + fp->f_flags |= F_CONFLICT; + fp->f_base->b_unresolved++; + errs |= ERR_UNRESOLVED; + +done: + /* + * if we have a conflict and the file is not in the baseline, + * then there was never any point at which the two copies were + * in agreement, and we want to preserve the conflict for future + * resolution. + */ + if ((errs&ERR_UNRESOLVED) && (fp->f_flags & F_IN_BASELINE) == 0) + if (fp->f_files == 0) + /* + * in most cases, this is most easily done by just + * excluding the file in question from the baseline + */ + fp->f_flags |= F_REMOVE; + else + /* + * but ... if the file in question is a directory + * with children, excluding it from the baseline + * would keep all of its children (even those with + * no conflicts) out of the baseline as well. In + * This case, it is better to tell a lie and to + * manufacture a point of imaginary agreement + * in the baseline ... but one that is absurd enough + * that we will still see conflicts each time we run. + * + * recording a type of directory, and everything + * else as zero should be absurd enough. + */ + fp->f_info[ OPT_BASE ].f_type = S_IFDIR; + + if (opt_debug & DBG_MISC) + fprintf(stderr, "MISC: %s ERRS=%s\n", fp->f_fullname, + showflags(errmap, errs)); + + return (errs); +} + +/* + * routine: + * newer + * + * purpose: + * determine which of two files is newer + * + * parameters: + * struct file + * + * returns: + * side_t (src/dest) + */ +static side_t +newer(struct file *fp) +{ + struct fileinfo *sp, *dp; + + sp = &fp->f_info[OPT_SRC]; + dp = &fp->f_info[OPT_DST]; + + if (sp->f_modtime > dp->f_modtime) + return (OPT_SRC); + + if (sp->f_modtime < dp->f_modtime) + return (OPT_DST); + + if (sp->f_modns >= dp->f_modns) + return (OPT_SRC); + + return (OPT_DST); +} + +/* + * routine: + * older + * + * purpose: + * determine which of two files is older + * + * parameters: + * struct file + * + * returns: + * side_t (src/dest) + */ +static side_t +older(struct file *fp) +{ + struct fileinfo *sp, *dp; + + sp = &fp->f_info[OPT_SRC]; + dp = &fp->f_info[OPT_DST]; + + if (sp->f_modtime < dp->f_modtime) + return (OPT_SRC); + + if (sp->f_modtime > dp->f_modtime) + return (OPT_DST); + + if (sp->f_modns <= dp->f_modns) + return (OPT_SRC); + + return (OPT_DST); +} + +/* + * routine: + * samedata + * + * purpose: + * determine whether or not two files contain the same data + * + * parameters: + * struct file + * + * returns: + * bool_t (true/false) + */ +static bool_t +samedata(struct file *fp) +{ + struct fileinfo *sp, *dp; + + sp = &fp->f_info[OPT_SRC]; + dp = &fp->f_info[OPT_DST]; + + /* cheap test: types are different */ + if (sp->f_type != dp->f_type) + return (FALSE); + + /* cheap test: directories have same contents */ + if (sp->f_type == S_IFDIR) + return (TRUE); + + /* special files are compared via their maj/min */ + if ((sp->f_type == S_IFBLK) || (sp->f_type == S_IFCHR)) { + if (sp->f_rd_maj != dp->f_rd_maj) + return (FALSE); + if (sp->f_rd_min != dp->f_rd_min) + return (FALSE); + return (TRUE); + } + + /* symlinks are the same if their contents are the same */ + if (sp->f_type == S_IFLNK) + return (samelink()); + + /* cheap test: sizes are different */ + if (fp->f_info[OPT_SRC].f_size != fp->f_info[OPT_DST].f_size) + return (FALSE); + + /* expensive test: byte for byte comparison */ + if (samecompare(fp) == 0) + return (FALSE); + + return (TRUE); +} + +/* + * routine: + * samestuff + * + * purpose: + * determine whether or not two files have same owner/protection + * + * parameters: + * struct file + * + * returns: + * bool_t (true/false) + */ +static bool_t +samestuff(struct file *fp) +{ int same_mode, same_uid, same_gid, same_acl; + struct fileinfo *sp, *dp; + + sp = &fp->f_info[OPT_SRC]; + dp = &fp->f_info[OPT_DST]; + + same_mode = (sp->f_mode == dp->f_mode); + same_uid = (sp->f_uid == dp->f_uid); + same_gid = (sp->f_gid == dp->f_gid); + same_acl = cmp_acls(sp, dp); + + /* if the are all the same, it is easy to tell the truth */ + if (same_uid && same_gid && same_mode && same_acl) + return (TRUE); + + /* note the nature of the conflict */ + if (!same_uid || !same_gid || !same_acl) + fp->f_problem = gettext(PROB_ownership); + else + fp->f_problem = gettext(PROB_protection); + + return (FALSE); +} + +/* + * routine: + * samecompare + * + * purpose: + * do a byte-for-byte comparison of two files + * + * parameters: + * struct file + * + * returns: + * bool_t (true/false) + */ +static bool_t +samecompare(struct file *fp) +{ int sfd, dfd; + int i, count; + char srcbuf[ COPY_BSIZE ], dstbuf[ COPY_BSIZE ]; + bool_t same = TRUE; + + + sfd = open(srcname, 0); + if (sfd < 0) + return (FALSE); + + dfd = open(dstname, 0); + if (dfd < 0) { + close(sfd); + return (FALSE); + } + + for ( + count = read(sfd, srcbuf, COPY_BSIZE); + count > 0; + count = read(sfd, srcbuf, COPY_BSIZE)) { + + /* do a matching read */ + if (read(dfd, dstbuf, COPY_BSIZE) != count) { + same = FALSE; + goto done; + } + + /* do the comparison for this block */ + for (i = 0; i < count; i++) { + if (srcbuf[i] != dstbuf[i]) { + same = FALSE; + goto done; + } + } + } + +done: + if (opt_debug & DBG_ANAL) + fprintf(stderr, "ANAL: SAME=%d %s\n", same, fp->f_fullname); + + close(sfd); + close(dfd); + return (same); +} + +/* + * routine: + * truncated + * + * purpose: + * to determine whether or not a file has been truncated + * + * parameters: + * pointer to file structure + * + * returns: + * true/false + */ +static bool_t +truncated(struct file *fp) +{ + /* either source or destination must now be zero length */ + if (fp->f_info[OPT_SRC].f_size && fp->f_info[OPT_DST].f_size) + return (FALSE); + + /* file must have originally had a non-zero length */ + if (fp->f_info[OPT_BASE].f_size == 0) + return (FALSE); + + /* file type must "normal" all around */ + if (fp->f_info[OPT_BASE].f_type != S_IFREG) + return (FALSE); + if (fp->f_info[OPT_SRC].f_type != S_IFREG) + return (FALSE); + if (fp->f_info[OPT_DST].f_type != S_IFREG) + return (FALSE); + + + return (TRUE); +} + +/* + * routine: + * samelink + * + * purpose: + * to determine whether or not two symbolic links agree + * + * parameters: + * pointer to file structure + * + * returns: + * true/false + */ +static bool_t +samelink() +{ int i, srclen, dstlen; + char srcbuf[ MAX_PATH ], dstbuf[ MAX_PATH ]; + + + /* read both copies of the link */ + srclen = readlink(srcname, srcbuf, sizeof (srcbuf)); + dstlen = readlink(dstname, dstbuf, sizeof (dstbuf)); + + /* if they aren't the same length, they disagree */ + if (srclen < 0 || dstlen < 0 || srclen != dstlen) + return (FALSE); + + /* look for differences in contents */ + for (i = 0; i < srclen; i++) + if (srcbuf[i] != dstbuf[i]) + return (FALSE); + + return (TRUE); +} + +/* + * routine: + * full_name + * + * purpose: + * to figure out the fully qualified path name to a file on the + * reconciliation list. + * + * parameters: + * pointer to the file structure + * side indication for which base to use + * side indication for which buffer to use + * + * returns: + * pointer to a clobberable buffer + * + * notes: + * the zero'th buffer is used for renames and links, where + * we need the name of another file on the same side. + */ +char * +full_name(struct file *fp, side_t srcdst, side_t whichbuf) +{ static char *buffers[3]; + static int buflen = 0; + char *p, *b; + int l; + + /* see if the existing buffer is long enough */ + b = (srcdst == OPT_SRC) ? fp->f_base->b_src_name + : fp->f_base->b_dst_name; + + /* see if the allocated buffer is long enough */ + l = strlen(b) + strlen(fp->f_fullname) + 2; + if (l > buflen) { + /* figure out the next "nice" size to use */ + for (buflen = MAX_PATH; buflen < l; buflen += MAX_NAME); + + /* reallocate all buffers to this size */ + for (l = 0; l < 3; l++) { + buffers[l] = (char *) realloc(buffers[l], buflen); + if (buffers[l] == 0) + nomem("full name"); + } + } + + /* assemble the name in the buffer and reurn it */ + p = buffers[whichbuf]; + strcpy(p, b); + strcat(p, "/"); + strcat(p, fp->f_fullname); + return (p); +} diff --git a/usr/src/cmd/filesync/rename.c b/usr/src/cmd/filesync/rename.c new file mode 100644 index 0000000000..ed2860706f --- /dev/null +++ b/usr/src/cmd/filesync/rename.c @@ -0,0 +1,261 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * rename.c + * + * purpose: + * routines to determine whether or not any renames have taken place + * and note them (for reconciliation) if we find any + * + * contents: + * find_renames . look for files that have been renamed + * find_oldname . (static) find the file we were renamed from + * note_rename .. (static) note the rename for subsequent reconciliation + * + * notes: + * the reason renames warrant special attention is because the tree + * we have constructed is name based, and a directory rename can + * appear as zillions of changes. We attempt to find and deal with + * renames prior to doing the difference analysis. + * + * The only case we deal with here is simple renames. If new links + * have been created beneath other directories (i.e. a file has been + * moved from one directory to another), the generalized link finding + * stuff will deal with it. + * + * This is still under construction, and to completely deal with + * directory renames may require some non-trivial tree restructuring. + * There is a whole design note on this subject. In the mean time, + * we still detect file renames, so that the user will see them + * reported as "mv"s rather than as "ln"s and "rm"s. Until directory + * renames are fully implemented, they will instead be handled as + * mkdirs, massive links and unlinks, and rmdirs. + */ +#ident "%W% %E% SMI" + +#include <stdio.h> + +#include "filesync.h" +#include "database.h" + + +/* local routines */ +static struct file *find_oldname(struct file *, struct file *, side_t); +static errmask_t + note_rename(struct file *, struct file *, struct file *, side_t); + +/* + * routine: + * find_renames + * + * purpose: + * recursively perform rename analysis on a directory + * + * parameters: + * file node for the suspected directory + * + * returns: + * error mask + * + * note: + * the basic algorithm here is to search every directory + * for files that have been newly created on one side, + * and then look to see if they correspond to an identical + * file that has been newly deleted on the same side. + */ +errmask_t +find_renames(struct file *fp) +{ struct file *np, *rp; + errmask_t errs = 0; + int stype, dtype, btype, side; + + /* if this isn't a directory, there is nothing to analyze */ + if (fp->f_files == 0) + return (0); + + /* look for any files under this directory that may have been renamed */ + for (np = fp->f_files; np; np = np->f_next) { + btype = np->f_info[OPT_BASE].f_type; + stype = np->f_info[OPT_SRC].f_type; + dtype = np->f_info[OPT_DST].f_type; + + /* a rename must be a file that is new on only one side */ + if (btype == 0 && stype != dtype && (!stype || !dtype)) { + side = stype ? OPT_SRC : OPT_DST; + rp = find_oldname(fp, np, side); + if (rp) + errs |= note_rename(fp, np, rp, side); + } + } + + /* recursively examine all my children */ + for (np = fp->f_files; np; np = np->f_next) { + errs |= find_renames(np); + } + + return (errs); +} + +/* + * routine: + * find_oldname + * + * purpose: + * to search for an old name for a newly discovered file + * + * parameters: + * file node for the containing directory + * file node for the new file + * which side the rename is believed to have happened on + * + * returns: + * pointer to likely previous file + * 0 no candidate found + * + * note: + * this routine only deals with simple renames within a single + * directory. + */ +static struct file *find_oldname(struct file *dirp, struct file *new, + side_t side) +{ struct file *fp; + long maj, min; + ino_t inum; + off_t size; + side_t otherside = (side == OPT_SRC) ? OPT_DST : OPT_SRC; + + /* figure out what we're looking for */ + inum = new->f_info[side].f_ino; + maj = new->f_info[side].f_d_maj; + min = new->f_info[side].f_d_min; + size = new->f_info[side].f_size; + + /* + * search the same directory for any entry that might describe + * the previous name of the new file. + */ + for (fp = dirp->f_files; fp; fp = fp->f_next) { + /* previous name on changed side must no longer exist */ + if (fp->f_info[side].f_type != 0) + continue; + + /* previous name on the other side must still exist */ + if (fp->f_info[otherside].f_type == 0) + continue; + + /* it must describe the same inode as the new file */ + if (fp->f_info[OPT_BASE].f_type != new->f_info[side].f_type) + continue; /* must be same type */ + if (((side == OPT_SRC) ? fp->f_s_inum : fp->f_d_inum) != inum) + continue; /* must be same inode # */ + if (((side == OPT_SRC) ? fp->f_s_maj : fp->f_d_maj) != maj) + continue; /* must be same major # */ + if (((side == OPT_SRC) ? fp->f_s_min : fp->f_d_min) != min) + continue; /* must be same minor # */ + + /* + * occasionally a prompt delete and create can reuse the + * same i-node in the same directory. What we really + * want is generation, but that isn't available just + * yet, so our poor-man's approximation is the size. + * There is little point in checking ownership and + * modes, since the fact that it is in the same + * directory strongly suggests that it is the same + * user who is doing the deleting and creating. + */ + if (fp->f_info[OPT_BASE].f_size != size) + continue; + + /* looks like we found a match */ + return (fp); + } + + /* no joy */ + return (0); +} + +/* + * routine: + * note_rename + * + * purpose: + * to record a discovered rename, so that the reconciliation + * phase will deal with it as a rename rather than as link + * followed by an unlink. + * + * parameters: + * file node for the containing directory + * file node for the new file + * file node for the old file + * which side the rename is believed to have happened on + * + * returns: + * error mask + */ +static errmask_t +note_rename(struct file *dirp, struct file *new, + struct file *old, side_t side) +{ + int dir; + errmask_t errs = 0; + static char *sidenames[] = {"base", "source", "dest"}; + + dir = new->f_info[side].f_type == S_IFDIR; + + if (opt_debug & DBG_ANAL) + fprintf(stderr, "ANAL: NOTE RENAME %s %s/%s -> %s/%s on %s\n", + dir ? "directory" : "file", + dirp->f_name, old->f_name, dirp->f_name, new->f_name, + sidenames[side]); + + /* FIX: we don't deal with directory renames yet */ + if (dir) + return (0); + + /* note that a rename has taken place */ + if (side == OPT_SRC) { + new->f_srcdiffs |= D_RENAME_TO; + old->f_srcdiffs |= D_RENAME_FROM; + } else { + new->f_dstdiffs |= D_RENAME_TO; + old->f_dstdiffs |= D_RENAME_FROM; + } + + /* put a link to the old name in the new name */ + new->f_previous = old; + + /* for most files, there is nothing else we have to do */ + if (!dir) + return (errs); + + /* + * FIX ... someday we are going to have to merge the old and + * new children into a single tree, but there are + * horrendous backout problems if we are unable to + * do the mvdir, so I have postponed this feature. + */ + + return (errs); +} diff --git a/usr/src/cmd/filesync/rules.c b/usr/src/cmd/filesync/rules.c new file mode 100644 index 0000000000..1d4b87dd16 --- /dev/null +++ b/usr/src/cmd/filesync/rules.c @@ -0,0 +1,638 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 1995 Sun Microsystems, Inc. All Rights Reserved + * + * module: + * rules.c + * + * purpose: + * to read and write the rules file and manage rules lists + * + * contents: + * reading rules file + * read_rules + * (static) read_command + * writing rules file + * write_rules + * (static) rw_header, rw_base + * adding rules + * add_ignore, add_include + * (static) add_rule + * adding/checking restrictions + * add_restr, check_restr + */ +#ident "%W% %E% SMI" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <ctype.h> + +#include "filesync.h" +#include "database.h" +#include "messages.h" +#include "debug.h" + +/* + * routines: + */ +static errmask_t rw_base(FILE *file, struct base *bp); +static errmask_t rw_header(FILE *file); +static errmask_t add_rule(struct base *, int, const char *); +static char *read_cmd(char *); + +/* + * globals + */ +static int rules_added; +static int restr_added; + +/* + * locals + */ +#define RULE_MAJOR 1 /* rules file format major rev */ +#define RULE_MINOR 1 /* rules file format minor rev */ +#define RULE_TAG "PACKINGRULES" /* magic string for rules files */ + +/* + * routine: + * read_rules + * + * purpose: + * to read in the rules file + * + * parameters: + * name of rules file + * + * returns: + * error mask + * + * notes: + * later when I implement a proper (comment preserving) update + * function I'm going to wish I had figured out how to build the + * input functions for this function in a way that would make + * the more usable for that too. + */ +errmask_t +read_rules(char *name) +{ FILE *file; + errmask_t errs = 0; + int flags; + int major, minor; + char *s, *s1, *s2; + struct base *bp; + char *errstr = "???"; + + file = fopen(name, "r"); + if (file == NULL) { + fprintf(stderr, gettext(ERR_open), gettext(TXT_rules), + name); + return (ERR_FILES); + } + + lex_linenum = 0; + + if (opt_debug & DBG_FILES) + fprintf(stderr, "FILE: READ RULES %s\n", name); + + bp = &omnibase; /* default base before any others */ + + while (!feof(file)) { + /* find the first token on the line */ + s = lex(file); + + /* skip blank lines and comments */ + if (s == 0 || *s == 0 || *s == '#' || *s == '*') + continue; + + /* see if the first token is a known keyword */ + if (strcmp(s, "BASE") == 0) { + + /* get the source & destination tokens */ + errstr = gettext(TXT_srcdst); + s1 = lex(0); + if (s1 == 0) + goto bad; + s1 = strdup(s1); + + s2 = lex(0); + if (s2 == 0) + goto bad; + s2 = strdup(s2); + + /* creat the new base pair */ + bp = add_base(s1, s2); + bp->b_flags |= F_LISTED; + + free(s1); + free(s2); + continue; + } + + if (strcmp(s, "LIST") == 0) { + + /* make sure we are associated with a real base */ + if (bp == &omnibase) { + errstr = gettext(TXT_nobase); + goto bad; + } + + /* skip to the next token */ + s = lex(0); + errstr = gettext(TXT_noargs); + if (s == 0) + goto bad; + + /* see if it is a program or a name */ + if (*s == '!') { + errs |= add_rule(bp, R_PROGRAM, + read_cmd(&s[1])); + } else { + do { + flags = wildcards(s) ? R_WILD : 0; + errs |= add_rule(bp, flags, s); + s = lex(0); + } while (s != 0); + } + continue; + } + + if (strcmp(s, "IGNORE") == 0) { + + /* skip to the next token */ + s = lex(0); + errstr = gettext(TXT_noargs); + if (s == 0) + goto bad; + + flags = R_IGNORE; + + /* see if it is a program or a name */ + if (*s == '!') { + errs |= add_rule(bp, R_PROGRAM|flags, + read_cmd(&s[1])); + } else { + do { + if (wildcards(s)) + flags |= R_WILD; + errs |= add_rule(bp, flags, s); + s = lex(0); + } while (s != 0); + } + continue; + } + + if (strcmp(s, "VERSION") == 0 || strcmp(s, RULE_TAG) == 0) { + s = lex(0); + errstr = gettext(TXT_noargs); + if (s == 0) + goto bad; + + major = strtol(s, &s1, 10); + errstr = gettext(TXT_badver); + if (*s1 != '.') + goto bad; + minor = strtol(&s1[1], 0, 10); + + if (major != RULE_MAJOR || minor > RULE_MINOR) { + fprintf(stderr, gettext(ERR_badver), + major, minor, gettext(TXT_rules), name); + errs |= ERR_FILES; + } + continue; + } + + bad: /* log the error and continue processing to find others */ + fprintf(stderr, gettext(ERR_badinput), + lex_linenum, errstr, name); + errs |= ERR_FILES; + } + + + (void) fclose(file); + return (errs); +} + +/* + * routine: + * read_cmd + * + * purpose: + * to lex a runnable command (! lines) into a buffer + * + * parameters: + * first token + * + * returns: + * pointer to a command line in a static buffer + * (it is assumed the caller will copy it promptly) + * + * notes: + * this is necessary because lex has already choped off + * the first token for us + */ +static char *read_cmd(char * s) +{ + static char cmdbuf[ MAX_LINE ]; + + cmdbuf[0] = 0; + + do { + if (*s) { + strcat(cmdbuf, s); + strcat(cmdbuf, " "); + } + } while ((s = lex(0)) != 0); + + return (cmdbuf); +} + +/* + * routine: + * write_rules + * + * purpose: + * to rewrite the rules file, appending the new rules + * + * parameters: + * name of output file + * + * returns: + * error mask + * + */ +errmask_t +write_rules(char *name) +{ FILE *newfile; + errmask_t errs = 0; + struct base *bp; + char tmpname[ MAX_PATH ]; + + /* if no-touch is specified, we don't update files */ + if (opt_notouch || rules_added == 0) + return (0); + + /* create a temporary output file */ + sprintf(tmpname, "%s-TMP", name); + + /* create our output file */ + newfile = fopen(tmpname, "w+"); + if (newfile == NULL) { + fprintf(stderr, gettext(ERR_creat), gettext(TXT_rules), + name); + return (ERR_FILES); + } + + if (opt_debug & DBG_FILES) + fprintf(stderr, "FILE: UPDATE RULES %s\n", name); + + errs |= rw_header(newfile); + errs |= rw_base(newfile, &omnibase); + for (bp = bases; bp; bp = bp->b_next) + errs |= rw_base(newfile, bp); + + if (ferror(newfile)) { + fprintf(stderr, gettext(ERR_write), gettext(TXT_rules), + tmpname); + errs |= ERR_FILES; + } + + if (fclose(newfile)) { + fprintf(stderr, gettext(ERR_fclose), gettext(TXT_rules), + tmpname); + errs |= ERR_FILES; + } + + /* now switch the new file for the old one */ + if (errs == 0) + if (rename(tmpname, name) != 0) { + fprintf(stderr, gettext(ERR_rename), + gettext(TXT_rules), tmpname, name); + errs |= ERR_FILES; + } + + return (errs); +} + +/* + * routine: + * rw_header + * + * purpose: + * to write out a rules header + * + * parameters: + * FILE* for the output file + * + * returns: + * error mask + * + * notes: + */ +static errmask_t rw_header(FILE *file) +{ + time_t now; + struct tm *local; + + /* figure out what time it is */ + (void) time(&now); + local = localtime(&now); + + fprintf(file, "%s %d.%d\n", RULE_TAG, RULE_MAJOR, RULE_MINOR); + fprintf(file, "#\n"); + fprintf(file, "# filesync rules, last written by %s, %s", + cuserid((char *) 0), asctime(local)); + fprintf(file, "#\n"); + + return (0); +} + +/* + * routine: + * rw_base + * + * purpose: + * to write out the summary for one base-pair + * + * parameters: + * FILE * for the output file + * + * returns: + * error mask + * + * notes: + */ +static errmask_t rw_base(FILE *file, struct base *bp) +{ struct rule *rp; + + fprintf(file, "\n"); + + /* global rules don't appear within a base */ + if (bp->b_ident) + fprintf(file, "BASE %s %s\n", noblanks(bp->b_src_spec), + noblanks(bp->b_dst_spec)); + + for (rp = bp->b_includes; rp; rp = rp->r_next) + if (rp->r_flags & R_PROGRAM) + fprintf(file, "LIST !%s\n", rp->r_file); + else + fprintf(file, "LIST %s\n", noblanks(rp->r_file)); + + for (rp = bp->b_excludes; rp; rp = rp->r_next) + if (rp->r_flags & R_PROGRAM) + fprintf(file, "IGNORE !%s\n", rp->r_file); + else + fprintf(file, "IGNORE %s\n", noblanks(rp->r_file)); + + return (0); +} + +/* + * routine: + * add_rule + * + * purpose: + * to add a new rule + * + * parameters: + * pointer to list base + * rule flags + * associated name/arguments + * + * returns: + * error flags + * + * notes: + * we always copy the argument string because most of them + * were read from a file and are just in a transient buffer + */ +static errmask_t add_rule(struct base *bp, int flags, const char *args) +{ struct rule *rp; + struct rule **list; + + rp = malloc(sizeof (struct rule)); + if (rp == 0) + nomem("rule struture"); + + /* initialize the new base */ + memset((void *) rp, 0, sizeof (struct rule)); + rp->r_flags = flags; + rp->r_file = strdup(args); + + /* figure out which list to put it on */ + if (flags&R_IGNORE) + list = &bp->b_excludes; + else if (flags&R_RESTRICT) + list = &bp->b_restrictions; + else + list = &bp->b_includes; + + while (*list) + list = &((*list)->r_next); + *list = rp; + + if (flags & R_NEW) + rules_added++; + + if (opt_debug & DBG_RULE) { + fprintf(stderr, "RULE: base=%d, ", bp->b_ident); + fprintf(stderr, "flags=%s, ", + showflags(rflags, rp->r_flags)); + fprintf(stderr, "arg=%s\n", rp->r_file); + } + + return (0); +} + +/* + * routine: + * add_ignore, add_include + * + * purpose: + * wrappers for add_rule that permit outsiders (like main.c) + * not to know what is inside of a base, file, or list entry + * + * parameters: + * base under which rules should be added + * argument associated with rule + * + * returns: + * error flags + * + * notes: + * basically these routines figure out what the right + * flags are for a rule, and what list to put it on, + * and then call a common handler. + */ +errmask_t +add_ignore(struct base *bp, char *name) +{ int flags = R_IGNORE | R_NEW; + + if (bp == 0) + bp = &omnibase; + + if (wildcards(name)) + flags |= R_WILD; + + return (add_rule(bp, flags, name)); +} + +errmask_t +add_include(struct base *bp, char *name) +{ int flags = R_NEW; + + if (bp == 0) + bp = &omnibase; + + if (wildcards(name)) + flags |= R_WILD; + + bp->b_flags |= F_LISTED; + + return (add_rule(bp, flags, name)); +} + +/* + * routine: + * add_restr + * + * purpose: + * to add a restriction to a base + * + * parameters: + * address of base + * restriction string + * + * returns: + * error mask + * + * notes: + * a restriction is specified on the command line and + * tells us to limit our analysis/reconcilation to + * specified files and/or directories. We deal with + * these by adding a restriction rule to any base that + * looks like it might fit the restriction. We need to + * treat this as a rule because the restriction string + * may extend beyond the base directory and part-way into + * its tree ... meaning that individual file names under + * the base will have to be checked against the restriction. + */ +errmask_t +add_restr(char *restr) +{ const char *s; + errmask_t errs = 0; + struct base *bp; + + for (bp = bases; bp; bp = bp->b_next) { + /* + * see if this restriction could apply to this base. + * It could match either the source or destination + * directory name for this base. If it matches neither + * then the restriction does not apply to this base. + */ + s = prefix(restr, bp->b_src_name); + if (s == 0) + s = prefix(restr, bp->b_dst_name); + if (s == 0) + continue; + + /* + * if there is more restriction string after the + * base, we will need to note the remainder of the + * string so that we can match individual files + * against it. + */ + if (*s == '/') + s++; + + errs |= add_rule(bp, R_RESTRICT, s); + restr_added++; + } + + return (errs); +} + +/* + * routine: + * check_restr + * + * purpose: + * to see if an argument falls within restrictions + * + * parameters: + * pointer to relevent base + * file name + * + * returns: + * TRUE name is within restrictions + * FALSE name is outside of restrictions + * MAYBE name is on the path to a restriction + * + * notes: + * if no restrictions have been specified, we evaluate + * everything. If any restrictions have been specified, + * we process only files that match one of the restrictions. + * + * add_restr has ensured that if the restriction includes + * a portion that must be matched by individual files under + * the base, that the restriction rule will contain that + * portion of the restriction which must be matched against + * individual file names. + */ +bool_t +check_restr(struct base *bp, const char *name) +{ struct rule *rp; + + /* if there are no restrictions, everything is OK */ + if (restr_added == 0) + return (TRUE); + + /* now we have to run through the list */ + for (rp = bp->b_restrictions; rp; rp = rp->r_next) { + /* see if current path is under the restriction */ + if (prefix(name, rp->r_file)) + return (TRUE); + + /* see if current path is on the way to restr */ + if (prefix(rp->r_file, name)) + /* + * this is kinky, but walker really needs + * to know the difference between a directory + * that we are unreservedly scanning, and one + * that we are scanning only to find something + * beneath it. + */ + return (MAYBE); + } + + /* + * there are restrictions in effect and this file doesn't seem + * to meet any of them + */ + if (opt_debug & DBG_RULE) + fprintf(stderr, "RULE: FAIL RESTRICTION base=%d, file=%s\n", + bp->b_ident, name); + + return (FALSE); +} |