diff options
-rw-r--r-- | usr/src/cmd/logadm/main.c | 81 |
1 files changed, 78 insertions, 3 deletions
diff --git a/usr/src/cmd/logadm/main.c b/usr/src/cmd/logadm/main.c index d1f105f7be..1b7cceb5c7 100644 --- a/usr/src/cmd/logadm/main.c +++ b/usr/src/cmd/logadm/main.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, Joyent, Inc. All rights reserved. * * logadm/main.c -- main routines for logadm * @@ -37,6 +38,7 @@ #include <sys/stat.h> #include <sys/wait.h> #include <sys/filio.h> +#include <sys/sysmacros.h> #include <time.h> #include <utime.h> #include "err.h" @@ -1100,10 +1102,12 @@ docmd(struct opts *opts, const char *msg, const char *cmd, static void docopytruncate(struct opts *opts, const char *file, const char *file_copy) { - int fi, fo, len; - char buf[4096]; + int fi, fo; + char buf[128 * 1024]; struct stat s; struct utimbuf times; + off_t written = 0, rem, last = 0, thresh = 1024 * 1024; + ssize_t len; /* print info if necessary */ if (opts_count(opts, "vn") != NULL) { @@ -1129,7 +1133,7 @@ docopytruncate(struct opts *opts, const char *file, const char *file_copy) } /* create new file for copy destination with correct attributes */ - if ((fo = open(file_copy, O_CREAT|O_APPEND|O_WRONLY, s.st_mode)) < 0) { + if ((fo = open(file_copy, O_CREAT|O_TRUNC|O_WRONLY, s.st_mode)) < 0) { err(EF_SYS, "cannot create file: %s", file_copy); (void) close(fi); return; @@ -1137,6 +1141,77 @@ docopytruncate(struct opts *opts, const char *file, const char *file_copy) (void) fchown(fo, s.st_uid, s.st_gid); + /* + * Now we'll loop, reading the log file and writing it to our copy + * until the bytes remaining are beneath our atomicity threshold -- at + * which point we'll lock the file and copy the remainder atomically. + * The body of this loop is non-atomic with respect to writers, the + * rationale being that total atomicity (that is, locking the file for + * the entire duration of the copy) comes at too great a cost for a + * large log file, as the writer (i.e., the daemon whose log is being + * rolled) can be blocked for an unacceptable duration. (For one + * particularly loquacious daemon, this period was observed to be + * several minutes in length -- a time so long that it induced + * additional failures in dependent components.) Note that this means + * that if the log file is not always appended to -- if it is opened + * without O_APPEND or otherwise truncated outside of logadm -- this + * will result in our log snapshot being incorrect. But of course, in + * either of these cases, the use of logadm at all is itself + * suspect... + */ + do { + if (fstat(fi, &s) < 0) { + err(EF_SYS, "cannot stat: %s", file); + (void) close(fi); + (void) close(fo); + (void) remove(file_copy); + return; + } + + if ((rem = s.st_size - written) < thresh) { + if (rem >= 0) + break; + + /* + * If the file became smaller, something fishy is going + * on; we'll truncate our copy, reset our seek offset + * and break into the atomic copy. + */ + (void) ftruncate(fo, 0); + (void) lseek(fo, 0, SEEK_SET); + (void) lseek(fi, 0, SEEK_SET); + break; + } + + if (written != 0 && rem > last) { + /* + * We're falling behind -- this file is getting bigger + * faster than we're able to write it; break out and + * lock the file to block the writer. + */ + break; + } + + last = rem; + + while (rem > 0) { + if ((len = read(fi, buf, MIN(sizeof (buf), rem))) <= 0) + break; + + if (write(fo, buf, len) == len) { + rem -= len; + written += len; + continue; + } + + err(EF_SYS, "cannot write into file %s", file_copy); + (void) close(fi); + (void) close(fo); + (void) remove(file_copy); + return; + } + } while (len >= 0); + /* lock log file so that nobody can write into it before we are done */ if (fchmod(fi, s.st_mode|S_ISGID) < 0) err(EF_SYS, "cannot set mandatory lock bit for: %s", file); |