diff options
Diffstat (limited to 'usr/src/cmd/zoneadmd')
-rw-r--r-- | usr/src/cmd/zoneadmd/Makefile | 38 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/Makefile.com | 72 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/amd64/Makefile | 31 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/i386/Makefile | 30 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/log.c | 1027 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/vplat.c | 684 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/zcons.c | 189 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/zfd.c | 1238 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/zoneadmd.c | 984 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/zoneadmd.h | 44 |
10 files changed, 3745 insertions, 592 deletions
diff --git a/usr/src/cmd/zoneadmd/Makefile b/usr/src/cmd/zoneadmd/Makefile index e75453fe56..cfff59f12a 100644 --- a/usr/src/cmd/zoneadmd/Makefile +++ b/usr/src/cmd/zoneadmd/Makefile @@ -18,12 +18,10 @@ # # CDDL HEADER END - -# - # # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. # Copyright 2014 Nexenta Systems, Inc. All rights reserved. +# Copyright (c) 2011, Joyent, Inc. All rights reserved. # PROG= zoneadmd @@ -31,40 +29,46 @@ PROG= zoneadmd include ../Makefile.cmd include ../Makefile.ctf -ROOTCMDDIR= $(ROOTLIB)/zones +$(64ONLY)SUBDIRS= $(MACH) +$(BUILD64)SUBDIRS += $(MACH64) -OBJS= zoneadmd.o zcons.o vplat.o -SRCS = $(OBJS:.o=.c) -POFILE=zoneadmd_all.po -POFILES= $(OBJS:%.o=%.po) +all := TARGET = all +install := TARGET = install +clean := TARGET = clean +clobber := TARGET = clobber +lint := TARGET = lint CFLAGS += $(CCVERBOSE) -LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \ - -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm -ltsnet -ltsol \ - -linetutil -lscf XGETFLAGS += -a -x zoneadmd.xcl +ROOTUSRLIBZONES = $(ROOT)/usr/lib/zones + .KEEP_STATE: .PARALLEL: -all: $(PROG) +all: $(SUBDIRS) $(PROG): $(OBJS) $(LINK.c) -o $@ $(OBJS) $(LDLIBS) $(POST_PROCESS) -install: all $(ROOTCMD) +install: $(SUBDIRS) + -$(RM) $(ROOTUSRLIBZONES)/$(PROG) + -$(LN) $(ISAEXEC) $(ROOTUSRLIBZONES)/$(PROG) -$(POFILE): $(POFILES) - $(RM) $@ - $(CAT) $(POFILES) > $@ +$(POFILE): clean: $(RM) $(OBJS) check: - $(CSTYLE) -p -P $(SRCS:%=%) + $(CSTYLE) -p -P *.c + +$(SUBDIRS): FRC + @cd $@; pwd; $(MAKE) $(TARGET) + +FRC: include ../Makefile.targ diff --git a/usr/src/cmd/zoneadmd/Makefile.com b/usr/src/cmd/zoneadmd/Makefile.com new file mode 100644 index 0000000000..aaf21c7f5b --- /dev/null +++ b/usr/src/cmd/zoneadmd/Makefile.com @@ -0,0 +1,72 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright 2020 Joyent, Inc. +# + +PROG= zoneadmd + +include ../../Makefile.cmd +include ../../Makefile.ctf + +ROOTCMDDIR= $(ROOTLIB)/zones + +OBJS= zoneadmd.o zcons.o zfd.o vplat.o log.o + +CFLAGS += $(CCVERBOSE) +LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \ + -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm -ltsnet -ltsol \ + -linetutil -lscf -lppt -lcustr + +CSTD= $(CSTD_GNU99) + +.KEEP_STATE: + +%.o: ../%.c + $(COMPILE.c) $< + $(POST_PROCESS_O) + +ROOTUSRLIBZONES = $(ROOT)/usr/lib/zones +ROOTUSRLIBZONES32 = $(ROOTUSRLIBZONES)/$(MACH32) +ROOTUSRLIBZONES64 = $(ROOTUSRLIBZONES)/$(MACH64) +ROOTUSRLIBZONESPROG32 = $(ROOTUSRLIBZONES32)/$(PROG) +ROOTUSRLIBZONESPROG64 = $(ROOTUSRLIBZONES64)/$(PROG) +$(ROOTUSRLIBZONES32)/%: $(ROOTUSRLIBZONES32) % + $(INS.file) +$(ROOTUSRLIBZONES64)/%: $(ROOTUSRLIBZONES64) % + $(INS.file) +$(ROOTUSRLIBZONES32): + $(INS.dir) + +all: $(PROG) + +$(PROG): $(OBJS) + $(LINK.c) -o $@ $(OBJS) $(LDLIBS) + $(POST_PROCESS) + +clean: + $(RM) $(OBJS) + +lint: + $(LINT.c) ../*.c $(LDLIBS) + +include ../../Makefile.targ diff --git a/usr/src/cmd/zoneadmd/amd64/Makefile b/usr/src/cmd/zoneadmd/amd64/Makefile new file mode 100644 index 0000000000..75ac51db32 --- /dev/null +++ b/usr/src/cmd/zoneadmd/amd64/Makefile @@ -0,0 +1,31 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2011, Joyent, Inc. All rights reserved. +# + +.KEEP_STATE: + +include ../Makefile.com +include ../../Makefile.cmd.64 + +install: all $(ROOTUSRLIBZONES64) $(ROOTUSRLIBZONESPROG64) diff --git a/usr/src/cmd/zoneadmd/i386/Makefile b/usr/src/cmd/zoneadmd/i386/Makefile new file mode 100644 index 0000000000..a8764e0638 --- /dev/null +++ b/usr/src/cmd/zoneadmd/i386/Makefile @@ -0,0 +1,30 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 2011, Joyent, Inc. All rights reserved. +# + +.KEEP_STATE: + +include ../Makefile.com + +install: all $(ROOTUSRLIBZONES32) $(ROOTUSRLIBZONESPROG32) diff --git a/usr/src/cmd/zoneadmd/log.c b/usr/src/cmd/zoneadmd/log.c new file mode 100644 index 0000000000..a4ecc3e1e8 --- /dev/null +++ b/usr/src/cmd/zoneadmd/log.c @@ -0,0 +1,1027 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2020 Joyent, Inc. + */ + +/* + * zoneadmd logging + * + * zoneadmd logs to log files under <zonepath>/logs. Each log entry is a json + * structure of the form: + * + * { + * "log": "some message\n", + * "stream": "stderr", + * "time": "2018-03-28T13:25:02.670423000Z" + * } + * + * Unlike the example above, the entries in the log file are not pretty-printed. + * Messages are processed so that they have the proper json escapes for + * problematic characters. Excessively long messages may be truncated. + * + * To use these interfaces: + * + * int logid; + * + * logstream_init(zlogp); + * + * logid = logstream_open("stdio.log", "stdout", flags); + * ... + * logstream_write(logid, buf, len); + * ... + * logstream_close(logid); + * + * logstream_init() needs to be called only once. + * + * logstream_open() opens a log file (if not already open) and associates the + * specified stream with it. + * + * The following flag is supported: + * + * LS_LINE_BUFFERED Buffer writes until a newline is encountered or the + * buffer fills. This should only be used with streams + * that are written to by a single thread. The timestamp + * on log messages are the time that the log entry was + * written to the log file. This means the timestamp is + * the time when the console user hits enter, not the time + * that the prompt was printed. + * + * Line buffering is particularly useful for bhyve console logging because + * bhyve's UART emulation causes read() calls in zcons.c to return far fewer + * than 10 characters at a time. Without line buffering, a small number of + * logged characters are accompanied by about 64 characters of timestamp and + * other overhead. Line buffering saves quite a lot of space and makes the log + * much easier to read. + * + * + * Log rotation + * + * Two attributes, zlog-max-size and zlog-keep-rotated are used for automatic + * log rotation. zlog-max-size is the approximate maximum size of a log before + * it is automatically rotated. Rotated logs are renamed as + * <log>.<iso-8601-stamp>. If zlog-keep-rotated is specified and is an integer + * greater than zero, only that number of rotated logs will be retained. + * + * If zlog-max-size is not specified, log rotation will not happen + * automatically. An external log rotation program may rename the log file(s), + * then send SIGHUP to zoneadmd. + * + * Log rotation can be forced with SIGUSR1. In this case, the log will be + * rotated as though it hit the maximum size and will be subject to retention + * rules described above. + * + * + * Locking strategy + * + * Callers need not worry about locking. In the interest of simplicity, a + * single global lock is used to protect the state of the log files and the + * associated streams. Locking is necessary because reboots and log rotations + * can cause various state changes. Without locking, races could cause log + * entries to be directed to the wrong file descriptors. + * + * The simplistic global lock complicates error reporting within logging + * routines. zerror() must not be called while holding logging_lock. Rather, + * logstream_err() should be used to log via syslog. + */ + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <glob.h> +#include <libcustr.h> +#include <netdb.h> +#include <signal.h> +#include <stdarg.h> +#include <stdio.h> +#include <strings.h> +#include <synch.h> +#include <syslog.h> +#include <time.h> +#include <thread.h> +#include <unistd.h> + +#include <sys/debug.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/varargs.h> + +#include "zoneadmd.h" + +/* + * Currently we only expect stdout, stderr, zoneadmd, and console. Increase + * MAX_ZLOG_STREAMS if more streams are added. If the count increases + * significantly, logfile_t and logstream_t elements should be dynamically + * allocated and the algorithms associated with opening and closing them should + * become more efficient. + */ +#define MAX_LOG_STREAMS 4 + +#define ZLOG_MAXSZ "zlog-max-size" /* zonecfg attr */ +#define ZLOG_MAXSZ_MIN (1024 * 1024) /* min size for autorotate */ +#define ZLOG_KEEP "zlog-keep-rotated" /* zonecfg attr */ +#define ZLOG_KEEP_MAX 1000 /* number of log files */ + +/* + * While we could get the numeric value of BUNYAN_LOG_INFO from bunyan.h, + * the log version is internal to the library, so we just define the values + * we need here. + */ +#define BUNYAN_VERSION 0 +#define BUNYAN_LOG_LEVEL 30 /* info */ + +typedef struct logfile { + char lf_path[MAXPATHLEN]; /* log file name (absolute path) */ + char lf_name[MAXNAMELEN]; /* tail of log file name */ + char lf_buf[BUFSIZ]; /* Buffer for event messages */ + custr_t *lf_cus; /* custr_t wrapper for lf_buf */ + int lf_fd; /* file descriptor */ + size_t lf_size; /* Current size */ + boolean_t lf_write_err; /* Avoid spamming console via logsys */ + boolean_t lf_closing; /* Avoid rotation recursion */ +} logfile_t; + +/* Large enough to hold BUFSIZ bytes with some escaping */ +#define LS_BUFSZ (BUFSIZ * 2) + +/* Large enough to hold LS_BUF contents + bunyan mandatory properties */ +#define LS_OBUFSZ (LS_BUFSZ + MAXNAMELEN + 128) + +typedef struct logstream { + char ls_stream[MAXNAMELEN]; /* stdout, stderr, etc. */ + char ls_buf[LS_BUFSZ]; /* Not-yet written data, json */ + char ls_obuf[LS_OBUFSZ]; /* Buffer to form output json */ + custr_t *ls_cusbuf; /* custr_t wrapper to ls_buf */ + custr_t *ls_cusobuf; /* custr_t wrapper to ls_ofbuf */ + logstream_flags_t ls_flags; + logfile_t *ls_logfile; /* N streams per log file */ +} logstream_t; + +typedef struct jsonpair { + const char *jp_key; + const char *jp_val; +} jsonpair_t; + +boolean_t logging_poisoned = B_FALSE; + +/* + * MAX_LOG_STREAMS is a small number so we allocate in the simplest way. + */ +static logstream_t streams[MAX_LOG_STREAMS]; +static logfile_t logfiles[MAX_LOG_STREAMS]; + +static char host[MAXHOSTNAMELEN]; +static char pidstr[10]; + +static boolean_t logging_initialized = B_FALSE; +static uint64_t logging_rot_size; /* See ZLOG_MAXSZ */ +static uint64_t logging_rot_keep; /* See ZLOG_KEEP */ +static int logging_pending_sig = 0; /* Signal recvd while logging */ +static mutex_t logging_lock = ERRORCHECKMUTEX; /* The global logging lock */ + +static void logstream_flush_all(logfile_t *); +static void logstream_sighandler(int); +static void rotate_log(logfile_t *); +static size_t make_json(jsonpair_t *, size_t, custr_t *); +static void logfile_write(logfile_t *, custr_t *); + +/* + * If errors are encountered while logging_lock is held, we can't use zerror(). + */ +static void +logstream_err(boolean_t use_strerror, const char *fmt, ...) +{ + va_list alist; + char buf[MAXPATHLEN * 2]; + char *bp; + int saved_errno = errno; + + (void) snprintf(buf, sizeof (buf), "[zone %s] ", zone_name); + + bp = &buf[strlen(buf)]; + + va_start(alist, fmt); + (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist); + va_end(alist); + + if (use_strerror) { + bp = &buf[strlen(buf)]; + (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", + strerror(saved_errno)); + } + syslog(LOG_ERR, "%s", buf); + + errno = saved_errno; +} + +static void +logstream_lock(void) +{ + VERIFY(logging_initialized); + VERIFY(!logging_poisoned); + + mutex_enter(&logging_lock); +} + +static void +logstream_unlock(void) +{ + int sig = logging_pending_sig; + + logging_pending_sig = 0; + mutex_exit(&logging_lock); + + /* + * If a signal arrived while this thread was holding the lock, call the + * handler. + */ + if (sig != 0) { + logstream_sighandler(sig); + } +} + +static void +logfile_write_event(logfile_t *lfp, const char *stream, const char *event) +{ + size_t len; + jsonpair_t pairs[] = { + { "stream", stream }, + { "msg", event } + }; + + len = make_json(pairs, ARRAY_SIZE(pairs), lfp->lf_cus); + if (len >= sizeof (lfp->lf_buf)) { + logstream_err(B_FALSE, "%s: buffer too small. Need %zu bytes, " + "have %zu bytes", __func__, len + 1, sizeof (lfp->lf_buf)); + return; + } + + logfile_write(lfp, lfp->lf_cus); +} + +static void +close_log(logfile_t *lfp, const char *why, boolean_t ign_err) +{ + int err; + + VERIFY(MUTEX_HELD(&logging_lock)); + + /* + * Something may have gone wrong during log rotation, leading to a + * zombie log. + */ + if (lfp->lf_fd == -1) { + return; + } + + lfp->lf_closing = B_TRUE; + + logstream_flush_all(lfp); + + logfile_write_event(lfp, "logfile", why); + + err = close(lfp->lf_fd); + if (!ign_err) + VERIFY0(err); + + lfp->lf_size = 0; + lfp->lf_fd = -1; +} + +static void +open_log(logfile_t *lfp, const char *why) +{ + struct stat64 sb; + + VERIFY(MUTEX_HELD(&logging_lock)); + VERIFY3S(lfp->lf_fd, ==, -1); + + lfp->lf_fd = open(lfp->lf_path, + O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, 0600); + if (lfp->lf_fd == -1) { + logstream_err(B_TRUE, "Cannot open log file %s", + lfp->lf_path); + lfp->lf_write_err = B_TRUE; + return; + } + + VERIFY0(fstat64(lfp->lf_fd, &sb)); + lfp->lf_size = sb.st_size; + lfp->lf_write_err = B_FALSE; + lfp->lf_closing = B_FALSE; + + logfile_write_event(lfp, "logfile", why); +} + +static void +logstream_sighandler(int sig) +{ + int i; + + /* + * Protect against recursive mutex enters when a signal comes during + * logging. This will cause this function to be called again just after + * this thread drops the lock. + */ + if (MUTEX_HELD(&logging_lock)) { + logging_pending_sig = sig; + return; + } + + logstream_lock(); + if (logging_poisoned) { + logstream_unlock(); + return; + } + + for (i = 0; i < ARRAY_SIZE(logfiles); i++) { + /* Inactive logfile slot */ + if (logfiles[i].lf_name[0] == '\0') { + continue; + } + + switch (sig) { + case SIGHUP: + close_log(&logfiles[i], "close-rotate", B_FALSE); + open_log(&logfiles[i], "open-rotate"); + break; + case SIGUSR1: + rotate_log(&logfiles[i]); + break; + default: + logstream_err(B_FALSE, "unhandled signal %d", sig); + } + } + + logstream_unlock(); +} + +static void +get_attr_uint64(zlog_t *zlogp, zone_dochandle_t handle, const char *name, + uint64_t max, uint64_t *valp) +{ + struct zone_attrtab tab = { 0 }; + char *p; + uint64_t val; + + ASSERT(!MUTEX_HELD(&logging_lock)); + + (void) strlcpy(tab.zone_attr_name, name, sizeof (tab.zone_attr_name)); + if (zonecfg_lookup_attr(handle, &tab) != Z_OK) { + return; + } + + errno = 0; + val = strtol(tab.zone_attr_value, &p, 10); + if (errno != 0 && *p == '\0') { + zerror(zlogp, errno != 0, "Bad value '%s' for 'attr name=%s'", + tab.zone_attr_value, tab.zone_attr_name); + return; + } + if (val > max) { + zerror(zlogp, B_FALSE, "Value of attr '%s' is too large. " + "Reducing to %llu", name, max); + val = max; + } + + *valp = val; +} + +static void +logstream_atfork_prepare(void) +{ + logstream_lock(); +} + +static void +logstream_atfork_parent(void) +{ + logstream_unlock(); +} + +/* + * logstream_*() should never be called in a child process, so we make sure this + * code is never called there. + * + * zerror() in a child process is still safe: it knows to check for poisoning, + * and in such a case will redirect its output to stderr on the presumption it + * is a pipe to the parent. + */ +static void +logstream_atfork_child(void) +{ + logging_poisoned = B_TRUE; + logging_pending_sig = 0; + (void) snprintf(pidstr, sizeof (pidstr), "%d", getpid()); + logstream_unlock(); +} + +void +logstream_init(zlog_t *zlogp) +{ + zone_dochandle_t handle; + int i; + + VERIFY(!logging_initialized); + + VERIFY0(gethostname(host, sizeof (host))); + (void) snprintf(pidstr, sizeof (pidstr), "%d", getpid()); + + for (i = 0; i < ARRAY_SIZE(logfiles); i++) { + logfile_t *lfp = &logfiles[i]; + + lfp->lf_fd = -1; + if (custr_alloc_buf(&lfp->lf_cus, lfp->lf_buf, + sizeof (lfp->lf_buf)) != 0) { + (void) fprintf(stderr, "failed to allocate custr_t for " + "log file\n"); + abort(); + } + } + + for (i = 0; i < ARRAY_SIZE(streams); i++) { + logstream_t *lsp = &streams[i]; + + if (custr_alloc_buf(&lsp->ls_cusbuf, lsp->ls_buf, + sizeof (lsp->ls_buf)) != 0 || + custr_alloc_buf(&lsp->ls_cusobuf, lsp->ls_obuf, + sizeof (lsp->ls_obuf)) != 0) { + (void) fprintf(stderr, "failed to allocate custr_t for " + "log stream\n"); + abort(); + } + } + + VERIFY0(pthread_atfork(logstream_atfork_prepare, + logstream_atfork_parent, logstream_atfork_child)); + + logging_initialized = B_TRUE; + + /* Now it is safe to use zlogp */ + + if ((handle = zonecfg_init_handle()) == NULL || + zonecfg_get_handle(zone_name, handle) != Z_OK) { + zerror(zlogp, B_FALSE, "failed to open zone configuration " + "while initializing logging"); + } else { + get_attr_uint64(zlogp, handle, ZLOG_MAXSZ, UINT64_MAX, + &logging_rot_size); + if (logging_rot_size != 0 && + logging_rot_size < ZLOG_MAXSZ_MIN) { + zerror(zlogp, B_FALSE, "%s value %llu is too small. " + "Setting to %d", ZLOG_MAXSZ, logging_rot_size, + ZLOG_MAXSZ_MIN); + logging_rot_size = ZLOG_MAXSZ_MIN; + } + get_attr_uint64(zlogp, handle, ZLOG_KEEP, ZLOG_KEEP_MAX, + &logging_rot_keep); + } + + zonecfg_fini_handle(handle); + + /* + * This thread should receive SIGHUP so that it can close the log + * file and reopen it during log rotation. SIGUSR1 can be used to force + * a log rotation. + */ + (void) sigset(SIGHUP, logstream_sighandler); + (void) sigset(SIGUSR1, logstream_sighandler); +} + +/* + * Rotate a single log file. The global lock must be held while this is called. + */ +static void +rotate_log(logfile_t *lfp) +{ + time_t t; + struct tm gtm; + char path[MAXPATHLEN]; + int64_t i; + size_t len; + glob_t glb = { 0 }; + int err; + + VERIFY(MUTEX_HELD(&logging_lock)); + + if (lfp->lf_closing) { + return; + } + + if ((t = time(NULL)) == (time_t)-1 || gmtime_r(&t, >m) == NULL) { + logstream_err(B_TRUE, "failed to format time"); + return; + } + + (void) snprintf(path, sizeof (path), "%s.%04d%02d%02dT%02d%02d%02dZ", + lfp->lf_path, gtm.tm_year + 1900, gtm.tm_mon + 1, gtm.tm_mday, + gtm.tm_hour, gtm.tm_min, gtm.tm_sec); + + if (rename(lfp->lf_path, path) != 0) { + logstream_err(B_TRUE, "failed to rotate log file " + "'%s' to '%s'", lfp->lf_path, path); + } + + close_log(lfp, "close-rotate", B_FALSE); + open_log(lfp, "open-rotate"); + + if (logging_rot_keep == 0) { + return; + } + + /* + * Remove old logs. + */ + len = snprintf(path, sizeof (path), + /* <lf_path>.YYYYmmdd */ + "%s.[12][0-9][0-9][0-9][01][0-9][0-3][0-9]" + /* THHMMSSZ */ + "T[012][0-9][0-5][0-9][0-6][0-9]Z", lfp->lf_path); + if (len >= sizeof (path)) { + logstream_err(B_FALSE, "log rotation glob too long"); + return; + } + + if ((err = glob(path, GLOB_LIMIT, NULL, &glb)) != 0) { + if (err != GLOB_NOMATCH) { + logstream_err(B_TRUE, "glob terminated with error %d", + err); + } + globfree(&glb); + return; + } + + if (glb.gl_pathc <= logging_rot_keep) { + globfree(&glb); + return; + } + + for (i = glb.gl_pathc - logging_rot_keep - 1; i >= 0; i--) { + if (unlink(glb.gl_pathv[i]) != 0) { + logstream_err(B_TRUE, "log rotation could not remove " + "%s", glb.gl_pathv[i]); + } + } + globfree(&glb); +} + +/* + * Modify the input string with json escapes. Since the destination can thus + * be larger than the source, multiple calls may be required to fully convert + * sbuf to json. + * + * sbuf, slen Source buffer and the number of bytes in it to process + * dest Destination custr_t containing escaped JSON. + * scntp On return, *scntp stores number of scnt bytes consumed + * flushp If non-NULL, line-buffered mode is enabled. Processing + * will stop at the first newline or when dest is full and + * *flushp will be set to B_TRUE. + * + * This function makes no attempt to handle wide characters properly because + * the messages that come in may be using any character encoding. Since + * characters other than 7-bit ASCII are not directly readable in the log + * anyway, it is better to log the raw data and leave it to specialized log + * readers to interpret non-ASCII data. + */ +static void +escape_json(const char *sbuf, size_t slen, custr_t *dest, size_t *scntp, + boolean_t *flushp) +{ + char c; + const char *save_sbuf = sbuf; + const char *sbuf_end = sbuf + slen - 1; + char append_buf[7]; /* "\\u0000\0" */ + const char *append; + int len; + + if (slen == 0) { + *scntp = 0; + return; + } + + if (flushp != NULL) { + *flushp = B_FALSE; + } + + while (sbuf <= sbuf_end) { + c = sbuf[0]; + + switch (c) { + case '\\': + append = "\\\\"; + break; + + case '"': + append = "\\\""; + break; + + case '\b': + append = "\\b"; + break; + + case '\f': + append = "\\f"; + break; + + case '\n': + append = "\\n"; + if (flushp != NULL) { + *flushp = B_TRUE; + } + break; + + case '\r': + append = "\\r"; + break; + + case '\t': + append = "\\t"; + break; + + default: + if (c >= 0x20 && c < 0x7f) { + append_buf[0] = c; + append_buf[1] = '\0'; + } else { + len = snprintf(append_buf, sizeof (append_buf), + "\\u%04x", (int)(0xff & c)); + VERIFY3S(len, <, sizeof (append_buf)); + } + append = append_buf; + break; + } + + if (custr_append(dest, append) != 0) { + VERIFY3S(errno, ==, EOVERFLOW); + if (flushp != NULL) { + *flushp = B_TRUE; + } + break; + } + + sbuf++; + + if (flushp != NULL && *flushp) { + break; + } + } + + *scntp = sbuf - save_sbuf; + + VERIFY3U(*scntp, <=, slen); +} + +/* + * Like write(2), but to a logfile_t and with retries on short writes. + */ +static void +logfile_write(logfile_t *lfp, custr_t *cus) +{ + const char *buf = custr_cstr(cus); + size_t buflen = custr_len(cus); + ssize_t wlen; + size_t wanted = buflen; + + while (buflen > 0) { + wlen = write(lfp->lf_fd, buf, buflen); + if (wlen == -1) { + if (lfp->lf_write_err) { + lfp->lf_write_err = B_TRUE; + logstream_err(B_TRUE, "log file fd %d '%s': " + "failed to write %llu of %llu bytes", + lfp->lf_fd, lfp->lf_path, buflen, wanted); + } + return; + } + buf += wlen; + buflen -= wlen; + lfp->lf_size += wlen; + + lfp->lf_write_err = B_FALSE; + } + + if (logging_rot_size != 0 && lfp->lf_size > logging_rot_size) { + rotate_log(lfp); + } +} + +static void +add_bunyan_preamble(custr_t *cus) +{ + struct tm gtm; + struct timeval tv; + /* Large enough for YYYY-MM-DDTHH:MM:SS.000000000Z + NUL */ + char timestr[32] = { 0 }; + size_t len; + + if (gettimeofday(&tv, NULL) != 0 || + gmtime_r(&tv.tv_sec, >m) == NULL) { + logstream_err(B_TRUE, "failed to get time of day"); + abort(); + } + + len = strftime(timestr, sizeof (timestr) - 1, "%FT%T", >m); + VERIFY3U(len, >, 0); + VERIFY3U(len, <, sizeof (timestr) - 1); + + VERIFY0(custr_append_printf(cus, "\"time\": \"%s.%09ldZ\", ", + timestr, tv.tv_usec * 1000)); + VERIFY0(custr_append_printf(cus, "\"v\": %d, ", BUNYAN_VERSION)); + VERIFY0(custr_append_printf(cus, "\"hostname\": \"%s\", ", host)); + VERIFY0(custr_append(cus, "\"name\": \"zoneadmd\",")); + VERIFY0(custr_append_printf(cus, "\"pid\": %s, ", pidstr)); + VERIFY0(custr_append_printf(cus, "\"level\": %d", BUNYAN_LOG_LEVEL)); +} + +/* + * Convert the json pairs into a json object. The properties required for + * bunyan-formatted json objects are added to every object. + * Returns the number of bytes that would have been written to + * buf if bufsz had buf been sufficiently large (excluding the terminating null + * byte). Like snprintf(). + */ +static size_t +make_json(jsonpair_t *pairs, size_t npairs, custr_t *cus) +{ + int i; + const char *key, *val; + const char *start = ", "; + + VERIFY3S(npairs, >, 0); + + custr_reset(cus); + + VERIFY0(custr_append(cus, "{ ")); + + add_bunyan_preamble(cus); + + for (i = 0; i < npairs; i++) { + size_t len; + + key = pairs[i].jp_key; + val = pairs[i].jp_val; + + /* The total number of bytes we're adding to cus */ + len = 3 + strlen(key) + 3 + strlen(val) + 1; + if (custr_append_printf(cus, "%s\"%s\":\"%s\"", + start, key, val) != 0) { + VERIFY3S(errno, ==, EOVERFLOW); + return (custr_len(cus) + len); + } + } + + if (custr_append(cus, " }\n") != 0) { + return (custr_len(cus) + 3); + } + + return (custr_len(cus)); +} + +static void +logstream_write_json(logstream_t *lsp) +{ + size_t len; + jsonpair_t pairs[] = { + { "msg", lsp->ls_buf }, + { "stream", lsp->ls_stream }, + }; + + if (custr_len(lsp->ls_cusbuf) == 0) { + return; + } + + len = make_json(pairs, ARRAY_SIZE(pairs), lsp->ls_cusobuf); + + custr_reset(lsp->ls_cusbuf); + if (len >= sizeof (lsp->ls_obuf)) { + logstream_err(B_FALSE, "%s: buffer too small. Need %llu bytes, " + "have %llu bytes", __func__, len + 1, + sizeof (lsp->ls_obuf)); + return; + } + + logfile_write(lsp->ls_logfile, lsp->ls_cusobuf); +} + +/* + * We output to the log file as json. + * ex. for string 'msg\n' on the zone's stdout: + * {"log":"msg\n","stream":"stdout","time":"2014-10-24T20:12:11.101973117Z"} + * + * We use ns in the last field of the timestamp for compatibility. + * + * We keep track of the size of the log file and rotate it when we exceed + * the log size limit (if one is set). + */ +void +logstream_write(int ls, char *buf, int len) +{ + logstream_t *lsp; + size_t scnt; + boolean_t newline; + boolean_t buffered; + + if (ls == -1 || len == 0) { + return; + } + VERIFY3S(ls, >=, 0); + VERIFY3S(ls, <, ARRAY_SIZE(streams)); + + logstream_lock(); + + lsp = &streams[ls]; + if (lsp->ls_stream[0] == '\0' || lsp->ls_logfile == NULL) { + logstream_unlock(); + return; + } + + buffered = !!(lsp->ls_flags & LS_LINE_BUFFERED); + + do { + escape_json(buf, len, lsp->ls_cusbuf, &scnt, + buffered ? &newline : NULL); + + buf += scnt; + len -= scnt; + + if (!buffered || newline) { + logstream_write_json(lsp); + } + } while (len > 0 && (!buffered || newline)); + + logstream_unlock(); +} + +static void +logstream_flush(int ls) +{ + logstream_t *lsp; + + VERIFY(MUTEX_HELD(&logging_lock)); + + lsp = &streams[ls]; + if (lsp->ls_stream[0] == '\0' || lsp->ls_logfile == NULL) { + return; + } + logstream_write_json(lsp); +} + +static void +logstream_flush_all(logfile_t *lfp) +{ + int i; + + VERIFY(MUTEX_HELD(&logging_lock)); + + for (i = 0; i < ARRAY_SIZE(streams); i++) { + if (streams[i].ls_logfile == lfp) { + logstream_flush(i); + } + } +} + +int +logstream_open(const char *logname, const char *stream, logstream_flags_t flags) +{ + int ls = -1; + int i; + logstream_t *lsp; + logfile_t *lfp = NULL; + + VERIFY3U(strlen(logname), <, sizeof (lfp->lf_name)); + VERIFY3U(strlen(stream), <, sizeof (lsp->ls_stream)); + + logstream_lock(); + + /* + * Find an empty logstream_t and verify that the stream is not already + * open. + */ + for (i = 0; i < ARRAY_SIZE(streams); i++) { + if (ls == -1 && streams[i].ls_stream[0] == '\0') { + VERIFY3P(streams[i].ls_logfile, ==, NULL); + ls = i; + continue; + } + if (strcmp(stream, streams[i].ls_stream) == 0) { + logstream_unlock(); + logstream_err(B_FALSE, "log stream %s already open", + stream); + return (-1); + } + } + VERIFY3S(ls, !=, -1); + + /* Find an existing or available logfile_t */ + for (i = 0; i < ARRAY_SIZE(logfiles); i++) { + if (lfp == NULL && logfiles[i].lf_name[0] == '\0') { + lfp = &logfiles[i]; + } + if (strcmp(logname, logfiles[i].lf_name) == 0) { + lfp = &logfiles[i]; + break; + } + } + if (lfp->lf_name[0] == '\0') { + (void) strlcpy(lfp->lf_name, logname, sizeof (lfp->lf_name)); + (void) snprintf(lfp->lf_path, sizeof (lfp->lf_path), "%s/logs", + zonepath); + (void) mkdir(lfp->lf_path, 0700); + + (void) snprintf(lfp->lf_path, sizeof (lfp->lf_path), + "%s/logs/%s", zonepath, logname); + + open_log(lfp, "open"); + if (lfp->lf_fd == -1) { + logstream_unlock(); + return (-1); + } + } + + lsp = &streams[ls]; + (void) strlcpy(lsp->ls_stream, stream, sizeof (lsp->ls_stream)); + + lsp->ls_flags = flags; + lsp->ls_logfile = lfp; + + logstream_unlock(); + + return (ls); +} + +static void +logstream_reset(logstream_t *lsp) +{ + custr_t *buf = lsp->ls_cusbuf; + custr_t *obuf = lsp->ls_cusobuf; + + (void) memset(lsp, 0, sizeof (*lsp)); + lsp->ls_cusbuf = buf; + lsp->ls_cusobuf = obuf; + + custr_reset(buf); + custr_reset(obuf); +} + +static void +logfile_reset(logfile_t *lfp) +{ + custr_t *buf = lfp->lf_cus; + + (void) memset(lfp, 0, sizeof (*lfp)); + lfp->lf_cus = buf; + lfp->lf_fd = -1; + + custr_reset(buf); +} + +void +logstream_close(int ls, boolean_t abrupt) +{ + logstream_t *lsp; + logfile_t *lfp; + int i; + + if (ls == -1) { + return; + } + VERIFY3S(ls, >=, 0); + VERIFY3S(ls, <, ARRAY_SIZE(streams)); + + logstream_lock(); + logstream_flush(ls); + + lsp = &streams[ls]; + lfp = lsp->ls_logfile; + + VERIFY(lsp->ls_stream[0] != '\0'); + VERIFY3P(lfp, !=, NULL); + + logstream_reset(lsp); + + for (i = 0; i < ARRAY_SIZE(streams); i++) { + if (streams[i].ls_logfile == lfp) { + logstream_unlock(); + return; + } + } + + /* No more streams using this log file so return to initial state */ + + close_log(lfp, "close", abrupt); + + logfile_reset(lfp); + + logstream_unlock(); +} diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index cbba769f94..0ace033738 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright 2016, Joyent Inc. + * Copyright 2018, Joyent Inc. * Copyright (c) 2015, 2016 by Delphix. All rights reserved. * Copyright 2019 OmniOS Community Edition (OmniOSce) Association. * Copyright 2020 RackTop Systems Inc. @@ -80,10 +80,12 @@ #include <sys/conf.h> #include <sys/systeminfo.h> #include <sys/secflags.h> +#include <sys/vnic.h> #include <libdlpi.h> #include <libdllink.h> #include <libdlvlan.h> +#include <libdlvnic.h> #include <inet/tcp.h> #include <arpa/inet.h> @@ -139,6 +141,9 @@ #define DFSTYPES "/etc/dfs/fstypes" #define MAXTNZLEN 2048 +/* Number of times to retry unmounting if it fails */ +#define UMOUNT_RETRIES 30 + #define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT) /* a reasonable estimate for the number of lwps per process */ @@ -164,11 +169,25 @@ static priv_set_t *zprivs = NULL; static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn"; +typedef struct zone_proj_rctl_map { + char *zpr_zone_rctl; + char *zpr_project_rctl; +} zone_proj_rctl_map_t; + +static zone_proj_rctl_map_t zone_proj_rctl_map[] = { + {"zone.max-msg-ids", "project.max-msg-ids"}, + {"zone.max-sem-ids", "project.max-sem-ids"}, + {"zone.max-shm-ids", "project.max-shm-ids"}, + {"zone.max-shm-memory", "project.max-shm-memory"}, + {NULL, NULL} +}; + /* from libsocket, not in any header file */ extern int getnetmaskbyaddr(struct in_addr, struct in_addr *); /* from zoneadmd */ extern char query_hook[]; +extern char post_statechg_hook[]; /* * For each "net" resource configured in zonecfg, we track a zone_addr_list_t @@ -209,7 +228,7 @@ autofs_cleanup(zoneid_t zoneid) * Handle ENOSYS in the case that the autofs kernel module is not * installed. */ - r = _autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid); + r = _autofssys(AUTOFS_UNMOUNTALL, (void *)((uintptr_t)zoneid)); if (r != 0 && errno == ENOSYS) { return (0); } @@ -604,6 +623,24 @@ root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved) } /* + * Perform brand-specific cleanup if we are unable to unmount a FS. + */ +static void +brand_umount_cleanup(zlog_t *zlogp, char *path) +{ + char cmdbuf[2 * MAXPATHLEN]; + + if (post_statechg_hook[0] == '\0') + return; + + if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook, + ZONE_STATE_DOWN, Z_UNMOUNT, path) > sizeof (cmdbuf)) + return; + + (void) do_subproc(zlogp, cmdbuf, NULL, B_FALSE); +} + +/* * The general strategy for unmounting filesystems is as follows: * * - Remote filesystems may be dead, and attempting to contact them as @@ -636,6 +673,7 @@ static int unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd) { int error = 0; + int fail = 0; FILE *mnttab; struct mnttab *mnts; uint_t nmnt; @@ -723,18 +761,39 @@ unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd) if (umount2(path, MS_FORCE) == 0) { unmounted = B_TRUE; stuck = B_FALSE; + fail = 0; } else { /* - * The first failure indicates a - * mount we won't be able to get - * rid of automatically, so we - * bail. + * We may hit a failure here if there + * is an app in the GZ with an open + * pipe into the zone (commonly into + * the zone's /var/run). This type + * of app will notice the closed + * connection and cleanup, but it may + * take a while and we have no easy + * way to notice that. To deal with + * this case, we will wait and retry + * a few times before we give up. */ - error++; - zerror(zlogp, B_FALSE, - "unable to unmount '%s'", path); - free_mnttable(mnts, nmnt); - goto out; + fail++; + if (fail < (UMOUNT_RETRIES - 1)) { + zerror(zlogp, B_FALSE, + "unable to unmount '%s', " + "retrying in 2 seconds", + path); + (void) sleep(2); + } else if (fail > UMOUNT_RETRIES) { + error++; + zerror(zlogp, B_FALSE, + "unmount of '%s' failed", + path); + free_mnttable(mnts, nmnt); + goto out; + } else { + /* Try the hook 2 times */ + brand_umount_cleanup(zlogp, + path); + } } } /* @@ -1072,23 +1131,10 @@ mount_one_dev_symlink_cb(void *arg, const char *source, const char *target) int vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep) { - zone_dochandle_t handle; - - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (-1); - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (-1); - } - if (zonecfg_get_iptype(handle, iptypep) != Z_OK) { + if (zonecfg_get_iptype(snap_hndl, iptypep) != Z_OK) { zerror(zlogp, B_FALSE, "invalid ip-type configuration"); - zonecfg_fini_handle(handle); return (-1); } - zonecfg_fini_handle(handle); return (0); } @@ -1101,14 +1147,13 @@ static int mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd) { char brand[MAXNAMELEN]; - zone_dochandle_t handle = NULL; brand_handle_t bh = NULL; struct zone_devtab ztab; di_prof_t prof = NULL; int err; int retval = -1; zone_iptype_t iptype; - const char *curr_iptype; + const char *curr_iptype = NULL; if (di_prof_init(devpath, &prof)) { zerror(zlogp, B_TRUE, "failed to initialize profile"); @@ -1146,6 +1191,8 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd) zerror(zlogp, B_FALSE, "bad ip-type"); goto cleanup; } + if (curr_iptype == NULL) + abort(); if (brand_platform_iter_devices(bh, zone_name, mount_one_dev_device_cb, prof, curr_iptype) != 0) { @@ -1160,28 +1207,25 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd) } /* Add user-specified devices and directories */ - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_FALSE, "can't initialize zone handle"); - goto cleanup; - } - if ((err = zonecfg_get_handle(zone_name, handle)) != 0) { - zerror(zlogp, B_FALSE, "can't get handle for zone " - "%s: %s", zone_name, zonecfg_strerror(err)); - goto cleanup; - } - if ((err = zonecfg_setdevent(handle)) != 0) { + if ((err = zonecfg_setdevent(snap_hndl)) != 0) { zerror(zlogp, B_FALSE, "%s: %s", zone_name, zonecfg_strerror(err)); goto cleanup; } - while (zonecfg_getdevent(handle, &ztab) == Z_OK) { - if (di_prof_add_dev(prof, ztab.zone_dev_match)) { + while (zonecfg_getdevent(snap_hndl, &ztab) == Z_OK) { + char path[MAXPATHLEN]; + + if ((err = resolve_device_match(zlogp, &ztab, + path, sizeof (path))) != Z_OK) + goto cleanup; + + if (di_prof_add_dev(prof, path)) { zerror(zlogp, B_TRUE, "failed to add " - "user-specified device"); + "user-specified device '%s'", path); goto cleanup; } } - (void) zonecfg_enddevent(handle); + (void) zonecfg_enddevent(snap_hndl); /* Send profile to kernel */ if (di_prof_commit(prof)) { @@ -1194,8 +1238,6 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd) cleanup: if (bh != NULL) brand_close(bh); - if (handle != NULL) - zonecfg_fini_handle(handle); if (prof) di_prof_fini(prof); return (retval); @@ -1686,12 +1728,10 @@ static int mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) { char rootpath[MAXPATHLEN]; - char zonepath[MAXPATHLEN]; char brand[MAXNAMELEN]; char luroot[MAXPATHLEN]; int i, num_fs = 0; struct zone_fstab *fs_ptr = NULL; - zone_dochandle_t handle = NULL; zone_state_t zstate; brand_handle_t bh; plat_gmount_cb_data_t cb; @@ -1705,22 +1745,12 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) goto bad; } - if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { - zerror(zlogp, B_TRUE, "unable to determine zone path"); - goto bad; - } - if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { zerror(zlogp, B_TRUE, "unable to determine zone root"); goto bad; } - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - goto bad; - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK || - zonecfg_setfsent(handle) != Z_OK) { + if (zonecfg_setfsent(snap_hndl) != Z_OK) { zerror(zlogp, B_FALSE, "invalid configuration"); goto bad; } @@ -1738,7 +1768,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) /* Get a handle to the brand info for this zone */ if ((bh = brand_open(brand)) == NULL) { zerror(zlogp, B_FALSE, "unable to determine zone brand"); - zonecfg_fini_handle(handle); return (-1); } @@ -1753,7 +1782,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) plat_gmount_cb, &cb) != 0) { zerror(zlogp, B_FALSE, "unable to mount filesystems"); brand_close(bh); - zonecfg_fini_handle(handle); return (-1); } brand_close(bh); @@ -1764,13 +1792,10 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) * higher level directories (e.g., /usr) get mounted before * any beneath them (e.g., /usr/local). */ - if (mount_filesystems_fsent(handle, zlogp, &fs_ptr, &num_fs, + if (mount_filesystems_fsent(snap_hndl, zlogp, &fs_ptr, &num_fs, mount_cmd) != 0) goto bad; - zonecfg_fini_handle(handle); - handle = NULL; - /* * Normally when we mount a zone all the zone filesystems * get mounted relative to rootpath, which is usually @@ -1810,23 +1835,40 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare); for (i = 0; i < num_fs; i++) { - if (ALT_MOUNT(mount_cmd) && - strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) { - size_t slen = strlen(rootpath) - 2; + if (ALT_MOUNT(mount_cmd)) { + if (strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) { + size_t slen = strlen(rootpath) - 2; - /* - * By default we'll try to mount /dev as /a/dev - * but /dev is special and always goes at the top - * so strip the trailing '/a' from the rootpath. - */ - assert(strcmp(&rootpath[slen], "/a") == 0); - rootpath[slen] = '\0'; - if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) - != 0) - goto bad; - rootpath[slen] = '/'; - continue; + /* + * By default we'll try to mount /dev + * as /a/dev but /dev is special and + * always goes at the top so strip the + * trailing '/a' from the rootpath. + */ + assert(strcmp(&rootpath[slen], "/a") == 0); + rootpath[slen] = '\0'; + if (mount_one(zlogp, &fs_ptr[i], rootpath, + mount_cmd) != 0) + goto bad; + rootpath[slen] = '/'; + continue; + } else if (strcmp(brand_name, default_brand) != 0) { + /* + * If mounting non-native brand, skip + * mounting global mounts and + * filesystem entries since they are + * only needed for native pkg upgrade + * tools. + * + * The only exception right now is + * /dev (handled above), which is + * needed in the luroot in order to + * zlogin -S into the zone. + */ + continue; + } } + if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0) goto bad; } @@ -1849,8 +1891,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) return (0); bad: - if (handle != NULL) - zonecfg_fini_handle(handle); free_fs_data(fs_ptr, num_fs); return (-1); } @@ -2210,9 +2250,6 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id, * the console by zoneadm(8) so instead we log the * message to syslog and continue. */ - zerror(&logsys, B_TRUE, "WARNING: skipping network interface " - "'%s' which may not be present/plumbed in the " - "global zone.", lifr.lifr_name); (void) close(s); return (Z_OK); } @@ -2425,7 +2462,6 @@ bad: static int configure_shared_network_interfaces(zlog_t *zlogp) { - zone_dochandle_t handle; struct zone_nwiftab nwiftab, loopback_iftab; zoneid_t zoneid; @@ -2434,29 +2470,19 @@ configure_shared_network_interfaces(zlog_t *zlogp) return (-1); } - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (-1); - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (-1); - } - if (zonecfg_setnwifent(handle) == Z_OK) { + if (zonecfg_setnwifent(snap_hndl) == Z_OK) { for (;;) { - if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) + if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK) break; + nwifent_free_attrs(&nwiftab); if (configure_one_interface(zlogp, zoneid, &nwiftab) != Z_OK) { - (void) zonecfg_endnwifent(handle); - zonecfg_fini_handle(handle); + (void) zonecfg_endnwifent(snap_hndl); return (-1); } } - (void) zonecfg_endnwifent(handle); + (void) zonecfg_endnwifent(snap_hndl); } - zonecfg_fini_handle(handle); if (is_system_labeled()) { /* * Labeled zones share the loopback interface @@ -2910,7 +2936,6 @@ free_ip_interface(zone_addr_list_t *zalist) static int configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) { - zone_dochandle_t handle; struct zone_nwiftab nwiftab; char rootpath[MAXPATHLEN]; char path[MAXPATHLEN]; @@ -2919,30 +2944,18 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) boolean_t added = B_FALSE; zone_addr_list_t *zalist = NULL, *new; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (-1); - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (-1); - } - - if (zonecfg_setnwifent(handle) != Z_OK) { - zonecfg_fini_handle(handle); + if (zonecfg_setnwifent(snap_hndl) != Z_OK) return (0); - } for (;;) { - if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK) + if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK) break; + nwifent_free_attrs(&nwiftab); if (prof == NULL) { if (zone_get_devroot(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { - (void) zonecfg_endnwifent(handle); - zonecfg_fini_handle(handle); + (void) zonecfg_endnwifent(snap_hndl); zerror(zlogp, B_TRUE, "unable to determine dev root"); return (-1); @@ -2950,8 +2963,7 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) (void) snprintf(path, sizeof (path), "%s%s", rootpath, "/dev"); if (di_prof_init(path, &prof) != 0) { - (void) zonecfg_endnwifent(handle); - zonecfg_fini_handle(handle); + (void) zonecfg_endnwifent(snap_hndl); zerror(zlogp, B_TRUE, "failed to initialize profile"); return (-1); @@ -2975,17 +2987,17 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) nwiftab.zone_nwif_physical) == 0) { added = B_TRUE; } else { - (void) zonecfg_endnwifent(handle); - zonecfg_fini_handle(handle); - zerror(zlogp, B_TRUE, "failed to add network device"); - return (-1); + /* + * Failed to add network device, but the brand hook + * might be doing this for us, so keep silent. + */ + continue; } /* set up the new IP interface, and add them all later */ new = malloc(sizeof (*new)); if (new == NULL) { zerror(zlogp, B_TRUE, "no memory for %s", nwiftab.zone_nwif_physical); - zonecfg_fini_handle(handle); free_ip_interface(zalist); } bzero(new, sizeof (*new)); @@ -2995,16 +3007,14 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) } if (zalist != NULL) { if ((errno = add_net(zlogp, zoneid, zalist)) != 0) { - (void) zonecfg_endnwifent(handle); - zonecfg_fini_handle(handle); + (void) zonecfg_endnwifent(snap_hndl); zerror(zlogp, B_TRUE, "failed to add address"); free_ip_interface(zalist); return (-1); } free_ip_interface(zalist); } - (void) zonecfg_endnwifent(handle); - zonecfg_fini_handle(handle); + (void) zonecfg_endnwifent(snap_hndl); if (prof != NULL && added) { if (di_prof_commit(prof) != 0) { @@ -3140,48 +3150,23 @@ remove_datalink_protect(zlog_t *zlogp, zoneid_t zoneid) /* datalink does not belong to the GZ */ continue; } - if (dlstatus != DLADM_STATUS_OK) { + if (dlstatus != DLADM_STATUS_OK) zerror(zlogp, B_FALSE, + "clear 'protection' link property: %s", dladm_status2str(dlstatus, dlerr)); - free(dllinks); - return (-1); - } + dlstatus = dladm_set_linkprop(dld_handle, *dllink, "allowed-ips", NULL, 0, DLADM_OPT_ACTIVE); - if (dlstatus != DLADM_STATUS_OK) { + if (dlstatus != DLADM_STATUS_OK) zerror(zlogp, B_FALSE, + "clear 'allowed-ips' link property: %s", dladm_status2str(dlstatus, dlerr)); - free(dllinks); - return (-1); - } } free(dllinks); return (0); } static int -unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid) -{ - int dlnum = 0; - - /* - * The kernel shutdown callback for the dls module should have removed - * all datalinks from this zone. If any remain, then there's a - * problem. - */ - if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) { - zerror(zlogp, B_TRUE, "unable to list network interfaces"); - return (-1); - } - if (dlnum != 0) { - zerror(zlogp, B_FALSE, - "datalinks remain in zone after shutdown"); - return (-1); - } - return (0); -} - -static int tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid, const struct sockaddr_storage *local, const struct sockaddr_storage *remote) { @@ -3263,26 +3248,14 @@ static int get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd) { int error = -1; - zone_dochandle_t handle; char *privname = NULL; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (-1); - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (-1); - } - if (ALT_MOUNT(mount_cmd)) { zone_iptype_t iptype; - const char *curr_iptype; + const char *curr_iptype = NULL; - if (zonecfg_get_iptype(handle, &iptype) != Z_OK) { + if (zonecfg_get_iptype(snap_hndl, &iptype) != Z_OK) { zerror(zlogp, B_TRUE, "unable to determine ip-type"); - zonecfg_fini_handle(handle); return (-1); } @@ -3295,21 +3268,18 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd) break; default: zerror(zlogp, B_FALSE, "bad ip-type"); - zonecfg_fini_handle(handle); return (-1); } - if (zonecfg_default_privset(privs, curr_iptype) == Z_OK) { - zonecfg_fini_handle(handle); + if (zonecfg_default_privset(privs, curr_iptype) == Z_OK) return (0); - } + zerror(zlogp, B_FALSE, "failed to determine the zone's default privilege set"); - zonecfg_fini_handle(handle); return (-1); } - switch (zonecfg_get_privset(handle, privs, &privname)) { + switch (zonecfg_get_privset(snap_hndl, privs, &privname)) { case Z_OK: error = 0; break; @@ -3332,10 +3302,22 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd) } free(privname); - zonecfg_fini_handle(handle); return (error); } +static char * +zone_proj_rctl(const char *name) +{ + int i; + + for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) { + if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) { + return (zone_proj_rctl_map[i].zpr_project_rctl); + } + } + return (NULL); +} + static int get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) { @@ -3345,7 +3327,6 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) nvlist_t **nvlv = NULL; int rctlcount = 0; int error = -1; - zone_dochandle_t handle; struct zone_rctltab rctltab; rctlblk_t *rctlblk = NULL; uint64_t maxlwps; @@ -3355,16 +3336,6 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) *bufp = NULL; *bufsizep = 0; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (-1); - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (-1); - } - rctltab.zone_rctl_valptr = NULL; if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) { zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc"); @@ -3379,17 +3350,17 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) * from max-processes. If only the max-lwps property is set, we add a * max-processes property with the same limit as max-lwps. */ - rproc = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs); - rlwp = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps); + rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs); + rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps); if (rproc == Z_OK && rlwp == Z_NO_ENTRY) { - if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS, + if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, maxprocs * LWPS_PER_PROCESS) != Z_OK) { zerror(zlogp, B_FALSE, "unable to set max-lwps alias"); goto out; } } else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) { /* no scaling for max-proc value */ - if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXPROCS, + if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, maxlwps) != Z_OK) { zerror(zlogp, B_FALSE, "unable to set max-processes alias"); @@ -3397,7 +3368,7 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) } } - if (zonecfg_setrctlent(handle) != Z_OK) { + if (zonecfg_setrctlent(snap_hndl) != Z_OK) { zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent"); goto out; } @@ -3406,10 +3377,11 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) zerror(zlogp, B_TRUE, "memory allocation failed"); goto out; } - while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) { + while (zonecfg_getrctlent(snap_hndl, &rctltab) == Z_OK) { struct zone_rctlvaltab *rctlval; uint_t i, count; const char *name = rctltab.zone_rctl_name; + char *proj_nm; /* zoneadm should have already warned about unknown rctls. */ if (!zonecfg_is_rctl(name)) { @@ -3476,6 +3448,26 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) } zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr); rctltab.zone_rctl_valptr = NULL; + + /* + * With no action on our part we will start zsched with the + * project rctl values for our (zoneadmd) current project. For + * brands running a variant of Illumos, that's not a problem + * since they will setup their own projects, but for a + * non-native brand like lx, where there are no projects, we + * want to start things up with the same project rctls as the + * corresponding zone rctls, since nothing within the zone will + * ever change the project rctls. + */ + if ((proj_nm = zone_proj_rctl(name)) != NULL) { + if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count) + != 0) { + zerror(zlogp, B_FALSE, + "nvlist_add_nvlist_arrays failed"); + goto out; + } + } + if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count) != 0) { zerror(zlogp, B_FALSE, "%s failed", @@ -3488,7 +3480,7 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep) nvlv = NULL; rctlcount++; } - (void) zonecfg_endrctlent(handle); + (void) zonecfg_endrctlent(snap_hndl); if (rctlcount == 0) { error = 0; @@ -3512,8 +3504,6 @@ out: nvlist_free(nvl); if (nvlv != NULL) free(nvlv); - if (handle != NULL) - zonecfg_fini_handle(handle); return (error); } @@ -3529,7 +3519,7 @@ get_implicit_datasets(zlog_t *zlogp, char **retstr) > sizeof (cmdbuf)) return (-1); - if (do_subproc(zlogp, cmdbuf, retstr) != 0) + if (do_subproc(zlogp, cmdbuf, retstr, B_FALSE) != 0) return (-1); return (0); @@ -3538,7 +3528,6 @@ get_implicit_datasets(zlog_t *zlogp, char **retstr) static int get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) { - zone_dochandle_t handle; struct zone_dstab dstab; size_t total, offset, len; int error = -1; @@ -3549,30 +3538,20 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) *bufp = NULL; *bufsizep = 0; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (-1); - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (-1); - } - if (get_implicit_datasets(zlogp, &implicit_datasets) != 0) { zerror(zlogp, B_FALSE, "getting implicit datasets failed"); goto out; } - if (zonecfg_setdsent(handle) != Z_OK) { + if (zonecfg_setdsent(snap_hndl) != Z_OK) { zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); goto out; } total = 0; - while (zonecfg_getdsent(handle, &dstab) == Z_OK) + while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) total += strlen(dstab.zone_dataset_name) + 1; - (void) zonecfg_enddsent(handle); + (void) zonecfg_enddsent(snap_hndl); if (implicit_datasets != NULL) implicit_len = strlen(implicit_datasets); @@ -3589,12 +3568,12 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) goto out; } - if (zonecfg_setdsent(handle) != Z_OK) { + if (zonecfg_setdsent(snap_hndl) != Z_OK) { zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent"); goto out; } offset = 0; - while (zonecfg_getdsent(handle, &dstab) == Z_OK) { + while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) { len = strlen(dstab.zone_dataset_name); (void) strlcpy(str + offset, dstab.zone_dataset_name, total - offset); @@ -3602,7 +3581,7 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) if (offset < total - 1) str[offset++] = ','; } - (void) zonecfg_enddsent(handle); + (void) zonecfg_enddsent(snap_hndl); if (implicit_len > 0) (void) strlcpy(str + offset, implicit_datasets, total - offset); @@ -3614,8 +3593,6 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep) out: if (error != 0 && str != NULL) free(str); - if (handle != NULL) - zonecfg_fini_handle(handle); if (implicit_datasets != NULL) free(implicit_datasets); @@ -3625,40 +3602,26 @@ out: static int validate_datasets(zlog_t *zlogp) { - zone_dochandle_t handle; struct zone_dstab dstab; zfs_handle_t *zhp; libzfs_handle_t *hdl; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (-1); - } - if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) { + if (zonecfg_setdsent(snap_hndl) != Z_OK) { zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (-1); - } - - if (zonecfg_setdsent(handle) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); return (-1); } if ((hdl = libzfs_init()) == NULL) { zerror(zlogp, B_FALSE, "opening ZFS library"); - zonecfg_fini_handle(handle); return (-1); } - while (zonecfg_getdsent(handle, &dstab) == Z_OK) { + while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) { if ((zhp = zfs_open(hdl, dstab.zone_dataset_name, ZFS_TYPE_FILESYSTEM)) == NULL) { zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'", dstab.zone_dataset_name); - zonecfg_fini_handle(handle); libzfs_fini(hdl); return (-1); } @@ -3673,7 +3636,6 @@ validate_datasets(zlog_t *zlogp) zerror(zlogp, B_FALSE, "cannot set 'zoned' " "property for ZFS dataset '%s'\n", dstab.zone_dataset_name); - zonecfg_fini_handle(handle); zfs_close(zhp); libzfs_fini(hdl); return (-1); @@ -3681,9 +3643,8 @@ validate_datasets(zlog_t *zlogp) zfs_close(zhp); } - (void) zonecfg_enddsent(handle); + (void) zonecfg_enddsent(snap_hndl); - zonecfg_fini_handle(handle); libzfs_fini(hdl); return (0); @@ -3737,17 +3698,11 @@ validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl) zfs_handle_t *zhp; libzfs_handle_t *hdl; m_label_t ds_sl; - char zonepath[MAXPATHLEN]; char ds_hexsl[MAXNAMELEN]; if (!is_system_labeled()) return (0); - if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { - zerror(zlogp, B_TRUE, "unable to determine zone path"); - return (-1); - } - if (!is_zonepath_zfs(zonepath)) return (0); @@ -4418,62 +4373,52 @@ duplicate_reachable_path(zlog_t *zlogp, const char *rootpath) } /* - * Set memory cap and pool info for the zone's resource management - * configuration. + * Set pool info for the zone's resource management configuration. */ static int setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) { int res; uint64_t tmp; - struct zone_mcaptab mcap; char sched[MAXNAMELEN]; - zone_dochandle_t handle = NULL; char pool_err[128]; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (Z_BAD_HANDLE); - } - - if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - zonecfg_fini_handle(handle); - return (res); - } - - /* - * If a memory cap is configured, set the cap in the kernel using - * zone_setattr() and make sure the rcapd SMF service is enabled. - */ - if (zonecfg_getmcapent(handle, &mcap) == Z_OK) { - uint64_t num; - char smf_err[128]; - - num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10); - if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) { - zerror(zlogp, B_TRUE, "could not set zone memory cap"); - zonecfg_fini_handle(handle); - return (Z_INVAL); - } - - if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) { - zerror(zlogp, B_FALSE, "enabling system/rcap service " - "failed: %s", smf_err); - zonecfg_fini_handle(handle); - return (Z_INVAL); - } - } - /* Get the scheduling class set in the zone configuration. */ - if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK && + if (zonecfg_get_sched_class(snap_hndl, sched, sizeof (sched)) == Z_OK && strlen(sched) > 0) { if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched, strlen(sched)) == -1) zerror(zlogp, B_TRUE, "WARNING: unable to set the " "default scheduling class"); - } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp) + if (strcmp(sched, "FX") == 0) { + /* + * When FX is specified then by default all processes + * will start at the lowest priority level (0) and + * stay there. We support an optional attr which + * indicates that all the processes should be "high + * priority". We set this on the zone so that starting + * init will set the priority high. + */ + struct zone_attrtab a; + + bzero(&a, sizeof (a)); + (void) strlcpy(a.zone_attr_name, "fixed-hi-prio", + sizeof (a.zone_attr_name)); + + if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK && + strcmp(a.zone_attr_value, "true") == 0) { + boolean_t hi = B_TRUE; + + if (zone_setattr(zoneid, + ZONE_ATTR_SCHED_FIXEDHI, (void *)hi, + sizeof (hi)) == -1) + zerror(zlogp, B_TRUE, "WARNING: unable " + "to set high priority"); + } + } + + } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp) == Z_OK) { /* * If the zone has the zone.cpu-shares rctl set then we want to @@ -4484,7 +4429,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) */ char class_name[PC_CLNMSZ]; - if (zonecfg_get_dflt_sched_class(handle, class_name, + if (zonecfg_get_dflt_sched_class(snap_hndl, class_name, sizeof (class_name)) != Z_OK) { zerror(zlogp, B_FALSE, "WARNING: unable to determine " "the zone's scheduling class"); @@ -4517,7 +4462,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) * right thing in all cases (reuse or create) based on the current * zonecfg. */ - if ((res = zonecfg_bind_tmp_pool(handle, zoneid, pool_err, + if ((res = zonecfg_bind_tmp_pool(snap_hndl, zoneid, pool_err, sizeof (pool_err))) != Z_OK) { if (res == Z_POOL || res == Z_POOL_CREATE || res == Z_POOL_BIND) zerror(zlogp, B_FALSE, "%s: %s\ndedicated-cpu setting " @@ -4526,14 +4471,13 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) else zerror(zlogp, B_FALSE, "could not bind zone to " "temporary pool: %s", zonecfg_strerror(res)); - zonecfg_fini_handle(handle); return (Z_POOL_BIND); } /* * Check if we need to warn about poold not being enabled. */ - if (zonecfg_warn_poold(handle)) { + if (zonecfg_warn_poold(snap_hndl)) { zerror(zlogp, B_FALSE, "WARNING: A range of dedicated-cpus has " "been specified\nbut the dynamic pool service is not " "enabled.\nThe system will not dynamically adjust the\n" @@ -4543,7 +4487,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) } /* The following is a warning, not an error. */ - if ((res = zonecfg_bind_pool(handle, zoneid, pool_err, + if ((res = zonecfg_bind_pool(snap_hndl, zoneid, pool_err, sizeof (pool_err))) != Z_OK) { if (res == Z_POOL_BIND) zerror(zlogp, B_FALSE, "WARNING: unable to bind to " @@ -4557,10 +4501,9 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid) } /* Update saved pool name in case it has changed */ - (void) zonecfg_get_poolname(handle, zone_name, pool_name, + (void) zonecfg_get_poolname(snap_hndl, zone_name, pool_name, sizeof (pool_name)); - zonecfg_fini_handle(handle); return (Z_OK); } @@ -4787,36 +4730,31 @@ setup_zone_fs_allowed(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid) } static int -setup_zone_attrs(zlog_t *zlogp, char *zone_namep, zoneid_t zoneid) +setup_zone_attrs(zlog_t *zlogp, zoneid_t zoneid) { - zone_dochandle_t handle; int res = Z_OK; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, "getting zone configuration handle"); - return (Z_BAD_HANDLE); - } - if ((res = zonecfg_get_snapshot_handle(zone_namep, handle)) != Z_OK) { - zerror(zlogp, B_FALSE, "invalid configuration"); - goto out; - } - - if ((res = setup_zone_hostid(handle, zlogp, zoneid)) != Z_OK) + if ((res = setup_zone_hostid(snap_hndl, zlogp, zoneid)) != Z_OK) goto out; - if ((res = setup_zone_fs_allowed(handle, zlogp, zoneid)) != Z_OK) + if ((res = setup_zone_fs_allowed(snap_hndl, zlogp, zoneid)) != Z_OK) goto out; - if ((res = setup_zone_secflags(handle, zlogp, zoneid)) != Z_OK) + if ((res = setup_zone_secflags(snap_hndl, zlogp, zoneid)) != Z_OK) goto out; out: - zonecfg_fini_handle(handle); return (res); } +/* + * The zone_did is a persistent debug ID. Each zone should have a unique ID + * in the kernel. This is used for things like DTrace which want to monitor + * zones across reboots. They can't use the zoneid since that changes on + * each boot. + */ zoneid_t -vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) +vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zone_did) { zoneid_t rval = -1; priv_set_t *privs; @@ -4832,7 +4770,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) tsol_zcent_t *zcent = NULL; int match = 0; int doi = 0; - int flags; + int flags = -1; zone_iptype_t iptype; if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { @@ -4851,6 +4789,8 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) } else { flags = 0; } + if (flags == -1) + abort(); if ((privs = priv_allocset()) == NULL) { zerror(zlogp, B_TRUE, "%s failed", "priv_allocset"); @@ -4954,7 +4894,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) xerr = 0; if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf, rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel, - flags)) == -1) { + flags, zone_did)) == -1) { if (xerr == ZE_AREMOUNTS) { if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) { zerror(zlogp, B_FALSE, @@ -5000,7 +4940,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd) struct brand_attr attr; char modname[MAXPATHLEN]; - if (setup_zone_attrs(zlogp, zone_name, zoneid) != Z_OK) + if (setup_zone_attrs(zlogp, zoneid) != Z_OK) goto error; if ((bh = brand_open(brand_name)) == NULL) { @@ -5058,6 +4998,8 @@ error: } if (rctlbuf != NULL) free(rctlbuf); + if (zfsbuf != NULL) + free(zfsbuf); priv_freeset(privs); if (fp != NULL) zonecfg_close_scratch(fp); @@ -5146,7 +5088,7 @@ write_index_file(zoneid_t zoneid) int vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid) { - char zonepath[MAXPATHLEN]; + char zpath[MAXPATHLEN]; if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) { lofs_discard_mnttab(); @@ -5157,15 +5099,11 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid) * Before we try to mount filesystems we need to create the * attribute backing store for /dev */ - if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { - lofs_discard_mnttab(); - return (-1); - } - resolve_lofs(zlogp, zonepath, sizeof (zonepath)); + (void) strlcpy(zpath, zonepath, sizeof (zpath)); + resolve_lofs(zlogp, zpath, sizeof (zpath)); /* Make /dev directory owned by root, grouped sys */ - if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE, - 0, 3) != 0) { + if (make_one_dir(zlogp, zpath, "/dev", DEFAULT_DIR_MODE, 0, 3) != 0) { lofs_discard_mnttab(); return (-1); } @@ -5200,6 +5138,8 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid) return (-1); } break; + default: + abort(); } } @@ -5274,14 +5214,88 @@ unmounted: } } +/* + * Delete all transient VNICs belonging to this zone. A transient VNIC + * is one that is created and destroyed along with the lifetime of the + * zone. Non-transient VNICs, ones that are assigned from the GZ to a + * NGZ, are reassigned to the GZ in zone_shutdown() via the + * zone-specific data (zsd) callbacks. + */ +static int +delete_transient_vnics(zlog_t *zlogp, zoneid_t zoneid) +{ + dladm_status_t status; + int num_links = 0; + datalink_id_t *links, link; + uint32_t link_flags; + datalink_class_t link_class; + char link_name[MAXLINKNAMELEN]; + + if (zone_list_datalink(zoneid, &num_links, NULL) != 0) { + zerror(zlogp, B_TRUE, "unable to determine " + "number of network interfaces"); + return (-1); + } + + if (num_links == 0) + return (0); + + links = malloc(num_links * sizeof (datalink_id_t)); + + if (links == NULL) { + zerror(zlogp, B_TRUE, "failed to delete " + "network interfaces because of alloc fail"); + return (-1); + } + + if (zone_list_datalink(zoneid, &num_links, links) != 0) { + zerror(zlogp, B_TRUE, "failed to delete " + "network interfaces because of failure " + "to list them"); + return (-1); + } + + for (int i = 0; i < num_links; i++) { + char dlerr[DLADM_STRSIZE]; + link = links[i]; + + status = dladm_datalink_id2info(dld_handle, link, &link_flags, + &link_class, NULL, link_name, sizeof (link_name)); + + if (status != DLADM_STATUS_OK) { + zerror(zlogp, B_FALSE, "failed to " + "delete network interface (%u)" + "due to failure to get link info: %s", + link, + dladm_status2str(status, dlerr)); + return (-1); + } + + if (link_flags & DLADM_OPT_TRANSIENT) { + assert(link_class & DATALINK_CLASS_VNIC); + status = dladm_vnic_delete(dld_handle, link, + DLADM_OPT_ACTIVE); + + if (status != DLADM_STATUS_OK) { + zerror(zlogp, B_TRUE, "failed to delete link " + "with id %d: %s", link, + dladm_status2str(status, dlerr)); + return (-1); + } + } + } + + return (0); +} + int -vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) +vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, + boolean_t debug) { char *kzone; zoneid_t zoneid; int res; char pool_err[128]; - char zpath[MAXPATHLEN]; char cmdbuf[MAXPATHLEN]; brand_handle_t bh = NULL; dladm_status_t status; @@ -5314,16 +5328,12 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) goto error; } - if (remove_datalink_pool(zlogp, zoneid) != 0) { + if (remove_datalink_pool(zlogp, zoneid) != 0) zerror(zlogp, B_FALSE, "unable clear datalink pool property"); - goto error; - } - if (remove_datalink_protect(zlogp, zoneid) != 0) { + if (remove_datalink_protect(zlogp, zoneid) != 0) zerror(zlogp, B_FALSE, "unable clear datalink protect property"); - goto error; - } /* * The datalinks assigned to the zone will be removed from the NGZ as @@ -5337,12 +5347,6 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) goto error; } - /* Get the zonepath of this zone */ - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { - zerror(zlogp, B_FALSE, "unable to determine zone path"); - goto error; - } - /* Get a handle to the brand info for this zone */ if ((bh = brand_open(brand_name)) == NULL) { zerror(zlogp, B_FALSE, "unable to determine zone brand"); @@ -5353,7 +5357,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) * brand a chance to cleanup any custom configuration. */ (void) strcpy(cmdbuf, EXEC_PREFIX); - if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN, + if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN, sizeof (cmdbuf) - EXEC_LEN) < 0) { brand_close(bh); zerror(zlogp, B_FALSE, "unable to determine branded zone's " @@ -5363,7 +5367,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) brand_close(bh); if ((strlen(cmdbuf) > EXEC_LEN) && - (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { + (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) { zerror(zlogp, B_FALSE, "%s failed", cmdbuf); goto error; } @@ -5395,17 +5399,18 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) } break; case ZS_EXCLUSIVE: - if (unconfigure_exclusive_network_interfaces(zlogp, - zoneid) != 0) { - zerror(zlogp, B_FALSE, "unable to unconfigure " - "network interfaces in zone"); + if (delete_transient_vnics(zlogp, zoneid) != 0) { + zerror(zlogp, B_FALSE, "unable to delete " + "transient vnics in zone"); goto error; } + status = dladm_zone_halt(dld_handle, zoneid); if (status != DLADM_STATUS_OK) { zerror(zlogp, B_FALSE, "unable to notify " "dlmgmtd of zone halt: %s", dladm_status2str(status, errmsg)); + goto error; } break; } @@ -5437,14 +5442,9 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting) if (rebooting) { struct zone_psettab pset_tab; - zone_dochandle_t handle; - if ((handle = zonecfg_init_handle()) != NULL && - zonecfg_get_handle(zone_name, handle) == Z_OK && - zonecfg_lookup_pset(handle, &pset_tab) == Z_OK) + if (zonecfg_lookup_pset(snap_hndl, &pset_tab) == Z_OK) destroy_tmp_pool = B_FALSE; - - zonecfg_fini_handle(handle); } if (destroy_tmp_pool) { diff --git a/usr/src/cmd/zoneadmd/zcons.c b/usr/src/cmd/zoneadmd/zcons.c index 130b97d984..09a9f9ba8e 100644 --- a/usr/src/cmd/zoneadmd/zcons.c +++ b/usr/src/cmd/zoneadmd/zcons.c @@ -22,7 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. - * Copyright 2012 Joyent, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. * Copyright 2015 Nexenta Systems, Inc. All rights reserved. */ @@ -40,10 +40,10 @@ * * Global Zone | Non-Global Zone * .--------------. | - * .-----------. | zoneadmd -z | | .--------. .---------. - * | zlogin -C | | myzone | | | ttymon | | syslogd | - * `-----------' `--------------' | `--------' `---------' - * | | | | | | | + * .-----------. | zoneadmd -z |--. | .--------. .---------. + * | zlogin -C | | myzone | | | | ttymon | | syslogd | + * `-----------' `--------------' V | `--------' `---------' + * | | | | console.log | | | * User | | | | | V V * - - - - - - - - -|- - - -|- - - -|-|- - - - - - -|- - /dev/zconsole - - - * Kernel V V | | | @@ -59,7 +59,7 @@ * V +-----------+ * +---manager--+-subsidiary+ * | | - * | zcons driver | + * | Zcons driver | * | zonename="myzone" | * +------------------------+ * @@ -81,6 +81,8 @@ * functions as a two-way proxy for console I/O, relaying user input * to the manager side of the console, and relaying output from the * zone to the user. + * + * - Logging output to <zonepath>/logs/console.log. */ #include <sys/types.h> @@ -118,9 +120,10 @@ #define CONSOLE_SOCKPATH ZONES_TMPDIR "/%s.console_sock" +#define ZCONS_RETRY 10 + static int serverfd = -1; /* console server unix domain socket fd */ char boot_args[BOOTARGS_MAX]; -char bad_boot_arg[BOOTARGS_MAX]; /* * The eventstream is a simple one-directional flow of messages from the @@ -130,7 +133,10 @@ char bad_boot_arg[BOOTARGS_MAX]; */ static int eventstream[2]; - +/* flag used to cope with race creating manager zcons devlink */ +static boolean_t manager_zcons_failed = B_FALSE; +/* flag to track if we've seen a state change when there is no manager zcons */ +static boolean_t state_changed = B_FALSE; int eventstream_init() @@ -323,7 +329,7 @@ destroy_console_devs(zlog_t *zlogp) * interfaces to instantiate a new zone console node. We do a lot of * sanity checking, and are careful to reuse a console if one exists. * - * Once the device is in the device tree, we kick devfsadm via di_init_devs() + * Once the device is in the device tree, we kick devfsadm via di_devlink_init() * to ensure that the appropriate symlinks (to the manager and subsidiary * console devices) are placed in /dev in the global zone. */ @@ -410,45 +416,66 @@ devlinks: * ioctl, which will cause the manager to retain a reference to the * subsidiary. This prevents ttymon from blowing through the * subsidiary's STREAMS anchor. + * + * In very rare cases the open returns ENOENT if devfs doesn't have + * everything setup yet due to heavy zone startup load. Wait for + * 1 sec. and retry a few times. Even if we can't setup the zone's + * console, we still go ahead and boot the zone. */ (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_MANAGER_NAME); - if ((managerfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) { + for (i = 0; i < ZCONS_RETRY; i++) { + managerfd = open(conspath, O_RDWR | O_NOCTTY); + if (managerfd >= 0 || errno != ENOENT) + break; + (void) sleep(1); + } + if (managerfd == -1) { zerror(zlogp, B_TRUE, "ERROR: could not open manager side of " "zone console for %s to acquire subsidiary handle", zone_name); - goto error; + manager_zcons_failed = B_TRUE; } + (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_SUBSIDIARY_NAME); - if ((subfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) { + for (i = 0; i < ZCONS_RETRY; i++) { + subfd = open(conspath, O_RDWR | O_NOCTTY); + if (subfd >= 0 || errno != ENOENT) + break; + (void) sleep(1); + } + if (subfd == -1) zerror(zlogp, B_TRUE, "ERROR: could not open subsidiary side " "of zone console for %s to acquire subsidiary handle", zone_name); - (void) close(managerfd); - goto error; - } + /* * This ioctl can occasionally return ENXIO if devfs doesn't have * everything plumbed up yet due to heavy zone startup load. Wait for * 1 sec. and retry a few times before we fail to boot the zone. */ - for (i = 0; i < 5; i++) { - if (ioctl(managerfd, ZC_HOLDSUBSID, (caddr_t)(intptr_t)subfd) - == 0) { - rv = 0; - break; - } else if (errno != ENXIO) { - break; + if (managerfd != -1 && subfd != -1) { + for (i = 0; i < ZCONS_RETRY; i++) { + if (ioctl(managerfd, ZC_HOLDSUBSID, + (caddr_t)(intptr_t)subfd) == 0) { + rv = 0; + break; + } else if (errno != ENXIO) { + break; + } + (void) sleep(1); } - (void) sleep(1); + if (rv != 0) + zerror(zlogp, B_TRUE, "ERROR: error while acquiring " + "subsidiary handle of zone console for %s", + zone_name); } - if (rv != 0) - zerror(zlogp, B_TRUE, "ERROR: error while acquiring " - "subsidiary handle of zone console for %s", zone_name); - (void) close(subfd); - (void) close(managerfd); + if (subfd != -1) + (void) close(subfd); + if (managerfd != -1) + (void) close(managerfd); error: if (ddef_hdl) @@ -521,6 +548,7 @@ get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len, size_t buflen = sizeof (buf); char c = '\0'; int i = 0, r; + ucred_t *cred = NULL; /* "eat up the ident string" case, for simplicity */ if (pid == NULL) { @@ -554,18 +582,22 @@ get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len, break; } + if (getpeerucred(clifd, &cred) == 0) { + *pid = ucred_getpid((const ucred_t *)cred); + ucred_free(cred); + } else { + return (-1); + } + /* * Parse buffer for message of the form: - * IDENT <pid> <locale> <disconnect flag> + * IDENT <locale> <disconnect flag> */ bufp = buf; if (strncmp(bufp, "IDENT ", 6) != 0) return (-1); bufp += 6; errno = 0; - *pid = strtoll(bufp, &bufp, 10); - if (errno != 0) - return (-1); while (*bufp != '\0' && isspace(*bufp)) bufp++; @@ -671,14 +703,6 @@ event_message(int clifd, char *clilocale, zone_evt_t evt, int dflag) else str = "NOTICE: Zone boot failed"; break; - case Z_EVT_ZONE_BADARGS: - /*LINTED*/ - (void) snprintf(lmsg, sizeof (lmsg), - localize_msg(clilocale, - "WARNING: Ignoring invalid boot arguments: %s"), - bad_boot_arg); - lstr = lmsg; - break; default: return; } @@ -717,7 +741,7 @@ test_client(int clifd) * messages) can be output in the user's locale. */ static void -do_console_io(zlog_t *zlogp, int consfd, int servfd) +do_console_io(zlog_t *zlogp, int consfd, int servfd, int conslog) { struct pollfd pollfds[4]; char ibuf[BUFSIZ]; @@ -763,14 +787,21 @@ do_console_io(zlog_t *zlogp, int consfd, int servfd) (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { errno = 0; cc = read(consfd, ibuf, BUFSIZ); - if (cc <= 0 && (errno != EINTR) && - (errno != EAGAIN)) - break; - /* - * Lose I/O if no one is listening - */ - if (clifd != -1 && cc > 0) - (void) write(clifd, ibuf, cc); + if (cc <= 0) { + if (errno != EINTR && + errno != EAGAIN) { + break; + } + } else { + logstream_write(conslog, ibuf, cc); + + /* + * Lose I/O if no one is listening + */ + if (clifd != -1) { + (void) write(clifd, ibuf, cc); + } + } } else { pollerr = pollfds[0].revents; zerror(zlogp, B_FALSE, @@ -882,7 +913,6 @@ init_console(zlog_t *zlogp) if (init_console_dev(zlogp) == -1) { zerror(zlogp, B_FALSE, "console setup: device initialization failed"); - return (-1); } if ((serverfd = init_console_sock(zlogp)) == -1) { @@ -894,6 +924,17 @@ init_console(zlog_t *zlogp) } /* + * Maintain a simple flag that tracks if we have seen at least one state + * change. This is currently only used to handle the special case where we are + * running without a console device, which is what normally drives shutdown. + */ +void +zcons_statechanged() +{ + state_changed = B_TRUE; +} + +/* * serve_console() is the master loop for driving console I/O. It is also the * routine which is ultimately responsible for "pulling the plug" on zoneadmd * when it realizes that the daemon should shut down. @@ -911,6 +952,10 @@ serve_console(zlog_t *zlogp) int managerfd; zone_state_t zstate; char conspath[MAXPATHLEN]; + static boolean_t cons_warned = B_FALSE; + int conslog; + + conslog = logstream_open("console.log", "console", LS_LINE_BUFFERED); (void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s", zone_name, ZCONS_MANAGER_NAME); @@ -918,6 +963,46 @@ serve_console(zlog_t *zlogp) for (;;) { managerfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY); if (managerfd == -1) { + if (manager_zcons_failed) { + /* + * If we don't have a console and the zone is + * not shutting down, there may have been a + * race/failure with devfs while creating the + * console. In this case we want to leave the + * zone up, even without a console, so + * periodically recheck. + */ + int i; + + /* + * In the normal flow of this loop, we use + * do_console_io to give things a chance to get + * going first. However, in this case we can't + * use that, so we have to wait for at least + * one state change before checking the state. + */ + for (i = 0; i < 60; i++) { + if (state_changed) + break; + (void) sleep(1); + } + + if (i < 60 && zone_get_state(zone_name, + &zstate) == Z_OK && + (zstate == ZONE_STATE_READY || + zstate == ZONE_STATE_RUNNING)) { + if (!cons_warned) { + zerror(zlogp, B_FALSE, + "WARNING: missing zone " + "console for %s", + zone_name); + cons_warned = B_TRUE; + } + (void) sleep(ZCONS_RETRY); + continue; + } + } + zerror(zlogp, B_TRUE, "failed to open console manager"); (void) mutex_lock(&lock); goto death; @@ -937,7 +1022,7 @@ serve_console(zlog_t *zlogp) goto death; } - do_console_io(zlogp, managerfd, serverfd); + do_console_io(zlogp, managerfd, serverfd, conslog); /* * We would prefer not to do this, but hostile zone processes @@ -978,4 +1063,6 @@ death: destroy_console_sock(serverfd); (void) destroy_console_devs(zlogp); + + logstream_close(conslog, B_FALSE); } diff --git a/usr/src/cmd/zoneadmd/zfd.c b/usr/src/cmd/zoneadmd/zfd.c new file mode 100644 index 0000000000..6647ef0c5f --- /dev/null +++ b/usr/src/cmd/zoneadmd/zfd.c @@ -0,0 +1,1238 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. + */ + +/* + * Zone file descriptor support is used as a mechanism for a process inside the + * zone to log messages to the GZ zoneadmd and also as a way to interact + * directly with the process (via zlogin -I). The zfd thread is modeled on + * the zcons thread so see the comment header in zcons.c for a general overview. + * Unlike with zcons, which has a single endpoint within the zone and a single + * endpoint used by zoneadmd, we setup multiple endpoints within the zone. + * + * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat + * of a misnomer since its purpose has evolved. The attribute currently + * can have six values which are used to control: + * - how the zfd devices are used inside the zone + * - if the output on the device(s) is also teed into another stream within + * the zone + * - if we do logging in the GZ + * See the comment on get_mode_logmax() in this file, and the comment in + * uts/common/io/zfd.c for more details. + * + * Internally the zfd_mode_t struct holds the number of stdio devs (1 or 3), + * the number of additional devs corresponding to the zone attr value and the + * GZ logging flag. + * + * Note that although the mode indicates the number of devices needed, we always + * create all possible zfd devices for simplicity. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/termios.h> +#include <sys/zfd.h> +#include <sys/mkdev.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <stropts.h> +#include <thread.h> +#include <ucred.h> +#include <unistd.h> +#include <zone.h> +#include <signal.h> +#include <wchar.h> + +#include <libdevinfo.h> +#include <libdevice.h> +#include <libzonecfg.h> + +#include <syslog.h> +#include <sys/modctl.h> + +#include "zoneadmd.h" + +static int shutting_down = 0; +static thread_t logger_tid; +static char log_name[MAXNAMELEN] = "stdio.log"; + +/* + * The eventstream is a simple one-directional flow of messages implemented + * with a pipe. It is used to wake up the poller when it needs to shutdown. + */ +static int eventstream[2] = {-1, -1}; + +#define ZLOG_MODE "zlog-mode" +#define ZLOG_NAME "zlog-name" +#define ZFDNEX_DEVTREEPATH "/pseudo/zfdnex@2" +#define ZFDNEX_FILEPATH "/devices/pseudo/zfdnex@2" +#define SERVER_SOCKPATH ZONES_TMPDIR "/%s.server_%s" +#define ZTTY_RETRY 5 + +#define NUM_ZFD_DEVS 5 + +typedef struct zfd_mode { + uint_t zmode_n_stddevs; + uint_t zmode_n_addl_devs; + boolean_t zmode_gzlogging; +} zfd_mode_t; +static zfd_mode_t mode; + +/* + * cb_data is only used by destroy_cb. + */ +struct cb_data { + zlog_t *zlogp; + int killed; +}; + +/* + * destroy_zfd_devs() and its helper destroy_cb() tears down any zfd instances + * associated with this zone. If things went very wrong, we might have an + * incorrect number of instances hanging around. This routine hunts down and + * tries to remove all of them. Of course, if the fd is open, the instance will + * not detach, which is a potential issue. + */ +static int +destroy_cb(di_node_t node, void *arg) +{ + struct cb_data *cb = (struct cb_data *)arg; + char *prop_data; + char *tmp; + char devpath[MAXPATHLEN]; + devctl_hdl_t hdl; + + if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname", + &prop_data) == -1) + return (DI_WALK_CONTINUE); + + assert(prop_data != NULL); + if (strcmp(prop_data, zone_name) != 0) { + /* this is a zfd for a different zone */ + return (DI_WALK_CONTINUE); + } + + tmp = di_devfs_path(node); + (void) snprintf(devpath, sizeof (devpath), "/devices/%s", tmp); + di_devfs_path_free(tmp); + + if ((hdl = devctl_device_acquire(devpath, 0)) == NULL) { + zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, " + "but it could not be controlled.", devpath); + return (DI_WALK_CONTINUE); + } + if (devctl_device_remove(hdl) == 0) { + cb->killed++; + } else { + zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, " + "but it could not be removed.", devpath); + } + devctl_release(hdl); + return (DI_WALK_CONTINUE); +} + +static int +destroy_zfd_devs(zlog_t *zlogp) +{ + di_node_t root; + struct cb_data cb; + + bzero(&cb, sizeof (cb)); + cb.zlogp = zlogp; + + if ((root = di_init(ZFDNEX_DEVTREEPATH, DINFOCPYALL)) == DI_NODE_NIL) { + zerror(zlogp, B_TRUE, "di_init failed"); + return (-1); + } + + (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, destroy_cb); + + di_fini(root); + return (0); +} + +static void +make_tty(zlog_t *zlogp, int id) +{ + int i; + int fd = -1; + char stdpath[MAXPATHLEN]; + + /* + * Open the master side of the dev and issue the ZFD_MAKETTY ioctl, + * which will cause the the various tty-related streams modules to be + * pushed when the slave opens the device. + * + * In very rare cases the open returns ENOENT if devfs doesn't have + * everything setup yet due to heavy zone startup load. Wait for + * 1 sec. and retry a few times. Even if we can't setup tty mode + * we still move on. + */ + (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d", + zone_name, id); + + for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) { + fd = open(stdpath, O_RDWR | O_NOCTTY); + if (fd >= 0 || errno != ENOENT) + break; + (void) sleep(1); + } + if (fd == -1) { + zerror(zlogp, B_TRUE, "ERROR: could not open zfd %d for " + "zone %s to set tty mode", id, zone_name); + } else { + /* + * This ioctl can occasionally return ENXIO if devfs doesn't + * have everything plumbed up yet due to heavy zone startup + * load. Wait for 1 sec. and retry a few times before we give + * up. + */ + for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) { + if (ioctl(fd, ZFD_MAKETTY) == 0) { + break; + } else if (errno != ENXIO) { + break; + } + (void) sleep(1); + } + } + + if (fd != -1) + (void) close(fd); +} + +/* + * init_zfd_devs() drives the device-tree configuration of the zone fd devices. + * The general strategy is to use the libdevice (devctl) interfaces to + * instantiate all of new zone fd nodes. We do a lot of sanity checking, and + * are careful to reuse a dev if one exists. + * + * Once the devices are in the device tree, we kick devfsadm via + * di_devlink_init() to ensure that the appropriate symlinks (to the master and + * slave fd devices) are placed in /dev in the global zone. + */ +static int +init_zfd_dev(zlog_t *zlogp, devctl_hdl_t bus_hdl, int id) +{ + int rv = -1; + devctl_ddef_t ddef_hdl = NULL; + devctl_hdl_t dev_hdl = NULL; + + if ((ddef_hdl = devctl_ddef_alloc("zfd", 0)) == NULL) { + zerror(zlogp, B_TRUE, "failed to allocate ddef handle"); + goto error; + } + + /* + * Set four properties on this node; the name of the zone, the dev name + * seen inside the zone, a flag which lets pseudo know that it is OK to + * automatically allocate an instance # for this device, and the last + * one tells the device framework not to auto-detach this node - we + * need the node to still be there when we ask devfsadmd to make links, + * and when we need to open it. + */ + if (devctl_ddef_string(ddef_hdl, "zfd_zname", zone_name) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd_zname property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "zfd_id", id) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd_id property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "auto-assign-instance", 1) == -1) { + zerror(zlogp, B_TRUE, "failed to create auto-assign-instance " + "property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "ddi-no-autodetach", 1) == -1) { + zerror(zlogp, B_TRUE, "failed to create ddi-no-auto-detach " + "property"); + goto error; + } + if (devctl_bus_dev_create(bus_hdl, ddef_hdl, 0, &dev_hdl) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd node"); + goto error; + } + rv = 0; + +error: + if (ddef_hdl) + devctl_ddef_free(ddef_hdl); + if (dev_hdl) + devctl_release(dev_hdl); + return (rv); +} + +static int +init_zfd_devs(zlog_t *zlogp, zfd_mode_t *mode) +{ + devctl_hdl_t bus_hdl = NULL; + di_devlink_handle_t dl = NULL; + int rv = -1; + int i; + + /* + * Time to make the devices. + */ + if ((bus_hdl = devctl_bus_acquire(ZFDNEX_FILEPATH, 0)) == NULL) { + zerror(zlogp, B_TRUE, "devctl_bus_acquire failed"); + goto error; + } + + for (i = 0; i < NUM_ZFD_DEVS; i++) { + if (init_zfd_dev(zlogp, bus_hdl, i) != 0) + goto error; + } + + if ((dl = di_devlink_init("zfd", DI_MAKE_LINK)) == NULL) { + zerror(zlogp, B_TRUE, "failed to create devlinks"); + goto error; + } + + (void) di_devlink_fini(&dl); + rv = 0; + + if (mode->zmode_n_stddevs == 1) { + /* We want the primary stream to look like a tty. */ + make_tty(zlogp, 0); + } + +error: + if (bus_hdl) + devctl_release(bus_hdl); + return (rv); +} + +static int +init_server_sock(int *servfd, char *nm) +{ + int resfd = -1; + struct sockaddr_un servaddr; + + bzero(&servaddr, sizeof (servaddr)); + servaddr.sun_family = AF_UNIX; + (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path), + SERVER_SOCKPATH, zone_name, nm); + + if ((resfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + zerror(&logplat, B_TRUE, + "server setup: could not create socket"); + goto err; + } + (void) unlink(servaddr.sun_path); + + if (bind(resfd, (struct sockaddr *)&servaddr, sizeof (servaddr)) + == -1) { + zerror(&logplat, B_TRUE, + "server setup: could not bind to socket"); + goto err; + } + + if (listen(resfd, 4) == -1) { + zerror(&logplat, B_TRUE, + "server setup: could not listen on socket"); + goto err; + } + + *servfd = resfd; + return (0); + +err: + (void) unlink(servaddr.sun_path); + if (resfd != -1) + (void) close(resfd); + return (-1); +} + +static void +destroy_server_sock(int servfd, char *nm) +{ + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), SERVER_SOCKPATH, zone_name, nm); + (void) unlink(path); + (void) shutdown(servfd, SHUT_RDWR); + (void) close(servfd); +} + +/* + * Read the "ident" string from the client's descriptor; this routine also + * tolerates being called with pid=NULL, for times when you want to "eat" + * the ident string from a client without saving it. + */ +static int +get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len, + uint_t *flagsp) +{ + char buf[BUFSIZ], *bufp; + size_t buflen = sizeof (buf); + char c = '\0'; + int i = 0, r; + ucred_t *cred = NULL; + + /* "eat up the ident string" case, for simplicity */ + if (pid == NULL) { + assert(locale == NULL && locale_len == 0); + while (read(clifd, &c, 1) == 1) { + if (c == '\n') + return (0); + } + } + + bzero(buf, sizeof (buf)); + while ((buflen > 1) && (r = read(clifd, &c, 1)) == 1) { + buflen--; + if (c == '\n') + break; + + buf[i] = c; + i++; + } + if (r == -1) + return (-1); + + /* + * We've filled the buffer, but still haven't seen \n. Keep eating + * until we find it; we don't expect this to happen, but this is + * defensive. + */ + if (c != '\n') { + while ((r = read(clifd, &c, sizeof (c))) > 0) + if (c == '\n') + break; + } + + /* + * Parse buffer for message of the form: + * IDENT <locale> <flags> + */ + bufp = buf; + if (strncmp(bufp, "IDENT ", 6) != 0) + return (-1); + bufp += 6; + + if (getpeerucred(clifd, &cred) == 0) { + *pid = ucred_getpid((const ucred_t *)cred); + ucred_free(cred); + } else { + return (-1); + } + + while (*bufp != '\0' && isspace(*bufp)) + bufp++; + buflen = strlen(bufp) - 1; + bufp[buflen - 1] = '\0'; + (void) strlcpy(locale, bufp, locale_len); + + *flagsp = atoi(&bufp[buflen]); + + return (0); +} + +static int +accept_client(int servfd, pid_t *pid, char *locale, size_t locale_len, + uint_t *flagsp) +{ + int connfd; + struct sockaddr_un cliaddr; + socklen_t clilen; + int flags; + + clilen = sizeof (cliaddr); + connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen); + if (connfd == -1) + return (-1); + if (pid != NULL) { + if (get_client_ident(connfd, pid, locale, locale_len, flagsp) + == -1) { + (void) shutdown(connfd, SHUT_RDWR); + (void) close(connfd); + return (-1); + } + (void) write(connfd, "OK\n", 3); + } + + flags = fcntl(connfd, F_GETFL, 0); + if (flags != -1) + (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC); + + return (connfd); +} + +static void +reject_client(int servfd, pid_t clientpid) +{ + int connfd; + struct sockaddr_un cliaddr; + socklen_t clilen; + char nak[MAXPATHLEN]; + + clilen = sizeof (cliaddr); + connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen); + + /* + * After getting its ident string, tell client to get lost. + */ + if (get_client_ident(connfd, NULL, NULL, 0, NULL) == 0) { + (void) snprintf(nak, sizeof (nak), "%lu\n", + clientpid); + (void) write(connfd, nak, strlen(nak)); + } + (void) shutdown(connfd, SHUT_RDWR); + (void) close(connfd); +} + +static int +accept_socket(int servfd, pid_t verpid) +{ + int connfd; + struct sockaddr_un cliaddr; + socklen_t clilen = sizeof (cliaddr); + ucred_t *cred = NULL; + pid_t rpid = -1; + int flags; + + connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen); + if (connfd == -1) + return (-1); + + /* Confirm connecting process is who we expect */ + if (getpeerucred(connfd, &cred) == 0) { + rpid = ucred_getpid((const ucred_t *)cred); + ucred_free(cred); + } + if (rpid == -1 || rpid != verpid) { + (void) shutdown(connfd, SHUT_RDWR); + (void) close(connfd); + return (-1); + } + + flags = fcntl(connfd, F_GETFL, 0); + if (flags != -1) + (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC); + + return (connfd); +} + +static void +ctlcmd_process(int sockfd, int stdoutfd, unsigned int *flags) +{ + char buf[BUFSIZ]; + int i; + for (i = 0; i < BUFSIZ-1; i++) { + char c; + if (read(sockfd, &c, 1) != 1 || + c == '\n' || c == '\0') { + break; + } + buf[i] = c; + } + if (i == 0) { + goto fail; + } + buf[i] = '\0'; + + if (strncmp(buf, "TIOCSWINSZ ", 11) == 0) { + char *next = buf + 11; + struct winsize ws; + errno = 0; + ws.ws_row = strtol(next, &next, 10); + if (errno == EINVAL) { + goto fail; + } + ws.ws_col = strtol(next + 1, &next, 10); + if (errno == EINVAL) { + goto fail; + } + if (ioctl(stdoutfd, TIOCSWINSZ, &ws) == 0) { + (void) write(sockfd, "OK\n", 3); + return; + } + } + if (strncmp(buf, "SETFLAGS ", 9) == 0) { + char *next = buf + 9; + unsigned int result; + errno = 0; + result = strtoul(next, &next, 10); + if (errno == EINVAL) { + goto fail; + } + *flags = result; + (void) write(sockfd, "OK\n", 3); + return; + } +fail: + (void) write(sockfd, "FAIL\n", 5); +} + +/* + * Check to see if the client at the other end of the socket is still alive; we + * know it is not if it throws EPIPE at us when we try to write an otherwise + * harmless 0-length message to it. + */ +static int +test_client(int clifd) +{ + if ((write(clifd, "", 0) == -1) && errno == EPIPE) + return (-1); + return (0); +} + +/* + * We want to sleep for a little while but need to be responsive if the zone is + * halting. We poll/sleep on the event stream so we can notice if we're halting. + * Return true if halting, otherwise false. + */ +static boolean_t +halt_sleep(int slptime) +{ + struct pollfd evfd[1]; + + evfd[0].fd = eventstream[1]; + evfd[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI; + + if (poll(evfd, 1, slptime) > 0) { + /* zone halting */ + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * This routine drives the logging and interactive I/O loop. It polls for + * input from the zone side of the fd (output to stdout/stderr), and from the + * client (input to the zone's stdin). Additionally, it polls on the server + * fd, and disconnects any clients that might try to hook up with the zone + * while the fd's are in use. + * + * Data from the zone's stdout and stderr is formatted in json and written to + * the log file whether an interactive client is connected or not. + * + * When the client first calls us up, it is expected to send a line giving its + * "identity"; this consists of the string 'IDENT <pid> <locale>'. This is so + * that we can report that the fd's are busy, along with some diagnostics + * about who has them busy; the locale is ignore here but kept for compatability + * with the zlogin code when running on the zone's console. + * + * We need to handle the case where there is no server within the zone (or + * the server gets stuck) and data that we're writing to the zone server's + * stdin fills the pipe. Because of the way the zfd device works writes can + * flow into the stream and simply be dropped, if there is no server, or writes + * could return -1 with EAGAIN if the server is stuck. Since we ignore errors + * on the write to stdin, we won't get blocked in that case but we'd like to + * avoid dropping initial input if the server within the zone hasn't started + * yet. To handle this we wait to read initial input until we detect that there + * is a server inside the zone. We have to poll for this so that we can + * re-run the ioctl to notice when a server shows up. This poll/wait is handled + * by halt_sleep() so that we can be responsive if the zone wants to halt. + * We only do this check to avoid dropping initial input so it is possible for + * the server within the zone to go away later. At that point zfd will just + * drop any new input flowing into the stream. + */ +static void +do_zfd_io(int gzctlfd, int gzservfd, int gzerrfd, int stdinfd, int stdoutfd, + int stderrfd, int logout, int logerr) +{ + struct pollfd pollfds[8]; + char ibuf[BUFSIZ + 1]; + int cc, ret; + int ctlfd = -1; + int clifd = -1; + int clierrfd = -1; + int pollerr = 0; + char clilocale[MAXPATHLEN]; + pid_t clipid = 0; + uint_t flags = 0; + boolean_t stdin_ready = B_FALSE; + int slptime = 250; /* initial poll sleep time in ms */ + + /* client control socket, watch for read events */ + pollfds[0].fd = ctlfd; + pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | + POLLPRI | POLLERR | POLLHUP | POLLNVAL; + + /* client socket, watch for read events */ + pollfds[1].fd = clifd; + pollfds[1].events = pollfds[0].events; + + /* stdout, watch for read events */ + pollfds[2].fd = stdoutfd; + pollfds[2].events = pollfds[0].events; + + /* stderr, watch for read events */ + pollfds[3].fd = stderrfd; + pollfds[3].events = pollfds[0].events; + + /* the server control socket; watch for new connections */ + pollfds[4].fd = gzctlfd; + pollfds[4].events = POLLIN | POLLRDNORM; + + /* the server stdin/out socket; watch for new connections */ + pollfds[5].fd = gzservfd; + pollfds[5].events = POLLIN | POLLRDNORM; + + /* the server stderr socket; watch for new connections */ + pollfds[6].fd = gzerrfd; + pollfds[6].events = POLLIN | POLLRDNORM; + + /* the eventstream; any input means the zone is halting */ + pollfds[7].fd = eventstream[1]; + pollfds[7].events = pollfds[0].events; + + while (!shutting_down) { + pollfds[0].revents = pollfds[1].revents = 0; + pollfds[2].revents = pollfds[3].revents = 0; + pollfds[4].revents = pollfds[5].revents = 0; + pollfds[6].revents = pollfds[7].revents = 0; + + ret = poll(pollfds, 8, -1); + if (ret == -1 && errno != EINTR) { + zerror(&logplat, B_TRUE, "poll failed"); + /* we are hosed, close connection */ + break; + } + + /* control events from client */ + if (pollfds[0].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + /* process control message */ + ctlcmd_process(ctlfd, stdoutfd, &flags); + } else if (pollfds[0].revents) { + /* bail if any error occurs */ + pollerr = pollfds[0].revents; + zerror(&logplat, B_FALSE, "closing connection " + "with control channel, pollerr %d\n", pollerr); + break; + } + + /* event from client side */ + if (pollfds[1].revents) { + if (stdin_ready) { + if (pollfds[1].revents & (POLLIN | + POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(clifd, ibuf, BUFSIZ); + if (cc > 0) { + /* + * See comment for this + * function on what happens if + * there is no reader in the + * zone. EOF is handled below. + */ + (void) write(stdinfd, ibuf, cc); + } + } else if (pollfds[1].revents & (POLLERR | + POLLNVAL)) { + pollerr = pollfds[1].revents; + zerror(&logplat, B_FALSE, + "closing connection " + "with client, pollerr %d\n", + pollerr); + break; + } + + if (pollfds[1].revents & POLLHUP) { + if (flags & ZLOGIN_ZFD_EOF) { + /* + * Let the client know. We've + * already serviced any pending + * regular input. Let the + * stream clear since the EOF + * ioctl jumps to the head. + */ + (void) ioctl(stdinfd, I_FLUSH); + if (halt_sleep(250)) + break; + (void) ioctl(stdinfd, ZFD_EOF); + } + break; + } + } else { + if (ioctl(stdinfd, ZFD_HAS_SLAVE) == 0) { + stdin_ready = B_TRUE; + } else { + /* + * There is nothing in the zone to read + * our input. Presumably the user + * providing input expects something to + * show up, but that is no guarantee. + * Since we haven't serviced the pending + * input poll yet, we don't want to + * immediately loop around but we also + * need to be responsive if the zone is + * halting. + */ + if (halt_sleep(slptime)) + break; + + if (slptime < 5000) + slptime += 250; + } + } + } + + /* event from the zone's stdout */ + if (pollfds[2].revents) { + if (pollfds[2].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(stdoutfd, ibuf, BUFSIZ); + /* zfd is a stream, so ignore 0 length read */ + if (cc < 0 && (errno != EINTR) && + (errno != EAGAIN)) + break; + if (cc > 0) { + logstream_write(logout, ibuf, cc); + + /* + * Lose output if no one is listening, + * otherwise pass it on. + */ + if (clifd != -1) + (void) write(clifd, ibuf, cc); + } + } else { + pollerr = pollfds[2].revents; + zerror(&logplat, B_FALSE, + "closing connection with stdout zfd, " + "pollerr %d\n", pollerr); + break; + } + } + + /* event from the zone's stderr */ + if (pollfds[3].revents) { + if (pollfds[3].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(stderrfd, ibuf, BUFSIZ); + /* zfd is a stream, so ignore 0 length read */ + if (cc < 0 && (errno != EINTR) && + (errno != EAGAIN)) + break; + if (cc > 0) { + logstream_write(logerr, ibuf, cc); + + /* + * Lose output if no one is listening, + * otherwise pass it on. + */ + if (clierrfd != -1) + (void) write(clierrfd, ibuf, + cc); + } + } else { + pollerr = pollfds[3].revents; + zerror(&logplat, B_FALSE, + "closing connection with stderr zfd, " + "pollerr %d\n", pollerr); + break; + } + } + + /* connect event from server control socket */ + if (pollfds[4].revents) { + if (ctlfd != -1) { + /* + * Test the client to see if it is really + * still alive. If it has died but we + * haven't yet detected that, we might + * deny a legitimate connect attempt. If it + * is dead, we break out; once we tear down + * the old connection, the new connection + * will happen. + */ + if (test_client(ctlfd) == -1) { + break; + } + /* we're already handling a client */ + reject_client(gzctlfd, clipid); + } else { + ctlfd = accept_client(gzctlfd, &clipid, + clilocale, sizeof (clilocale), &flags); + if (ctlfd != -1) { + pollfds[0].fd = ctlfd; + } else { + break; + } + } + } + + /* connect event from server stdin/out socket */ + if (pollfds[5].revents) { + if (ctlfd == -1) { + /* + * This shouldn't happen since the client is + * expected to connect on the control socket + * first. If we see this, tear everything down + * and start over. + */ + zerror(&logplat, B_FALSE, "GZ zfd stdin/stdout " + "connection attempt with no GZ control\n"); + break; + } + assert(clifd == -1); + if ((clifd = accept_socket(gzservfd, clipid)) != -1) { + /* No need to watch for other new connections */ + pollfds[5].fd = -1; + /* Client input is of interest, though */ + pollfds[1].fd = clifd; + } else { + break; + } + } + + /* connection event from server stderr socket */ + if (pollfds[6].revents) { + if (ctlfd == -1) { + /* + * Same conditions apply to stderr as stdin/out. + */ + zerror(&logplat, B_FALSE, "GZ zfd stderr " + "connection attempt with no GZ control\n"); + break; + } + assert(clierrfd == -1); + if ((clierrfd = accept_socket(gzerrfd, clipid)) != -1) { + /* No need to watch for other new connections */ + pollfds[6].fd = -1; + } else { + break; + } + } + + /* + * Watch for events on the eventstream. This is how we get + * notified of the zone halting, etc. It provides us a + * "wakeup" from poll when important things happen, which + * is good. + */ + if (pollfds[7].revents) { + break; + } + } + + if (clifd != -1) { + (void) shutdown(clifd, SHUT_RDWR); + (void) close(clifd); + } + + if (clierrfd != -1) { + (void) shutdown(clierrfd, SHUT_RDWR); + (void) close(clierrfd); + } +} + +static int +open_fd(int id, int rw) +{ + int fd; + int flag = O_NONBLOCK | O_NOCTTY | O_CLOEXEC; + int retried = 0; + char stdpath[MAXPATHLEN]; + + (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d", + zone_name, id); + flag |= rw; + + while (!shutting_down) { + if ((fd = open(stdpath, flag)) != -1) { + /* + * Setting RPROTDIS on the stream means that the + * control portion of messages received (which we don't + * care about) will be discarded by the stream head. If + * we allowed such messages, we wouldn't be able to use + * read(2), as it fails (EBADMSG) when a message with a + * control element is received. + */ + if (ioctl(fd, I_SRDOPT, RNORM|RPROTDIS) == -1) { + zerror(&logplat, B_TRUE, + "failed to set options on zfd"); + return (-1); + } + return (fd); + } + + if (retried++ > 60) + break; + + (void) sleep(1); + } + + zerror(&logplat, B_TRUE, "failed to open zfd"); + return (-1); +} + +/* + * Body of the worker thread to log the zfd's stdout and stderr to a log file + * and to perform interactive IO to the stdin, stdout and stderr zfd's. + * + * The stdin, stdout and stderr are from the perspective of the process inside + * the zone, so the zoneadmd view is opposite (i.e. we write to the stdin fd + * and read from the stdout/stderr fds). + */ +static void * +srvr(void *modearg) +{ + zfd_mode_t *mode = (zfd_mode_t *)modearg; + int gzctlfd = -1; + int gzoutfd = -1; + int stdinfd = -1; + int stdoutfd = -1; + int gzerrfd = -1; + int stderrfd = -1; + int flags; + int len; + char ibuf[BUFSIZ + 1]; + int logout = -1; + int logerr = -1; + + if (!shutting_down && mode->zmode_gzlogging) { + logout = logstream_open(log_name, "stdout", 0); + logerr = logstream_open(log_name, "stderr", 0); + } + + if (!shutting_down) { + if (pipe(eventstream) != 0) { + zerror(&logplat, B_TRUE, "failed to open logger " + "control pipe"); + return (NULL); + } + } + + while (!shutting_down) { + if (init_server_sock(&gzctlfd, "ctl") == -1) { + zerror(&logplat, B_FALSE, + "server setup: control socket init failed"); + goto death; + } + if (init_server_sock(&gzoutfd, "out") == -1) { + zerror(&logplat, B_FALSE, + "server setup: stdout socket init failed"); + goto death; + } + if (init_server_sock(&gzerrfd, "err") == -1) { + zerror(&logplat, B_FALSE, + "server setup: stderr socket init failed"); + goto death; + } + + if (mode->zmode_n_stddevs == 1) { + if ((stdinfd = open_fd(0, O_RDWR)) == -1) { + goto death; + } + stdoutfd = stdinfd; + } else { + if ((stdinfd = open_fd(0, O_WRONLY)) == -1 || + (stdoutfd = open_fd(1, O_RDONLY)) == -1 || + (stderrfd = open_fd(2, O_RDONLY)) == -1) { + goto death; + } + } + + do_zfd_io(gzctlfd, gzoutfd, gzerrfd, stdinfd, stdoutfd, + stderrfd, logout, logerr); +death: + destroy_server_sock(gzctlfd, "ctl"); + destroy_server_sock(gzoutfd, "out"); + destroy_server_sock(gzerrfd, "err"); + + /* when shutting down, leave open until drained */ + if (!shutting_down) { + (void) close(stdinfd); + if (mode->zmode_n_stddevs == 3) { + (void) close(stdoutfd); + (void) close(stderrfd); + } + } + } + + /* + * Attempt to drain remaining log output from the zone prior to closing + * the file descriptors. This helps ensure that complete logs are + * captured during shutdown. + */ + flags = fcntl(stdoutfd, F_GETFL, 0); + if (fcntl(stdoutfd, F_SETFL, flags | O_NONBLOCK) != -1) { + while ((len = read(stdoutfd, ibuf, BUFSIZ)) > 0) { + logstream_write(logout, ibuf, len); + } + } + (void) close(stdoutfd); + + if (mode->zmode_n_stddevs > 1) { + (void) close(stdinfd); + flags = fcntl(stderrfd, F_GETFL, 0); + if (fcntl(stderrfd, F_SETFL, flags | O_NONBLOCK) != -1) { + while ((len = read(stderrfd, ibuf, BUFSIZ)) > 0) { + logstream_write(logerr, ibuf, len); + } + } + (void) close(stderrfd); + } + + + (void) close(eventstream[0]); + eventstream[0] = -1; + (void) close(eventstream[1]); + eventstream[1] = -1; + logstream_close(logout, B_FALSE); + logstream_close(logerr, B_FALSE); + return (NULL); +} + +/* + * The meaning of the original legacy values for the zlog-mode evolved over + * time, to the point where the old names no longer made sense. The current + * values are simply positional letters used to indicate various capabilities. + * The following table shows the meaning of the mode values, along with the + * legacy name which we continue to support for compatability. Any future + * capability can add a letter to the left and '-' is implied for existing + * strings. + * + * zlog-mode gz log - tty - ngz log + * --------- ------ --- ------- + * gt- (int) y y n + * g-- (log) y n n + * gtn (nlint) y y y + * g-n (nolog) y n y + * -t- n y n + * --- n n n + * + * This function also obtains any custom name for stdio.log while it is reading + * the zone configuration. + */ +static void +get_mode_logmax(zfd_mode_t *mode) +{ + zone_dochandle_t handle; + struct zone_attrtab attr; + + bzero(mode, sizeof (zfd_mode_t)); + + if ((handle = zonecfg_init_handle()) == NULL) + return; + + if (zonecfg_get_handle(zone_name, handle) != Z_OK) + goto done; + + if (zonecfg_setattrent(handle) != Z_OK) + goto done; + while (zonecfg_getattrent(handle, &attr) == Z_OK) { + if (strcmp(ZLOG_MODE, attr.zone_attr_name) == 0) { + if (strcmp("g--", attr.zone_attr_value) == 0 || + strncmp("log", attr.zone_attr_value, 3) == 0) { + mode->zmode_gzlogging = B_TRUE; + mode->zmode_n_stddevs = 3; + mode->zmode_n_addl_devs = 0; + } else if (strcmp("g-n", attr.zone_attr_value) == 0 || + strncmp("nolog", attr.zone_attr_value, 5) == 0) { + mode->zmode_gzlogging = B_TRUE; + mode->zmode_n_stddevs = 3; + mode->zmode_n_addl_devs = 2; + } else if (strcmp("gt-", attr.zone_attr_value) == 0 || + strncmp("int", attr.zone_attr_value, 3) == 0) { + mode->zmode_gzlogging = B_TRUE; + mode->zmode_n_stddevs = 1; + mode->zmode_n_addl_devs = 0; + } else if (strcmp("gtn", attr.zone_attr_value) == 0 || + strncmp("nlint", attr.zone_attr_value, 5) == 0) { + mode->zmode_gzlogging = B_TRUE; + mode->zmode_n_stddevs = 1; + mode->zmode_n_addl_devs = 1; + } else if (strcmp("-t-", attr.zone_attr_value) == 0) { + mode->zmode_gzlogging = B_FALSE; + mode->zmode_n_stddevs = 1; + mode->zmode_n_addl_devs = 0; + } else if (strcmp("---", attr.zone_attr_value) == 0) { + mode->zmode_gzlogging = B_FALSE; + mode->zmode_n_stddevs = 3; + mode->zmode_n_addl_devs = 0; + } + continue; + } + + if (strcmp(ZLOG_NAME, attr.zone_attr_name) == 0) { + (void) strlcpy(log_name, attr.zone_attr_value, + sizeof (log_name)); + continue; + } + } + (void) zonecfg_endattrent(handle); + +done: + zonecfg_fini_handle(handle); +} + +void +create_log_thread(zlog_t *zlogp) +{ + int res; + + shutting_down = 0; + + get_mode_logmax(&mode); + if (mode.zmode_n_stddevs == 0) + return; + + if (init_zfd_devs(zlogp, &mode) == -1) { + zerror(zlogp, B_FALSE, + "zfd setup: device initialization failed"); + return; + } + + res = thr_create(NULL, 0, srvr, (void *)&mode, 0, + &logger_tid); + if (res != 0) { + zerror(zlogp, B_FALSE, "error %d creating logger thread", res); + logger_tid = 0; + } +} + +void +destroy_log_thread(zlog_t *zlogp) +{ + if (logger_tid != 0) { + int stop = 1; + + shutting_down = 1; + /* break out of poll to shutdown */ + if (eventstream[0] != -1) + (void) write(eventstream[0], &stop, sizeof (stop)); + (void) thr_join(logger_tid, NULL, NULL); + logger_tid = 0; + } + + (void) destroy_zfd_devs(zlogp); +} diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c index b1c2d2bbf5..342b1bf958 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.c +++ b/usr/src/cmd/zoneadmd/zoneadmd.c @@ -22,6 +22,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2021 Joyent, Inc. * Copyright (c) 2016 by Delphix. All rights reserved. */ @@ -69,6 +70,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/sysmacros.h> +#include <sys/time.h> #include <bsm/adt.h> #include <bsm/adt_event.h> @@ -102,6 +104,8 @@ #include <libdladm.h> #include <sys/dls_mgmt.h> #include <libscf.h> +#include <uuid/uuid.h> +#include <libppt.h> #include <libzonecfg.h> #include <zonestat_impl.h> @@ -109,6 +113,8 @@ static char *progname; char *zone_name; /* zone which we are managing */ +zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */ +char zonepath[MAXNAMELEN]; char pool_name[MAXNAMELEN]; char default_brand[MAXNAMELEN]; char brand_name[MAXNAMELEN]; @@ -117,13 +123,15 @@ boolean_t zone_iscluster; boolean_t zone_islabeled; boolean_t shutdown_in_progress; static zoneid_t zone_id; +static zoneid_t zone_did = 0; dladm_handle_t dld_handle = NULL; -static char pre_statechg_hook[2 * MAXPATHLEN]; -static char post_statechg_hook[2 * MAXPATHLEN]; +char pre_statechg_hook[2 * MAXPATHLEN]; +char post_statechg_hook[2 * MAXPATHLEN]; char query_hook[2 * MAXPATHLEN]; -zlog_t logsys; +zlog_t logsys; /* log to syslog */ +zlog_t logplat; /* log to platform.log */ mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */ mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */ @@ -136,12 +144,17 @@ static int zone_door = -1; boolean_t in_death_throes = B_FALSE; /* daemon is dying */ boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */ +static int platloghdl = -1; /* Handle for <zonepath>/logs/platform.log */ + #if !defined(TEXT_DOMAIN) /* should be defined by cc -D */ #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */ #endif #define DEFAULT_LOCALE "C" +#define RSRC_NET "net" +#define RSRC_DEV "device" + static const char * z_cmd_name(zone_cmd_t zcmd) { @@ -215,17 +228,14 @@ zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) { va_list alist; char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */ - char *bp; + char *bp, *bp_nozone; int saved_errno = errno; - if (zlogp == NULL) - return; if (zlogp == &logsys) - (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", - zone_name); + (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", zone_name); else buf[0] = '\0'; - bp = &(buf[strlen(buf)]); + bp = bp_nozone = &(buf[strlen(buf)]); /* * In theory, the locale pointer should be set to either "C" or a @@ -242,15 +252,38 @@ zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) if (use_strerror) (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s", strerror(saved_errno)); + + (void) strlcat(buf, "\n", sizeof (buf)); + + /* + * If we don't have the platform log, we are in a child process, and + * should log to stderr (which is a pipe) instead of the file. + */ + if (logging_poisoned) { + (void) fprintf(stderr, "%s", buf); + + if (zlogp != &logsys && zlogp->logfile == stderr) + return; + } else { + logstream_write(platloghdl, bp_nozone, strlen(bp_nozone)); + + if (zlogp == &logplat) + return; + } + if (zlogp == &logsys) { + bp = strrchr(buf, '\n'); + if (bp != NULL && bp[1] == '\0') { + *bp = '\0'; + } (void) syslog(LOG_ERR, "%s", buf); } else if (zlogp->logfile != NULL) { - (void) fprintf(zlogp->logfile, "%s\n", buf); + (void) fprintf(zlogp->logfile, "%s", buf); } else { size_t buflen; size_t copylen; - buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf); + buflen = snprintf(zlogp->log, zlogp->loglen, "%s", buf); copylen = MIN(buflen, zlogp->loglen); zlogp->log += copylen; zlogp->loglen -= copylen; @@ -258,34 +291,58 @@ zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...) } /* + * Append src to dest, modifying dest in the process. Prefix src with + * a space character if dest is a non-empty string. Assumes dest is already + * properly \0-terminated OR overruns destsize. + */ +static void +strnappend(char *dest, size_t destsize, const char *src) +{ + size_t startpoint = strnlen(dest, destsize); + + if (startpoint >= destsize - 1) { + /* We've run out of room. Record something?! */ + return; + } + + if (startpoint > 0) { + /* Add the space per the function's intro comment. */ + dest[startpoint] = ' '; + startpoint++; + } + + /* Arguably we should check here too... */ + (void) strlcpy(dest + startpoint, src, destsize - startpoint); +} + +/* * Emit a warning for any boot arguments which are unrecognized. Since * Solaris boot arguments are getopt(3c) compatible (see kernel(8)), we * put the arguments into an argv style array, use getopt to process them, - * and put the resultant argument string back into outargs. + * and put the resultant argument string back into outargs. Non-native brands + * may support alternate forms of boot arguments so we must handle that as well. * * During the filtering, we pull out any arguments which are truly "boot" * arguments, leaving only those which are to be passed intact to the * progenitor process. The one we support at the moment is -i, which * indicates to the kernel which program should be launched as 'init'. * - * A return of Z_INVAL indicates specifically that the arguments are - * not valid; this is a non-fatal error. Except for Z_OK, all other return - * values are treated as fatal. + * Except for Z_OK, all other return values are treated as fatal. */ static int filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, - char *init_file, char *badarg) + char *init_file) { int argc = 0, argc_save; int i; - int err; + int err = Z_OK; char *arg, *lasts, **argv = NULL, **argv_save; char zonecfg_args[BOOTARGS_MAX]; char scratchargs[BOOTARGS_MAX], *sargs; + char scratchopt[3]; char c; bzero(outargs, BOOTARGS_MAX); - bzero(badarg, BOOTARGS_MAX); /* * If the user didn't specify transient boot arguments, check @@ -293,25 +350,10 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, * and use them if applicable. */ if (inargs == NULL || inargs[0] == '\0') { - zone_dochandle_t handle; - if ((handle = zonecfg_init_handle()) == NULL) { - zerror(zlogp, B_TRUE, - "getting zone configuration handle"); - return (Z_BAD_HANDLE); - } - err = zonecfg_get_snapshot_handle(zone_name, handle); - if (err != Z_OK) { - zerror(zlogp, B_FALSE, - "invalid configuration snapshot"); - zonecfg_fini_handle(handle); - return (Z_BAD_HANDLE); - } - bzero(zonecfg_args, sizeof (zonecfg_args)); - (void) zonecfg_get_bootargs(handle, zonecfg_args, + (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args, sizeof (zonecfg_args)); inargs = zonecfg_args; - zonecfg_fini_handle(handle); } if (strlen(inargs) >= BOOTARGS_MAX) { @@ -348,14 +390,22 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, } /* - * We preserve compatibility with the Solaris system boot behavior, + * We preserve compatibility with the illumos system boot behavior, * which allows: * * # reboot kernel/unix -s -m verbose * - * In this example, kernel/unix tells the booter what file to - * boot. We don't want reboot in a zone to be gratuitously different, - * so we silently ignore the boot file, if necessary. + * In this example, kernel/unix tells the booter what file to boot. The + * original intent of this was that we didn't want reboot in a zone to + * be gratuitously different, so we would silently ignore the boot + * file, if necessary. However, this usage is archaic and has never + * been common, since it is impossible to boot a zone onto a different + * kernel. Ignoring the first argument breaks for non-native brands + * which pass boot arguments in a different style. e.g. + * systemd.log_level=debug + * Thus, for backward compatibility we only ignore the first argument + * if it appears to be in the illumos form and attempting to specify a + * kernel. */ if (argv[0] == NULL) goto done; @@ -363,7 +413,7 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, assert(argv[0][0] != ' '); assert(argv[0][0] != '\t'); - if (argv[0][0] != '-' && argv[0][0] != '\0') { + if (strncmp(argv[0], "kernel/", 7) == 0) { argv = &argv[1]; argc--; } @@ -386,41 +436,35 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs, case 'm': case 's': /* These pass through unmolested */ - (void) snprintf(outargs, BOOTARGS_MAX, - "%s -%c %s ", outargs, c, optarg ? optarg : ""); + (void) snprintf(scratchopt, sizeof (scratchopt), + "-%c", c); + strnappend(outargs, BOOTARGS_MAX, scratchopt); + if (optarg != NULL) + strnappend(outargs, BOOTARGS_MAX, optarg); break; case '?': /* - * We warn about unknown arguments but pass them - * along anyway-- if someone wants to develop their - * own init replacement, they can pass it whatever - * args they want. + * If a brand has its own init, we need to pass along + * whatever the user provides. We use the entire + * unknown string here so that we correctly handle + * unknown long options (e.g. --debug). */ - err = Z_INVAL; - (void) snprintf(outargs, BOOTARGS_MAX, - "%s -%c", outargs, optopt); - (void) snprintf(badarg, BOOTARGS_MAX, - "%s -%c", badarg, optopt); + strnappend(outargs, BOOTARGS_MAX, argv[optind - 1]); break; } } /* - * For Solaris Zones we warn about and discard non-option arguments. - * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar - * to the kernel, we concat up all the other remaining boot args. - * and warn on them as a group. + * We need to pass along everything else since we don't know what + * the brand's init is expecting. For example, an argument list like: + * --confdir /foo --debug + * will cause the getopt parsing to stop at '/foo' but we need to pass + * that on, along with the '--debug'. This does mean that we require + * any of our known options (-ifms) to preceed the brand-specific ones. */ - if (optind < argc) { - err = Z_INVAL; - while (optind < argc) { - (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s", - badarg, strlen(badarg) > 0 ? " " : "", - argv[optind]); - optind++; - } - zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot " - "arguments `%s'.", badarg); + while (optind < argc) { + strnappend(outargs, BOOTARGS_MAX, argv[optind]); + optind++; } done: @@ -459,7 +503,7 @@ mkzonedir(zlog_t *zlogp) * Run the brand's pre-state change callback, if it exists. */ static int -brand_prestatechg(zlog_t *zlogp, int state, int cmd) +brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug) { char cmdbuf[2 * MAXPATHLEN]; const char *altroot; @@ -472,7 +516,7 @@ brand_prestatechg(zlog_t *zlogp, int state, int cmd) state, cmd, altroot) > sizeof (cmdbuf)) return (-1); - if (do_subproc(zlogp, cmdbuf, NULL) != 0) + if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0) return (-1); return (0); @@ -482,7 +526,7 @@ brand_prestatechg(zlog_t *zlogp, int state, int cmd) * Run the brand's post-state change callback, if it exists. */ static int -brand_poststatechg(zlog_t *zlogp, int state, int cmd) +brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug) { char cmdbuf[2 * MAXPATHLEN]; const char *altroot; @@ -495,7 +539,7 @@ brand_poststatechg(zlog_t *zlogp, int state, int cmd) state, cmd, altroot) > sizeof (cmdbuf)) return (-1); - if (do_subproc(zlogp, cmdbuf, NULL) != 0) + if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0) return (-1); return (0); @@ -532,37 +576,51 @@ notify_zonestatd(zoneid_t zoneid) * Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is * 'true' if this is being invoked as part of the processing for the "mount" * subcommand. + * + * If a scratch zone mount (ALT_MOUNT) is being performed then do not + * call the state change hooks. */ static int -zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate) +zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug) { int err; + boolean_t snapped = B_FALSE; - if (brand_prestatechg(zlogp, zstate, Z_READY) != 0) - return (-1); - + if ((snap_hndl = zonecfg_init_handle()) == NULL) { + zerror(zlogp, B_TRUE, "getting zone configuration handle"); + goto bad; + } if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) { zerror(zlogp, B_FALSE, "unable to create snapshot: %s", zonecfg_strerror(err)); goto bad; } + snapped = B_TRUE; - if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) { - if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) - zerror(zlogp, B_FALSE, "destroying snapshot: %s", - zonecfg_strerror(err)); + if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) { + zerror(zlogp, B_FALSE, "invalid configuration snapshot"); goto bad; } + + if (zone_did == 0) + zone_did = zone_get_did(zone_name); + + if (!ALT_MOUNT(mount_cmd) && + brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0) + goto bad; + + if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1) + goto bad; + if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) { bringup_failure_recovery = B_TRUE; - (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE); - if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) - zerror(zlogp, B_FALSE, "destroying snapshot: %s", - zonecfg_strerror(err)); + (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE, + debug); goto bad; } - if (brand_poststatechg(zlogp, zstate, Z_READY) != 0) + if (!ALT_MOUNT(mount_cmd) && + brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0) goto bad; return (0); @@ -572,7 +630,16 @@ bad: * If something goes wrong, we up the zones's state to the target * state, READY, and then invoke the hook as if we're halting. */ - (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT); + if (!ALT_MOUNT(mount_cmd)) + (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT, + debug); + + if (snapped) + if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) + zerror(zlogp, B_FALSE, "destroying snapshot: %s", + zonecfg_strerror(err)); + zonecfg_fini_handle(snap_hndl); + snap_hndl = NULL; return (-1); } @@ -624,15 +691,8 @@ mount_early_fs(void *data, const char *spec, const char *dir, /* determine the zone rootpath */ if (mount_cmd) { - char zonepath[MAXPATHLEN]; char luroot[MAXPATHLEN]; - if (zone_get_zonepath(zone_name, - zonepath, sizeof (zonepath)) != Z_OK) { - zerror(zlogp, B_FALSE, "unable to determine zone path"); - return (-1); - } - (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); resolve_lofs(zlogp, luroot, sizeof (luroot)); (void) strlcpy(rootpath, luroot, sizeof (rootpath)); @@ -687,6 +747,8 @@ mount_early_fs(void *data, const char *spec, const char *dir, char opt_buf[MAX_MNTOPT_STR]; int optlen = 0; int mflag = MS_DATA; + int i; + int ret; (void) ct_tmpl_clear(tmpl_fd); /* @@ -714,9 +776,26 @@ mount_early_fs(void *data, const char *spec, const char *dir, optlen = MAX_MNTOPT_STR; mflag = MS_OPTIONSTR; } - if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0) - _exit(errno); - _exit(0); + + /* + * There is an obscure race condition which can cause mount + * to return EBUSY. This happens for example on the mount + * of the zone's /etc/svc/volatile file system if there is + * a GZ process running svcs -Z, which will touch the + * mountpoint, just as we're trying to do the mount. To cope + * with this, we retry up to 3 times to let this transient + * process get out of the way. + */ + for (i = 0; i < 3; i++) { + ret = 0; + if (mount(spec, dir, mflag, fstype, NULL, 0, opt, + optlen) != 0) + ret = errno; + if (ret != EBUSY) + break; + (void) sleep(1); + } + _exit(ret); } /* parent */ @@ -740,18 +819,275 @@ mount_early_fs(void *data, const char *spec, const char *dir, } /* + * Replace characters other than [A-Za-z0-9_] with '_' so that the string is a + * valid environment variable name. + */ +static void +sanitize_env_var_name(char *var) +{ + for (char *p = var; *p != '\0'; p++) { + if (!isalnum(*p)) { + *p = '_'; + } + } +} + +/* + * env variable name format + * _ZONECFG_{resource name}_{identifying attr. name}_{property name} + * Any dashes (-) in the property names are replaced with underscore (_). + */ +static void +set_zonecfg_env(char *rsrc, char *attr, char *name, char *val) +{ + /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */ + char nm[2 * MAXNAMELEN + 32]; + + if (attr == NULL) + (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc, + name); + else + (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc, + attr, name); + + sanitize_env_var_name(nm); + + (void) setenv(nm, val, 1); +} + +/* + * Resolve a device:match value to a path. This is only different for PPT + * devices, where we expect the match property to be a /devices/... path, and + * configured for PPT already. + */ +int +resolve_device_match(zlog_t *zlogp, struct zone_devtab *dtab, + char *path, size_t len) +{ + struct zone_res_attrtab *rap; + + for (rap = dtab->zone_dev_attrp; rap != NULL; + rap = rap->zone_res_attr_next) { + if (strcmp(rap->zone_res_attr_name, "model") == 0 && + strcmp(rap->zone_res_attr_value, "passthru") == 0) + break; + } + + if (rap == NULL) { + if (strlcpy(path, dtab->zone_dev_match, len) >= len) + return (Z_INVAL); + return (Z_OK); + } + + if (strncmp(dtab->zone_dev_match, "/devices", + strlen("/devices")) != 0) { + zerror(zlogp, B_FALSE, "invalid passthru match value '%s'", + dtab->zone_dev_match); + return (Z_INVAL); + } + + if (ppt_devpath_to_dev(dtab->zone_dev_match, path, len) != 0) { + zerror(zlogp, B_TRUE, "failed to resolve passthru device %s", + dtab->zone_dev_match); + return (Z_INVAL); + } + + return (Z_OK); +} + +/* + * Export various zonecfg properties into environment for the boot and state + * change hooks. + * + * If debug is true, _ZONEADMD_brand_debug is set to 1, else it is set to an + * empty string. Brand hooks consider any non-empty string as an indication + * that debug output is requested. + * + * We could export more of the config in the future, as necessary. A better + * solution would be to make it so brand-specific behavior is handled by + * brand-specific callbacks written in C. Then the normal libzonecfg interfaces + * can be used for accessing any parts of the configuration that are needed. + * + * All of the environment variables set by this function are specific to + * SmartOS. + */ +static int +setup_subproc_env(zlog_t *zlogp, boolean_t debug) +{ + int res; + struct zone_nwiftab ntab; + struct zone_devtab dtab; + struct zone_attrtab atab; + char net_resources[MAXNAMELEN * 2]; + char dev_resources[MAXNAMELEN * 2]; + char didstr[16]; + char uuidstr[UUID_PRINTABLE_STRING_LENGTH]; + uuid_t uuid; + + /* snap_hndl is null when called through the set_brand_env code path */ + if (snap_hndl == NULL) + return (Z_OK); + + if ((res = zonecfg_get_uuid(zone_name, uuid)) != Z_OK) + return (res); + + uuid_unparse(uuid, uuidstr); + (void) setenv("_ZONECFG_uuid", uuidstr, 1); + + (void) snprintf(didstr, sizeof (didstr), "%d", zone_did); + (void) setenv("_ZONECFG_did", didstr, 1); + + /* + * "net" resources are exported because zoneadmd does not handle + * automatic configuration of vnics and so that the bhyve boot hook + * can generate the argument list for the brand's init program. At such + * a time as vnic creation is handled in zoneadmd and brand callbacks + * can be executed as part of the zoneadmd process this should be + * removed. + */ + net_resources[0] = '\0'; + if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK) + goto done; + + while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) { + struct zone_res_attrtab *rap; + char *phys; + + phys = ntab.zone_nwif_physical; + + (void) strlcat(net_resources, phys, sizeof (net_resources)); + (void) strlcat(net_resources, " ", sizeof (net_resources)); + + set_zonecfg_env(RSRC_NET, phys, "physical", phys); + + set_zonecfg_env(RSRC_NET, phys, "address", + ntab.zone_nwif_address); + set_zonecfg_env(RSRC_NET, phys, "allowed-address", + ntab.zone_nwif_allowed_address); + set_zonecfg_env(RSRC_NET, phys, "defrouter", + ntab.zone_nwif_defrouter); + set_zonecfg_env(RSRC_NET, phys, "global-nic", + ntab.zone_nwif_gnic); + set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac); + set_zonecfg_env(RSRC_NET, phys, "vlan-id", + ntab.zone_nwif_vlan_id); + + for (rap = ntab.zone_nwif_attrp; rap != NULL; + rap = rap->zone_res_attr_next) + set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name, + rap->zone_res_attr_value); + nwifent_free_attrs(&ntab); + } + + (void) setenv("_ZONECFG_net_resources", net_resources, 1); + + (void) zonecfg_endnwifent(snap_hndl); + + /* + * "device" resources are exported because the bhyve boot brand callback + * needs them to generate the argument list for the brand's init + * program. At such a time as brand callbacks can be executed as part + * of the zoneadmd process, this should be removed. + * + * The bhyve brand only supports disk-like and ppt devices and does not + * support regular expressions. + */ + if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK) + goto done; + + dev_resources[0] = '\0'; + while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) { + char *match = dtab.zone_dev_match; + struct zone_res_attrtab *rap; + char path[MAXPATHLEN]; + + res = resolve_device_match(zlogp, &dtab, path, sizeof (path)); + if (res != Z_OK) + goto done; + + /* + * Even if not modified, the match path will be mangled in the + * environment variable name, so we always store the value here. + */ + set_zonecfg_env(RSRC_DEV, match, "path", path); + + for (rap = dtab.zone_dev_attrp; rap != NULL; + rap = rap->zone_res_attr_next) { + set_zonecfg_env(RSRC_DEV, match, + rap->zone_res_attr_name, rap->zone_res_attr_value); + } + + /* + * _ZONECFG_device_resources will contain a space separated list + * of devices that have _ZONECFG_device_<device>* environment + * variables. So that each element of the list matches up with + * <device>, each list item needs to be sanitized in the same + * way that environment variable names are sanitized. + */ + sanitize_env_var_name(match); + (void) strlcat(dev_resources, match, sizeof (dev_resources)); + (void) strlcat(dev_resources, " ", sizeof (dev_resources)); + } + (void) zonecfg_enddevent(snap_hndl); + + (void) setenv("_ZONECFG_device_resources", dev_resources, 1); + + /* + * "attr" resources are exported because the bhyve brand's boot hook + * needs access to the "ram", "cpu", "bootrom", etc. to form the + * argument list for the brand's init program. Once the bhyve brand is + * configured via proper resources and properties, this should be + * removed. + */ + if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK) + goto done; + + while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) { + set_zonecfg_env("attr", NULL, atab.zone_attr_name, + atab.zone_attr_value); + } + + (void) zonecfg_endattrent(snap_hndl); + + if (debug) + (void) setenv("_ZONEADMD_brand_debug", "1", 1); + else + (void) setenv("_ZONEADMD_brand_debug", "", 1); + + res = Z_OK; + +done: + return (res); +} + +void +nwifent_free_attrs(struct zone_nwiftab *np) +{ + struct zone_res_attrtab *rap; + + for (rap = np->zone_nwif_attrp; rap != NULL; ) { + struct zone_res_attrtab *tp = rap; + + rap = rap->zone_res_attr_next; + free(tp); + } +} + +/* * If retstr is not NULL, the output of the subproc is returned in the str, * otherwise it is output using zerror(). Any memory allocated for retstr * should be freed by the caller. */ int -do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) +do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug) { char buf[1024]; /* arbitrary large amount */ char *inbuf; FILE *file; int status; int rd_cnt; + int fds[2]; + pid_t child; if (retstr != NULL) { if ((*retstr = malloc(1024)) == NULL) { @@ -764,31 +1100,104 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) inbuf = buf; } - file = popen(cmdbuf, "r"); - if (file == NULL) { - zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf); + if (pipe(fds) != 0) { + zerror(zlogp, B_TRUE, "failed to create pipe for subprocess"); return (-1); } + if ((child = fork()) == 0) { + int in; + + /* + * SIGINT is currently ignored. It probably shouldn't be so + * hard to kill errant children, so we revert to SIG_DFL. + * SIGHUP and SIGUSR1 are used to perform log rotation. We + * leave those as-is because we don't want a 'pkill -HUP + * zoneadmd' to kill this child process before exec(). On + * exec(), SIGHUP and SIGUSR1 will become SIG_DFL. + */ + (void) sigset(SIGINT, SIG_DFL); + + /* + * Set up a pipe for the child to log to. + */ + if (dup2(fds[1], STDERR_FILENO) == -1) { + (void) snprintf(buf, sizeof (buf), + "subprocess failed to dup2(STDERR_FILENO): %s\n", + strerror(errno)); + (void) write(fds[1], buf, strlen(buf)); + _exit(127); + } + if (dup2(fds[1], STDOUT_FILENO) == -1) { + perror("subprocess failed to dup2(STDOUT_FILENO)"); + _exit(127); + } + /* + * Some naughty children may try to read from stdin. Be sure + * that the first file that a child opens doesn't get stdin's + * file descriptor. + */ + if ((in = open("/dev/null", O_RDONLY)) == -1 || + dup2(in, STDIN_FILENO) == -1) { + zerror(zlogp, B_TRUE, + "subprocess failed to set up STDIN_FILENO"); + _exit(127); + } + closefrom(STDERR_FILENO + 1); + + if (setup_subproc_env(zlogp, debug) != Z_OK) { + zerror(zlogp, B_FALSE, "failed to setup environment"); + _exit(127); + } + + (void) execl("/bin/sh", "sh", "-c", cmdbuf, NULL); + + zerror(zlogp, B_TRUE, "subprocess execl failed"); + _exit(127); + } else if (child == -1) { + zerror(zlogp, B_TRUE, "failed to create subprocess for '%s'", + cmdbuf); + (void) close(fds[0]); + (void) close(fds[1]); + return (-1); + } + + (void) close(fds[1]); + + file = fdopen(fds[0], "r"); while (fgets(inbuf, 1024, file) != NULL) { if (retstr == NULL) { - if (zlogp != &logsys) + if (zlogp != &logsys) { + int last = strlen(inbuf) - 1; + + if (inbuf[last] == '\n') + inbuf[last] = '\0'; zerror(zlogp, B_FALSE, "%s", inbuf); + } } else { char *p; rd_cnt += 1024 - 1; if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) { zerror(zlogp, B_FALSE, "out of memory"); - (void) pclose(file); - return (-1); + break; } *retstr = p; inbuf = *retstr + rd_cnt; } } - status = pclose(file); + + while (fclose(file) != 0) { + assert(errno == EINTR); + } + while (waitpid(child, &status, 0) == -1) { + if (errno != EINTR) { + zerror(zlogp, B_TRUE, + "failed to get exit status of '%s'", cmdbuf); + return (-1); + } + } if (WIFSIGNALED(status)) { zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to " @@ -803,24 +1212,91 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr) return (WEXITSTATUS(status)); } +/* + * Get the path for this zone's init(1M) (or equivalent) process. First look + * for a zone-specific init-name attr, then get it from the brand. + */ +static int +get_initname(brand_handle_t bh, char *initname, int len) +{ + struct zone_attrtab a; + + bzero(&a, sizeof (a)); + (void) strlcpy(a.zone_attr_name, "init-name", + sizeof (a.zone_attr_name)); + + if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) { + (void) strlcpy(initname, a.zone_attr_value, len); + return (0); + } + + return (brand_get_initname(bh, initname, len)); +} + +/* + * Get the restart-init flag for this zone's init(1M) (or equivalent) process. + * First look for a zone-specific restart-init attr, then get it from the brand. + */ +static boolean_t +restartinit(brand_handle_t bh) +{ + struct zone_attrtab a; + + bzero(&a, sizeof (a)); + (void) strlcpy(a.zone_attr_name, "restart-init", + sizeof (a.zone_attr_name)); + + if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) { + if (strcmp(a.zone_attr_value, "false") == 0) + return (B_FALSE); + return (B_TRUE); + } + + return (brand_restartinit(bh)); +} + +/* + * Get the app-svc-dependent flag for this zone's init process. This is a + * zone-specific attr which controls the type of contract we create for the + * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal + * set, so that when any service which is in the same contract exits, the init + * application will be terminated. + */ +static boolean_t +is_app_svc_dep(void) +{ + struct zone_attrtab a; + + bzero(&a, sizeof (a)); + (void) strlcpy(a.zone_attr_name, "app-svc-dependent", + sizeof (a.zone_attr_name)); + + if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK && + strcmp(a.zone_attr_value, "true") == 0) { + return (B_TRUE); + } + + return (B_FALSE); +} + static int -zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) +zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug) { zoneid_t zoneid; struct stat st; - char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; + char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; char nbootargs[BOOTARGS_MAX]; char cmdbuf[MAXPATHLEN]; fs_callback_t cb; brand_handle_t bh; zone_iptype_t iptype; - boolean_t links_loaded = B_FALSE; dladm_status_t status; char errmsg[DLADM_STRSIZE]; int err; + boolean_t app_svc_dep; boolean_t restart_init, restart_init0, restart_initreboot; - if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0) + if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0) return (-1); if ((zoneid = getzoneidbyname(zone_name)) == -1) { @@ -853,13 +1329,8 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) /* * Get the brand's boot callback if it exists. */ - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { - zerror(zlogp, B_FALSE, "unable to determine zone path"); - brand_close(bh); - goto bad; - } (void) strcpy(cmdbuf, EXEC_PREFIX); - if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, + if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN, sizeof (cmdbuf) - EXEC_LEN) != 0) { zerror(zlogp, B_FALSE, "unable to determine branded zone's boot callback"); @@ -868,7 +1339,7 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) } /* Get the path for this zone's init(8) (or equivalent) process. */ - if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) { + if (get_initname(bh, init_file, MAXPATHLEN) != 0) { zerror(zlogp, B_FALSE, "unable to determine zone's init(8) location"); brand_close(bh); @@ -876,35 +1347,44 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) } /* See if this zone's brand should restart init if it dies. */ - restart_init = brand_restartinit(bh); + restart_init = restartinit(bh); restart_init0 = brand_restartinit0(bh); restart_initreboot = brand_restartinitreboot(bh); + /* + * See if we need to setup contract dependencies between the zone's + * primary application and any of its services. + */ + app_svc_dep = is_app_svc_dep(); + brand_close(bh); - err = filter_bootargs(zlogp, bootargs, nbootargs, init_file, - bad_boot_arg); - if (err == Z_INVAL) - eventstream_write(Z_EVT_ZONE_BADARGS); - else if (err != Z_OK) + err = filter_bootargs(zlogp, bootargs, nbootargs, init_file); + if (err != Z_OK) goto bad; assert(init_file[0] != '\0'); - /* Try to anticipate possible problems: Make sure init is executable. */ - if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { + /* + * Try to anticipate possible problems: If possible, make sure init is + * executable. + */ + if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) { zerror(zlogp, B_FALSE, "unable to determine zone root"); goto bad; } - (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); + (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file); - if (stat(initpath, &st) == -1) { + if (lstat(initpath, &st) == -1) { zerror(zlogp, B_TRUE, "could not stat %s", initpath); goto bad; } - if ((st.st_mode & S_IXUSR) == 0) { + /* LINTED: E_NOP_IF_STMT */ + if ((st.st_mode & S_IFMT) == S_IFLNK) { + /* symlink, we'll have to wait and resolve when we boot */ + } else if ((st.st_mode & S_IXUSR) == 0) { zerror(zlogp, B_FALSE, "%s is not executable", initpath); goto bad; } @@ -922,7 +1402,6 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) " %s", dladm_status2str(status, errmsg)); goto bad; } - links_loaded = B_TRUE; } /* @@ -931,7 +1410,7 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) * is booted. */ if ((strlen(cmdbuf) > EXEC_LEN) && - (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) { + (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) { zerror(zlogp, B_FALSE, "%s failed", cmdbuf); goto bad; } @@ -963,19 +1442,31 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate) goto bad; } + if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT, + (void *)B_TRUE, sizeof (boolean_t)) == -1) { + zerror(zlogp, B_TRUE, "could not set zone app-die"); + goto bad; + } + /* * Inform zonestatd of a new zone so that it can install a door for * the zone to contact it. */ notify_zonestatd(zone_id); + /* Startup a thread to perform zfd logging/tty svc for the zone. */ + create_log_thread(zlogp); + if (zone_boot(zoneid) == -1) { zerror(zlogp, B_TRUE, "unable to boot zone"); + destroy_log_thread(zlogp); goto bad; } - if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0) + if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0) { + destroy_log_thread(zlogp); goto bad; + } return (0); @@ -984,32 +1475,45 @@ bad: * If something goes wrong, we up the zones's state to the target * state, RUNNING, and then invoke the hook as if we're halting. */ - (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT); - if (links_loaded) - (void) dladm_zone_halt(dld_handle, zoneid); + (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug); + return (-1); } static int -zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate) +zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate, + boolean_t debug) { int err; - if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0) + /* + * If performing a scratch zone unmount then do not call the + * state change hooks. + */ + if (unmount_cmd == B_FALSE && + brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0) return (-1); - if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) { + if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) { if (!bringup_failure_recovery) zerror(zlogp, B_FALSE, "unable to destroy zone"); + destroy_log_thread(zlogp); return (-1); } + /* Shut down is done, stop the log thread */ + destroy_log_thread(zlogp); + + if (unmount_cmd == B_FALSE && + brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0) + return (-1); + if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK) zerror(zlogp, B_FALSE, "destroying snapshot: %s", zonecfg_strerror(err)); - if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0) - return (-1); + zonecfg_fini_handle(snap_hndl); + snap_hndl = NULL; return (0); } @@ -1021,7 +1525,6 @@ zone_graceful_shutdown(zlog_t *zlogp) pid_t child; char cmdbuf[MAXPATHLEN]; brand_handle_t bh = NULL; - char zpath[MAXPATHLEN]; ctid_t ct; int tmpl_fd; int child_status; @@ -1042,18 +1545,12 @@ zone_graceful_shutdown(zlog_t *zlogp) return (-1); } - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { - zerror(zlogp, B_FALSE, "unable to determine zone path"); - brand_close(bh); - return (-1); - } - /* * If there is a brand 'shutdown' callback, execute it now to give the * brand a chance to cleanup any custom configuration. */ (void) strcpy(cmdbuf, EXEC_PREFIX); - if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN, + if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN, sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) { (void) strcat(cmdbuf, SHUTDOWN_DEFAULT); } @@ -1191,6 +1688,36 @@ audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, } /* + * Log the exit time and status of the zone's init process into + * {zonepath}/lastexited. If the zone shutdown normally, the exit status will + * be -1, otherwise it will be the exit status as described in wait.3c. + * If the zone is configured to restart init, then nothing will be logged if + * init exits unexpectedly (the kernel will never upcall in this case). + */ +static void +log_init_exit(int status) +{ + char p[MAXPATHLEN]; + char buf[128]; + struct timeval t; + int fd; + + if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p)) + return; + if (gettimeofday(&t, NULL) != 0) + return; + if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec, + status) > sizeof (buf)) + return; + if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0) + return; + + (void) write(fd, buf, strlen(buf)); + + (void) close(fd); +} + +/* * The main routine for the door server that deals with zone state transitions. */ /* ARGSUSED */ @@ -1203,9 +1730,11 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, zone_state_t zstate; zone_cmd_t cmd; + boolean_t debug; + int init_status; zone_cmd_arg_t *zargp; - boolean_t kernelcall = B_FALSE; + boolean_t kernelcall = B_TRUE; int rval = -1; uint64_t uniqid; @@ -1226,6 +1755,8 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, * it is time for us to shut down zoneadmd. */ if (zargp == DOOR_UNREF_DATA) { + logstream_close(platloghdl, B_TRUE); + /* * See comment at end of main() for info on the last rites. */ @@ -1255,6 +1786,8 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, goto out; } cmd = zargp->cmd; + debug = zargp->debug; + init_status = zargp->status; if (door_ucred(&uc) != 0) { zerror(&logsys, B_TRUE, "door_ucred"); @@ -1335,7 +1868,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, rval = -1; goto out; } - zlogp = &logsys; /* Log errors to syslog */ + zlogp = &logplat; /* Log errors to platform.log */ } /* @@ -1361,23 +1894,25 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, case ZONE_STATE_INSTALLED: switch (cmd) { case Z_READY: - rval = zone_ready(zlogp, Z_MNT_BOOT, zstate); + rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug); if (rval == 0) eventstream_write(Z_EVT_ZONE_READIED); + zcons_statechanged(); break; case Z_BOOT: case Z_FORCEBOOT: eventstream_write(Z_EVT_ZONE_BOOTING); - if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) - == 0) { + if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, + debug)) == 0) { rval = zone_bootup(zlogp, zargp->bootbuf, - zstate); + zstate, debug); } audit_put_record(zlogp, uc, rval, "boot"); + zcons_statechanged(); if (rval != 0) { bringup_failure_recovery = B_TRUE; (void) zone_halt(zlogp, B_FALSE, B_FALSE, - zstate); + zstate, debug); eventstream_write(Z_EVT_ZONE_BOOTFAILED); } break; @@ -1429,7 +1964,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, rval = zone_ready(zlogp, strcmp(zargp->bootbuf, "-U") == 0 ? - Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate); + Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug); if (rval != 0) break; @@ -1495,12 +2030,14 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, (void) strlcpy(boot_args, zargp->bootbuf, sizeof (boot_args)); eventstream_write(Z_EVT_ZONE_BOOTING); - rval = zone_bootup(zlogp, zargp->bootbuf, zstate); + rval = zone_bootup(zlogp, zargp->bootbuf, zstate, + debug); audit_put_record(zlogp, uc, rval, "boot"); + zcons_statechanged(); if (rval != 0) { bringup_failure_recovery = B_TRUE; (void) zone_halt(zlogp, B_FALSE, B_TRUE, - zstate); + zstate, debug); eventstream_write(Z_EVT_ZONE_BOOTFAILED); } boot_args[0] = '\0'; @@ -1508,9 +2045,10 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, case Z_HALT: if (kernelcall) /* Invalid; can't happen */ abort(); - if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) - != 0) + if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate, + debug)) != 0) break; + zcons_statechanged(); eventstream_write(Z_EVT_ZONE_HALTED); break; case Z_SHUTDOWN: @@ -1534,7 +2072,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, case Z_UNMOUNT: if (kernelcall) /* Invalid; can't happen */ abort(); - rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate); + rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug); if (rval == 0) { eventstream_write(Z_EVT_ZONE_HALTED); (void) sema_post(&scratch_sem); @@ -1556,10 +2094,12 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, case ZONE_STATE_DOWN: switch (cmd) { case Z_READY: - if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) - != 0) + if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate, + debug)) != 0) break; - if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0) + zcons_statechanged(); + if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, + debug)) == 0) eventstream_write(Z_EVT_ZONE_READIED); else eventstream_write(Z_EVT_ZONE_HALTED); @@ -1576,32 +2116,40 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp, rval = 0; break; case Z_HALT: - if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate)) - != 0) + if (kernelcall) { + log_init_exit(init_status); + } else { + log_init_exit(-1); + } + if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate, + debug)) != 0) break; eventstream_write(Z_EVT_ZONE_HALTED); + zcons_statechanged(); break; case Z_REBOOT: (void) strlcpy(boot_args, zargp->bootbuf, sizeof (boot_args)); eventstream_write(Z_EVT_ZONE_REBOOTING); - if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate)) - != 0) { + if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate, + debug)) != 0) { eventstream_write(Z_EVT_ZONE_BOOTFAILED); boot_args[0] = '\0'; break; } - if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) - != 0) { + zcons_statechanged(); + if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, + debug)) != 0) { eventstream_write(Z_EVT_ZONE_BOOTFAILED); boot_args[0] = '\0'; break; } - rval = zone_bootup(zlogp, zargp->bootbuf, zstate); + rval = zone_bootup(zlogp, zargp->bootbuf, zstate, + debug); audit_put_record(zlogp, uc, rval, "reboot"); if (rval != 0) { (void) zone_halt(zlogp, B_FALSE, B_TRUE, - zstate); + zstate, debug); eventstream_write(Z_EVT_ZONE_BOOTFAILED); } boot_args[0] = '\0'; @@ -1782,6 +2330,29 @@ top: "zoneadmd does not appear to be available; " "restarted zoneadmd to recover.", zone_name, zone_state_str(zstate)); + + /* + * Startup a thread to perform the zfd logging/tty svc + * for the zone. zlogp won't be valid for much longer + * so use logplat. + */ + if (getzoneidbyname(zone_name) != -1) { + create_log_thread(&logplat); + } + + /* recover the global configuration snapshot */ + if (snap_hndl == NULL) { + if ((snap_hndl = zonecfg_init_handle()) + == NULL || + zonecfg_create_snapshot(zone_name) + != Z_OK || + zonecfg_get_snapshot_handle(zone_name, + snap_hndl) != Z_OK) { + zerror(zlogp, B_FALSE, "recovering " + "zone configuration handle"); + goto out; + } + } } (void) fdetach(zone_door_path); @@ -1795,21 +2366,62 @@ out: } /* - * Setup the brand's pre and post state change callbacks, as well as the - * query callback, if any of these exist. + * Run the query hook with the 'env' parameter. It should return a + * string of tab-delimited key-value pairs, each of which should be set + * in the environment. + * + * Because the env_vars string values become part of the environment, the + * string is static and we don't free it. + * + * This function is always called before zoneadmd forks and makes itself + * exclusive, so it is possible there could more than one instance of zoneadmd + * running in parallel at this point. Thus, we have no zonecfg snapshot and + * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't + * need any zonecfg info to query for a brand-specific env value. */ static int -brand_callback_init(brand_handle_t bh, char *zone_name) +set_brand_env(zlog_t *zlogp) { - char zpath[MAXPATHLEN]; + int ret = 0; + static char *env_vars = NULL; + char buf[2 * MAXPATHLEN]; + + if (query_hook[0] == '\0' || env_vars != NULL) + return (0); + + if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf)) + return (-1); - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) + if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0) return (-1); + if (env_vars != NULL) { + char *sp; + + sp = strtok(env_vars, "\t"); + while (sp != NULL) { + if (putenv(sp) != 0) { + ret = -1; + break; + } + sp = strtok(NULL, "\t"); + } + } + + return (ret); +} + +/* + * Setup the brand's pre and post state change callbacks, as well as the + * query callback, if any of these exist. + */ +static int +brand_callback_init(brand_handle_t bh, char *zone_name) +{ (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, sizeof (pre_statechg_hook)); - if (brand_get_prestatechange(bh, zone_name, zpath, + if (brand_get_prestatechange(bh, zone_name, zonepath, pre_statechg_hook + EXEC_LEN, sizeof (pre_statechg_hook) - EXEC_LEN) != 0) return (-1); @@ -1820,7 +2432,7 @@ brand_callback_init(brand_handle_t bh, char *zone_name) (void) strlcpy(post_statechg_hook, EXEC_PREFIX, sizeof (post_statechg_hook)); - if (brand_get_poststatechange(bh, zone_name, zpath, + if (brand_get_poststatechange(bh, zone_name, zonepath, post_statechg_hook + EXEC_LEN, sizeof (post_statechg_hook) - EXEC_LEN) != 0) return (-1); @@ -1831,7 +2443,7 @@ brand_callback_init(brand_handle_t bh, char *zone_name) (void) strlcpy(query_hook, EXEC_PREFIX, sizeof (query_hook)); - if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, + if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN, sizeof (query_hook) - EXEC_LEN) != 0) return (-1); @@ -1959,6 +2571,11 @@ main(int argc, char *argv[]) return (1); } + if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { + zerror(zlogp, B_FALSE, "unable to determine zone path"); + return (-1); + } + if (zonecfg_default_brand(default_brand, sizeof (default_brand)) != Z_OK) { zerror(zlogp, B_FALSE, "unable to determine default brand"); @@ -2030,6 +2647,11 @@ main(int argc, char *argv[]) } priv_freeset(privset); + if (set_brand_env(zlogp) != 0) { + zerror(zlogp, B_FALSE, "Unable to setup brand's environment"); + return (1); + } + if (mkzonedir(zlogp) != 0) return (1); @@ -2156,6 +2778,15 @@ main(int argc, char *argv[]) openlog("zoneadmd", LOG_PID, LOG_DAEMON); /* + * Allow logging to <zonepath>/logs/<file>. + */ + logstream_init(zlogp); + platloghdl = logstream_open("platform.log", "zoneadmd", 0); + + /* logplat looks the same as logsys, but logs to platform.log */ + logplat = logsys; + + /* * The eventstream is used to publish state changes in the zone * from the door threads to the console I/O poller. */ @@ -2174,7 +2805,6 @@ main(int argc, char *argv[]) if (make_daemon_exclusive(zlogp) == -1) goto child_out; - /* * Create/join a new session; we need to be careful of what we do with * the console from now on so we don't end up being the session leader @@ -2184,9 +2814,13 @@ main(int argc, char *argv[]) /* * This thread shouldn't be receiving any signals; in particular, - * SIGCHLD should be received by the thread doing the fork(). + * SIGCHLD should be received by the thread doing the fork(). The + * exceptions are SIGHUP and SIGUSR1 for log rotation, set up by + * logstream_init(). */ (void) sigfillset(&blockset); + (void) sigdelset(&blockset, SIGHUP); + (void) sigdelset(&blockset, SIGUSR1); (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); /* diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h index d784a303b3..06353cbe61 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.h +++ b/usr/src/cmd/zoneadmd/zoneadmd.h @@ -22,6 +22,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2014 Nexenta Systems, Inc. All rights reserved. + * Copyright 2019 Joyent, Inc. */ #ifndef _ZONEADMD_H @@ -32,6 +33,9 @@ extern "C" { #endif #include <libdladm.h> +#include <libzonecfg.h> +#include <thread.h> +#include <synch.h> /* * Multi-threaded programs should avoid MT-unsafe library calls (i.e., any- @@ -69,6 +73,7 @@ extern "C" { #define DEFAULT_DIR_USER -1 /* user ID for chown: -1 means don't change */ #define DEFAULT_DIR_GROUP -1 /* grp ID for chown: -1 means don't change */ +#define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT) typedef struct zlog { FILE *logfile; /* file to log to */ @@ -83,24 +88,27 @@ typedef struct zlog { char *locale; /* locale to use for gettext() */ } zlog_t; -extern zlog_t logsys; +extern zlog_t logsys; /* syslog */ +extern zlog_t logplat; /* platform.log */ extern mutex_t lock; extern mutex_t msglock; extern boolean_t in_death_throes; extern boolean_t bringup_failure_recovery; extern char *zone_name; +extern char zonepath[MAXNAMELEN]; +extern zone_dochandle_t snap_hndl; extern char pool_name[MAXNAMELEN]; extern char brand_name[MAXNAMELEN]; extern char default_brand[MAXNAMELEN]; extern char boot_args[BOOTARGS_MAX]; -extern char bad_boot_arg[BOOTARGS_MAX]; extern boolean_t zone_isnative; extern boolean_t zone_iscluster; extern dladm_handle_t dld_handle; extern void zerror(zlog_t *, boolean_t, const char *, ...); extern char *localize_msg(char *locale, const char *msg); +extern void nwifent_free_attrs(struct zone_nwiftab *); /* * Eventstream interfaces. @@ -112,8 +120,7 @@ typedef enum { Z_EVT_ZONE_HALTED, Z_EVT_ZONE_READIED, Z_EVT_ZONE_UNINSTALLING, - Z_EVT_ZONE_BOOTFAILED, - Z_EVT_ZONE_BADARGS + Z_EVT_ZONE_BOOTFAILED } zone_evt_t; extern int eventstream_init(); @@ -135,9 +142,9 @@ typedef enum { /* * Virtual platform interfaces. */ -extern zoneid_t vplat_create(zlog_t *, zone_mnt_t); +extern zoneid_t vplat_create(zlog_t *, zone_mnt_t, zoneid_t); extern int vplat_bringup(zlog_t *, zone_mnt_t, zoneid_t); -extern int vplat_teardown(zlog_t *, boolean_t, boolean_t); +extern int vplat_teardown(zlog_t *, boolean_t, boolean_t, boolean_t); extern int vplat_get_iptype(zlog_t *, zone_iptype_t *); /* @@ -154,6 +161,23 @@ extern void resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen); */ extern int init_console(zlog_t *); extern void serve_console(zlog_t *); +extern void zcons_statechanged(); + +/* + * Logging routines + */ +typedef enum { + LS_LINE_BUFFERED = 0x1 /* Write when \n found or full buffer */ +} logstream_flags_t; + +extern boolean_t logging_poisoned; + +extern void create_log_thread(zlog_t *); +extern void destroy_log_thread(zlog_t *); +extern void logstream_init(zlog_t *); +extern int logstream_open(const char *, const char *, logstream_flags_t); +extern void logstream_write(int, char *, int); +extern void logstream_close(int, boolean_t); /* * Contract handling. @@ -163,7 +187,13 @@ extern int init_template(void); /* * Routine to manage child processes. */ -extern int do_subproc(zlog_t *, char *, char **); +extern int do_subproc(zlog_t *, char *, char **, boolean_t); + +/* + * Resource handling. + */ +extern int resolve_device_match(zlog_t *, struct zone_devtab *, + char *, size_t); #ifdef __cplusplus } |