summaryrefslogtreecommitdiff
path: root/usr/src/cmd/zoneadmd
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/cmd/zoneadmd')
-rw-r--r--usr/src/cmd/zoneadmd/Makefile38
-rw-r--r--usr/src/cmd/zoneadmd/Makefile.com72
-rw-r--r--usr/src/cmd/zoneadmd/amd64/Makefile31
-rw-r--r--usr/src/cmd/zoneadmd/i386/Makefile30
-rw-r--r--usr/src/cmd/zoneadmd/log.c1027
-rw-r--r--usr/src/cmd/zoneadmd/vplat.c684
-rw-r--r--usr/src/cmd/zoneadmd/zcons.c189
-rw-r--r--usr/src/cmd/zoneadmd/zfd.c1238
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.c984
-rw-r--r--usr/src/cmd/zoneadmd/zoneadmd.h44
10 files changed, 3745 insertions, 592 deletions
diff --git a/usr/src/cmd/zoneadmd/Makefile b/usr/src/cmd/zoneadmd/Makefile
index e75453fe56..cfff59f12a 100644
--- a/usr/src/cmd/zoneadmd/Makefile
+++ b/usr/src/cmd/zoneadmd/Makefile
@@ -18,12 +18,10 @@
#
# CDDL HEADER END
-
-#
-
#
# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
#
PROG= zoneadmd
@@ -31,40 +29,46 @@ PROG= zoneadmd
include ../Makefile.cmd
include ../Makefile.ctf
-ROOTCMDDIR= $(ROOTLIB)/zones
+$(64ONLY)SUBDIRS= $(MACH)
+$(BUILD64)SUBDIRS += $(MACH64)
-OBJS= zoneadmd.o zcons.o vplat.o
-SRCS = $(OBJS:.o=.c)
-POFILE=zoneadmd_all.po
-POFILES= $(OBJS:%.o=%.po)
+all := TARGET = all
+install := TARGET = install
+clean := TARGET = clean
+clobber := TARGET = clobber
+lint := TARGET = lint
CFLAGS += $(CCVERBOSE)
-LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \
- -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm -ltsnet -ltsol \
- -linetutil -lscf
XGETFLAGS += -a -x zoneadmd.xcl
+ROOTUSRLIBZONES = $(ROOT)/usr/lib/zones
+
.KEEP_STATE:
.PARALLEL:
-all: $(PROG)
+all: $(SUBDIRS)
$(PROG): $(OBJS)
$(LINK.c) -o $@ $(OBJS) $(LDLIBS)
$(POST_PROCESS)
-install: all $(ROOTCMD)
+install: $(SUBDIRS)
+ -$(RM) $(ROOTUSRLIBZONES)/$(PROG)
+ -$(LN) $(ISAEXEC) $(ROOTUSRLIBZONES)/$(PROG)
-$(POFILE): $(POFILES)
- $(RM) $@
- $(CAT) $(POFILES) > $@
+$(POFILE):
clean:
$(RM) $(OBJS)
check:
- $(CSTYLE) -p -P $(SRCS:%=%)
+ $(CSTYLE) -p -P *.c
+
+$(SUBDIRS): FRC
+ @cd $@; pwd; $(MAKE) $(TARGET)
+
+FRC:
include ../Makefile.targ
diff --git a/usr/src/cmd/zoneadmd/Makefile.com b/usr/src/cmd/zoneadmd/Makefile.com
new file mode 100644
index 0000000000..aaf21c7f5b
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/Makefile.com
@@ -0,0 +1,72 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+
+#
+# Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2020 Joyent, Inc.
+#
+
+PROG= zoneadmd
+
+include ../../Makefile.cmd
+include ../../Makefile.ctf
+
+ROOTCMDDIR= $(ROOTLIB)/zones
+
+OBJS= zoneadmd.o zcons.o zfd.o vplat.o log.o
+
+CFLAGS += $(CCVERBOSE)
+LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \
+ -lgen -lbsm -lcontract -lzfs -luuid -lbrand -ldladm -ltsnet -ltsol \
+ -linetutil -lscf -lppt -lcustr
+
+CSTD= $(CSTD_GNU99)
+
+.KEEP_STATE:
+
+%.o: ../%.c
+ $(COMPILE.c) $<
+ $(POST_PROCESS_O)
+
+ROOTUSRLIBZONES = $(ROOT)/usr/lib/zones
+ROOTUSRLIBZONES32 = $(ROOTUSRLIBZONES)/$(MACH32)
+ROOTUSRLIBZONES64 = $(ROOTUSRLIBZONES)/$(MACH64)
+ROOTUSRLIBZONESPROG32 = $(ROOTUSRLIBZONES32)/$(PROG)
+ROOTUSRLIBZONESPROG64 = $(ROOTUSRLIBZONES64)/$(PROG)
+$(ROOTUSRLIBZONES32)/%: $(ROOTUSRLIBZONES32) %
+ $(INS.file)
+$(ROOTUSRLIBZONES64)/%: $(ROOTUSRLIBZONES64) %
+ $(INS.file)
+$(ROOTUSRLIBZONES32):
+ $(INS.dir)
+
+all: $(PROG)
+
+$(PROG): $(OBJS)
+ $(LINK.c) -o $@ $(OBJS) $(LDLIBS)
+ $(POST_PROCESS)
+
+clean:
+ $(RM) $(OBJS)
+
+lint:
+ $(LINT.c) ../*.c $(LDLIBS)
+
+include ../../Makefile.targ
diff --git a/usr/src/cmd/zoneadmd/amd64/Makefile b/usr/src/cmd/zoneadmd/amd64/Makefile
new file mode 100644
index 0000000000..75ac51db32
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/amd64/Makefile
@@ -0,0 +1,31 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+.KEEP_STATE:
+
+include ../Makefile.com
+include ../../Makefile.cmd.64
+
+install: all $(ROOTUSRLIBZONES64) $(ROOTUSRLIBZONESPROG64)
diff --git a/usr/src/cmd/zoneadmd/i386/Makefile b/usr/src/cmd/zoneadmd/i386/Makefile
new file mode 100644
index 0000000000..a8764e0638
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/i386/Makefile
@@ -0,0 +1,30 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License, Version 1.0 only
+# (the "License"). You may not use this file except in compliance
+# with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright (c) 2011, Joyent, Inc. All rights reserved.
+#
+
+.KEEP_STATE:
+
+include ../Makefile.com
+
+install: all $(ROOTUSRLIBZONES32) $(ROOTUSRLIBZONESPROG32)
diff --git a/usr/src/cmd/zoneadmd/log.c b/usr/src/cmd/zoneadmd/log.c
new file mode 100644
index 0000000000..a4ecc3e1e8
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/log.c
@@ -0,0 +1,1027 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2020 Joyent, Inc.
+ */
+
+/*
+ * zoneadmd logging
+ *
+ * zoneadmd logs to log files under <zonepath>/logs. Each log entry is a json
+ * structure of the form:
+ *
+ * {
+ * "log": "some message\n",
+ * "stream": "stderr",
+ * "time": "2018-03-28T13:25:02.670423000Z"
+ * }
+ *
+ * Unlike the example above, the entries in the log file are not pretty-printed.
+ * Messages are processed so that they have the proper json escapes for
+ * problematic characters. Excessively long messages may be truncated.
+ *
+ * To use these interfaces:
+ *
+ * int logid;
+ *
+ * logstream_init(zlogp);
+ *
+ * logid = logstream_open("stdio.log", "stdout", flags);
+ * ...
+ * logstream_write(logid, buf, len);
+ * ...
+ * logstream_close(logid);
+ *
+ * logstream_init() needs to be called only once.
+ *
+ * logstream_open() opens a log file (if not already open) and associates the
+ * specified stream with it.
+ *
+ * The following flag is supported:
+ *
+ * LS_LINE_BUFFERED Buffer writes until a newline is encountered or the
+ * buffer fills. This should only be used with streams
+ * that are written to by a single thread. The timestamp
+ * on log messages are the time that the log entry was
+ * written to the log file. This means the timestamp is
+ * the time when the console user hits enter, not the time
+ * that the prompt was printed.
+ *
+ * Line buffering is particularly useful for bhyve console logging because
+ * bhyve's UART emulation causes read() calls in zcons.c to return far fewer
+ * than 10 characters at a time. Without line buffering, a small number of
+ * logged characters are accompanied by about 64 characters of timestamp and
+ * other overhead. Line buffering saves quite a lot of space and makes the log
+ * much easier to read.
+ *
+ *
+ * Log rotation
+ *
+ * Two attributes, zlog-max-size and zlog-keep-rotated are used for automatic
+ * log rotation. zlog-max-size is the approximate maximum size of a log before
+ * it is automatically rotated. Rotated logs are renamed as
+ * <log>.<iso-8601-stamp>. If zlog-keep-rotated is specified and is an integer
+ * greater than zero, only that number of rotated logs will be retained.
+ *
+ * If zlog-max-size is not specified, log rotation will not happen
+ * automatically. An external log rotation program may rename the log file(s),
+ * then send SIGHUP to zoneadmd.
+ *
+ * Log rotation can be forced with SIGUSR1. In this case, the log will be
+ * rotated as though it hit the maximum size and will be subject to retention
+ * rules described above.
+ *
+ *
+ * Locking strategy
+ *
+ * Callers need not worry about locking. In the interest of simplicity, a
+ * single global lock is used to protect the state of the log files and the
+ * associated streams. Locking is necessary because reboots and log rotations
+ * can cause various state changes. Without locking, races could cause log
+ * entries to be directed to the wrong file descriptors.
+ *
+ * The simplistic global lock complicates error reporting within logging
+ * routines. zerror() must not be called while holding logging_lock. Rather,
+ * logstream_err() should be used to log via syslog.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <libcustr.h>
+#include <netdb.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <strings.h>
+#include <synch.h>
+#include <syslog.h>
+#include <time.h>
+#include <thread.h>
+#include <unistd.h>
+
+#include <sys/debug.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/varargs.h>
+
+#include "zoneadmd.h"
+
+/*
+ * Currently we only expect stdout, stderr, zoneadmd, and console. Increase
+ * MAX_ZLOG_STREAMS if more streams are added. If the count increases
+ * significantly, logfile_t and logstream_t elements should be dynamically
+ * allocated and the algorithms associated with opening and closing them should
+ * become more efficient.
+ */
+#define MAX_LOG_STREAMS 4
+
+#define ZLOG_MAXSZ "zlog-max-size" /* zonecfg attr */
+#define ZLOG_MAXSZ_MIN (1024 * 1024) /* min size for autorotate */
+#define ZLOG_KEEP "zlog-keep-rotated" /* zonecfg attr */
+#define ZLOG_KEEP_MAX 1000 /* number of log files */
+
+/*
+ * While we could get the numeric value of BUNYAN_LOG_INFO from bunyan.h,
+ * the log version is internal to the library, so we just define the values
+ * we need here.
+ */
+#define BUNYAN_VERSION 0
+#define BUNYAN_LOG_LEVEL 30 /* info */
+
+typedef struct logfile {
+ char lf_path[MAXPATHLEN]; /* log file name (absolute path) */
+ char lf_name[MAXNAMELEN]; /* tail of log file name */
+ char lf_buf[BUFSIZ]; /* Buffer for event messages */
+ custr_t *lf_cus; /* custr_t wrapper for lf_buf */
+ int lf_fd; /* file descriptor */
+ size_t lf_size; /* Current size */
+ boolean_t lf_write_err; /* Avoid spamming console via logsys */
+ boolean_t lf_closing; /* Avoid rotation recursion */
+} logfile_t;
+
+/* Large enough to hold BUFSIZ bytes with some escaping */
+#define LS_BUFSZ (BUFSIZ * 2)
+
+/* Large enough to hold LS_BUF contents + bunyan mandatory properties */
+#define LS_OBUFSZ (LS_BUFSZ + MAXNAMELEN + 128)
+
+typedef struct logstream {
+ char ls_stream[MAXNAMELEN]; /* stdout, stderr, etc. */
+ char ls_buf[LS_BUFSZ]; /* Not-yet written data, json */
+ char ls_obuf[LS_OBUFSZ]; /* Buffer to form output json */
+ custr_t *ls_cusbuf; /* custr_t wrapper to ls_buf */
+ custr_t *ls_cusobuf; /* custr_t wrapper to ls_ofbuf */
+ logstream_flags_t ls_flags;
+ logfile_t *ls_logfile; /* N streams per log file */
+} logstream_t;
+
+typedef struct jsonpair {
+ const char *jp_key;
+ const char *jp_val;
+} jsonpair_t;
+
+boolean_t logging_poisoned = B_FALSE;
+
+/*
+ * MAX_LOG_STREAMS is a small number so we allocate in the simplest way.
+ */
+static logstream_t streams[MAX_LOG_STREAMS];
+static logfile_t logfiles[MAX_LOG_STREAMS];
+
+static char host[MAXHOSTNAMELEN];
+static char pidstr[10];
+
+static boolean_t logging_initialized = B_FALSE;
+static uint64_t logging_rot_size; /* See ZLOG_MAXSZ */
+static uint64_t logging_rot_keep; /* See ZLOG_KEEP */
+static int logging_pending_sig = 0; /* Signal recvd while logging */
+static mutex_t logging_lock = ERRORCHECKMUTEX; /* The global logging lock */
+
+static void logstream_flush_all(logfile_t *);
+static void logstream_sighandler(int);
+static void rotate_log(logfile_t *);
+static size_t make_json(jsonpair_t *, size_t, custr_t *);
+static void logfile_write(logfile_t *, custr_t *);
+
+/*
+ * If errors are encountered while logging_lock is held, we can't use zerror().
+ */
+static void
+logstream_err(boolean_t use_strerror, const char *fmt, ...)
+{
+ va_list alist;
+ char buf[MAXPATHLEN * 2];
+ char *bp;
+ int saved_errno = errno;
+
+ (void) snprintf(buf, sizeof (buf), "[zone %s] ", zone_name);
+
+ bp = &buf[strlen(buf)];
+
+ va_start(alist, fmt);
+ (void) vsnprintf(bp, sizeof (buf) - (bp - buf), fmt, alist);
+ va_end(alist);
+
+ if (use_strerror) {
+ bp = &buf[strlen(buf)];
+ (void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
+ strerror(saved_errno));
+ }
+ syslog(LOG_ERR, "%s", buf);
+
+ errno = saved_errno;
+}
+
+static void
+logstream_lock(void)
+{
+ VERIFY(logging_initialized);
+ VERIFY(!logging_poisoned);
+
+ mutex_enter(&logging_lock);
+}
+
+static void
+logstream_unlock(void)
+{
+ int sig = logging_pending_sig;
+
+ logging_pending_sig = 0;
+ mutex_exit(&logging_lock);
+
+ /*
+ * If a signal arrived while this thread was holding the lock, call the
+ * handler.
+ */
+ if (sig != 0) {
+ logstream_sighandler(sig);
+ }
+}
+
+static void
+logfile_write_event(logfile_t *lfp, const char *stream, const char *event)
+{
+ size_t len;
+ jsonpair_t pairs[] = {
+ { "stream", stream },
+ { "msg", event }
+ };
+
+ len = make_json(pairs, ARRAY_SIZE(pairs), lfp->lf_cus);
+ if (len >= sizeof (lfp->lf_buf)) {
+ logstream_err(B_FALSE, "%s: buffer too small. Need %zu bytes, "
+ "have %zu bytes", __func__, len + 1, sizeof (lfp->lf_buf));
+ return;
+ }
+
+ logfile_write(lfp, lfp->lf_cus);
+}
+
+static void
+close_log(logfile_t *lfp, const char *why, boolean_t ign_err)
+{
+ int err;
+
+ VERIFY(MUTEX_HELD(&logging_lock));
+
+ /*
+ * Something may have gone wrong during log rotation, leading to a
+ * zombie log.
+ */
+ if (lfp->lf_fd == -1) {
+ return;
+ }
+
+ lfp->lf_closing = B_TRUE;
+
+ logstream_flush_all(lfp);
+
+ logfile_write_event(lfp, "logfile", why);
+
+ err = close(lfp->lf_fd);
+ if (!ign_err)
+ VERIFY0(err);
+
+ lfp->lf_size = 0;
+ lfp->lf_fd = -1;
+}
+
+static void
+open_log(logfile_t *lfp, const char *why)
+{
+ struct stat64 sb;
+
+ VERIFY(MUTEX_HELD(&logging_lock));
+ VERIFY3S(lfp->lf_fd, ==, -1);
+
+ lfp->lf_fd = open(lfp->lf_path,
+ O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC, 0600);
+ if (lfp->lf_fd == -1) {
+ logstream_err(B_TRUE, "Cannot open log file %s",
+ lfp->lf_path);
+ lfp->lf_write_err = B_TRUE;
+ return;
+ }
+
+ VERIFY0(fstat64(lfp->lf_fd, &sb));
+ lfp->lf_size = sb.st_size;
+ lfp->lf_write_err = B_FALSE;
+ lfp->lf_closing = B_FALSE;
+
+ logfile_write_event(lfp, "logfile", why);
+}
+
+static void
+logstream_sighandler(int sig)
+{
+ int i;
+
+ /*
+ * Protect against recursive mutex enters when a signal comes during
+ * logging. This will cause this function to be called again just after
+ * this thread drops the lock.
+ */
+ if (MUTEX_HELD(&logging_lock)) {
+ logging_pending_sig = sig;
+ return;
+ }
+
+ logstream_lock();
+ if (logging_poisoned) {
+ logstream_unlock();
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(logfiles); i++) {
+ /* Inactive logfile slot */
+ if (logfiles[i].lf_name[0] == '\0') {
+ continue;
+ }
+
+ switch (sig) {
+ case SIGHUP:
+ close_log(&logfiles[i], "close-rotate", B_FALSE);
+ open_log(&logfiles[i], "open-rotate");
+ break;
+ case SIGUSR1:
+ rotate_log(&logfiles[i]);
+ break;
+ default:
+ logstream_err(B_FALSE, "unhandled signal %d", sig);
+ }
+ }
+
+ logstream_unlock();
+}
+
+static void
+get_attr_uint64(zlog_t *zlogp, zone_dochandle_t handle, const char *name,
+ uint64_t max, uint64_t *valp)
+{
+ struct zone_attrtab tab = { 0 };
+ char *p;
+ uint64_t val;
+
+ ASSERT(!MUTEX_HELD(&logging_lock));
+
+ (void) strlcpy(tab.zone_attr_name, name, sizeof (tab.zone_attr_name));
+ if (zonecfg_lookup_attr(handle, &tab) != Z_OK) {
+ return;
+ }
+
+ errno = 0;
+ val = strtol(tab.zone_attr_value, &p, 10);
+ if (errno != 0 && *p == '\0') {
+ zerror(zlogp, errno != 0, "Bad value '%s' for 'attr name=%s'",
+ tab.zone_attr_value, tab.zone_attr_name);
+ return;
+ }
+ if (val > max) {
+ zerror(zlogp, B_FALSE, "Value of attr '%s' is too large. "
+ "Reducing to %llu", name, max);
+ val = max;
+ }
+
+ *valp = val;
+}
+
+static void
+logstream_atfork_prepare(void)
+{
+ logstream_lock();
+}
+
+static void
+logstream_atfork_parent(void)
+{
+ logstream_unlock();
+}
+
+/*
+ * logstream_*() should never be called in a child process, so we make sure this
+ * code is never called there.
+ *
+ * zerror() in a child process is still safe: it knows to check for poisoning,
+ * and in such a case will redirect its output to stderr on the presumption it
+ * is a pipe to the parent.
+ */
+static void
+logstream_atfork_child(void)
+{
+ logging_poisoned = B_TRUE;
+ logging_pending_sig = 0;
+ (void) snprintf(pidstr, sizeof (pidstr), "%d", getpid());
+ logstream_unlock();
+}
+
+void
+logstream_init(zlog_t *zlogp)
+{
+ zone_dochandle_t handle;
+ int i;
+
+ VERIFY(!logging_initialized);
+
+ VERIFY0(gethostname(host, sizeof (host)));
+ (void) snprintf(pidstr, sizeof (pidstr), "%d", getpid());
+
+ for (i = 0; i < ARRAY_SIZE(logfiles); i++) {
+ logfile_t *lfp = &logfiles[i];
+
+ lfp->lf_fd = -1;
+ if (custr_alloc_buf(&lfp->lf_cus, lfp->lf_buf,
+ sizeof (lfp->lf_buf)) != 0) {
+ (void) fprintf(stderr, "failed to allocate custr_t for "
+ "log file\n");
+ abort();
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(streams); i++) {
+ logstream_t *lsp = &streams[i];
+
+ if (custr_alloc_buf(&lsp->ls_cusbuf, lsp->ls_buf,
+ sizeof (lsp->ls_buf)) != 0 ||
+ custr_alloc_buf(&lsp->ls_cusobuf, lsp->ls_obuf,
+ sizeof (lsp->ls_obuf)) != 0) {
+ (void) fprintf(stderr, "failed to allocate custr_t for "
+ "log stream\n");
+ abort();
+ }
+ }
+
+ VERIFY0(pthread_atfork(logstream_atfork_prepare,
+ logstream_atfork_parent, logstream_atfork_child));
+
+ logging_initialized = B_TRUE;
+
+ /* Now it is safe to use zlogp */
+
+ if ((handle = zonecfg_init_handle()) == NULL ||
+ zonecfg_get_handle(zone_name, handle) != Z_OK) {
+ zerror(zlogp, B_FALSE, "failed to open zone configuration "
+ "while initializing logging");
+ } else {
+ get_attr_uint64(zlogp, handle, ZLOG_MAXSZ, UINT64_MAX,
+ &logging_rot_size);
+ if (logging_rot_size != 0 &&
+ logging_rot_size < ZLOG_MAXSZ_MIN) {
+ zerror(zlogp, B_FALSE, "%s value %llu is too small. "
+ "Setting to %d", ZLOG_MAXSZ, logging_rot_size,
+ ZLOG_MAXSZ_MIN);
+ logging_rot_size = ZLOG_MAXSZ_MIN;
+ }
+ get_attr_uint64(zlogp, handle, ZLOG_KEEP, ZLOG_KEEP_MAX,
+ &logging_rot_keep);
+ }
+
+ zonecfg_fini_handle(handle);
+
+ /*
+ * This thread should receive SIGHUP so that it can close the log
+ * file and reopen it during log rotation. SIGUSR1 can be used to force
+ * a log rotation.
+ */
+ (void) sigset(SIGHUP, logstream_sighandler);
+ (void) sigset(SIGUSR1, logstream_sighandler);
+}
+
+/*
+ * Rotate a single log file. The global lock must be held while this is called.
+ */
+static void
+rotate_log(logfile_t *lfp)
+{
+ time_t t;
+ struct tm gtm;
+ char path[MAXPATHLEN];
+ int64_t i;
+ size_t len;
+ glob_t glb = { 0 };
+ int err;
+
+ VERIFY(MUTEX_HELD(&logging_lock));
+
+ if (lfp->lf_closing) {
+ return;
+ }
+
+ if ((t = time(NULL)) == (time_t)-1 || gmtime_r(&t, &gtm) == NULL) {
+ logstream_err(B_TRUE, "failed to format time");
+ return;
+ }
+
+ (void) snprintf(path, sizeof (path), "%s.%04d%02d%02dT%02d%02d%02dZ",
+ lfp->lf_path, gtm.tm_year + 1900, gtm.tm_mon + 1, gtm.tm_mday,
+ gtm.tm_hour, gtm.tm_min, gtm.tm_sec);
+
+ if (rename(lfp->lf_path, path) != 0) {
+ logstream_err(B_TRUE, "failed to rotate log file "
+ "'%s' to '%s'", lfp->lf_path, path);
+ }
+
+ close_log(lfp, "close-rotate", B_FALSE);
+ open_log(lfp, "open-rotate");
+
+ if (logging_rot_keep == 0) {
+ return;
+ }
+
+ /*
+ * Remove old logs.
+ */
+ len = snprintf(path, sizeof (path),
+ /* <lf_path>.YYYYmmdd */
+ "%s.[12][0-9][0-9][0-9][01][0-9][0-3][0-9]"
+ /* THHMMSSZ */
+ "T[012][0-9][0-5][0-9][0-6][0-9]Z", lfp->lf_path);
+ if (len >= sizeof (path)) {
+ logstream_err(B_FALSE, "log rotation glob too long");
+ return;
+ }
+
+ if ((err = glob(path, GLOB_LIMIT, NULL, &glb)) != 0) {
+ if (err != GLOB_NOMATCH) {
+ logstream_err(B_TRUE, "glob terminated with error %d",
+ err);
+ }
+ globfree(&glb);
+ return;
+ }
+
+ if (glb.gl_pathc <= logging_rot_keep) {
+ globfree(&glb);
+ return;
+ }
+
+ for (i = glb.gl_pathc - logging_rot_keep - 1; i >= 0; i--) {
+ if (unlink(glb.gl_pathv[i]) != 0) {
+ logstream_err(B_TRUE, "log rotation could not remove "
+ "%s", glb.gl_pathv[i]);
+ }
+ }
+ globfree(&glb);
+}
+
+/*
+ * Modify the input string with json escapes. Since the destination can thus
+ * be larger than the source, multiple calls may be required to fully convert
+ * sbuf to json.
+ *
+ * sbuf, slen Source buffer and the number of bytes in it to process
+ * dest Destination custr_t containing escaped JSON.
+ * scntp On return, *scntp stores number of scnt bytes consumed
+ * flushp If non-NULL, line-buffered mode is enabled. Processing
+ * will stop at the first newline or when dest is full and
+ * *flushp will be set to B_TRUE.
+ *
+ * This function makes no attempt to handle wide characters properly because
+ * the messages that come in may be using any character encoding. Since
+ * characters other than 7-bit ASCII are not directly readable in the log
+ * anyway, it is better to log the raw data and leave it to specialized log
+ * readers to interpret non-ASCII data.
+ */
+static void
+escape_json(const char *sbuf, size_t slen, custr_t *dest, size_t *scntp,
+ boolean_t *flushp)
+{
+ char c;
+ const char *save_sbuf = sbuf;
+ const char *sbuf_end = sbuf + slen - 1;
+ char append_buf[7]; /* "\\u0000\0" */
+ const char *append;
+ int len;
+
+ if (slen == 0) {
+ *scntp = 0;
+ return;
+ }
+
+ if (flushp != NULL) {
+ *flushp = B_FALSE;
+ }
+
+ while (sbuf <= sbuf_end) {
+ c = sbuf[0];
+
+ switch (c) {
+ case '\\':
+ append = "\\\\";
+ break;
+
+ case '"':
+ append = "\\\"";
+ break;
+
+ case '\b':
+ append = "\\b";
+ break;
+
+ case '\f':
+ append = "\\f";
+ break;
+
+ case '\n':
+ append = "\\n";
+ if (flushp != NULL) {
+ *flushp = B_TRUE;
+ }
+ break;
+
+ case '\r':
+ append = "\\r";
+ break;
+
+ case '\t':
+ append = "\\t";
+ break;
+
+ default:
+ if (c >= 0x20 && c < 0x7f) {
+ append_buf[0] = c;
+ append_buf[1] = '\0';
+ } else {
+ len = snprintf(append_buf, sizeof (append_buf),
+ "\\u%04x", (int)(0xff & c));
+ VERIFY3S(len, <, sizeof (append_buf));
+ }
+ append = append_buf;
+ break;
+ }
+
+ if (custr_append(dest, append) != 0) {
+ VERIFY3S(errno, ==, EOVERFLOW);
+ if (flushp != NULL) {
+ *flushp = B_TRUE;
+ }
+ break;
+ }
+
+ sbuf++;
+
+ if (flushp != NULL && *flushp) {
+ break;
+ }
+ }
+
+ *scntp = sbuf - save_sbuf;
+
+ VERIFY3U(*scntp, <=, slen);
+}
+
+/*
+ * Like write(2), but to a logfile_t and with retries on short writes.
+ */
+static void
+logfile_write(logfile_t *lfp, custr_t *cus)
+{
+ const char *buf = custr_cstr(cus);
+ size_t buflen = custr_len(cus);
+ ssize_t wlen;
+ size_t wanted = buflen;
+
+ while (buflen > 0) {
+ wlen = write(lfp->lf_fd, buf, buflen);
+ if (wlen == -1) {
+ if (lfp->lf_write_err) {
+ lfp->lf_write_err = B_TRUE;
+ logstream_err(B_TRUE, "log file fd %d '%s': "
+ "failed to write %llu of %llu bytes",
+ lfp->lf_fd, lfp->lf_path, buflen, wanted);
+ }
+ return;
+ }
+ buf += wlen;
+ buflen -= wlen;
+ lfp->lf_size += wlen;
+
+ lfp->lf_write_err = B_FALSE;
+ }
+
+ if (logging_rot_size != 0 && lfp->lf_size > logging_rot_size) {
+ rotate_log(lfp);
+ }
+}
+
+static void
+add_bunyan_preamble(custr_t *cus)
+{
+ struct tm gtm;
+ struct timeval tv;
+ /* Large enough for YYYY-MM-DDTHH:MM:SS.000000000Z + NUL */
+ char timestr[32] = { 0 };
+ size_t len;
+
+ if (gettimeofday(&tv, NULL) != 0 ||
+ gmtime_r(&tv.tv_sec, &gtm) == NULL) {
+ logstream_err(B_TRUE, "failed to get time of day");
+ abort();
+ }
+
+ len = strftime(timestr, sizeof (timestr) - 1, "%FT%T", &gtm);
+ VERIFY3U(len, >, 0);
+ VERIFY3U(len, <, sizeof (timestr) - 1);
+
+ VERIFY0(custr_append_printf(cus, "\"time\": \"%s.%09ldZ\", ",
+ timestr, tv.tv_usec * 1000));
+ VERIFY0(custr_append_printf(cus, "\"v\": %d, ", BUNYAN_VERSION));
+ VERIFY0(custr_append_printf(cus, "\"hostname\": \"%s\", ", host));
+ VERIFY0(custr_append(cus, "\"name\": \"zoneadmd\","));
+ VERIFY0(custr_append_printf(cus, "\"pid\": %s, ", pidstr));
+ VERIFY0(custr_append_printf(cus, "\"level\": %d", BUNYAN_LOG_LEVEL));
+}
+
+/*
+ * Convert the json pairs into a json object. The properties required for
+ * bunyan-formatted json objects are added to every object.
+ * Returns the number of bytes that would have been written to
+ * buf if bufsz had buf been sufficiently large (excluding the terminating null
+ * byte). Like snprintf().
+ */
+static size_t
+make_json(jsonpair_t *pairs, size_t npairs, custr_t *cus)
+{
+ int i;
+ const char *key, *val;
+ const char *start = ", ";
+
+ VERIFY3S(npairs, >, 0);
+
+ custr_reset(cus);
+
+ VERIFY0(custr_append(cus, "{ "));
+
+ add_bunyan_preamble(cus);
+
+ for (i = 0; i < npairs; i++) {
+ size_t len;
+
+ key = pairs[i].jp_key;
+ val = pairs[i].jp_val;
+
+ /* The total number of bytes we're adding to cus */
+ len = 3 + strlen(key) + 3 + strlen(val) + 1;
+ if (custr_append_printf(cus, "%s\"%s\":\"%s\"",
+ start, key, val) != 0) {
+ VERIFY3S(errno, ==, EOVERFLOW);
+ return (custr_len(cus) + len);
+ }
+ }
+
+ if (custr_append(cus, " }\n") != 0) {
+ return (custr_len(cus) + 3);
+ }
+
+ return (custr_len(cus));
+}
+
+static void
+logstream_write_json(logstream_t *lsp)
+{
+ size_t len;
+ jsonpair_t pairs[] = {
+ { "msg", lsp->ls_buf },
+ { "stream", lsp->ls_stream },
+ };
+
+ if (custr_len(lsp->ls_cusbuf) == 0) {
+ return;
+ }
+
+ len = make_json(pairs, ARRAY_SIZE(pairs), lsp->ls_cusobuf);
+
+ custr_reset(lsp->ls_cusbuf);
+ if (len >= sizeof (lsp->ls_obuf)) {
+ logstream_err(B_FALSE, "%s: buffer too small. Need %llu bytes, "
+ "have %llu bytes", __func__, len + 1,
+ sizeof (lsp->ls_obuf));
+ return;
+ }
+
+ logfile_write(lsp->ls_logfile, lsp->ls_cusobuf);
+}
+
+/*
+ * We output to the log file as json.
+ * ex. for string 'msg\n' on the zone's stdout:
+ * {"log":"msg\n","stream":"stdout","time":"2014-10-24T20:12:11.101973117Z"}
+ *
+ * We use ns in the last field of the timestamp for compatibility.
+ *
+ * We keep track of the size of the log file and rotate it when we exceed
+ * the log size limit (if one is set).
+ */
+void
+logstream_write(int ls, char *buf, int len)
+{
+ logstream_t *lsp;
+ size_t scnt;
+ boolean_t newline;
+ boolean_t buffered;
+
+ if (ls == -1 || len == 0) {
+ return;
+ }
+ VERIFY3S(ls, >=, 0);
+ VERIFY3S(ls, <, ARRAY_SIZE(streams));
+
+ logstream_lock();
+
+ lsp = &streams[ls];
+ if (lsp->ls_stream[0] == '\0' || lsp->ls_logfile == NULL) {
+ logstream_unlock();
+ return;
+ }
+
+ buffered = !!(lsp->ls_flags & LS_LINE_BUFFERED);
+
+ do {
+ escape_json(buf, len, lsp->ls_cusbuf, &scnt,
+ buffered ? &newline : NULL);
+
+ buf += scnt;
+ len -= scnt;
+
+ if (!buffered || newline) {
+ logstream_write_json(lsp);
+ }
+ } while (len > 0 && (!buffered || newline));
+
+ logstream_unlock();
+}
+
+static void
+logstream_flush(int ls)
+{
+ logstream_t *lsp;
+
+ VERIFY(MUTEX_HELD(&logging_lock));
+
+ lsp = &streams[ls];
+ if (lsp->ls_stream[0] == '\0' || lsp->ls_logfile == NULL) {
+ return;
+ }
+ logstream_write_json(lsp);
+}
+
+static void
+logstream_flush_all(logfile_t *lfp)
+{
+ int i;
+
+ VERIFY(MUTEX_HELD(&logging_lock));
+
+ for (i = 0; i < ARRAY_SIZE(streams); i++) {
+ if (streams[i].ls_logfile == lfp) {
+ logstream_flush(i);
+ }
+ }
+}
+
+int
+logstream_open(const char *logname, const char *stream, logstream_flags_t flags)
+{
+ int ls = -1;
+ int i;
+ logstream_t *lsp;
+ logfile_t *lfp = NULL;
+
+ VERIFY3U(strlen(logname), <, sizeof (lfp->lf_name));
+ VERIFY3U(strlen(stream), <, sizeof (lsp->ls_stream));
+
+ logstream_lock();
+
+ /*
+ * Find an empty logstream_t and verify that the stream is not already
+ * open.
+ */
+ for (i = 0; i < ARRAY_SIZE(streams); i++) {
+ if (ls == -1 && streams[i].ls_stream[0] == '\0') {
+ VERIFY3P(streams[i].ls_logfile, ==, NULL);
+ ls = i;
+ continue;
+ }
+ if (strcmp(stream, streams[i].ls_stream) == 0) {
+ logstream_unlock();
+ logstream_err(B_FALSE, "log stream %s already open",
+ stream);
+ return (-1);
+ }
+ }
+ VERIFY3S(ls, !=, -1);
+
+ /* Find an existing or available logfile_t */
+ for (i = 0; i < ARRAY_SIZE(logfiles); i++) {
+ if (lfp == NULL && logfiles[i].lf_name[0] == '\0') {
+ lfp = &logfiles[i];
+ }
+ if (strcmp(logname, logfiles[i].lf_name) == 0) {
+ lfp = &logfiles[i];
+ break;
+ }
+ }
+ if (lfp->lf_name[0] == '\0') {
+ (void) strlcpy(lfp->lf_name, logname, sizeof (lfp->lf_name));
+ (void) snprintf(lfp->lf_path, sizeof (lfp->lf_path), "%s/logs",
+ zonepath);
+ (void) mkdir(lfp->lf_path, 0700);
+
+ (void) snprintf(lfp->lf_path, sizeof (lfp->lf_path),
+ "%s/logs/%s", zonepath, logname);
+
+ open_log(lfp, "open");
+ if (lfp->lf_fd == -1) {
+ logstream_unlock();
+ return (-1);
+ }
+ }
+
+ lsp = &streams[ls];
+ (void) strlcpy(lsp->ls_stream, stream, sizeof (lsp->ls_stream));
+
+ lsp->ls_flags = flags;
+ lsp->ls_logfile = lfp;
+
+ logstream_unlock();
+
+ return (ls);
+}
+
+static void
+logstream_reset(logstream_t *lsp)
+{
+ custr_t *buf = lsp->ls_cusbuf;
+ custr_t *obuf = lsp->ls_cusobuf;
+
+ (void) memset(lsp, 0, sizeof (*lsp));
+ lsp->ls_cusbuf = buf;
+ lsp->ls_cusobuf = obuf;
+
+ custr_reset(buf);
+ custr_reset(obuf);
+}
+
+static void
+logfile_reset(logfile_t *lfp)
+{
+ custr_t *buf = lfp->lf_cus;
+
+ (void) memset(lfp, 0, sizeof (*lfp));
+ lfp->lf_cus = buf;
+ lfp->lf_fd = -1;
+
+ custr_reset(buf);
+}
+
+void
+logstream_close(int ls, boolean_t abrupt)
+{
+ logstream_t *lsp;
+ logfile_t *lfp;
+ int i;
+
+ if (ls == -1) {
+ return;
+ }
+ VERIFY3S(ls, >=, 0);
+ VERIFY3S(ls, <, ARRAY_SIZE(streams));
+
+ logstream_lock();
+ logstream_flush(ls);
+
+ lsp = &streams[ls];
+ lfp = lsp->ls_logfile;
+
+ VERIFY(lsp->ls_stream[0] != '\0');
+ VERIFY3P(lfp, !=, NULL);
+
+ logstream_reset(lsp);
+
+ for (i = 0; i < ARRAY_SIZE(streams); i++) {
+ if (streams[i].ls_logfile == lfp) {
+ logstream_unlock();
+ return;
+ }
+ }
+
+ /* No more streams using this log file so return to initial state */
+
+ close_log(lfp, "close", abrupt);
+
+ logfile_reset(lfp);
+
+ logstream_unlock();
+}
diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c
index cbba769f94..0ace033738 100644
--- a/usr/src/cmd/zoneadmd/vplat.c
+++ b/usr/src/cmd/zoneadmd/vplat.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2016, Joyent Inc.
+ * Copyright 2018, Joyent Inc.
* Copyright (c) 2015, 2016 by Delphix. All rights reserved.
* Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
* Copyright 2020 RackTop Systems Inc.
@@ -80,10 +80,12 @@
#include <sys/conf.h>
#include <sys/systeminfo.h>
#include <sys/secflags.h>
+#include <sys/vnic.h>
#include <libdlpi.h>
#include <libdllink.h>
#include <libdlvlan.h>
+#include <libdlvnic.h>
#include <inet/tcp.h>
#include <arpa/inet.h>
@@ -139,6 +141,9 @@
#define DFSTYPES "/etc/dfs/fstypes"
#define MAXTNZLEN 2048
+/* Number of times to retry unmounting if it fails */
+#define UMOUNT_RETRIES 30
+
#define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT)
/* a reasonable estimate for the number of lwps per process */
@@ -164,11 +169,25 @@ static priv_set_t *zprivs = NULL;
static const char *DFLT_FS_ALLOWED = "hsfs,smbfs,nfs,nfs3,nfs4,nfsdyn";
+typedef struct zone_proj_rctl_map {
+ char *zpr_zone_rctl;
+ char *zpr_project_rctl;
+} zone_proj_rctl_map_t;
+
+static zone_proj_rctl_map_t zone_proj_rctl_map[] = {
+ {"zone.max-msg-ids", "project.max-msg-ids"},
+ {"zone.max-sem-ids", "project.max-sem-ids"},
+ {"zone.max-shm-ids", "project.max-shm-ids"},
+ {"zone.max-shm-memory", "project.max-shm-memory"},
+ {NULL, NULL}
+};
+
/* from libsocket, not in any header file */
extern int getnetmaskbyaddr(struct in_addr, struct in_addr *);
/* from zoneadmd */
extern char query_hook[];
+extern char post_statechg_hook[];
/*
* For each "net" resource configured in zonecfg, we track a zone_addr_list_t
@@ -209,7 +228,7 @@ autofs_cleanup(zoneid_t zoneid)
* Handle ENOSYS in the case that the autofs kernel module is not
* installed.
*/
- r = _autofssys(AUTOFS_UNMOUNTALL, (void *)zoneid);
+ r = _autofssys(AUTOFS_UNMOUNTALL, (void *)((uintptr_t)zoneid));
if (r != 0 && errno == ENOSYS) {
return (0);
}
@@ -604,6 +623,24 @@ root_to_lu(zlog_t *zlogp, char *zroot, size_t zrootlen, boolean_t isresolved)
}
/*
+ * Perform brand-specific cleanup if we are unable to unmount a FS.
+ */
+static void
+brand_umount_cleanup(zlog_t *zlogp, char *path)
+{
+ char cmdbuf[2 * MAXPATHLEN];
+
+ if (post_statechg_hook[0] == '\0')
+ return;
+
+ if (snprintf(cmdbuf, sizeof (cmdbuf), "%s %d %d %s", post_statechg_hook,
+ ZONE_STATE_DOWN, Z_UNMOUNT, path) > sizeof (cmdbuf))
+ return;
+
+ (void) do_subproc(zlogp, cmdbuf, NULL, B_FALSE);
+}
+
+/*
* The general strategy for unmounting filesystems is as follows:
*
* - Remote filesystems may be dead, and attempting to contact them as
@@ -636,6 +673,7 @@ static int
unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
{
int error = 0;
+ int fail = 0;
FILE *mnttab;
struct mnttab *mnts;
uint_t nmnt;
@@ -723,18 +761,39 @@ unmount_filesystems(zlog_t *zlogp, zoneid_t zoneid, boolean_t unmount_cmd)
if (umount2(path, MS_FORCE) == 0) {
unmounted = B_TRUE;
stuck = B_FALSE;
+ fail = 0;
} else {
/*
- * The first failure indicates a
- * mount we won't be able to get
- * rid of automatically, so we
- * bail.
+ * We may hit a failure here if there
+ * is an app in the GZ with an open
+ * pipe into the zone (commonly into
+ * the zone's /var/run). This type
+ * of app will notice the closed
+ * connection and cleanup, but it may
+ * take a while and we have no easy
+ * way to notice that. To deal with
+ * this case, we will wait and retry
+ * a few times before we give up.
*/
- error++;
- zerror(zlogp, B_FALSE,
- "unable to unmount '%s'", path);
- free_mnttable(mnts, nmnt);
- goto out;
+ fail++;
+ if (fail < (UMOUNT_RETRIES - 1)) {
+ zerror(zlogp, B_FALSE,
+ "unable to unmount '%s', "
+ "retrying in 2 seconds",
+ path);
+ (void) sleep(2);
+ } else if (fail > UMOUNT_RETRIES) {
+ error++;
+ zerror(zlogp, B_FALSE,
+ "unmount of '%s' failed",
+ path);
+ free_mnttable(mnts, nmnt);
+ goto out;
+ } else {
+ /* Try the hook 2 times */
+ brand_umount_cleanup(zlogp,
+ path);
+ }
}
}
/*
@@ -1072,23 +1131,10 @@ mount_one_dev_symlink_cb(void *arg, const char *source, const char *target)
int
vplat_get_iptype(zlog_t *zlogp, zone_iptype_t *iptypep)
{
- zone_dochandle_t handle;
-
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
- if (zonecfg_get_iptype(handle, iptypep) != Z_OK) {
+ if (zonecfg_get_iptype(snap_hndl, iptypep) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid ip-type configuration");
- zonecfg_fini_handle(handle);
return (-1);
}
- zonecfg_fini_handle(handle);
return (0);
}
@@ -1101,14 +1147,13 @@ static int
mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
{
char brand[MAXNAMELEN];
- zone_dochandle_t handle = NULL;
brand_handle_t bh = NULL;
struct zone_devtab ztab;
di_prof_t prof = NULL;
int err;
int retval = -1;
zone_iptype_t iptype;
- const char *curr_iptype;
+ const char *curr_iptype = NULL;
if (di_prof_init(devpath, &prof)) {
zerror(zlogp, B_TRUE, "failed to initialize profile");
@@ -1146,6 +1191,8 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
zerror(zlogp, B_FALSE, "bad ip-type");
goto cleanup;
}
+ if (curr_iptype == NULL)
+ abort();
if (brand_platform_iter_devices(bh, zone_name,
mount_one_dev_device_cb, prof, curr_iptype) != 0) {
@@ -1160,28 +1207,25 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
}
/* Add user-specified devices and directories */
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_FALSE, "can't initialize zone handle");
- goto cleanup;
- }
- if ((err = zonecfg_get_handle(zone_name, handle)) != 0) {
- zerror(zlogp, B_FALSE, "can't get handle for zone "
- "%s: %s", zone_name, zonecfg_strerror(err));
- goto cleanup;
- }
- if ((err = zonecfg_setdevent(handle)) != 0) {
+ if ((err = zonecfg_setdevent(snap_hndl)) != 0) {
zerror(zlogp, B_FALSE, "%s: %s", zone_name,
zonecfg_strerror(err));
goto cleanup;
}
- while (zonecfg_getdevent(handle, &ztab) == Z_OK) {
- if (di_prof_add_dev(prof, ztab.zone_dev_match)) {
+ while (zonecfg_getdevent(snap_hndl, &ztab) == Z_OK) {
+ char path[MAXPATHLEN];
+
+ if ((err = resolve_device_match(zlogp, &ztab,
+ path, sizeof (path))) != Z_OK)
+ goto cleanup;
+
+ if (di_prof_add_dev(prof, path)) {
zerror(zlogp, B_TRUE, "failed to add "
- "user-specified device");
+ "user-specified device '%s'", path);
goto cleanup;
}
}
- (void) zonecfg_enddevent(handle);
+ (void) zonecfg_enddevent(snap_hndl);
/* Send profile to kernel */
if (di_prof_commit(prof)) {
@@ -1194,8 +1238,6 @@ mount_one_dev(zlog_t *zlogp, char *devpath, zone_mnt_t mount_cmd)
cleanup:
if (bh != NULL)
brand_close(bh);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
if (prof)
di_prof_fini(prof);
return (retval);
@@ -1686,12 +1728,10 @@ static int
mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
{
char rootpath[MAXPATHLEN];
- char zonepath[MAXPATHLEN];
char brand[MAXNAMELEN];
char luroot[MAXPATHLEN];
int i, num_fs = 0;
struct zone_fstab *fs_ptr = NULL;
- zone_dochandle_t handle = NULL;
zone_state_t zstate;
brand_handle_t bh;
plat_gmount_cb_data_t cb;
@@ -1705,22 +1745,12 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
goto bad;
}
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_TRUE, "unable to determine zone path");
- goto bad;
- }
-
if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
zerror(zlogp, B_TRUE, "unable to determine zone root");
goto bad;
}
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- goto bad;
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK ||
- zonecfg_setfsent(handle) != Z_OK) {
+ if (zonecfg_setfsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid configuration");
goto bad;
}
@@ -1738,7 +1768,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
/* Get a handle to the brand info for this zone */
if ((bh = brand_open(brand)) == NULL) {
zerror(zlogp, B_FALSE, "unable to determine zone brand");
- zonecfg_fini_handle(handle);
return (-1);
}
@@ -1753,7 +1782,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
plat_gmount_cb, &cb) != 0) {
zerror(zlogp, B_FALSE, "unable to mount filesystems");
brand_close(bh);
- zonecfg_fini_handle(handle);
return (-1);
}
brand_close(bh);
@@ -1764,13 +1792,10 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
* higher level directories (e.g., /usr) get mounted before
* any beneath them (e.g., /usr/local).
*/
- if (mount_filesystems_fsent(handle, zlogp, &fs_ptr, &num_fs,
+ if (mount_filesystems_fsent(snap_hndl, zlogp, &fs_ptr, &num_fs,
mount_cmd) != 0)
goto bad;
- zonecfg_fini_handle(handle);
- handle = NULL;
-
/*
* Normally when we mount a zone all the zone filesystems
* get mounted relative to rootpath, which is usually
@@ -1810,23 +1835,40 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
qsort(fs_ptr, num_fs, sizeof (*fs_ptr), fs_compare);
for (i = 0; i < num_fs; i++) {
- if (ALT_MOUNT(mount_cmd) &&
- strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
- size_t slen = strlen(rootpath) - 2;
+ if (ALT_MOUNT(mount_cmd)) {
+ if (strcmp(fs_ptr[i].zone_fs_dir, "/dev") == 0) {
+ size_t slen = strlen(rootpath) - 2;
- /*
- * By default we'll try to mount /dev as /a/dev
- * but /dev is special and always goes at the top
- * so strip the trailing '/a' from the rootpath.
- */
- assert(strcmp(&rootpath[slen], "/a") == 0);
- rootpath[slen] = '\0';
- if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd)
- != 0)
- goto bad;
- rootpath[slen] = '/';
- continue;
+ /*
+ * By default we'll try to mount /dev
+ * as /a/dev but /dev is special and
+ * always goes at the top so strip the
+ * trailing '/a' from the rootpath.
+ */
+ assert(strcmp(&rootpath[slen], "/a") == 0);
+ rootpath[slen] = '\0';
+ if (mount_one(zlogp, &fs_ptr[i], rootpath,
+ mount_cmd) != 0)
+ goto bad;
+ rootpath[slen] = '/';
+ continue;
+ } else if (strcmp(brand_name, default_brand) != 0) {
+ /*
+ * If mounting non-native brand, skip
+ * mounting global mounts and
+ * filesystem entries since they are
+ * only needed for native pkg upgrade
+ * tools.
+ *
+ * The only exception right now is
+ * /dev (handled above), which is
+ * needed in the luroot in order to
+ * zlogin -S into the zone.
+ */
+ continue;
+ }
}
+
if (mount_one(zlogp, &fs_ptr[i], rootpath, mount_cmd) != 0)
goto bad;
}
@@ -1849,8 +1891,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd)
return (0);
bad:
- if (handle != NULL)
- zonecfg_fini_handle(handle);
free_fs_data(fs_ptr, num_fs);
return (-1);
}
@@ -2210,9 +2250,6 @@ configure_one_interface(zlog_t *zlogp, zoneid_t zone_id,
* the console by zoneadm(8) so instead we log the
* message to syslog and continue.
*/
- zerror(&logsys, B_TRUE, "WARNING: skipping network interface "
- "'%s' which may not be present/plumbed in the "
- "global zone.", lifr.lifr_name);
(void) close(s);
return (Z_OK);
}
@@ -2425,7 +2462,6 @@ bad:
static int
configure_shared_network_interfaces(zlog_t *zlogp)
{
- zone_dochandle_t handle;
struct zone_nwiftab nwiftab, loopback_iftab;
zoneid_t zoneid;
@@ -2434,29 +2470,19 @@ configure_shared_network_interfaces(zlog_t *zlogp)
return (-1);
}
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
- if (zonecfg_setnwifent(handle) == Z_OK) {
+ if (zonecfg_setnwifent(snap_hndl) == Z_OK) {
for (;;) {
- if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
+ if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK)
break;
+ nwifent_free_attrs(&nwiftab);
if (configure_one_interface(zlogp, zoneid, &nwiftab) !=
Z_OK) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
return (-1);
}
}
- (void) zonecfg_endnwifent(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
}
- zonecfg_fini_handle(handle);
if (is_system_labeled()) {
/*
* Labeled zones share the loopback interface
@@ -2910,7 +2936,6 @@ free_ip_interface(zone_addr_list_t *zalist)
static int
configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
{
- zone_dochandle_t handle;
struct zone_nwiftab nwiftab;
char rootpath[MAXPATHLEN];
char path[MAXPATHLEN];
@@ -2919,30 +2944,18 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
boolean_t added = B_FALSE;
zone_addr_list_t *zalist = NULL, *new;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
- if (zonecfg_setnwifent(handle) != Z_OK) {
- zonecfg_fini_handle(handle);
+ if (zonecfg_setnwifent(snap_hndl) != Z_OK)
return (0);
- }
for (;;) {
- if (zonecfg_getnwifent(handle, &nwiftab) != Z_OK)
+ if (zonecfg_getnwifent(snap_hndl, &nwiftab) != Z_OK)
break;
+ nwifent_free_attrs(&nwiftab);
if (prof == NULL) {
if (zone_get_devroot(zone_name, rootpath,
sizeof (rootpath)) != Z_OK) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE,
"unable to determine dev root");
return (-1);
@@ -2950,8 +2963,7 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
(void) snprintf(path, sizeof (path), "%s%s", rootpath,
"/dev");
if (di_prof_init(path, &prof) != 0) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE,
"failed to initialize profile");
return (-1);
@@ -2975,17 +2987,17 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
nwiftab.zone_nwif_physical) == 0) {
added = B_TRUE;
} else {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
- zerror(zlogp, B_TRUE, "failed to add network device");
- return (-1);
+ /*
+ * Failed to add network device, but the brand hook
+ * might be doing this for us, so keep silent.
+ */
+ continue;
}
/* set up the new IP interface, and add them all later */
new = malloc(sizeof (*new));
if (new == NULL) {
zerror(zlogp, B_TRUE, "no memory for %s",
nwiftab.zone_nwif_physical);
- zonecfg_fini_handle(handle);
free_ip_interface(zalist);
}
bzero(new, sizeof (*new));
@@ -2995,16 +3007,14 @@ configure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
}
if (zalist != NULL) {
if ((errno = add_net(zlogp, zoneid, zalist)) != 0) {
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
zerror(zlogp, B_TRUE, "failed to add address");
free_ip_interface(zalist);
return (-1);
}
free_ip_interface(zalist);
}
- (void) zonecfg_endnwifent(handle);
- zonecfg_fini_handle(handle);
+ (void) zonecfg_endnwifent(snap_hndl);
if (prof != NULL && added) {
if (di_prof_commit(prof) != 0) {
@@ -3140,48 +3150,23 @@ remove_datalink_protect(zlog_t *zlogp, zoneid_t zoneid)
/* datalink does not belong to the GZ */
continue;
}
- if (dlstatus != DLADM_STATUS_OK) {
+ if (dlstatus != DLADM_STATUS_OK)
zerror(zlogp, B_FALSE,
+ "clear 'protection' link property: %s",
dladm_status2str(dlstatus, dlerr));
- free(dllinks);
- return (-1);
- }
+
dlstatus = dladm_set_linkprop(dld_handle, *dllink,
"allowed-ips", NULL, 0, DLADM_OPT_ACTIVE);
- if (dlstatus != DLADM_STATUS_OK) {
+ if (dlstatus != DLADM_STATUS_OK)
zerror(zlogp, B_FALSE,
+ "clear 'allowed-ips' link property: %s",
dladm_status2str(dlstatus, dlerr));
- free(dllinks);
- return (-1);
- }
}
free(dllinks);
return (0);
}
static int
-unconfigure_exclusive_network_interfaces(zlog_t *zlogp, zoneid_t zoneid)
-{
- int dlnum = 0;
-
- /*
- * The kernel shutdown callback for the dls module should have removed
- * all datalinks from this zone. If any remain, then there's a
- * problem.
- */
- if (zone_list_datalink(zoneid, &dlnum, NULL) != 0) {
- zerror(zlogp, B_TRUE, "unable to list network interfaces");
- return (-1);
- }
- if (dlnum != 0) {
- zerror(zlogp, B_FALSE,
- "datalinks remain in zone after shutdown");
- return (-1);
- }
- return (0);
-}
-
-static int
tcp_abort_conn(zlog_t *zlogp, zoneid_t zoneid,
const struct sockaddr_storage *local, const struct sockaddr_storage *remote)
{
@@ -3263,26 +3248,14 @@ static int
get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
{
int error = -1;
- zone_dochandle_t handle;
char *privname = NULL;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
if (ALT_MOUNT(mount_cmd)) {
zone_iptype_t iptype;
- const char *curr_iptype;
+ const char *curr_iptype = NULL;
- if (zonecfg_get_iptype(handle, &iptype) != Z_OK) {
+ if (zonecfg_get_iptype(snap_hndl, &iptype) != Z_OK) {
zerror(zlogp, B_TRUE, "unable to determine ip-type");
- zonecfg_fini_handle(handle);
return (-1);
}
@@ -3295,21 +3268,18 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
break;
default:
zerror(zlogp, B_FALSE, "bad ip-type");
- zonecfg_fini_handle(handle);
return (-1);
}
- if (zonecfg_default_privset(privs, curr_iptype) == Z_OK) {
- zonecfg_fini_handle(handle);
+ if (zonecfg_default_privset(privs, curr_iptype) == Z_OK)
return (0);
- }
+
zerror(zlogp, B_FALSE,
"failed to determine the zone's default privilege set");
- zonecfg_fini_handle(handle);
return (-1);
}
- switch (zonecfg_get_privset(handle, privs, &privname)) {
+ switch (zonecfg_get_privset(snap_hndl, privs, &privname)) {
case Z_OK:
error = 0;
break;
@@ -3332,10 +3302,22 @@ get_privset(zlog_t *zlogp, priv_set_t *privs, zone_mnt_t mount_cmd)
}
free(privname);
- zonecfg_fini_handle(handle);
return (error);
}
+static char *
+zone_proj_rctl(const char *name)
+{
+ int i;
+
+ for (i = 0; zone_proj_rctl_map[i].zpr_zone_rctl != NULL; i++) {
+ if (strcmp(name, zone_proj_rctl_map[i].zpr_zone_rctl) == 0) {
+ return (zone_proj_rctl_map[i].zpr_project_rctl);
+ }
+ }
+ return (NULL);
+}
+
static int
get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
{
@@ -3345,7 +3327,6 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
nvlist_t **nvlv = NULL;
int rctlcount = 0;
int error = -1;
- zone_dochandle_t handle;
struct zone_rctltab rctltab;
rctlblk_t *rctlblk = NULL;
uint64_t maxlwps;
@@ -3355,16 +3336,6 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
*bufp = NULL;
*bufsizep = 0;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
rctltab.zone_rctl_valptr = NULL;
if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) {
zerror(zlogp, B_TRUE, "%s failed", "nvlist_alloc");
@@ -3379,17 +3350,17 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
* from max-processes. If only the max-lwps property is set, we add a
* max-processes property with the same limit as max-lwps.
*/
- rproc = zonecfg_get_aliased_rctl(handle, ALIAS_MAXPROCS, &maxprocs);
- rlwp = zonecfg_get_aliased_rctl(handle, ALIAS_MAXLWPS, &maxlwps);
+ rproc = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXPROCS, &maxprocs);
+ rlwp = zonecfg_get_aliased_rctl(snap_hndl, ALIAS_MAXLWPS, &maxlwps);
if (rproc == Z_OK && rlwp == Z_NO_ENTRY) {
- if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXLWPS,
+ if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXLWPS,
maxprocs * LWPS_PER_PROCESS) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to set max-lwps alias");
goto out;
}
} else if (rlwp == Z_OK && rproc == Z_NO_ENTRY) {
/* no scaling for max-proc value */
- if (zonecfg_set_aliased_rctl(handle, ALIAS_MAXPROCS,
+ if (zonecfg_set_aliased_rctl(snap_hndl, ALIAS_MAXPROCS,
maxlwps) != Z_OK) {
zerror(zlogp, B_FALSE,
"unable to set max-processes alias");
@@ -3397,7 +3368,7 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
}
}
- if (zonecfg_setrctlent(handle) != Z_OK) {
+ if (zonecfg_setrctlent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setrctlent");
goto out;
}
@@ -3406,10 +3377,11 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
zerror(zlogp, B_TRUE, "memory allocation failed");
goto out;
}
- while (zonecfg_getrctlent(handle, &rctltab) == Z_OK) {
+ while (zonecfg_getrctlent(snap_hndl, &rctltab) == Z_OK) {
struct zone_rctlvaltab *rctlval;
uint_t i, count;
const char *name = rctltab.zone_rctl_name;
+ char *proj_nm;
/* zoneadm should have already warned about unknown rctls. */
if (!zonecfg_is_rctl(name)) {
@@ -3476,6 +3448,26 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
}
zonecfg_free_rctl_value_list(rctltab.zone_rctl_valptr);
rctltab.zone_rctl_valptr = NULL;
+
+ /*
+ * With no action on our part we will start zsched with the
+ * project rctl values for our (zoneadmd) current project. For
+ * brands running a variant of Illumos, that's not a problem
+ * since they will setup their own projects, but for a
+ * non-native brand like lx, where there are no projects, we
+ * want to start things up with the same project rctls as the
+ * corresponding zone rctls, since nothing within the zone will
+ * ever change the project rctls.
+ */
+ if ((proj_nm = zone_proj_rctl(name)) != NULL) {
+ if (nvlist_add_nvlist_array(nvl, proj_nm, nvlv, count)
+ != 0) {
+ zerror(zlogp, B_FALSE,
+ "nvlist_add_nvlist_arrays failed");
+ goto out;
+ }
+ }
+
if (nvlist_add_nvlist_array(nvl, (char *)name, nvlv, count)
!= 0) {
zerror(zlogp, B_FALSE, "%s failed",
@@ -3488,7 +3480,7 @@ get_rctls(zlog_t *zlogp, char **bufp, size_t *bufsizep)
nvlv = NULL;
rctlcount++;
}
- (void) zonecfg_endrctlent(handle);
+ (void) zonecfg_endrctlent(snap_hndl);
if (rctlcount == 0) {
error = 0;
@@ -3512,8 +3504,6 @@ out:
nvlist_free(nvl);
if (nvlv != NULL)
free(nvlv);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
return (error);
}
@@ -3529,7 +3519,7 @@ get_implicit_datasets(zlog_t *zlogp, char **retstr)
> sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, retstr) != 0)
+ if (do_subproc(zlogp, cmdbuf, retstr, B_FALSE) != 0)
return (-1);
return (0);
@@ -3538,7 +3528,6 @@ get_implicit_datasets(zlog_t *zlogp, char **retstr)
static int
get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
{
- zone_dochandle_t handle;
struct zone_dstab dstab;
size_t total, offset, len;
int error = -1;
@@ -3549,30 +3538,20 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
*bufp = NULL;
*bufsizep = 0;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
if (get_implicit_datasets(zlogp, &implicit_datasets) != 0) {
zerror(zlogp, B_FALSE, "getting implicit datasets failed");
goto out;
}
- if (zonecfg_setdsent(handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
goto out;
}
total = 0;
- while (zonecfg_getdsent(handle, &dstab) == Z_OK)
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK)
total += strlen(dstab.zone_dataset_name) + 1;
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
if (implicit_datasets != NULL)
implicit_len = strlen(implicit_datasets);
@@ -3589,12 +3568,12 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
goto out;
}
- if (zonecfg_setdsent(handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "%s failed", "zonecfg_setdsent");
goto out;
}
offset = 0;
- while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) {
len = strlen(dstab.zone_dataset_name);
(void) strlcpy(str + offset, dstab.zone_dataset_name,
total - offset);
@@ -3602,7 +3581,7 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
if (offset < total - 1)
str[offset++] = ',';
}
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
if (implicit_len > 0)
(void) strlcpy(str + offset, implicit_datasets, total - offset);
@@ -3614,8 +3593,6 @@ get_datasets(zlog_t *zlogp, char **bufp, size_t *bufsizep)
out:
if (error != 0 && str != NULL)
free(str);
- if (handle != NULL)
- zonecfg_fini_handle(handle);
if (implicit_datasets != NULL)
free(implicit_datasets);
@@ -3625,40 +3602,26 @@ out:
static int
validate_datasets(zlog_t *zlogp)
{
- zone_dochandle_t handle;
struct zone_dstab dstab;
zfs_handle_t *zhp;
libzfs_handle_t *hdl;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (-1);
- }
- if (zonecfg_get_snapshot_handle(zone_name, handle) != Z_OK) {
+ if (zonecfg_setdsent(snap_hndl) != Z_OK) {
zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (-1);
- }
-
- if (zonecfg_setdsent(handle) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
return (-1);
}
if ((hdl = libzfs_init()) == NULL) {
zerror(zlogp, B_FALSE, "opening ZFS library");
- zonecfg_fini_handle(handle);
return (-1);
}
- while (zonecfg_getdsent(handle, &dstab) == Z_OK) {
+ while (zonecfg_getdsent(snap_hndl, &dstab) == Z_OK) {
if ((zhp = zfs_open(hdl, dstab.zone_dataset_name,
ZFS_TYPE_FILESYSTEM)) == NULL) {
zerror(zlogp, B_FALSE, "cannot open ZFS dataset '%s'",
dstab.zone_dataset_name);
- zonecfg_fini_handle(handle);
libzfs_fini(hdl);
return (-1);
}
@@ -3673,7 +3636,6 @@ validate_datasets(zlog_t *zlogp)
zerror(zlogp, B_FALSE, "cannot set 'zoned' "
"property for ZFS dataset '%s'\n",
dstab.zone_dataset_name);
- zonecfg_fini_handle(handle);
zfs_close(zhp);
libzfs_fini(hdl);
return (-1);
@@ -3681,9 +3643,8 @@ validate_datasets(zlog_t *zlogp)
zfs_close(zhp);
}
- (void) zonecfg_enddsent(handle);
+ (void) zonecfg_enddsent(snap_hndl);
- zonecfg_fini_handle(handle);
libzfs_fini(hdl);
return (0);
@@ -3737,17 +3698,11 @@ validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl)
zfs_handle_t *zhp;
libzfs_handle_t *hdl;
m_label_t ds_sl;
- char zonepath[MAXPATHLEN];
char ds_hexsl[MAXNAMELEN];
if (!is_system_labeled())
return (0);
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_TRUE, "unable to determine zone path");
- return (-1);
- }
-
if (!is_zonepath_zfs(zonepath))
return (0);
@@ -4418,62 +4373,52 @@ duplicate_reachable_path(zlog_t *zlogp, const char *rootpath)
}
/*
- * Set memory cap and pool info for the zone's resource management
- * configuration.
+ * Set pool info for the zone's resource management configuration.
*/
static int
setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
{
int res;
uint64_t tmp;
- struct zone_mcaptab mcap;
char sched[MAXNAMELEN];
- zone_dochandle_t handle = NULL;
char pool_err[128];
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
-
- if ((res = zonecfg_get_snapshot_handle(zone_name, handle)) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- zonecfg_fini_handle(handle);
- return (res);
- }
-
- /*
- * If a memory cap is configured, set the cap in the kernel using
- * zone_setattr() and make sure the rcapd SMF service is enabled.
- */
- if (zonecfg_getmcapent(handle, &mcap) == Z_OK) {
- uint64_t num;
- char smf_err[128];
-
- num = (uint64_t)strtoull(mcap.zone_physmem_cap, NULL, 10);
- if (zone_setattr(zoneid, ZONE_ATTR_PHYS_MCAP, &num, 0) == -1) {
- zerror(zlogp, B_TRUE, "could not set zone memory cap");
- zonecfg_fini_handle(handle);
- return (Z_INVAL);
- }
-
- if (zonecfg_enable_rcapd(smf_err, sizeof (smf_err)) != Z_OK) {
- zerror(zlogp, B_FALSE, "enabling system/rcap service "
- "failed: %s", smf_err);
- zonecfg_fini_handle(handle);
- return (Z_INVAL);
- }
- }
-
/* Get the scheduling class set in the zone configuration. */
- if (zonecfg_get_sched_class(handle, sched, sizeof (sched)) == Z_OK &&
+ if (zonecfg_get_sched_class(snap_hndl, sched, sizeof (sched)) == Z_OK &&
strlen(sched) > 0) {
if (zone_setattr(zoneid, ZONE_ATTR_SCHED_CLASS, sched,
strlen(sched)) == -1)
zerror(zlogp, B_TRUE, "WARNING: unable to set the "
"default scheduling class");
- } else if (zonecfg_get_aliased_rctl(handle, ALIAS_SHARES, &tmp)
+ if (strcmp(sched, "FX") == 0) {
+ /*
+ * When FX is specified then by default all processes
+ * will start at the lowest priority level (0) and
+ * stay there. We support an optional attr which
+ * indicates that all the processes should be "high
+ * priority". We set this on the zone so that starting
+ * init will set the priority high.
+ */
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "fixed-hi-prio",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
+ strcmp(a.zone_attr_value, "true") == 0) {
+ boolean_t hi = B_TRUE;
+
+ if (zone_setattr(zoneid,
+ ZONE_ATTR_SCHED_FIXEDHI, (void *)hi,
+ sizeof (hi)) == -1)
+ zerror(zlogp, B_TRUE, "WARNING: unable "
+ "to set high priority");
+ }
+ }
+
+ } else if (zonecfg_get_aliased_rctl(snap_hndl, ALIAS_SHARES, &tmp)
== Z_OK) {
/*
* If the zone has the zone.cpu-shares rctl set then we want to
@@ -4484,7 +4429,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
*/
char class_name[PC_CLNMSZ];
- if (zonecfg_get_dflt_sched_class(handle, class_name,
+ if (zonecfg_get_dflt_sched_class(snap_hndl, class_name,
sizeof (class_name)) != Z_OK) {
zerror(zlogp, B_FALSE, "WARNING: unable to determine "
"the zone's scheduling class");
@@ -4517,7 +4462,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
* right thing in all cases (reuse or create) based on the current
* zonecfg.
*/
- if ((res = zonecfg_bind_tmp_pool(handle, zoneid, pool_err,
+ if ((res = zonecfg_bind_tmp_pool(snap_hndl, zoneid, pool_err,
sizeof (pool_err))) != Z_OK) {
if (res == Z_POOL || res == Z_POOL_CREATE || res == Z_POOL_BIND)
zerror(zlogp, B_FALSE, "%s: %s\ndedicated-cpu setting "
@@ -4526,14 +4471,13 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
else
zerror(zlogp, B_FALSE, "could not bind zone to "
"temporary pool: %s", zonecfg_strerror(res));
- zonecfg_fini_handle(handle);
return (Z_POOL_BIND);
}
/*
* Check if we need to warn about poold not being enabled.
*/
- if (zonecfg_warn_poold(handle)) {
+ if (zonecfg_warn_poold(snap_hndl)) {
zerror(zlogp, B_FALSE, "WARNING: A range of dedicated-cpus has "
"been specified\nbut the dynamic pool service is not "
"enabled.\nThe system will not dynamically adjust the\n"
@@ -4543,7 +4487,7 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
}
/* The following is a warning, not an error. */
- if ((res = zonecfg_bind_pool(handle, zoneid, pool_err,
+ if ((res = zonecfg_bind_pool(snap_hndl, zoneid, pool_err,
sizeof (pool_err))) != Z_OK) {
if (res == Z_POOL_BIND)
zerror(zlogp, B_FALSE, "WARNING: unable to bind to "
@@ -4557,10 +4501,9 @@ setup_zone_rm(zlog_t *zlogp, char *zone_name, zoneid_t zoneid)
}
/* Update saved pool name in case it has changed */
- (void) zonecfg_get_poolname(handle, zone_name, pool_name,
+ (void) zonecfg_get_poolname(snap_hndl, zone_name, pool_name,
sizeof (pool_name));
- zonecfg_fini_handle(handle);
return (Z_OK);
}
@@ -4787,36 +4730,31 @@ setup_zone_fs_allowed(zone_dochandle_t handle, zlog_t *zlogp, zoneid_t zoneid)
}
static int
-setup_zone_attrs(zlog_t *zlogp, char *zone_namep, zoneid_t zoneid)
+setup_zone_attrs(zlog_t *zlogp, zoneid_t zoneid)
{
- zone_dochandle_t handle;
int res = Z_OK;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE, "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
- if ((res = zonecfg_get_snapshot_handle(zone_namep, handle)) != Z_OK) {
- zerror(zlogp, B_FALSE, "invalid configuration");
- goto out;
- }
-
- if ((res = setup_zone_hostid(handle, zlogp, zoneid)) != Z_OK)
+ if ((res = setup_zone_hostid(snap_hndl, zlogp, zoneid)) != Z_OK)
goto out;
- if ((res = setup_zone_fs_allowed(handle, zlogp, zoneid)) != Z_OK)
+ if ((res = setup_zone_fs_allowed(snap_hndl, zlogp, zoneid)) != Z_OK)
goto out;
- if ((res = setup_zone_secflags(handle, zlogp, zoneid)) != Z_OK)
+ if ((res = setup_zone_secflags(snap_hndl, zlogp, zoneid)) != Z_OK)
goto out;
out:
- zonecfg_fini_handle(handle);
return (res);
}
+/*
+ * The zone_did is a persistent debug ID. Each zone should have a unique ID
+ * in the kernel. This is used for things like DTrace which want to monitor
+ * zones across reboots. They can't use the zoneid since that changes on
+ * each boot.
+ */
zoneid_t
-vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
+vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zone_did)
{
zoneid_t rval = -1;
priv_set_t *privs;
@@ -4832,7 +4770,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
tsol_zcent_t *zcent = NULL;
int match = 0;
int doi = 0;
- int flags;
+ int flags = -1;
zone_iptype_t iptype;
if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) {
@@ -4851,6 +4789,8 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
} else {
flags = 0;
}
+ if (flags == -1)
+ abort();
if ((privs = priv_allocset()) == NULL) {
zerror(zlogp, B_TRUE, "%s failed", "priv_allocset");
@@ -4954,7 +4894,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
xerr = 0;
if ((zoneid = zone_create(kzone, rootpath, privs, rctlbuf,
rctlbufsz, zfsbuf, zfsbufsz, &xerr, match, doi, zlabel,
- flags)) == -1) {
+ flags, zone_did)) == -1) {
if (xerr == ZE_AREMOUNTS) {
if (zonecfg_find_mounts(rootpath, NULL, NULL) < 1) {
zerror(zlogp, B_FALSE,
@@ -5000,7 +4940,7 @@ vplat_create(zlog_t *zlogp, zone_mnt_t mount_cmd)
struct brand_attr attr;
char modname[MAXPATHLEN];
- if (setup_zone_attrs(zlogp, zone_name, zoneid) != Z_OK)
+ if (setup_zone_attrs(zlogp, zoneid) != Z_OK)
goto error;
if ((bh = brand_open(brand_name)) == NULL) {
@@ -5058,6 +4998,8 @@ error:
}
if (rctlbuf != NULL)
free(rctlbuf);
+ if (zfsbuf != NULL)
+ free(zfsbuf);
priv_freeset(privs);
if (fp != NULL)
zonecfg_close_scratch(fp);
@@ -5146,7 +5088,7 @@ write_index_file(zoneid_t zoneid)
int
vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
{
- char zonepath[MAXPATHLEN];
+ char zpath[MAXPATHLEN];
if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) {
lofs_discard_mnttab();
@@ -5157,15 +5099,11 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
* Before we try to mount filesystems we need to create the
* attribute backing store for /dev
*/
- if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
- lofs_discard_mnttab();
- return (-1);
- }
- resolve_lofs(zlogp, zonepath, sizeof (zonepath));
+ (void) strlcpy(zpath, zonepath, sizeof (zpath));
+ resolve_lofs(zlogp, zpath, sizeof (zpath));
/* Make /dev directory owned by root, grouped sys */
- if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE,
- 0, 3) != 0) {
+ if (make_one_dir(zlogp, zpath, "/dev", DEFAULT_DIR_MODE, 0, 3) != 0) {
lofs_discard_mnttab();
return (-1);
}
@@ -5200,6 +5138,8 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid)
return (-1);
}
break;
+ default:
+ abort();
}
}
@@ -5274,14 +5214,88 @@ unmounted:
}
}
+/*
+ * Delete all transient VNICs belonging to this zone. A transient VNIC
+ * is one that is created and destroyed along with the lifetime of the
+ * zone. Non-transient VNICs, ones that are assigned from the GZ to a
+ * NGZ, are reassigned to the GZ in zone_shutdown() via the
+ * zone-specific data (zsd) callbacks.
+ */
+static int
+delete_transient_vnics(zlog_t *zlogp, zoneid_t zoneid)
+{
+ dladm_status_t status;
+ int num_links = 0;
+ datalink_id_t *links, link;
+ uint32_t link_flags;
+ datalink_class_t link_class;
+ char link_name[MAXLINKNAMELEN];
+
+ if (zone_list_datalink(zoneid, &num_links, NULL) != 0) {
+ zerror(zlogp, B_TRUE, "unable to determine "
+ "number of network interfaces");
+ return (-1);
+ }
+
+ if (num_links == 0)
+ return (0);
+
+ links = malloc(num_links * sizeof (datalink_id_t));
+
+ if (links == NULL) {
+ zerror(zlogp, B_TRUE, "failed to delete "
+ "network interfaces because of alloc fail");
+ return (-1);
+ }
+
+ if (zone_list_datalink(zoneid, &num_links, links) != 0) {
+ zerror(zlogp, B_TRUE, "failed to delete "
+ "network interfaces because of failure "
+ "to list them");
+ return (-1);
+ }
+
+ for (int i = 0; i < num_links; i++) {
+ char dlerr[DLADM_STRSIZE];
+ link = links[i];
+
+ status = dladm_datalink_id2info(dld_handle, link, &link_flags,
+ &link_class, NULL, link_name, sizeof (link_name));
+
+ if (status != DLADM_STATUS_OK) {
+ zerror(zlogp, B_FALSE, "failed to "
+ "delete network interface (%u)"
+ "due to failure to get link info: %s",
+ link,
+ dladm_status2str(status, dlerr));
+ return (-1);
+ }
+
+ if (link_flags & DLADM_OPT_TRANSIENT) {
+ assert(link_class & DATALINK_CLASS_VNIC);
+ status = dladm_vnic_delete(dld_handle, link,
+ DLADM_OPT_ACTIVE);
+
+ if (status != DLADM_STATUS_OK) {
+ zerror(zlogp, B_TRUE, "failed to delete link "
+ "with id %d: %s", link,
+ dladm_status2str(status, dlerr));
+ return (-1);
+ }
+ }
+ }
+
+ return (0);
+}
+
int
-vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
+vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting,
+ boolean_t debug)
{
char *kzone;
zoneid_t zoneid;
int res;
char pool_err[128];
- char zpath[MAXPATHLEN];
char cmdbuf[MAXPATHLEN];
brand_handle_t bh = NULL;
dladm_status_t status;
@@ -5314,16 +5328,12 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
goto error;
}
- if (remove_datalink_pool(zlogp, zoneid) != 0) {
+ if (remove_datalink_pool(zlogp, zoneid) != 0)
zerror(zlogp, B_FALSE, "unable clear datalink pool property");
- goto error;
- }
- if (remove_datalink_protect(zlogp, zoneid) != 0) {
+ if (remove_datalink_protect(zlogp, zoneid) != 0)
zerror(zlogp, B_FALSE,
"unable clear datalink protect property");
- goto error;
- }
/*
* The datalinks assigned to the zone will be removed from the NGZ as
@@ -5337,12 +5347,6 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
goto error;
}
- /* Get the zonepath of this zone */
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- goto error;
- }
-
/* Get a handle to the brand info for this zone */
if ((bh = brand_open(brand_name)) == NULL) {
zerror(zlogp, B_FALSE, "unable to determine zone brand");
@@ -5353,7 +5357,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
* brand a chance to cleanup any custom configuration.
*/
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) < 0) {
brand_close(bh);
zerror(zlogp, B_FALSE, "unable to determine branded zone's "
@@ -5363,7 +5367,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
brand_close(bh);
if ((strlen(cmdbuf) > EXEC_LEN) &&
- (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
+ (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
goto error;
}
@@ -5395,17 +5399,18 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
}
break;
case ZS_EXCLUSIVE:
- if (unconfigure_exclusive_network_interfaces(zlogp,
- zoneid) != 0) {
- zerror(zlogp, B_FALSE, "unable to unconfigure "
- "network interfaces in zone");
+ if (delete_transient_vnics(zlogp, zoneid) != 0) {
+ zerror(zlogp, B_FALSE, "unable to delete "
+ "transient vnics in zone");
goto error;
}
+
status = dladm_zone_halt(dld_handle, zoneid);
if (status != DLADM_STATUS_OK) {
zerror(zlogp, B_FALSE, "unable to notify "
"dlmgmtd of zone halt: %s",
dladm_status2str(status, errmsg));
+ goto error;
}
break;
}
@@ -5437,14 +5442,9 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting)
if (rebooting) {
struct zone_psettab pset_tab;
- zone_dochandle_t handle;
- if ((handle = zonecfg_init_handle()) != NULL &&
- zonecfg_get_handle(zone_name, handle) == Z_OK &&
- zonecfg_lookup_pset(handle, &pset_tab) == Z_OK)
+ if (zonecfg_lookup_pset(snap_hndl, &pset_tab) == Z_OK)
destroy_tmp_pool = B_FALSE;
-
- zonecfg_fini_handle(handle);
}
if (destroy_tmp_pool) {
diff --git a/usr/src/cmd/zoneadmd/zcons.c b/usr/src/cmd/zoneadmd/zcons.c
index 130b97d984..09a9f9ba8e 100644
--- a/usr/src/cmd/zoneadmd/zcons.c
+++ b/usr/src/cmd/zoneadmd/zcons.c
@@ -22,7 +22,7 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
- * Copyright 2012 Joyent, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
@@ -40,10 +40,10 @@
*
* Global Zone | Non-Global Zone
* .--------------. |
- * .-----------. | zoneadmd -z | | .--------. .---------.
- * | zlogin -C | | myzone | | | ttymon | | syslogd |
- * `-----------' `--------------' | `--------' `---------'
- * | | | | | | |
+ * .-----------. | zoneadmd -z |--. | .--------. .---------.
+ * | zlogin -C | | myzone | | | | ttymon | | syslogd |
+ * `-----------' `--------------' V | `--------' `---------'
+ * | | | | console.log | | |
* User | | | | | V V
* - - - - - - - - -|- - - -|- - - -|-|- - - - - - -|- - /dev/zconsole - - -
* Kernel V V | | |
@@ -59,7 +59,7 @@
* V +-----------+
* +---manager--+-subsidiary+
* | |
- * | zcons driver |
+ * | Zcons driver |
* | zonename="myzone" |
* +------------------------+
*
@@ -81,6 +81,8 @@
* functions as a two-way proxy for console I/O, relaying user input
* to the manager side of the console, and relaying output from the
* zone to the user.
+ *
+ * - Logging output to <zonepath>/logs/console.log.
*/
#include <sys/types.h>
@@ -118,9 +120,10 @@
#define CONSOLE_SOCKPATH ZONES_TMPDIR "/%s.console_sock"
+#define ZCONS_RETRY 10
+
static int serverfd = -1; /* console server unix domain socket fd */
char boot_args[BOOTARGS_MAX];
-char bad_boot_arg[BOOTARGS_MAX];
/*
* The eventstream is a simple one-directional flow of messages from the
@@ -130,7 +133,10 @@ char bad_boot_arg[BOOTARGS_MAX];
*/
static int eventstream[2];
-
+/* flag used to cope with race creating manager zcons devlink */
+static boolean_t manager_zcons_failed = B_FALSE;
+/* flag to track if we've seen a state change when there is no manager zcons */
+static boolean_t state_changed = B_FALSE;
int
eventstream_init()
@@ -323,7 +329,7 @@ destroy_console_devs(zlog_t *zlogp)
* interfaces to instantiate a new zone console node. We do a lot of
* sanity checking, and are careful to reuse a console if one exists.
*
- * Once the device is in the device tree, we kick devfsadm via di_init_devs()
+ * Once the device is in the device tree, we kick devfsadm via di_devlink_init()
* to ensure that the appropriate symlinks (to the manager and subsidiary
* console devices) are placed in /dev in the global zone.
*/
@@ -410,45 +416,66 @@ devlinks:
* ioctl, which will cause the manager to retain a reference to the
* subsidiary. This prevents ttymon from blowing through the
* subsidiary's STREAMS anchor.
+ *
+ * In very rare cases the open returns ENOENT if devfs doesn't have
+ * everything setup yet due to heavy zone startup load. Wait for
+ * 1 sec. and retry a few times. Even if we can't setup the zone's
+ * console, we still go ahead and boot the zone.
*/
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_MANAGER_NAME);
- if ((managerfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ managerfd = open(conspath, O_RDWR | O_NOCTTY);
+ if (managerfd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (managerfd == -1) {
zerror(zlogp, B_TRUE, "ERROR: could not open manager side of "
"zone console for %s to acquire subsidiary handle",
zone_name);
- goto error;
+ manager_zcons_failed = B_TRUE;
}
+
(void) snprintf(conspath, sizeof (conspath), "/dev/zcons/%s/%s",
zone_name, ZCONS_SUBSIDIARY_NAME);
- if ((subfd = open(conspath, O_RDWR | O_NOCTTY)) == -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ subfd = open(conspath, O_RDWR | O_NOCTTY);
+ if (subfd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (subfd == -1)
zerror(zlogp, B_TRUE, "ERROR: could not open subsidiary side "
"of zone console for %s to acquire subsidiary handle",
zone_name);
- (void) close(managerfd);
- goto error;
- }
+
/*
* This ioctl can occasionally return ENXIO if devfs doesn't have
* everything plumbed up yet due to heavy zone startup load. Wait for
* 1 sec. and retry a few times before we fail to boot the zone.
*/
- for (i = 0; i < 5; i++) {
- if (ioctl(managerfd, ZC_HOLDSUBSID, (caddr_t)(intptr_t)subfd)
- == 0) {
- rv = 0;
- break;
- } else if (errno != ENXIO) {
- break;
+ if (managerfd != -1 && subfd != -1) {
+ for (i = 0; i < ZCONS_RETRY; i++) {
+ if (ioctl(managerfd, ZC_HOLDSUBSID,
+ (caddr_t)(intptr_t)subfd) == 0) {
+ rv = 0;
+ break;
+ } else if (errno != ENXIO) {
+ break;
+ }
+ (void) sleep(1);
}
- (void) sleep(1);
+ if (rv != 0)
+ zerror(zlogp, B_TRUE, "ERROR: error while acquiring "
+ "subsidiary handle of zone console for %s",
+ zone_name);
}
- if (rv != 0)
- zerror(zlogp, B_TRUE, "ERROR: error while acquiring "
- "subsidiary handle of zone console for %s", zone_name);
- (void) close(subfd);
- (void) close(managerfd);
+ if (subfd != -1)
+ (void) close(subfd);
+ if (managerfd != -1)
+ (void) close(managerfd);
error:
if (ddef_hdl)
@@ -521,6 +548,7 @@ get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len,
size_t buflen = sizeof (buf);
char c = '\0';
int i = 0, r;
+ ucred_t *cred = NULL;
/* "eat up the ident string" case, for simplicity */
if (pid == NULL) {
@@ -554,18 +582,22 @@ get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len,
break;
}
+ if (getpeerucred(clifd, &cred) == 0) {
+ *pid = ucred_getpid((const ucred_t *)cred);
+ ucred_free(cred);
+ } else {
+ return (-1);
+ }
+
/*
* Parse buffer for message of the form:
- * IDENT <pid> <locale> <disconnect flag>
+ * IDENT <locale> <disconnect flag>
*/
bufp = buf;
if (strncmp(bufp, "IDENT ", 6) != 0)
return (-1);
bufp += 6;
errno = 0;
- *pid = strtoll(bufp, &bufp, 10);
- if (errno != 0)
- return (-1);
while (*bufp != '\0' && isspace(*bufp))
bufp++;
@@ -671,14 +703,6 @@ event_message(int clifd, char *clilocale, zone_evt_t evt, int dflag)
else
str = "NOTICE: Zone boot failed";
break;
- case Z_EVT_ZONE_BADARGS:
- /*LINTED*/
- (void) snprintf(lmsg, sizeof (lmsg),
- localize_msg(clilocale,
- "WARNING: Ignoring invalid boot arguments: %s"),
- bad_boot_arg);
- lstr = lmsg;
- break;
default:
return;
}
@@ -717,7 +741,7 @@ test_client(int clifd)
* messages) can be output in the user's locale.
*/
static void
-do_console_io(zlog_t *zlogp, int consfd, int servfd)
+do_console_io(zlog_t *zlogp, int consfd, int servfd, int conslog)
{
struct pollfd pollfds[4];
char ibuf[BUFSIZ];
@@ -763,14 +787,21 @@ do_console_io(zlog_t *zlogp, int consfd, int servfd)
(POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
errno = 0;
cc = read(consfd, ibuf, BUFSIZ);
- if (cc <= 0 && (errno != EINTR) &&
- (errno != EAGAIN))
- break;
- /*
- * Lose I/O if no one is listening
- */
- if (clifd != -1 && cc > 0)
- (void) write(clifd, ibuf, cc);
+ if (cc <= 0) {
+ if (errno != EINTR &&
+ errno != EAGAIN) {
+ break;
+ }
+ } else {
+ logstream_write(conslog, ibuf, cc);
+
+ /*
+ * Lose I/O if no one is listening
+ */
+ if (clifd != -1) {
+ (void) write(clifd, ibuf, cc);
+ }
+ }
} else {
pollerr = pollfds[0].revents;
zerror(zlogp, B_FALSE,
@@ -882,7 +913,6 @@ init_console(zlog_t *zlogp)
if (init_console_dev(zlogp) == -1) {
zerror(zlogp, B_FALSE,
"console setup: device initialization failed");
- return (-1);
}
if ((serverfd = init_console_sock(zlogp)) == -1) {
@@ -894,6 +924,17 @@ init_console(zlog_t *zlogp)
}
/*
+ * Maintain a simple flag that tracks if we have seen at least one state
+ * change. This is currently only used to handle the special case where we are
+ * running without a console device, which is what normally drives shutdown.
+ */
+void
+zcons_statechanged()
+{
+ state_changed = B_TRUE;
+}
+
+/*
* serve_console() is the master loop for driving console I/O. It is also the
* routine which is ultimately responsible for "pulling the plug" on zoneadmd
* when it realizes that the daemon should shut down.
@@ -911,6 +952,10 @@ serve_console(zlog_t *zlogp)
int managerfd;
zone_state_t zstate;
char conspath[MAXPATHLEN];
+ static boolean_t cons_warned = B_FALSE;
+ int conslog;
+
+ conslog = logstream_open("console.log", "console", LS_LINE_BUFFERED);
(void) snprintf(conspath, sizeof (conspath),
"/dev/zcons/%s/%s", zone_name, ZCONS_MANAGER_NAME);
@@ -918,6 +963,46 @@ serve_console(zlog_t *zlogp)
for (;;) {
managerfd = open(conspath, O_RDWR|O_NONBLOCK|O_NOCTTY);
if (managerfd == -1) {
+ if (manager_zcons_failed) {
+ /*
+ * If we don't have a console and the zone is
+ * not shutting down, there may have been a
+ * race/failure with devfs while creating the
+ * console. In this case we want to leave the
+ * zone up, even without a console, so
+ * periodically recheck.
+ */
+ int i;
+
+ /*
+ * In the normal flow of this loop, we use
+ * do_console_io to give things a chance to get
+ * going first. However, in this case we can't
+ * use that, so we have to wait for at least
+ * one state change before checking the state.
+ */
+ for (i = 0; i < 60; i++) {
+ if (state_changed)
+ break;
+ (void) sleep(1);
+ }
+
+ if (i < 60 && zone_get_state(zone_name,
+ &zstate) == Z_OK &&
+ (zstate == ZONE_STATE_READY ||
+ zstate == ZONE_STATE_RUNNING)) {
+ if (!cons_warned) {
+ zerror(zlogp, B_FALSE,
+ "WARNING: missing zone "
+ "console for %s",
+ zone_name);
+ cons_warned = B_TRUE;
+ }
+ (void) sleep(ZCONS_RETRY);
+ continue;
+ }
+ }
+
zerror(zlogp, B_TRUE, "failed to open console manager");
(void) mutex_lock(&lock);
goto death;
@@ -937,7 +1022,7 @@ serve_console(zlog_t *zlogp)
goto death;
}
- do_console_io(zlogp, managerfd, serverfd);
+ do_console_io(zlogp, managerfd, serverfd, conslog);
/*
* We would prefer not to do this, but hostile zone processes
@@ -978,4 +1063,6 @@ death:
destroy_console_sock(serverfd);
(void) destroy_console_devs(zlogp);
+
+ logstream_close(conslog, B_FALSE);
}
diff --git a/usr/src/cmd/zoneadmd/zfd.c b/usr/src/cmd/zoneadmd/zfd.c
new file mode 100644
index 0000000000..6647ef0c5f
--- /dev/null
+++ b/usr/src/cmd/zoneadmd/zfd.c
@@ -0,0 +1,1238 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ * Copyright 2019 Joyent, Inc.
+ */
+
+/*
+ * Zone file descriptor support is used as a mechanism for a process inside the
+ * zone to log messages to the GZ zoneadmd and also as a way to interact
+ * directly with the process (via zlogin -I). The zfd thread is modeled on
+ * the zcons thread so see the comment header in zcons.c for a general overview.
+ * Unlike with zcons, which has a single endpoint within the zone and a single
+ * endpoint used by zoneadmd, we setup multiple endpoints within the zone.
+ *
+ * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat
+ * of a misnomer since its purpose has evolved. The attribute currently
+ * can have six values which are used to control:
+ * - how the zfd devices are used inside the zone
+ * - if the output on the device(s) is also teed into another stream within
+ * the zone
+ * - if we do logging in the GZ
+ * See the comment on get_mode_logmax() in this file, and the comment in
+ * uts/common/io/zfd.c for more details.
+ *
+ * Internally the zfd_mode_t struct holds the number of stdio devs (1 or 3),
+ * the number of additional devs corresponding to the zone attr value and the
+ * GZ logging flag.
+ *
+ * Note that although the mode indicates the number of devices needed, we always
+ * create all possible zfd devices for simplicity.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/termios.h>
+#include <sys/zfd.h>
+#include <sys/mkdev.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <stropts.h>
+#include <thread.h>
+#include <ucred.h>
+#include <unistd.h>
+#include <zone.h>
+#include <signal.h>
+#include <wchar.h>
+
+#include <libdevinfo.h>
+#include <libdevice.h>
+#include <libzonecfg.h>
+
+#include <syslog.h>
+#include <sys/modctl.h>
+
+#include "zoneadmd.h"
+
+static int shutting_down = 0;
+static thread_t logger_tid;
+static char log_name[MAXNAMELEN] = "stdio.log";
+
+/*
+ * The eventstream is a simple one-directional flow of messages implemented
+ * with a pipe. It is used to wake up the poller when it needs to shutdown.
+ */
+static int eventstream[2] = {-1, -1};
+
+#define ZLOG_MODE "zlog-mode"
+#define ZLOG_NAME "zlog-name"
+#define ZFDNEX_DEVTREEPATH "/pseudo/zfdnex@2"
+#define ZFDNEX_FILEPATH "/devices/pseudo/zfdnex@2"
+#define SERVER_SOCKPATH ZONES_TMPDIR "/%s.server_%s"
+#define ZTTY_RETRY 5
+
+#define NUM_ZFD_DEVS 5
+
+typedef struct zfd_mode {
+ uint_t zmode_n_stddevs;
+ uint_t zmode_n_addl_devs;
+ boolean_t zmode_gzlogging;
+} zfd_mode_t;
+static zfd_mode_t mode;
+
+/*
+ * cb_data is only used by destroy_cb.
+ */
+struct cb_data {
+ zlog_t *zlogp;
+ int killed;
+};
+
+/*
+ * destroy_zfd_devs() and its helper destroy_cb() tears down any zfd instances
+ * associated with this zone. If things went very wrong, we might have an
+ * incorrect number of instances hanging around. This routine hunts down and
+ * tries to remove all of them. Of course, if the fd is open, the instance will
+ * not detach, which is a potential issue.
+ */
+static int
+destroy_cb(di_node_t node, void *arg)
+{
+ struct cb_data *cb = (struct cb_data *)arg;
+ char *prop_data;
+ char *tmp;
+ char devpath[MAXPATHLEN];
+ devctl_hdl_t hdl;
+
+ if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname",
+ &prop_data) == -1)
+ return (DI_WALK_CONTINUE);
+
+ assert(prop_data != NULL);
+ if (strcmp(prop_data, zone_name) != 0) {
+ /* this is a zfd for a different zone */
+ return (DI_WALK_CONTINUE);
+ }
+
+ tmp = di_devfs_path(node);
+ (void) snprintf(devpath, sizeof (devpath), "/devices/%s", tmp);
+ di_devfs_path_free(tmp);
+
+ if ((hdl = devctl_device_acquire(devpath, 0)) == NULL) {
+ zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, "
+ "but it could not be controlled.", devpath);
+ return (DI_WALK_CONTINUE);
+ }
+ if (devctl_device_remove(hdl) == 0) {
+ cb->killed++;
+ } else {
+ zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, "
+ "but it could not be removed.", devpath);
+ }
+ devctl_release(hdl);
+ return (DI_WALK_CONTINUE);
+}
+
+static int
+destroy_zfd_devs(zlog_t *zlogp)
+{
+ di_node_t root;
+ struct cb_data cb;
+
+ bzero(&cb, sizeof (cb));
+ cb.zlogp = zlogp;
+
+ if ((root = di_init(ZFDNEX_DEVTREEPATH, DINFOCPYALL)) == DI_NODE_NIL) {
+ zerror(zlogp, B_TRUE, "di_init failed");
+ return (-1);
+ }
+
+ (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, destroy_cb);
+
+ di_fini(root);
+ return (0);
+}
+
+static void
+make_tty(zlog_t *zlogp, int id)
+{
+ int i;
+ int fd = -1;
+ char stdpath[MAXPATHLEN];
+
+ /*
+ * Open the master side of the dev and issue the ZFD_MAKETTY ioctl,
+ * which will cause the the various tty-related streams modules to be
+ * pushed when the slave opens the device.
+ *
+ * In very rare cases the open returns ENOENT if devfs doesn't have
+ * everything setup yet due to heavy zone startup load. Wait for
+ * 1 sec. and retry a few times. Even if we can't setup tty mode
+ * we still move on.
+ */
+ (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d",
+ zone_name, id);
+
+ for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) {
+ fd = open(stdpath, O_RDWR | O_NOCTTY);
+ if (fd >= 0 || errno != ENOENT)
+ break;
+ (void) sleep(1);
+ }
+ if (fd == -1) {
+ zerror(zlogp, B_TRUE, "ERROR: could not open zfd %d for "
+ "zone %s to set tty mode", id, zone_name);
+ } else {
+ /*
+ * This ioctl can occasionally return ENXIO if devfs doesn't
+ * have everything plumbed up yet due to heavy zone startup
+ * load. Wait for 1 sec. and retry a few times before we give
+ * up.
+ */
+ for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) {
+ if (ioctl(fd, ZFD_MAKETTY) == 0) {
+ break;
+ } else if (errno != ENXIO) {
+ break;
+ }
+ (void) sleep(1);
+ }
+ }
+
+ if (fd != -1)
+ (void) close(fd);
+}
+
+/*
+ * init_zfd_devs() drives the device-tree configuration of the zone fd devices.
+ * The general strategy is to use the libdevice (devctl) interfaces to
+ * instantiate all of new zone fd nodes. We do a lot of sanity checking, and
+ * are careful to reuse a dev if one exists.
+ *
+ * Once the devices are in the device tree, we kick devfsadm via
+ * di_devlink_init() to ensure that the appropriate symlinks (to the master and
+ * slave fd devices) are placed in /dev in the global zone.
+ */
+static int
+init_zfd_dev(zlog_t *zlogp, devctl_hdl_t bus_hdl, int id)
+{
+ int rv = -1;
+ devctl_ddef_t ddef_hdl = NULL;
+ devctl_hdl_t dev_hdl = NULL;
+
+ if ((ddef_hdl = devctl_ddef_alloc("zfd", 0)) == NULL) {
+ zerror(zlogp, B_TRUE, "failed to allocate ddef handle");
+ goto error;
+ }
+
+ /*
+ * Set four properties on this node; the name of the zone, the dev name
+ * seen inside the zone, a flag which lets pseudo know that it is OK to
+ * automatically allocate an instance # for this device, and the last
+ * one tells the device framework not to auto-detach this node - we
+ * need the node to still be there when we ask devfsadmd to make links,
+ * and when we need to open it.
+ */
+ if (devctl_ddef_string(ddef_hdl, "zfd_zname", zone_name) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create zfd_zname property");
+ goto error;
+ }
+ if (devctl_ddef_int(ddef_hdl, "zfd_id", id) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create zfd_id property");
+ goto error;
+ }
+ if (devctl_ddef_int(ddef_hdl, "auto-assign-instance", 1) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create auto-assign-instance "
+ "property");
+ goto error;
+ }
+ if (devctl_ddef_int(ddef_hdl, "ddi-no-autodetach", 1) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create ddi-no-auto-detach "
+ "property");
+ goto error;
+ }
+ if (devctl_bus_dev_create(bus_hdl, ddef_hdl, 0, &dev_hdl) == -1) {
+ zerror(zlogp, B_TRUE, "failed to create zfd node");
+ goto error;
+ }
+ rv = 0;
+
+error:
+ if (ddef_hdl)
+ devctl_ddef_free(ddef_hdl);
+ if (dev_hdl)
+ devctl_release(dev_hdl);
+ return (rv);
+}
+
+static int
+init_zfd_devs(zlog_t *zlogp, zfd_mode_t *mode)
+{
+ devctl_hdl_t bus_hdl = NULL;
+ di_devlink_handle_t dl = NULL;
+ int rv = -1;
+ int i;
+
+ /*
+ * Time to make the devices.
+ */
+ if ((bus_hdl = devctl_bus_acquire(ZFDNEX_FILEPATH, 0)) == NULL) {
+ zerror(zlogp, B_TRUE, "devctl_bus_acquire failed");
+ goto error;
+ }
+
+ for (i = 0; i < NUM_ZFD_DEVS; i++) {
+ if (init_zfd_dev(zlogp, bus_hdl, i) != 0)
+ goto error;
+ }
+
+ if ((dl = di_devlink_init("zfd", DI_MAKE_LINK)) == NULL) {
+ zerror(zlogp, B_TRUE, "failed to create devlinks");
+ goto error;
+ }
+
+ (void) di_devlink_fini(&dl);
+ rv = 0;
+
+ if (mode->zmode_n_stddevs == 1) {
+ /* We want the primary stream to look like a tty. */
+ make_tty(zlogp, 0);
+ }
+
+error:
+ if (bus_hdl)
+ devctl_release(bus_hdl);
+ return (rv);
+}
+
+static int
+init_server_sock(int *servfd, char *nm)
+{
+ int resfd = -1;
+ struct sockaddr_un servaddr;
+
+ bzero(&servaddr, sizeof (servaddr));
+ servaddr.sun_family = AF_UNIX;
+ (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path),
+ SERVER_SOCKPATH, zone_name, nm);
+
+ if ((resfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+ zerror(&logplat, B_TRUE,
+ "server setup: could not create socket");
+ goto err;
+ }
+ (void) unlink(servaddr.sun_path);
+
+ if (bind(resfd, (struct sockaddr *)&servaddr, sizeof (servaddr))
+ == -1) {
+ zerror(&logplat, B_TRUE,
+ "server setup: could not bind to socket");
+ goto err;
+ }
+
+ if (listen(resfd, 4) == -1) {
+ zerror(&logplat, B_TRUE,
+ "server setup: could not listen on socket");
+ goto err;
+ }
+
+ *servfd = resfd;
+ return (0);
+
+err:
+ (void) unlink(servaddr.sun_path);
+ if (resfd != -1)
+ (void) close(resfd);
+ return (-1);
+}
+
+static void
+destroy_server_sock(int servfd, char *nm)
+{
+ char path[MAXPATHLEN];
+
+ (void) snprintf(path, sizeof (path), SERVER_SOCKPATH, zone_name, nm);
+ (void) unlink(path);
+ (void) shutdown(servfd, SHUT_RDWR);
+ (void) close(servfd);
+}
+
+/*
+ * Read the "ident" string from the client's descriptor; this routine also
+ * tolerates being called with pid=NULL, for times when you want to "eat"
+ * the ident string from a client without saving it.
+ */
+static int
+get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len,
+ uint_t *flagsp)
+{
+ char buf[BUFSIZ], *bufp;
+ size_t buflen = sizeof (buf);
+ char c = '\0';
+ int i = 0, r;
+ ucred_t *cred = NULL;
+
+ /* "eat up the ident string" case, for simplicity */
+ if (pid == NULL) {
+ assert(locale == NULL && locale_len == 0);
+ while (read(clifd, &c, 1) == 1) {
+ if (c == '\n')
+ return (0);
+ }
+ }
+
+ bzero(buf, sizeof (buf));
+ while ((buflen > 1) && (r = read(clifd, &c, 1)) == 1) {
+ buflen--;
+ if (c == '\n')
+ break;
+
+ buf[i] = c;
+ i++;
+ }
+ if (r == -1)
+ return (-1);
+
+ /*
+ * We've filled the buffer, but still haven't seen \n. Keep eating
+ * until we find it; we don't expect this to happen, but this is
+ * defensive.
+ */
+ if (c != '\n') {
+ while ((r = read(clifd, &c, sizeof (c))) > 0)
+ if (c == '\n')
+ break;
+ }
+
+ /*
+ * Parse buffer for message of the form:
+ * IDENT <locale> <flags>
+ */
+ bufp = buf;
+ if (strncmp(bufp, "IDENT ", 6) != 0)
+ return (-1);
+ bufp += 6;
+
+ if (getpeerucred(clifd, &cred) == 0) {
+ *pid = ucred_getpid((const ucred_t *)cred);
+ ucred_free(cred);
+ } else {
+ return (-1);
+ }
+
+ while (*bufp != '\0' && isspace(*bufp))
+ bufp++;
+ buflen = strlen(bufp) - 1;
+ bufp[buflen - 1] = '\0';
+ (void) strlcpy(locale, bufp, locale_len);
+
+ *flagsp = atoi(&bufp[buflen]);
+
+ return (0);
+}
+
+static int
+accept_client(int servfd, pid_t *pid, char *locale, size_t locale_len,
+ uint_t *flagsp)
+{
+ int connfd;
+ struct sockaddr_un cliaddr;
+ socklen_t clilen;
+ int flags;
+
+ clilen = sizeof (cliaddr);
+ connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
+ if (connfd == -1)
+ return (-1);
+ if (pid != NULL) {
+ if (get_client_ident(connfd, pid, locale, locale_len, flagsp)
+ == -1) {
+ (void) shutdown(connfd, SHUT_RDWR);
+ (void) close(connfd);
+ return (-1);
+ }
+ (void) write(connfd, "OK\n", 3);
+ }
+
+ flags = fcntl(connfd, F_GETFL, 0);
+ if (flags != -1)
+ (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC);
+
+ return (connfd);
+}
+
+static void
+reject_client(int servfd, pid_t clientpid)
+{
+ int connfd;
+ struct sockaddr_un cliaddr;
+ socklen_t clilen;
+ char nak[MAXPATHLEN];
+
+ clilen = sizeof (cliaddr);
+ connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
+
+ /*
+ * After getting its ident string, tell client to get lost.
+ */
+ if (get_client_ident(connfd, NULL, NULL, 0, NULL) == 0) {
+ (void) snprintf(nak, sizeof (nak), "%lu\n",
+ clientpid);
+ (void) write(connfd, nak, strlen(nak));
+ }
+ (void) shutdown(connfd, SHUT_RDWR);
+ (void) close(connfd);
+}
+
+static int
+accept_socket(int servfd, pid_t verpid)
+{
+ int connfd;
+ struct sockaddr_un cliaddr;
+ socklen_t clilen = sizeof (cliaddr);
+ ucred_t *cred = NULL;
+ pid_t rpid = -1;
+ int flags;
+
+ connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen);
+ if (connfd == -1)
+ return (-1);
+
+ /* Confirm connecting process is who we expect */
+ if (getpeerucred(connfd, &cred) == 0) {
+ rpid = ucred_getpid((const ucred_t *)cred);
+ ucred_free(cred);
+ }
+ if (rpid == -1 || rpid != verpid) {
+ (void) shutdown(connfd, SHUT_RDWR);
+ (void) close(connfd);
+ return (-1);
+ }
+
+ flags = fcntl(connfd, F_GETFL, 0);
+ if (flags != -1)
+ (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC);
+
+ return (connfd);
+}
+
+static void
+ctlcmd_process(int sockfd, int stdoutfd, unsigned int *flags)
+{
+ char buf[BUFSIZ];
+ int i;
+ for (i = 0; i < BUFSIZ-1; i++) {
+ char c;
+ if (read(sockfd, &c, 1) != 1 ||
+ c == '\n' || c == '\0') {
+ break;
+ }
+ buf[i] = c;
+ }
+ if (i == 0) {
+ goto fail;
+ }
+ buf[i] = '\0';
+
+ if (strncmp(buf, "TIOCSWINSZ ", 11) == 0) {
+ char *next = buf + 11;
+ struct winsize ws;
+ errno = 0;
+ ws.ws_row = strtol(next, &next, 10);
+ if (errno == EINVAL) {
+ goto fail;
+ }
+ ws.ws_col = strtol(next + 1, &next, 10);
+ if (errno == EINVAL) {
+ goto fail;
+ }
+ if (ioctl(stdoutfd, TIOCSWINSZ, &ws) == 0) {
+ (void) write(sockfd, "OK\n", 3);
+ return;
+ }
+ }
+ if (strncmp(buf, "SETFLAGS ", 9) == 0) {
+ char *next = buf + 9;
+ unsigned int result;
+ errno = 0;
+ result = strtoul(next, &next, 10);
+ if (errno == EINVAL) {
+ goto fail;
+ }
+ *flags = result;
+ (void) write(sockfd, "OK\n", 3);
+ return;
+ }
+fail:
+ (void) write(sockfd, "FAIL\n", 5);
+}
+
+/*
+ * Check to see if the client at the other end of the socket is still alive; we
+ * know it is not if it throws EPIPE at us when we try to write an otherwise
+ * harmless 0-length message to it.
+ */
+static int
+test_client(int clifd)
+{
+ if ((write(clifd, "", 0) == -1) && errno == EPIPE)
+ return (-1);
+ return (0);
+}
+
+/*
+ * We want to sleep for a little while but need to be responsive if the zone is
+ * halting. We poll/sleep on the event stream so we can notice if we're halting.
+ * Return true if halting, otherwise false.
+ */
+static boolean_t
+halt_sleep(int slptime)
+{
+ struct pollfd evfd[1];
+
+ evfd[0].fd = eventstream[1];
+ evfd[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI;
+
+ if (poll(evfd, 1, slptime) > 0) {
+ /* zone halting */
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+/*
+ * This routine drives the logging and interactive I/O loop. It polls for
+ * input from the zone side of the fd (output to stdout/stderr), and from the
+ * client (input to the zone's stdin). Additionally, it polls on the server
+ * fd, and disconnects any clients that might try to hook up with the zone
+ * while the fd's are in use.
+ *
+ * Data from the zone's stdout and stderr is formatted in json and written to
+ * the log file whether an interactive client is connected or not.
+ *
+ * When the client first calls us up, it is expected to send a line giving its
+ * "identity"; this consists of the string 'IDENT <pid> <locale>'. This is so
+ * that we can report that the fd's are busy, along with some diagnostics
+ * about who has them busy; the locale is ignore here but kept for compatability
+ * with the zlogin code when running on the zone's console.
+ *
+ * We need to handle the case where there is no server within the zone (or
+ * the server gets stuck) and data that we're writing to the zone server's
+ * stdin fills the pipe. Because of the way the zfd device works writes can
+ * flow into the stream and simply be dropped, if there is no server, or writes
+ * could return -1 with EAGAIN if the server is stuck. Since we ignore errors
+ * on the write to stdin, we won't get blocked in that case but we'd like to
+ * avoid dropping initial input if the server within the zone hasn't started
+ * yet. To handle this we wait to read initial input until we detect that there
+ * is a server inside the zone. We have to poll for this so that we can
+ * re-run the ioctl to notice when a server shows up. This poll/wait is handled
+ * by halt_sleep() so that we can be responsive if the zone wants to halt.
+ * We only do this check to avoid dropping initial input so it is possible for
+ * the server within the zone to go away later. At that point zfd will just
+ * drop any new input flowing into the stream.
+ */
+static void
+do_zfd_io(int gzctlfd, int gzservfd, int gzerrfd, int stdinfd, int stdoutfd,
+ int stderrfd, int logout, int logerr)
+{
+ struct pollfd pollfds[8];
+ char ibuf[BUFSIZ + 1];
+ int cc, ret;
+ int ctlfd = -1;
+ int clifd = -1;
+ int clierrfd = -1;
+ int pollerr = 0;
+ char clilocale[MAXPATHLEN];
+ pid_t clipid = 0;
+ uint_t flags = 0;
+ boolean_t stdin_ready = B_FALSE;
+ int slptime = 250; /* initial poll sleep time in ms */
+
+ /* client control socket, watch for read events */
+ pollfds[0].fd = ctlfd;
+ pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND |
+ POLLPRI | POLLERR | POLLHUP | POLLNVAL;
+
+ /* client socket, watch for read events */
+ pollfds[1].fd = clifd;
+ pollfds[1].events = pollfds[0].events;
+
+ /* stdout, watch for read events */
+ pollfds[2].fd = stdoutfd;
+ pollfds[2].events = pollfds[0].events;
+
+ /* stderr, watch for read events */
+ pollfds[3].fd = stderrfd;
+ pollfds[3].events = pollfds[0].events;
+
+ /* the server control socket; watch for new connections */
+ pollfds[4].fd = gzctlfd;
+ pollfds[4].events = POLLIN | POLLRDNORM;
+
+ /* the server stdin/out socket; watch for new connections */
+ pollfds[5].fd = gzservfd;
+ pollfds[5].events = POLLIN | POLLRDNORM;
+
+ /* the server stderr socket; watch for new connections */
+ pollfds[6].fd = gzerrfd;
+ pollfds[6].events = POLLIN | POLLRDNORM;
+
+ /* the eventstream; any input means the zone is halting */
+ pollfds[7].fd = eventstream[1];
+ pollfds[7].events = pollfds[0].events;
+
+ while (!shutting_down) {
+ pollfds[0].revents = pollfds[1].revents = 0;
+ pollfds[2].revents = pollfds[3].revents = 0;
+ pollfds[4].revents = pollfds[5].revents = 0;
+ pollfds[6].revents = pollfds[7].revents = 0;
+
+ ret = poll(pollfds, 8, -1);
+ if (ret == -1 && errno != EINTR) {
+ zerror(&logplat, B_TRUE, "poll failed");
+ /* we are hosed, close connection */
+ break;
+ }
+
+ /* control events from client */
+ if (pollfds[0].revents &
+ (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+ /* process control message */
+ ctlcmd_process(ctlfd, stdoutfd, &flags);
+ } else if (pollfds[0].revents) {
+ /* bail if any error occurs */
+ pollerr = pollfds[0].revents;
+ zerror(&logplat, B_FALSE, "closing connection "
+ "with control channel, pollerr %d\n", pollerr);
+ break;
+ }
+
+ /* event from client side */
+ if (pollfds[1].revents) {
+ if (stdin_ready) {
+ if (pollfds[1].revents & (POLLIN |
+ POLLRDNORM | POLLRDBAND | POLLPRI)) {
+ errno = 0;
+ cc = read(clifd, ibuf, BUFSIZ);
+ if (cc > 0) {
+ /*
+ * See comment for this
+ * function on what happens if
+ * there is no reader in the
+ * zone. EOF is handled below.
+ */
+ (void) write(stdinfd, ibuf, cc);
+ }
+ } else if (pollfds[1].revents & (POLLERR |
+ POLLNVAL)) {
+ pollerr = pollfds[1].revents;
+ zerror(&logplat, B_FALSE,
+ "closing connection "
+ "with client, pollerr %d\n",
+ pollerr);
+ break;
+ }
+
+ if (pollfds[1].revents & POLLHUP) {
+ if (flags & ZLOGIN_ZFD_EOF) {
+ /*
+ * Let the client know. We've
+ * already serviced any pending
+ * regular input. Let the
+ * stream clear since the EOF
+ * ioctl jumps to the head.
+ */
+ (void) ioctl(stdinfd, I_FLUSH);
+ if (halt_sleep(250))
+ break;
+ (void) ioctl(stdinfd, ZFD_EOF);
+ }
+ break;
+ }
+ } else {
+ if (ioctl(stdinfd, ZFD_HAS_SLAVE) == 0) {
+ stdin_ready = B_TRUE;
+ } else {
+ /*
+ * There is nothing in the zone to read
+ * our input. Presumably the user
+ * providing input expects something to
+ * show up, but that is no guarantee.
+ * Since we haven't serviced the pending
+ * input poll yet, we don't want to
+ * immediately loop around but we also
+ * need to be responsive if the zone is
+ * halting.
+ */
+ if (halt_sleep(slptime))
+ break;
+
+ if (slptime < 5000)
+ slptime += 250;
+ }
+ }
+ }
+
+ /* event from the zone's stdout */
+ if (pollfds[2].revents) {
+ if (pollfds[2].revents &
+ (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+ errno = 0;
+ cc = read(stdoutfd, ibuf, BUFSIZ);
+ /* zfd is a stream, so ignore 0 length read */
+ if (cc < 0 && (errno != EINTR) &&
+ (errno != EAGAIN))
+ break;
+ if (cc > 0) {
+ logstream_write(logout, ibuf, cc);
+
+ /*
+ * Lose output if no one is listening,
+ * otherwise pass it on.
+ */
+ if (clifd != -1)
+ (void) write(clifd, ibuf, cc);
+ }
+ } else {
+ pollerr = pollfds[2].revents;
+ zerror(&logplat, B_FALSE,
+ "closing connection with stdout zfd, "
+ "pollerr %d\n", pollerr);
+ break;
+ }
+ }
+
+ /* event from the zone's stderr */
+ if (pollfds[3].revents) {
+ if (pollfds[3].revents &
+ (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) {
+ errno = 0;
+ cc = read(stderrfd, ibuf, BUFSIZ);
+ /* zfd is a stream, so ignore 0 length read */
+ if (cc < 0 && (errno != EINTR) &&
+ (errno != EAGAIN))
+ break;
+ if (cc > 0) {
+ logstream_write(logerr, ibuf, cc);
+
+ /*
+ * Lose output if no one is listening,
+ * otherwise pass it on.
+ */
+ if (clierrfd != -1)
+ (void) write(clierrfd, ibuf,
+ cc);
+ }
+ } else {
+ pollerr = pollfds[3].revents;
+ zerror(&logplat, B_FALSE,
+ "closing connection with stderr zfd, "
+ "pollerr %d\n", pollerr);
+ break;
+ }
+ }
+
+ /* connect event from server control socket */
+ if (pollfds[4].revents) {
+ if (ctlfd != -1) {
+ /*
+ * Test the client to see if it is really
+ * still alive. If it has died but we
+ * haven't yet detected that, we might
+ * deny a legitimate connect attempt. If it
+ * is dead, we break out; once we tear down
+ * the old connection, the new connection
+ * will happen.
+ */
+ if (test_client(ctlfd) == -1) {
+ break;
+ }
+ /* we're already handling a client */
+ reject_client(gzctlfd, clipid);
+ } else {
+ ctlfd = accept_client(gzctlfd, &clipid,
+ clilocale, sizeof (clilocale), &flags);
+ if (ctlfd != -1) {
+ pollfds[0].fd = ctlfd;
+ } else {
+ break;
+ }
+ }
+ }
+
+ /* connect event from server stdin/out socket */
+ if (pollfds[5].revents) {
+ if (ctlfd == -1) {
+ /*
+ * This shouldn't happen since the client is
+ * expected to connect on the control socket
+ * first. If we see this, tear everything down
+ * and start over.
+ */
+ zerror(&logplat, B_FALSE, "GZ zfd stdin/stdout "
+ "connection attempt with no GZ control\n");
+ break;
+ }
+ assert(clifd == -1);
+ if ((clifd = accept_socket(gzservfd, clipid)) != -1) {
+ /* No need to watch for other new connections */
+ pollfds[5].fd = -1;
+ /* Client input is of interest, though */
+ pollfds[1].fd = clifd;
+ } else {
+ break;
+ }
+ }
+
+ /* connection event from server stderr socket */
+ if (pollfds[6].revents) {
+ if (ctlfd == -1) {
+ /*
+ * Same conditions apply to stderr as stdin/out.
+ */
+ zerror(&logplat, B_FALSE, "GZ zfd stderr "
+ "connection attempt with no GZ control\n");
+ break;
+ }
+ assert(clierrfd == -1);
+ if ((clierrfd = accept_socket(gzerrfd, clipid)) != -1) {
+ /* No need to watch for other new connections */
+ pollfds[6].fd = -1;
+ } else {
+ break;
+ }
+ }
+
+ /*
+ * Watch for events on the eventstream. This is how we get
+ * notified of the zone halting, etc. It provides us a
+ * "wakeup" from poll when important things happen, which
+ * is good.
+ */
+ if (pollfds[7].revents) {
+ break;
+ }
+ }
+
+ if (clifd != -1) {
+ (void) shutdown(clifd, SHUT_RDWR);
+ (void) close(clifd);
+ }
+
+ if (clierrfd != -1) {
+ (void) shutdown(clierrfd, SHUT_RDWR);
+ (void) close(clierrfd);
+ }
+}
+
+static int
+open_fd(int id, int rw)
+{
+ int fd;
+ int flag = O_NONBLOCK | O_NOCTTY | O_CLOEXEC;
+ int retried = 0;
+ char stdpath[MAXPATHLEN];
+
+ (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d",
+ zone_name, id);
+ flag |= rw;
+
+ while (!shutting_down) {
+ if ((fd = open(stdpath, flag)) != -1) {
+ /*
+ * Setting RPROTDIS on the stream means that the
+ * control portion of messages received (which we don't
+ * care about) will be discarded by the stream head. If
+ * we allowed such messages, we wouldn't be able to use
+ * read(2), as it fails (EBADMSG) when a message with a
+ * control element is received.
+ */
+ if (ioctl(fd, I_SRDOPT, RNORM|RPROTDIS) == -1) {
+ zerror(&logplat, B_TRUE,
+ "failed to set options on zfd");
+ return (-1);
+ }
+ return (fd);
+ }
+
+ if (retried++ > 60)
+ break;
+
+ (void) sleep(1);
+ }
+
+ zerror(&logplat, B_TRUE, "failed to open zfd");
+ return (-1);
+}
+
+/*
+ * Body of the worker thread to log the zfd's stdout and stderr to a log file
+ * and to perform interactive IO to the stdin, stdout and stderr zfd's.
+ *
+ * The stdin, stdout and stderr are from the perspective of the process inside
+ * the zone, so the zoneadmd view is opposite (i.e. we write to the stdin fd
+ * and read from the stdout/stderr fds).
+ */
+static void *
+srvr(void *modearg)
+{
+ zfd_mode_t *mode = (zfd_mode_t *)modearg;
+ int gzctlfd = -1;
+ int gzoutfd = -1;
+ int stdinfd = -1;
+ int stdoutfd = -1;
+ int gzerrfd = -1;
+ int stderrfd = -1;
+ int flags;
+ int len;
+ char ibuf[BUFSIZ + 1];
+ int logout = -1;
+ int logerr = -1;
+
+ if (!shutting_down && mode->zmode_gzlogging) {
+ logout = logstream_open(log_name, "stdout", 0);
+ logerr = logstream_open(log_name, "stderr", 0);
+ }
+
+ if (!shutting_down) {
+ if (pipe(eventstream) != 0) {
+ zerror(&logplat, B_TRUE, "failed to open logger "
+ "control pipe");
+ return (NULL);
+ }
+ }
+
+ while (!shutting_down) {
+ if (init_server_sock(&gzctlfd, "ctl") == -1) {
+ zerror(&logplat, B_FALSE,
+ "server setup: control socket init failed");
+ goto death;
+ }
+ if (init_server_sock(&gzoutfd, "out") == -1) {
+ zerror(&logplat, B_FALSE,
+ "server setup: stdout socket init failed");
+ goto death;
+ }
+ if (init_server_sock(&gzerrfd, "err") == -1) {
+ zerror(&logplat, B_FALSE,
+ "server setup: stderr socket init failed");
+ goto death;
+ }
+
+ if (mode->zmode_n_stddevs == 1) {
+ if ((stdinfd = open_fd(0, O_RDWR)) == -1) {
+ goto death;
+ }
+ stdoutfd = stdinfd;
+ } else {
+ if ((stdinfd = open_fd(0, O_WRONLY)) == -1 ||
+ (stdoutfd = open_fd(1, O_RDONLY)) == -1 ||
+ (stderrfd = open_fd(2, O_RDONLY)) == -1) {
+ goto death;
+ }
+ }
+
+ do_zfd_io(gzctlfd, gzoutfd, gzerrfd, stdinfd, stdoutfd,
+ stderrfd, logout, logerr);
+death:
+ destroy_server_sock(gzctlfd, "ctl");
+ destroy_server_sock(gzoutfd, "out");
+ destroy_server_sock(gzerrfd, "err");
+
+ /* when shutting down, leave open until drained */
+ if (!shutting_down) {
+ (void) close(stdinfd);
+ if (mode->zmode_n_stddevs == 3) {
+ (void) close(stdoutfd);
+ (void) close(stderrfd);
+ }
+ }
+ }
+
+ /*
+ * Attempt to drain remaining log output from the zone prior to closing
+ * the file descriptors. This helps ensure that complete logs are
+ * captured during shutdown.
+ */
+ flags = fcntl(stdoutfd, F_GETFL, 0);
+ if (fcntl(stdoutfd, F_SETFL, flags | O_NONBLOCK) != -1) {
+ while ((len = read(stdoutfd, ibuf, BUFSIZ)) > 0) {
+ logstream_write(logout, ibuf, len);
+ }
+ }
+ (void) close(stdoutfd);
+
+ if (mode->zmode_n_stddevs > 1) {
+ (void) close(stdinfd);
+ flags = fcntl(stderrfd, F_GETFL, 0);
+ if (fcntl(stderrfd, F_SETFL, flags | O_NONBLOCK) != -1) {
+ while ((len = read(stderrfd, ibuf, BUFSIZ)) > 0) {
+ logstream_write(logerr, ibuf, len);
+ }
+ }
+ (void) close(stderrfd);
+ }
+
+
+ (void) close(eventstream[0]);
+ eventstream[0] = -1;
+ (void) close(eventstream[1]);
+ eventstream[1] = -1;
+ logstream_close(logout, B_FALSE);
+ logstream_close(logerr, B_FALSE);
+ return (NULL);
+}
+
+/*
+ * The meaning of the original legacy values for the zlog-mode evolved over
+ * time, to the point where the old names no longer made sense. The current
+ * values are simply positional letters used to indicate various capabilities.
+ * The following table shows the meaning of the mode values, along with the
+ * legacy name which we continue to support for compatability. Any future
+ * capability can add a letter to the left and '-' is implied for existing
+ * strings.
+ *
+ * zlog-mode gz log - tty - ngz log
+ * --------- ------ --- -------
+ * gt- (int) y y n
+ * g-- (log) y n n
+ * gtn (nlint) y y y
+ * g-n (nolog) y n y
+ * -t- n y n
+ * --- n n n
+ *
+ * This function also obtains any custom name for stdio.log while it is reading
+ * the zone configuration.
+ */
+static void
+get_mode_logmax(zfd_mode_t *mode)
+{
+ zone_dochandle_t handle;
+ struct zone_attrtab attr;
+
+ bzero(mode, sizeof (zfd_mode_t));
+
+ if ((handle = zonecfg_init_handle()) == NULL)
+ return;
+
+ if (zonecfg_get_handle(zone_name, handle) != Z_OK)
+ goto done;
+
+ if (zonecfg_setattrent(handle) != Z_OK)
+ goto done;
+ while (zonecfg_getattrent(handle, &attr) == Z_OK) {
+ if (strcmp(ZLOG_MODE, attr.zone_attr_name) == 0) {
+ if (strcmp("g--", attr.zone_attr_value) == 0 ||
+ strncmp("log", attr.zone_attr_value, 3) == 0) {
+ mode->zmode_gzlogging = B_TRUE;
+ mode->zmode_n_stddevs = 3;
+ mode->zmode_n_addl_devs = 0;
+ } else if (strcmp("g-n", attr.zone_attr_value) == 0 ||
+ strncmp("nolog", attr.zone_attr_value, 5) == 0) {
+ mode->zmode_gzlogging = B_TRUE;
+ mode->zmode_n_stddevs = 3;
+ mode->zmode_n_addl_devs = 2;
+ } else if (strcmp("gt-", attr.zone_attr_value) == 0 ||
+ strncmp("int", attr.zone_attr_value, 3) == 0) {
+ mode->zmode_gzlogging = B_TRUE;
+ mode->zmode_n_stddevs = 1;
+ mode->zmode_n_addl_devs = 0;
+ } else if (strcmp("gtn", attr.zone_attr_value) == 0 ||
+ strncmp("nlint", attr.zone_attr_value, 5) == 0) {
+ mode->zmode_gzlogging = B_TRUE;
+ mode->zmode_n_stddevs = 1;
+ mode->zmode_n_addl_devs = 1;
+ } else if (strcmp("-t-", attr.zone_attr_value) == 0) {
+ mode->zmode_gzlogging = B_FALSE;
+ mode->zmode_n_stddevs = 1;
+ mode->zmode_n_addl_devs = 0;
+ } else if (strcmp("---", attr.zone_attr_value) == 0) {
+ mode->zmode_gzlogging = B_FALSE;
+ mode->zmode_n_stddevs = 3;
+ mode->zmode_n_addl_devs = 0;
+ }
+ continue;
+ }
+
+ if (strcmp(ZLOG_NAME, attr.zone_attr_name) == 0) {
+ (void) strlcpy(log_name, attr.zone_attr_value,
+ sizeof (log_name));
+ continue;
+ }
+ }
+ (void) zonecfg_endattrent(handle);
+
+done:
+ zonecfg_fini_handle(handle);
+}
+
+void
+create_log_thread(zlog_t *zlogp)
+{
+ int res;
+
+ shutting_down = 0;
+
+ get_mode_logmax(&mode);
+ if (mode.zmode_n_stddevs == 0)
+ return;
+
+ if (init_zfd_devs(zlogp, &mode) == -1) {
+ zerror(zlogp, B_FALSE,
+ "zfd setup: device initialization failed");
+ return;
+ }
+
+ res = thr_create(NULL, 0, srvr, (void *)&mode, 0,
+ &logger_tid);
+ if (res != 0) {
+ zerror(zlogp, B_FALSE, "error %d creating logger thread", res);
+ logger_tid = 0;
+ }
+}
+
+void
+destroy_log_thread(zlog_t *zlogp)
+{
+ if (logger_tid != 0) {
+ int stop = 1;
+
+ shutting_down = 1;
+ /* break out of poll to shutdown */
+ if (eventstream[0] != -1)
+ (void) write(eventstream[0], &stop, sizeof (stop));
+ (void) thr_join(logger_tid, NULL, NULL);
+ logger_tid = 0;
+ }
+
+ (void) destroy_zfd_devs(zlogp);
+}
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c
index b1c2d2bbf5..342b1bf958 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.c
+++ b/usr/src/cmd/zoneadmd/zoneadmd.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2021 Joyent, Inc.
* Copyright (c) 2016 by Delphix. All rights reserved.
*/
@@ -69,6 +70,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <sys/time.h>
#include <bsm/adt.h>
#include <bsm/adt_event.h>
@@ -102,6 +104,8 @@
#include <libdladm.h>
#include <sys/dls_mgmt.h>
#include <libscf.h>
+#include <uuid/uuid.h>
+#include <libppt.h>
#include <libzonecfg.h>
#include <zonestat_impl.h>
@@ -109,6 +113,8 @@
static char *progname;
char *zone_name; /* zone which we are managing */
+zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */
+char zonepath[MAXNAMELEN];
char pool_name[MAXNAMELEN];
char default_brand[MAXNAMELEN];
char brand_name[MAXNAMELEN];
@@ -117,13 +123,15 @@ boolean_t zone_iscluster;
boolean_t zone_islabeled;
boolean_t shutdown_in_progress;
static zoneid_t zone_id;
+static zoneid_t zone_did = 0;
dladm_handle_t dld_handle = NULL;
-static char pre_statechg_hook[2 * MAXPATHLEN];
-static char post_statechg_hook[2 * MAXPATHLEN];
+char pre_statechg_hook[2 * MAXPATHLEN];
+char post_statechg_hook[2 * MAXPATHLEN];
char query_hook[2 * MAXPATHLEN];
-zlog_t logsys;
+zlog_t logsys; /* log to syslog */
+zlog_t logplat; /* log to platform.log */
mutex_t lock = DEFAULTMUTEX; /* to serialize stuff */
mutex_t msglock = DEFAULTMUTEX; /* for calling setlocale() */
@@ -136,12 +144,17 @@ static int zone_door = -1;
boolean_t in_death_throes = B_FALSE; /* daemon is dying */
boolean_t bringup_failure_recovery = B_FALSE; /* ignore certain failures */
+static int platloghdl = -1; /* Handle for <zonepath>/logs/platform.log */
+
#if !defined(TEXT_DOMAIN) /* should be defined by cc -D */
#define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
#endif
#define DEFAULT_LOCALE "C"
+#define RSRC_NET "net"
+#define RSRC_DEV "device"
+
static const char *
z_cmd_name(zone_cmd_t zcmd)
{
@@ -215,17 +228,14 @@ zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
{
va_list alist;
char buf[MAXPATHLEN * 2]; /* enough space for err msg with a path */
- char *bp;
+ char *bp, *bp_nozone;
int saved_errno = errno;
- if (zlogp == NULL)
- return;
if (zlogp == &logsys)
- (void) snprintf(buf, sizeof (buf), "[zone '%s'] ",
- zone_name);
+ (void) snprintf(buf, sizeof (buf), "[zone '%s'] ", zone_name);
else
buf[0] = '\0';
- bp = &(buf[strlen(buf)]);
+ bp = bp_nozone = &(buf[strlen(buf)]);
/*
* In theory, the locale pointer should be set to either "C" or a
@@ -242,15 +252,38 @@ zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
if (use_strerror)
(void) snprintf(bp, sizeof (buf) - (bp - buf), ": %s",
strerror(saved_errno));
+
+ (void) strlcat(buf, "\n", sizeof (buf));
+
+ /*
+ * If we don't have the platform log, we are in a child process, and
+ * should log to stderr (which is a pipe) instead of the file.
+ */
+ if (logging_poisoned) {
+ (void) fprintf(stderr, "%s", buf);
+
+ if (zlogp != &logsys && zlogp->logfile == stderr)
+ return;
+ } else {
+ logstream_write(platloghdl, bp_nozone, strlen(bp_nozone));
+
+ if (zlogp == &logplat)
+ return;
+ }
+
if (zlogp == &logsys) {
+ bp = strrchr(buf, '\n');
+ if (bp != NULL && bp[1] == '\0') {
+ *bp = '\0';
+ }
(void) syslog(LOG_ERR, "%s", buf);
} else if (zlogp->logfile != NULL) {
- (void) fprintf(zlogp->logfile, "%s\n", buf);
+ (void) fprintf(zlogp->logfile, "%s", buf);
} else {
size_t buflen;
size_t copylen;
- buflen = snprintf(zlogp->log, zlogp->loglen, "%s\n", buf);
+ buflen = snprintf(zlogp->log, zlogp->loglen, "%s", buf);
copylen = MIN(buflen, zlogp->loglen);
zlogp->log += copylen;
zlogp->loglen -= copylen;
@@ -258,34 +291,58 @@ zerror(zlog_t *zlogp, boolean_t use_strerror, const char *fmt, ...)
}
/*
+ * Append src to dest, modifying dest in the process. Prefix src with
+ * a space character if dest is a non-empty string. Assumes dest is already
+ * properly \0-terminated OR overruns destsize.
+ */
+static void
+strnappend(char *dest, size_t destsize, const char *src)
+{
+ size_t startpoint = strnlen(dest, destsize);
+
+ if (startpoint >= destsize - 1) {
+ /* We've run out of room. Record something?! */
+ return;
+ }
+
+ if (startpoint > 0) {
+ /* Add the space per the function's intro comment. */
+ dest[startpoint] = ' ';
+ startpoint++;
+ }
+
+ /* Arguably we should check here too... */
+ (void) strlcpy(dest + startpoint, src, destsize - startpoint);
+}
+
+/*
* Emit a warning for any boot arguments which are unrecognized. Since
* Solaris boot arguments are getopt(3c) compatible (see kernel(8)), we
* put the arguments into an argv style array, use getopt to process them,
- * and put the resultant argument string back into outargs.
+ * and put the resultant argument string back into outargs. Non-native brands
+ * may support alternate forms of boot arguments so we must handle that as well.
*
* During the filtering, we pull out any arguments which are truly "boot"
* arguments, leaving only those which are to be passed intact to the
* progenitor process. The one we support at the moment is -i, which
* indicates to the kernel which program should be launched as 'init'.
*
- * A return of Z_INVAL indicates specifically that the arguments are
- * not valid; this is a non-fatal error. Except for Z_OK, all other return
- * values are treated as fatal.
+ * Except for Z_OK, all other return values are treated as fatal.
*/
static int
filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
- char *init_file, char *badarg)
+ char *init_file)
{
int argc = 0, argc_save;
int i;
- int err;
+ int err = Z_OK;
char *arg, *lasts, **argv = NULL, **argv_save;
char zonecfg_args[BOOTARGS_MAX];
char scratchargs[BOOTARGS_MAX], *sargs;
+ char scratchopt[3];
char c;
bzero(outargs, BOOTARGS_MAX);
- bzero(badarg, BOOTARGS_MAX);
/*
* If the user didn't specify transient boot arguments, check
@@ -293,25 +350,10 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
* and use them if applicable.
*/
if (inargs == NULL || inargs[0] == '\0') {
- zone_dochandle_t handle;
- if ((handle = zonecfg_init_handle()) == NULL) {
- zerror(zlogp, B_TRUE,
- "getting zone configuration handle");
- return (Z_BAD_HANDLE);
- }
- err = zonecfg_get_snapshot_handle(zone_name, handle);
- if (err != Z_OK) {
- zerror(zlogp, B_FALSE,
- "invalid configuration snapshot");
- zonecfg_fini_handle(handle);
- return (Z_BAD_HANDLE);
- }
-
bzero(zonecfg_args, sizeof (zonecfg_args));
- (void) zonecfg_get_bootargs(handle, zonecfg_args,
+ (void) zonecfg_get_bootargs(snap_hndl, zonecfg_args,
sizeof (zonecfg_args));
inargs = zonecfg_args;
- zonecfg_fini_handle(handle);
}
if (strlen(inargs) >= BOOTARGS_MAX) {
@@ -348,14 +390,22 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
}
/*
- * We preserve compatibility with the Solaris system boot behavior,
+ * We preserve compatibility with the illumos system boot behavior,
* which allows:
*
* # reboot kernel/unix -s -m verbose
*
- * In this example, kernel/unix tells the booter what file to
- * boot. We don't want reboot in a zone to be gratuitously different,
- * so we silently ignore the boot file, if necessary.
+ * In this example, kernel/unix tells the booter what file to boot. The
+ * original intent of this was that we didn't want reboot in a zone to
+ * be gratuitously different, so we would silently ignore the boot
+ * file, if necessary. However, this usage is archaic and has never
+ * been common, since it is impossible to boot a zone onto a different
+ * kernel. Ignoring the first argument breaks for non-native brands
+ * which pass boot arguments in a different style. e.g.
+ * systemd.log_level=debug
+ * Thus, for backward compatibility we only ignore the first argument
+ * if it appears to be in the illumos form and attempting to specify a
+ * kernel.
*/
if (argv[0] == NULL)
goto done;
@@ -363,7 +413,7 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
assert(argv[0][0] != ' ');
assert(argv[0][0] != '\t');
- if (argv[0][0] != '-' && argv[0][0] != '\0') {
+ if (strncmp(argv[0], "kernel/", 7) == 0) {
argv = &argv[1];
argc--;
}
@@ -386,41 +436,35 @@ filter_bootargs(zlog_t *zlogp, const char *inargs, char *outargs,
case 'm':
case 's':
/* These pass through unmolested */
- (void) snprintf(outargs, BOOTARGS_MAX,
- "%s -%c %s ", outargs, c, optarg ? optarg : "");
+ (void) snprintf(scratchopt, sizeof (scratchopt),
+ "-%c", c);
+ strnappend(outargs, BOOTARGS_MAX, scratchopt);
+ if (optarg != NULL)
+ strnappend(outargs, BOOTARGS_MAX, optarg);
break;
case '?':
/*
- * We warn about unknown arguments but pass them
- * along anyway-- if someone wants to develop their
- * own init replacement, they can pass it whatever
- * args they want.
+ * If a brand has its own init, we need to pass along
+ * whatever the user provides. We use the entire
+ * unknown string here so that we correctly handle
+ * unknown long options (e.g. --debug).
*/
- err = Z_INVAL;
- (void) snprintf(outargs, BOOTARGS_MAX,
- "%s -%c", outargs, optopt);
- (void) snprintf(badarg, BOOTARGS_MAX,
- "%s -%c", badarg, optopt);
+ strnappend(outargs, BOOTARGS_MAX, argv[optind - 1]);
break;
}
}
/*
- * For Solaris Zones we warn about and discard non-option arguments.
- * Hence 'boot foo bar baz gub' --> 'boot'. However, to be similar
- * to the kernel, we concat up all the other remaining boot args.
- * and warn on them as a group.
+ * We need to pass along everything else since we don't know what
+ * the brand's init is expecting. For example, an argument list like:
+ * --confdir /foo --debug
+ * will cause the getopt parsing to stop at '/foo' but we need to pass
+ * that on, along with the '--debug'. This does mean that we require
+ * any of our known options (-ifms) to preceed the brand-specific ones.
*/
- if (optind < argc) {
- err = Z_INVAL;
- while (optind < argc) {
- (void) snprintf(badarg, BOOTARGS_MAX, "%s%s%s",
- badarg, strlen(badarg) > 0 ? " " : "",
- argv[optind]);
- optind++;
- }
- zerror(zlogp, B_FALSE, "WARNING: Unused or invalid boot "
- "arguments `%s'.", badarg);
+ while (optind < argc) {
+ strnappend(outargs, BOOTARGS_MAX, argv[optind]);
+ optind++;
}
done:
@@ -459,7 +503,7 @@ mkzonedir(zlog_t *zlogp)
* Run the brand's pre-state change callback, if it exists.
*/
static int
-brand_prestatechg(zlog_t *zlogp, int state, int cmd)
+brand_prestatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
{
char cmdbuf[2 * MAXPATHLEN];
const char *altroot;
@@ -472,7 +516,7 @@ brand_prestatechg(zlog_t *zlogp, int state, int cmd)
state, cmd, altroot) > sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, NULL) != 0)
+ if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
return (-1);
return (0);
@@ -482,7 +526,7 @@ brand_prestatechg(zlog_t *zlogp, int state, int cmd)
* Run the brand's post-state change callback, if it exists.
*/
static int
-brand_poststatechg(zlog_t *zlogp, int state, int cmd)
+brand_poststatechg(zlog_t *zlogp, int state, int cmd, boolean_t debug)
{
char cmdbuf[2 * MAXPATHLEN];
const char *altroot;
@@ -495,7 +539,7 @@ brand_poststatechg(zlog_t *zlogp, int state, int cmd)
state, cmd, altroot) > sizeof (cmdbuf))
return (-1);
- if (do_subproc(zlogp, cmdbuf, NULL) != 0)
+ if (do_subproc(zlogp, cmdbuf, NULL, debug) != 0)
return (-1);
return (0);
@@ -532,37 +576,51 @@ notify_zonestatd(zoneid_t zoneid)
* Bring a zone up to the pre-boot "ready" stage. The mount_cmd argument is
* 'true' if this is being invoked as part of the processing for the "mount"
* subcommand.
+ *
+ * If a scratch zone mount (ALT_MOUNT) is being performed then do not
+ * call the state change hooks.
*/
static int
-zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate)
+zone_ready(zlog_t *zlogp, zone_mnt_t mount_cmd, int zstate, boolean_t debug)
{
int err;
+ boolean_t snapped = B_FALSE;
- if (brand_prestatechg(zlogp, zstate, Z_READY) != 0)
- return (-1);
-
+ if ((snap_hndl = zonecfg_init_handle()) == NULL) {
+ zerror(zlogp, B_TRUE, "getting zone configuration handle");
+ goto bad;
+ }
if ((err = zonecfg_create_snapshot(zone_name)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to create snapshot: %s",
zonecfg_strerror(err));
goto bad;
}
+ snapped = B_TRUE;
- if ((zone_id = vplat_create(zlogp, mount_cmd)) == -1) {
- if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
- zerror(zlogp, B_FALSE, "destroying snapshot: %s",
- zonecfg_strerror(err));
+ if (zonecfg_get_snapshot_handle(zone_name, snap_hndl) != Z_OK) {
+ zerror(zlogp, B_FALSE, "invalid configuration snapshot");
goto bad;
}
+
+ if (zone_did == 0)
+ zone_did = zone_get_did(zone_name);
+
+ if (!ALT_MOUNT(mount_cmd) &&
+ brand_prestatechg(zlogp, zstate, Z_READY, debug) != 0)
+ goto bad;
+
+ if ((zone_id = vplat_create(zlogp, mount_cmd, zone_did)) == -1)
+ goto bad;
+
if (vplat_bringup(zlogp, mount_cmd, zone_id) != 0) {
bringup_failure_recovery = B_TRUE;
- (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE);
- if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
- zerror(zlogp, B_FALSE, "destroying snapshot: %s",
- zonecfg_strerror(err));
+ (void) vplat_teardown(NULL, (mount_cmd != Z_MNT_BOOT), B_FALSE,
+ debug);
goto bad;
}
- if (brand_poststatechg(zlogp, zstate, Z_READY) != 0)
+ if (!ALT_MOUNT(mount_cmd) &&
+ brand_poststatechg(zlogp, zstate, Z_READY, debug) != 0)
goto bad;
return (0);
@@ -572,7 +630,16 @@ bad:
* If something goes wrong, we up the zones's state to the target
* state, READY, and then invoke the hook as if we're halting.
*/
- (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT);
+ if (!ALT_MOUNT(mount_cmd))
+ (void) brand_poststatechg(zlogp, ZONE_STATE_READY, Z_HALT,
+ debug);
+
+ if (snapped)
+ if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
+ zerror(zlogp, B_FALSE, "destroying snapshot: %s",
+ zonecfg_strerror(err));
+ zonecfg_fini_handle(snap_hndl);
+ snap_hndl = NULL;
return (-1);
}
@@ -624,15 +691,8 @@ mount_early_fs(void *data, const char *spec, const char *dir,
/* determine the zone rootpath */
if (mount_cmd) {
- char zonepath[MAXPATHLEN];
char luroot[MAXPATHLEN];
- if (zone_get_zonepath(zone_name,
- zonepath, sizeof (zonepath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- return (-1);
- }
-
(void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath);
resolve_lofs(zlogp, luroot, sizeof (luroot));
(void) strlcpy(rootpath, luroot, sizeof (rootpath));
@@ -687,6 +747,8 @@ mount_early_fs(void *data, const char *spec, const char *dir,
char opt_buf[MAX_MNTOPT_STR];
int optlen = 0;
int mflag = MS_DATA;
+ int i;
+ int ret;
(void) ct_tmpl_clear(tmpl_fd);
/*
@@ -714,9 +776,26 @@ mount_early_fs(void *data, const char *spec, const char *dir,
optlen = MAX_MNTOPT_STR;
mflag = MS_OPTIONSTR;
}
- if (mount(spec, dir, mflag, fstype, NULL, 0, opt, optlen) != 0)
- _exit(errno);
- _exit(0);
+
+ /*
+ * There is an obscure race condition which can cause mount
+ * to return EBUSY. This happens for example on the mount
+ * of the zone's /etc/svc/volatile file system if there is
+ * a GZ process running svcs -Z, which will touch the
+ * mountpoint, just as we're trying to do the mount. To cope
+ * with this, we retry up to 3 times to let this transient
+ * process get out of the way.
+ */
+ for (i = 0; i < 3; i++) {
+ ret = 0;
+ if (mount(spec, dir, mflag, fstype, NULL, 0, opt,
+ optlen) != 0)
+ ret = errno;
+ if (ret != EBUSY)
+ break;
+ (void) sleep(1);
+ }
+ _exit(ret);
}
/* parent */
@@ -740,18 +819,275 @@ mount_early_fs(void *data, const char *spec, const char *dir,
}
/*
+ * Replace characters other than [A-Za-z0-9_] with '_' so that the string is a
+ * valid environment variable name.
+ */
+static void
+sanitize_env_var_name(char *var)
+{
+ for (char *p = var; *p != '\0'; p++) {
+ if (!isalnum(*p)) {
+ *p = '_';
+ }
+ }
+}
+
+/*
+ * env variable name format
+ * _ZONECFG_{resource name}_{identifying attr. name}_{property name}
+ * Any dashes (-) in the property names are replaced with underscore (_).
+ */
+static void
+set_zonecfg_env(char *rsrc, char *attr, char *name, char *val)
+{
+ /* Enough for maximal name, rsrc + attr, & slop for ZONECFG & _'s */
+ char nm[2 * MAXNAMELEN + 32];
+
+ if (attr == NULL)
+ (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s", rsrc,
+ name);
+ else
+ (void) snprintf(nm, sizeof (nm), "_ZONECFG_%s_%s_%s", rsrc,
+ attr, name);
+
+ sanitize_env_var_name(nm);
+
+ (void) setenv(nm, val, 1);
+}
+
+/*
+ * Resolve a device:match value to a path. This is only different for PPT
+ * devices, where we expect the match property to be a /devices/... path, and
+ * configured for PPT already.
+ */
+int
+resolve_device_match(zlog_t *zlogp, struct zone_devtab *dtab,
+ char *path, size_t len)
+{
+ struct zone_res_attrtab *rap;
+
+ for (rap = dtab->zone_dev_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next) {
+ if (strcmp(rap->zone_res_attr_name, "model") == 0 &&
+ strcmp(rap->zone_res_attr_value, "passthru") == 0)
+ break;
+ }
+
+ if (rap == NULL) {
+ if (strlcpy(path, dtab->zone_dev_match, len) >= len)
+ return (Z_INVAL);
+ return (Z_OK);
+ }
+
+ if (strncmp(dtab->zone_dev_match, "/devices",
+ strlen("/devices")) != 0) {
+ zerror(zlogp, B_FALSE, "invalid passthru match value '%s'",
+ dtab->zone_dev_match);
+ return (Z_INVAL);
+ }
+
+ if (ppt_devpath_to_dev(dtab->zone_dev_match, path, len) != 0) {
+ zerror(zlogp, B_TRUE, "failed to resolve passthru device %s",
+ dtab->zone_dev_match);
+ return (Z_INVAL);
+ }
+
+ return (Z_OK);
+}
+
+/*
+ * Export various zonecfg properties into environment for the boot and state
+ * change hooks.
+ *
+ * If debug is true, _ZONEADMD_brand_debug is set to 1, else it is set to an
+ * empty string. Brand hooks consider any non-empty string as an indication
+ * that debug output is requested.
+ *
+ * We could export more of the config in the future, as necessary. A better
+ * solution would be to make it so brand-specific behavior is handled by
+ * brand-specific callbacks written in C. Then the normal libzonecfg interfaces
+ * can be used for accessing any parts of the configuration that are needed.
+ *
+ * All of the environment variables set by this function are specific to
+ * SmartOS.
+ */
+static int
+setup_subproc_env(zlog_t *zlogp, boolean_t debug)
+{
+ int res;
+ struct zone_nwiftab ntab;
+ struct zone_devtab dtab;
+ struct zone_attrtab atab;
+ char net_resources[MAXNAMELEN * 2];
+ char dev_resources[MAXNAMELEN * 2];
+ char didstr[16];
+ char uuidstr[UUID_PRINTABLE_STRING_LENGTH];
+ uuid_t uuid;
+
+ /* snap_hndl is null when called through the set_brand_env code path */
+ if (snap_hndl == NULL)
+ return (Z_OK);
+
+ if ((res = zonecfg_get_uuid(zone_name, uuid)) != Z_OK)
+ return (res);
+
+ uuid_unparse(uuid, uuidstr);
+ (void) setenv("_ZONECFG_uuid", uuidstr, 1);
+
+ (void) snprintf(didstr, sizeof (didstr), "%d", zone_did);
+ (void) setenv("_ZONECFG_did", didstr, 1);
+
+ /*
+ * "net" resources are exported because zoneadmd does not handle
+ * automatic configuration of vnics and so that the bhyve boot hook
+ * can generate the argument list for the brand's init program. At such
+ * a time as vnic creation is handled in zoneadmd and brand callbacks
+ * can be executed as part of the zoneadmd process this should be
+ * removed.
+ */
+ net_resources[0] = '\0';
+ if ((res = zonecfg_setnwifent(snap_hndl)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getnwifent(snap_hndl, &ntab) == Z_OK) {
+ struct zone_res_attrtab *rap;
+ char *phys;
+
+ phys = ntab.zone_nwif_physical;
+
+ (void) strlcat(net_resources, phys, sizeof (net_resources));
+ (void) strlcat(net_resources, " ", sizeof (net_resources));
+
+ set_zonecfg_env(RSRC_NET, phys, "physical", phys);
+
+ set_zonecfg_env(RSRC_NET, phys, "address",
+ ntab.zone_nwif_address);
+ set_zonecfg_env(RSRC_NET, phys, "allowed-address",
+ ntab.zone_nwif_allowed_address);
+ set_zonecfg_env(RSRC_NET, phys, "defrouter",
+ ntab.zone_nwif_defrouter);
+ set_zonecfg_env(RSRC_NET, phys, "global-nic",
+ ntab.zone_nwif_gnic);
+ set_zonecfg_env(RSRC_NET, phys, "mac-addr", ntab.zone_nwif_mac);
+ set_zonecfg_env(RSRC_NET, phys, "vlan-id",
+ ntab.zone_nwif_vlan_id);
+
+ for (rap = ntab.zone_nwif_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next)
+ set_zonecfg_env(RSRC_NET, phys, rap->zone_res_attr_name,
+ rap->zone_res_attr_value);
+ nwifent_free_attrs(&ntab);
+ }
+
+ (void) setenv("_ZONECFG_net_resources", net_resources, 1);
+
+ (void) zonecfg_endnwifent(snap_hndl);
+
+ /*
+ * "device" resources are exported because the bhyve boot brand callback
+ * needs them to generate the argument list for the brand's init
+ * program. At such a time as brand callbacks can be executed as part
+ * of the zoneadmd process, this should be removed.
+ *
+ * The bhyve brand only supports disk-like and ppt devices and does not
+ * support regular expressions.
+ */
+ if ((res = zonecfg_setdevent(snap_hndl)) != Z_OK)
+ goto done;
+
+ dev_resources[0] = '\0';
+ while (zonecfg_getdevent(snap_hndl, &dtab) == Z_OK) {
+ char *match = dtab.zone_dev_match;
+ struct zone_res_attrtab *rap;
+ char path[MAXPATHLEN];
+
+ res = resolve_device_match(zlogp, &dtab, path, sizeof (path));
+ if (res != Z_OK)
+ goto done;
+
+ /*
+ * Even if not modified, the match path will be mangled in the
+ * environment variable name, so we always store the value here.
+ */
+ set_zonecfg_env(RSRC_DEV, match, "path", path);
+
+ for (rap = dtab.zone_dev_attrp; rap != NULL;
+ rap = rap->zone_res_attr_next) {
+ set_zonecfg_env(RSRC_DEV, match,
+ rap->zone_res_attr_name, rap->zone_res_attr_value);
+ }
+
+ /*
+ * _ZONECFG_device_resources will contain a space separated list
+ * of devices that have _ZONECFG_device_<device>* environment
+ * variables. So that each element of the list matches up with
+ * <device>, each list item needs to be sanitized in the same
+ * way that environment variable names are sanitized.
+ */
+ sanitize_env_var_name(match);
+ (void) strlcat(dev_resources, match, sizeof (dev_resources));
+ (void) strlcat(dev_resources, " ", sizeof (dev_resources));
+ }
+ (void) zonecfg_enddevent(snap_hndl);
+
+ (void) setenv("_ZONECFG_device_resources", dev_resources, 1);
+
+ /*
+ * "attr" resources are exported because the bhyve brand's boot hook
+ * needs access to the "ram", "cpu", "bootrom", etc. to form the
+ * argument list for the brand's init program. Once the bhyve brand is
+ * configured via proper resources and properties, this should be
+ * removed.
+ */
+ if ((res = zonecfg_setattrent(snap_hndl)) != Z_OK)
+ goto done;
+
+ while (zonecfg_getattrent(snap_hndl, &atab) == Z_OK) {
+ set_zonecfg_env("attr", NULL, atab.zone_attr_name,
+ atab.zone_attr_value);
+ }
+
+ (void) zonecfg_endattrent(snap_hndl);
+
+ if (debug)
+ (void) setenv("_ZONEADMD_brand_debug", "1", 1);
+ else
+ (void) setenv("_ZONEADMD_brand_debug", "", 1);
+
+ res = Z_OK;
+
+done:
+ return (res);
+}
+
+void
+nwifent_free_attrs(struct zone_nwiftab *np)
+{
+ struct zone_res_attrtab *rap;
+
+ for (rap = np->zone_nwif_attrp; rap != NULL; ) {
+ struct zone_res_attrtab *tp = rap;
+
+ rap = rap->zone_res_attr_next;
+ free(tp);
+ }
+}
+
+/*
* If retstr is not NULL, the output of the subproc is returned in the str,
* otherwise it is output using zerror(). Any memory allocated for retstr
* should be freed by the caller.
*/
int
-do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
+do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr, boolean_t debug)
{
char buf[1024]; /* arbitrary large amount */
char *inbuf;
FILE *file;
int status;
int rd_cnt;
+ int fds[2];
+ pid_t child;
if (retstr != NULL) {
if ((*retstr = malloc(1024)) == NULL) {
@@ -764,31 +1100,104 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
inbuf = buf;
}
- file = popen(cmdbuf, "r");
- if (file == NULL) {
- zerror(zlogp, B_TRUE, "could not launch: %s", cmdbuf);
+ if (pipe(fds) != 0) {
+ zerror(zlogp, B_TRUE, "failed to create pipe for subprocess");
return (-1);
}
+ if ((child = fork()) == 0) {
+ int in;
+
+ /*
+ * SIGINT is currently ignored. It probably shouldn't be so
+ * hard to kill errant children, so we revert to SIG_DFL.
+ * SIGHUP and SIGUSR1 are used to perform log rotation. We
+ * leave those as-is because we don't want a 'pkill -HUP
+ * zoneadmd' to kill this child process before exec(). On
+ * exec(), SIGHUP and SIGUSR1 will become SIG_DFL.
+ */
+ (void) sigset(SIGINT, SIG_DFL);
+
+ /*
+ * Set up a pipe for the child to log to.
+ */
+ if (dup2(fds[1], STDERR_FILENO) == -1) {
+ (void) snprintf(buf, sizeof (buf),
+ "subprocess failed to dup2(STDERR_FILENO): %s\n",
+ strerror(errno));
+ (void) write(fds[1], buf, strlen(buf));
+ _exit(127);
+ }
+ if (dup2(fds[1], STDOUT_FILENO) == -1) {
+ perror("subprocess failed to dup2(STDOUT_FILENO)");
+ _exit(127);
+ }
+ /*
+ * Some naughty children may try to read from stdin. Be sure
+ * that the first file that a child opens doesn't get stdin's
+ * file descriptor.
+ */
+ if ((in = open("/dev/null", O_RDONLY)) == -1 ||
+ dup2(in, STDIN_FILENO) == -1) {
+ zerror(zlogp, B_TRUE,
+ "subprocess failed to set up STDIN_FILENO");
+ _exit(127);
+ }
+ closefrom(STDERR_FILENO + 1);
+
+ if (setup_subproc_env(zlogp, debug) != Z_OK) {
+ zerror(zlogp, B_FALSE, "failed to setup environment");
+ _exit(127);
+ }
+
+ (void) execl("/bin/sh", "sh", "-c", cmdbuf, NULL);
+
+ zerror(zlogp, B_TRUE, "subprocess execl failed");
+ _exit(127);
+ } else if (child == -1) {
+ zerror(zlogp, B_TRUE, "failed to create subprocess for '%s'",
+ cmdbuf);
+ (void) close(fds[0]);
+ (void) close(fds[1]);
+ return (-1);
+ }
+
+ (void) close(fds[1]);
+
+ file = fdopen(fds[0], "r");
while (fgets(inbuf, 1024, file) != NULL) {
if (retstr == NULL) {
- if (zlogp != &logsys)
+ if (zlogp != &logsys) {
+ int last = strlen(inbuf) - 1;
+
+ if (inbuf[last] == '\n')
+ inbuf[last] = '\0';
zerror(zlogp, B_FALSE, "%s", inbuf);
+ }
} else {
char *p;
rd_cnt += 1024 - 1;
if ((p = realloc(*retstr, rd_cnt + 1024)) == NULL) {
zerror(zlogp, B_FALSE, "out of memory");
- (void) pclose(file);
- return (-1);
+ break;
}
*retstr = p;
inbuf = *retstr + rd_cnt;
}
}
- status = pclose(file);
+
+ while (fclose(file) != 0) {
+ assert(errno == EINTR);
+ }
+ while (waitpid(child, &status, 0) == -1) {
+ if (errno != EINTR) {
+ zerror(zlogp, B_TRUE,
+ "failed to get exit status of '%s'", cmdbuf);
+ return (-1);
+ }
+ }
if (WIFSIGNALED(status)) {
zerror(zlogp, B_FALSE, "%s unexpectedly terminated due to "
@@ -803,24 +1212,91 @@ do_subproc(zlog_t *zlogp, char *cmdbuf, char **retstr)
return (WEXITSTATUS(status));
}
+/*
+ * Get the path for this zone's init(1M) (or equivalent) process. First look
+ * for a zone-specific init-name attr, then get it from the brand.
+ */
+static int
+get_initname(brand_handle_t bh, char *initname, int len)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "init-name",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
+ (void) strlcpy(initname, a.zone_attr_value, len);
+ return (0);
+ }
+
+ return (brand_get_initname(bh, initname, len));
+}
+
+/*
+ * Get the restart-init flag for this zone's init(1M) (or equivalent) process.
+ * First look for a zone-specific restart-init attr, then get it from the brand.
+ */
+static boolean_t
+restartinit(brand_handle_t bh)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "restart-init",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK) {
+ if (strcmp(a.zone_attr_value, "false") == 0)
+ return (B_FALSE);
+ return (B_TRUE);
+ }
+
+ return (brand_restartinit(bh));
+}
+
+/*
+ * Get the app-svc-dependent flag for this zone's init process. This is a
+ * zone-specific attr which controls the type of contract we create for the
+ * zone's init. When true, the contract will include CT_PR_EV_EXIT in the fatal
+ * set, so that when any service which is in the same contract exits, the init
+ * application will be terminated.
+ */
+static boolean_t
+is_app_svc_dep(void)
+{
+ struct zone_attrtab a;
+
+ bzero(&a, sizeof (a));
+ (void) strlcpy(a.zone_attr_name, "app-svc-dependent",
+ sizeof (a.zone_attr_name));
+
+ if (zonecfg_lookup_attr(snap_hndl, &a) == Z_OK &&
+ strcmp(a.zone_attr_value, "true") == 0) {
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
static int
-zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
+zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug)
{
zoneid_t zoneid;
struct stat st;
- char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
+ char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN];
char nbootargs[BOOTARGS_MAX];
char cmdbuf[MAXPATHLEN];
fs_callback_t cb;
brand_handle_t bh;
zone_iptype_t iptype;
- boolean_t links_loaded = B_FALSE;
dladm_status_t status;
char errmsg[DLADM_STRSIZE];
int err;
+ boolean_t app_svc_dep;
boolean_t restart_init, restart_init0, restart_initreboot;
- if (brand_prestatechg(zlogp, zstate, Z_BOOT) != 0)
+ if (brand_prestatechg(zlogp, zstate, Z_BOOT, debug) != 0)
return (-1);
if ((zoneid = getzoneidbyname(zone_name)) == -1) {
@@ -853,13 +1329,8 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
/*
* Get the brand's boot callback if it exists.
*/
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- brand_close(bh);
- goto bad;
- }
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) != 0) {
zerror(zlogp, B_FALSE,
"unable to determine branded zone's boot callback");
@@ -868,7 +1339,7 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
}
/* Get the path for this zone's init(8) (or equivalent) process. */
- if (brand_get_initname(bh, init_file, MAXPATHLEN) != 0) {
+ if (get_initname(bh, init_file, MAXPATHLEN) != 0) {
zerror(zlogp, B_FALSE,
"unable to determine zone's init(8) location");
brand_close(bh);
@@ -876,35 +1347,44 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
}
/* See if this zone's brand should restart init if it dies. */
- restart_init = brand_restartinit(bh);
+ restart_init = restartinit(bh);
restart_init0 = brand_restartinit0(bh);
restart_initreboot = brand_restartinitreboot(bh);
+ /*
+ * See if we need to setup contract dependencies between the zone's
+ * primary application and any of its services.
+ */
+ app_svc_dep = is_app_svc_dep();
+
brand_close(bh);
- err = filter_bootargs(zlogp, bootargs, nbootargs, init_file,
- bad_boot_arg);
- if (err == Z_INVAL)
- eventstream_write(Z_EVT_ZONE_BADARGS);
- else if (err != Z_OK)
+ err = filter_bootargs(zlogp, bootargs, nbootargs, init_file);
+ if (err != Z_OK)
goto bad;
assert(init_file[0] != '\0');
- /* Try to anticipate possible problems: Make sure init is executable. */
- if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
+ /*
+ * Try to anticipate possible problems: If possible, make sure init is
+ * executable.
+ */
+ if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to determine zone root");
goto bad;
}
- (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file);
+ (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file);
- if (stat(initpath, &st) == -1) {
+ if (lstat(initpath, &st) == -1) {
zerror(zlogp, B_TRUE, "could not stat %s", initpath);
goto bad;
}
- if ((st.st_mode & S_IXUSR) == 0) {
+ /* LINTED: E_NOP_IF_STMT */
+ if ((st.st_mode & S_IFMT) == S_IFLNK) {
+ /* symlink, we'll have to wait and resolve when we boot */
+ } else if ((st.st_mode & S_IXUSR) == 0) {
zerror(zlogp, B_FALSE, "%s is not executable", initpath);
goto bad;
}
@@ -922,7 +1402,6 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
" %s", dladm_status2str(status, errmsg));
goto bad;
}
- links_loaded = B_TRUE;
}
/*
@@ -931,7 +1410,7 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
* is booted.
*/
if ((strlen(cmdbuf) > EXEC_LEN) &&
- (do_subproc(zlogp, cmdbuf, NULL) != Z_OK)) {
+ (do_subproc(zlogp, cmdbuf, NULL, debug) != Z_OK)) {
zerror(zlogp, B_FALSE, "%s failed", cmdbuf);
goto bad;
}
@@ -963,19 +1442,31 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate)
goto bad;
}
+ if (app_svc_dep && zone_setattr(zoneid, ZONE_ATTR_APP_SVC_CT,
+ (void *)B_TRUE, sizeof (boolean_t)) == -1) {
+ zerror(zlogp, B_TRUE, "could not set zone app-die");
+ goto bad;
+ }
+
/*
* Inform zonestatd of a new zone so that it can install a door for
* the zone to contact it.
*/
notify_zonestatd(zone_id);
+ /* Startup a thread to perform zfd logging/tty svc for the zone. */
+ create_log_thread(zlogp);
+
if (zone_boot(zoneid) == -1) {
zerror(zlogp, B_TRUE, "unable to boot zone");
+ destroy_log_thread(zlogp);
goto bad;
}
- if (brand_poststatechg(zlogp, zstate, Z_BOOT) != 0)
+ if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0) {
+ destroy_log_thread(zlogp);
goto bad;
+ }
return (0);
@@ -984,32 +1475,45 @@ bad:
* If something goes wrong, we up the zones's state to the target
* state, RUNNING, and then invoke the hook as if we're halting.
*/
- (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT);
- if (links_loaded)
- (void) dladm_zone_halt(dld_handle, zoneid);
+ (void) brand_poststatechg(zlogp, ZONE_STATE_RUNNING, Z_HALT, debug);
+
return (-1);
}
static int
-zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate)
+zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate,
+ boolean_t debug)
{
int err;
- if (brand_prestatechg(zlogp, zstate, Z_HALT) != 0)
+ /*
+ * If performing a scratch zone unmount then do not call the
+ * state change hooks.
+ */
+ if (unmount_cmd == B_FALSE &&
+ brand_prestatechg(zlogp, zstate, Z_HALT, debug) != 0)
return (-1);
- if (vplat_teardown(zlogp, unmount_cmd, rebooting) != 0) {
+ if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) {
if (!bringup_failure_recovery)
zerror(zlogp, B_FALSE, "unable to destroy zone");
+ destroy_log_thread(zlogp);
return (-1);
}
+ /* Shut down is done, stop the log thread */
+ destroy_log_thread(zlogp);
+
+ if (unmount_cmd == B_FALSE &&
+ brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0)
+ return (-1);
+
if ((err = zonecfg_destroy_snapshot(zone_name)) != Z_OK)
zerror(zlogp, B_FALSE, "destroying snapshot: %s",
zonecfg_strerror(err));
- if (brand_poststatechg(zlogp, zstate, Z_HALT) != 0)
- return (-1);
+ zonecfg_fini_handle(snap_hndl);
+ snap_hndl = NULL;
return (0);
}
@@ -1021,7 +1525,6 @@ zone_graceful_shutdown(zlog_t *zlogp)
pid_t child;
char cmdbuf[MAXPATHLEN];
brand_handle_t bh = NULL;
- char zpath[MAXPATHLEN];
ctid_t ct;
int tmpl_fd;
int child_status;
@@ -1042,18 +1545,12 @@ zone_graceful_shutdown(zlog_t *zlogp)
return (-1);
}
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) {
- zerror(zlogp, B_FALSE, "unable to determine zone path");
- brand_close(bh);
- return (-1);
- }
-
/*
* If there is a brand 'shutdown' callback, execute it now to give the
* brand a chance to cleanup any custom configuration.
*/
(void) strcpy(cmdbuf, EXEC_PREFIX);
- if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN,
+ if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN,
sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) {
(void) strcat(cmdbuf, SHUTDOWN_DEFAULT);
}
@@ -1191,6 +1688,36 @@ audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val,
}
/*
+ * Log the exit time and status of the zone's init process into
+ * {zonepath}/lastexited. If the zone shutdown normally, the exit status will
+ * be -1, otherwise it will be the exit status as described in wait.3c.
+ * If the zone is configured to restart init, then nothing will be logged if
+ * init exits unexpectedly (the kernel will never upcall in this case).
+ */
+static void
+log_init_exit(int status)
+{
+ char p[MAXPATHLEN];
+ char buf[128];
+ struct timeval t;
+ int fd;
+
+ if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p))
+ return;
+ if (gettimeofday(&t, NULL) != 0)
+ return;
+ if (snprintf(buf, sizeof (buf), "%ld.%ld %d\n", t.tv_sec, t.tv_usec,
+ status) > sizeof (buf))
+ return;
+ if ((fd = open(p, O_WRONLY | O_CREAT | O_TRUNC, 0644)) < 0)
+ return;
+
+ (void) write(fd, buf, strlen(buf));
+
+ (void) close(fd);
+}
+
+/*
* The main routine for the door server that deals with zone state transitions.
*/
/* ARGSUSED */
@@ -1203,9 +1730,11 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
zone_state_t zstate;
zone_cmd_t cmd;
+ boolean_t debug;
+ int init_status;
zone_cmd_arg_t *zargp;
- boolean_t kernelcall = B_FALSE;
+ boolean_t kernelcall = B_TRUE;
int rval = -1;
uint64_t uniqid;
@@ -1226,6 +1755,8 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
* it is time for us to shut down zoneadmd.
*/
if (zargp == DOOR_UNREF_DATA) {
+ logstream_close(platloghdl, B_TRUE);
+
/*
* See comment at end of main() for info on the last rites.
*/
@@ -1255,6 +1786,8 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
goto out;
}
cmd = zargp->cmd;
+ debug = zargp->debug;
+ init_status = zargp->status;
if (door_ucred(&uc) != 0) {
zerror(&logsys, B_TRUE, "door_ucred");
@@ -1335,7 +1868,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = -1;
goto out;
}
- zlogp = &logsys; /* Log errors to syslog */
+ zlogp = &logplat; /* Log errors to platform.log */
}
/*
@@ -1361,23 +1894,25 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case ZONE_STATE_INSTALLED:
switch (cmd) {
case Z_READY:
- rval = zone_ready(zlogp, Z_MNT_BOOT, zstate);
+ rval = zone_ready(zlogp, Z_MNT_BOOT, zstate, debug);
if (rval == 0)
eventstream_write(Z_EVT_ZONE_READIED);
+ zcons_statechanged();
break;
case Z_BOOT:
case Z_FORCEBOOT:
eventstream_write(Z_EVT_ZONE_BOOTING);
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
- == 0) {
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) == 0) {
rval = zone_bootup(zlogp, zargp->bootbuf,
- zstate);
+ zstate, debug);
}
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_FALSE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
break;
@@ -1429,7 +1964,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = zone_ready(zlogp,
strcmp(zargp->bootbuf, "-U") == 0 ?
- Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate);
+ Z_MNT_UPDATE : Z_MNT_SCRATCH, zstate, debug);
if (rval != 0)
break;
@@ -1495,12 +2030,14 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
(void) strlcpy(boot_args, zargp->bootbuf,
sizeof (boot_args));
eventstream_write(Z_EVT_ZONE_BOOTING);
- rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
+ rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
+ debug);
audit_put_record(zlogp, uc, rval, "boot");
+ zcons_statechanged();
if (rval != 0) {
bringup_failure_recovery = B_TRUE;
(void) zone_halt(zlogp, B_FALSE, B_TRUE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
boot_args[0] = '\0';
@@ -1508,9 +2045,10 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case Z_HALT:
if (kernelcall) /* Invalid; can't happen */
abort();
- if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
- != 0)
+ if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
+ debug)) != 0)
break;
+ zcons_statechanged();
eventstream_write(Z_EVT_ZONE_HALTED);
break;
case Z_SHUTDOWN:
@@ -1534,7 +2072,7 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case Z_UNMOUNT:
if (kernelcall) /* Invalid; can't happen */
abort();
- rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate);
+ rval = zone_halt(zlogp, B_TRUE, B_FALSE, zstate, debug);
if (rval == 0) {
eventstream_write(Z_EVT_ZONE_HALTED);
(void) sema_post(&scratch_sem);
@@ -1556,10 +2094,12 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
case ZONE_STATE_DOWN:
switch (cmd) {
case Z_READY:
- if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
- != 0)
+ if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
+ debug)) != 0)
break;
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate)) == 0)
+ zcons_statechanged();
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) == 0)
eventstream_write(Z_EVT_ZONE_READIED);
else
eventstream_write(Z_EVT_ZONE_HALTED);
@@ -1576,32 +2116,40 @@ server(void *cookie, char *args, size_t alen, door_desc_t *dp,
rval = 0;
break;
case Z_HALT:
- if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate))
- != 0)
+ if (kernelcall) {
+ log_init_exit(init_status);
+ } else {
+ log_init_exit(-1);
+ }
+ if ((rval = zone_halt(zlogp, B_FALSE, B_FALSE, zstate,
+ debug)) != 0)
break;
eventstream_write(Z_EVT_ZONE_HALTED);
+ zcons_statechanged();
break;
case Z_REBOOT:
(void) strlcpy(boot_args, zargp->bootbuf,
sizeof (boot_args));
eventstream_write(Z_EVT_ZONE_REBOOTING);
- if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate))
- != 0) {
+ if ((rval = zone_halt(zlogp, B_FALSE, B_TRUE, zstate,
+ debug)) != 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
boot_args[0] = '\0';
break;
}
- if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate))
- != 0) {
+ zcons_statechanged();
+ if ((rval = zone_ready(zlogp, Z_MNT_BOOT, zstate,
+ debug)) != 0) {
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
boot_args[0] = '\0';
break;
}
- rval = zone_bootup(zlogp, zargp->bootbuf, zstate);
+ rval = zone_bootup(zlogp, zargp->bootbuf, zstate,
+ debug);
audit_put_record(zlogp, uc, rval, "reboot");
if (rval != 0) {
(void) zone_halt(zlogp, B_FALSE, B_TRUE,
- zstate);
+ zstate, debug);
eventstream_write(Z_EVT_ZONE_BOOTFAILED);
}
boot_args[0] = '\0';
@@ -1782,6 +2330,29 @@ top:
"zoneadmd does not appear to be available; "
"restarted zoneadmd to recover.",
zone_name, zone_state_str(zstate));
+
+ /*
+ * Startup a thread to perform the zfd logging/tty svc
+ * for the zone. zlogp won't be valid for much longer
+ * so use logplat.
+ */
+ if (getzoneidbyname(zone_name) != -1) {
+ create_log_thread(&logplat);
+ }
+
+ /* recover the global configuration snapshot */
+ if (snap_hndl == NULL) {
+ if ((snap_hndl = zonecfg_init_handle())
+ == NULL ||
+ zonecfg_create_snapshot(zone_name)
+ != Z_OK ||
+ zonecfg_get_snapshot_handle(zone_name,
+ snap_hndl) != Z_OK) {
+ zerror(zlogp, B_FALSE, "recovering "
+ "zone configuration handle");
+ goto out;
+ }
+ }
}
(void) fdetach(zone_door_path);
@@ -1795,21 +2366,62 @@ out:
}
/*
- * Setup the brand's pre and post state change callbacks, as well as the
- * query callback, if any of these exist.
+ * Run the query hook with the 'env' parameter. It should return a
+ * string of tab-delimited key-value pairs, each of which should be set
+ * in the environment.
+ *
+ * Because the env_vars string values become part of the environment, the
+ * string is static and we don't free it.
+ *
+ * This function is always called before zoneadmd forks and makes itself
+ * exclusive, so it is possible there could more than one instance of zoneadmd
+ * running in parallel at this point. Thus, we have no zonecfg snapshot and
+ * shouldn't take one yet (i.e. snap_hndl is NULL). Thats ok, since we don't
+ * need any zonecfg info to query for a brand-specific env value.
*/
static int
-brand_callback_init(brand_handle_t bh, char *zone_name)
+set_brand_env(zlog_t *zlogp)
{
- char zpath[MAXPATHLEN];
+ int ret = 0;
+ static char *env_vars = NULL;
+ char buf[2 * MAXPATHLEN];
+
+ if (query_hook[0] == '\0' || env_vars != NULL)
+ return (0);
+
+ if (snprintf(buf, sizeof (buf), "%s env", query_hook) > sizeof (buf))
+ return (-1);
- if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK)
+ if (do_subproc(zlogp, buf, &env_vars, B_FALSE) != 0)
return (-1);
+ if (env_vars != NULL) {
+ char *sp;
+
+ sp = strtok(env_vars, "\t");
+ while (sp != NULL) {
+ if (putenv(sp) != 0) {
+ ret = -1;
+ break;
+ }
+ sp = strtok(NULL, "\t");
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * Setup the brand's pre and post state change callbacks, as well as the
+ * query callback, if any of these exist.
+ */
+static int
+brand_callback_init(brand_handle_t bh, char *zone_name)
+{
(void) strlcpy(pre_statechg_hook, EXEC_PREFIX,
sizeof (pre_statechg_hook));
- if (brand_get_prestatechange(bh, zone_name, zpath,
+ if (brand_get_prestatechange(bh, zone_name, zonepath,
pre_statechg_hook + EXEC_LEN,
sizeof (pre_statechg_hook) - EXEC_LEN) != 0)
return (-1);
@@ -1820,7 +2432,7 @@ brand_callback_init(brand_handle_t bh, char *zone_name)
(void) strlcpy(post_statechg_hook, EXEC_PREFIX,
sizeof (post_statechg_hook));
- if (brand_get_poststatechange(bh, zone_name, zpath,
+ if (brand_get_poststatechange(bh, zone_name, zonepath,
post_statechg_hook + EXEC_LEN,
sizeof (post_statechg_hook) - EXEC_LEN) != 0)
return (-1);
@@ -1831,7 +2443,7 @@ brand_callback_init(brand_handle_t bh, char *zone_name)
(void) strlcpy(query_hook, EXEC_PREFIX,
sizeof (query_hook));
- if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN,
+ if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN,
sizeof (query_hook) - EXEC_LEN) != 0)
return (-1);
@@ -1959,6 +2571,11 @@ main(int argc, char *argv[])
return (1);
}
+ if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) {
+ zerror(zlogp, B_FALSE, "unable to determine zone path");
+ return (-1);
+ }
+
if (zonecfg_default_brand(default_brand,
sizeof (default_brand)) != Z_OK) {
zerror(zlogp, B_FALSE, "unable to determine default brand");
@@ -2030,6 +2647,11 @@ main(int argc, char *argv[])
}
priv_freeset(privset);
+ if (set_brand_env(zlogp) != 0) {
+ zerror(zlogp, B_FALSE, "Unable to setup brand's environment");
+ return (1);
+ }
+
if (mkzonedir(zlogp) != 0)
return (1);
@@ -2156,6 +2778,15 @@ main(int argc, char *argv[])
openlog("zoneadmd", LOG_PID, LOG_DAEMON);
/*
+ * Allow logging to <zonepath>/logs/<file>.
+ */
+ logstream_init(zlogp);
+ platloghdl = logstream_open("platform.log", "zoneadmd", 0);
+
+ /* logplat looks the same as logsys, but logs to platform.log */
+ logplat = logsys;
+
+ /*
* The eventstream is used to publish state changes in the zone
* from the door threads to the console I/O poller.
*/
@@ -2174,7 +2805,6 @@ main(int argc, char *argv[])
if (make_daemon_exclusive(zlogp) == -1)
goto child_out;
-
/*
* Create/join a new session; we need to be careful of what we do with
* the console from now on so we don't end up being the session leader
@@ -2184,9 +2814,13 @@ main(int argc, char *argv[])
/*
* This thread shouldn't be receiving any signals; in particular,
- * SIGCHLD should be received by the thread doing the fork().
+ * SIGCHLD should be received by the thread doing the fork(). The
+ * exceptions are SIGHUP and SIGUSR1 for log rotation, set up by
+ * logstream_init().
*/
(void) sigfillset(&blockset);
+ (void) sigdelset(&blockset, SIGHUP);
+ (void) sigdelset(&blockset, SIGUSR1);
(void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL);
/*
diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h
index d784a303b3..06353cbe61 100644
--- a/usr/src/cmd/zoneadmd/zoneadmd.h
+++ b/usr/src/cmd/zoneadmd/zoneadmd.h
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2019 Joyent, Inc.
*/
#ifndef _ZONEADMD_H
@@ -32,6 +33,9 @@ extern "C" {
#endif
#include <libdladm.h>
+#include <libzonecfg.h>
+#include <thread.h>
+#include <synch.h>
/*
* Multi-threaded programs should avoid MT-unsafe library calls (i.e., any-
@@ -69,6 +73,7 @@ extern "C" {
#define DEFAULT_DIR_USER -1 /* user ID for chown: -1 means don't change */
#define DEFAULT_DIR_GROUP -1 /* grp ID for chown: -1 means don't change */
+#define ALT_MOUNT(mount_cmd) ((mount_cmd) != Z_MNT_BOOT)
typedef struct zlog {
FILE *logfile; /* file to log to */
@@ -83,24 +88,27 @@ typedef struct zlog {
char *locale; /* locale to use for gettext() */
} zlog_t;
-extern zlog_t logsys;
+extern zlog_t logsys; /* syslog */
+extern zlog_t logplat; /* platform.log */
extern mutex_t lock;
extern mutex_t msglock;
extern boolean_t in_death_throes;
extern boolean_t bringup_failure_recovery;
extern char *zone_name;
+extern char zonepath[MAXNAMELEN];
+extern zone_dochandle_t snap_hndl;
extern char pool_name[MAXNAMELEN];
extern char brand_name[MAXNAMELEN];
extern char default_brand[MAXNAMELEN];
extern char boot_args[BOOTARGS_MAX];
-extern char bad_boot_arg[BOOTARGS_MAX];
extern boolean_t zone_isnative;
extern boolean_t zone_iscluster;
extern dladm_handle_t dld_handle;
extern void zerror(zlog_t *, boolean_t, const char *, ...);
extern char *localize_msg(char *locale, const char *msg);
+extern void nwifent_free_attrs(struct zone_nwiftab *);
/*
* Eventstream interfaces.
@@ -112,8 +120,7 @@ typedef enum {
Z_EVT_ZONE_HALTED,
Z_EVT_ZONE_READIED,
Z_EVT_ZONE_UNINSTALLING,
- Z_EVT_ZONE_BOOTFAILED,
- Z_EVT_ZONE_BADARGS
+ Z_EVT_ZONE_BOOTFAILED
} zone_evt_t;
extern int eventstream_init();
@@ -135,9 +142,9 @@ typedef enum {
/*
* Virtual platform interfaces.
*/
-extern zoneid_t vplat_create(zlog_t *, zone_mnt_t);
+extern zoneid_t vplat_create(zlog_t *, zone_mnt_t, zoneid_t);
extern int vplat_bringup(zlog_t *, zone_mnt_t, zoneid_t);
-extern int vplat_teardown(zlog_t *, boolean_t, boolean_t);
+extern int vplat_teardown(zlog_t *, boolean_t, boolean_t, boolean_t);
extern int vplat_get_iptype(zlog_t *, zone_iptype_t *);
/*
@@ -154,6 +161,23 @@ extern void resolve_lofs(zlog_t *zlogp, char *path, size_t pathlen);
*/
extern int init_console(zlog_t *);
extern void serve_console(zlog_t *);
+extern void zcons_statechanged();
+
+/*
+ * Logging routines
+ */
+typedef enum {
+ LS_LINE_BUFFERED = 0x1 /* Write when \n found or full buffer */
+} logstream_flags_t;
+
+extern boolean_t logging_poisoned;
+
+extern void create_log_thread(zlog_t *);
+extern void destroy_log_thread(zlog_t *);
+extern void logstream_init(zlog_t *);
+extern int logstream_open(const char *, const char *, logstream_flags_t);
+extern void logstream_write(int, char *, int);
+extern void logstream_close(int, boolean_t);
/*
* Contract handling.
@@ -163,7 +187,13 @@ extern int init_template(void);
/*
* Routine to manage child processes.
*/
-extern int do_subproc(zlog_t *, char *, char **);
+extern int do_subproc(zlog_t *, char *, char **, boolean_t);
+
+/*
+ * Resource handling.
+ */
+extern int resolve_device_match(zlog_t *, struct zone_devtab *,
+ char *, size_t);
#ifdef __cplusplus
}