diff options
Diffstat (limited to 'usr/src/cmd/zoneadmd/zfd.c')
-rw-r--r-- | usr/src/cmd/zoneadmd/zfd.c | 1238 |
1 files changed, 1238 insertions, 0 deletions
diff --git a/usr/src/cmd/zoneadmd/zfd.c b/usr/src/cmd/zoneadmd/zfd.c new file mode 100644 index 0000000000..6647ef0c5f --- /dev/null +++ b/usr/src/cmd/zoneadmd/zfd.c @@ -0,0 +1,1238 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * Copyright 2019 Joyent, Inc. + */ + +/* + * Zone file descriptor support is used as a mechanism for a process inside the + * zone to log messages to the GZ zoneadmd and also as a way to interact + * directly with the process (via zlogin -I). The zfd thread is modeled on + * the zcons thread so see the comment header in zcons.c for a general overview. + * Unlike with zcons, which has a single endpoint within the zone and a single + * endpoint used by zoneadmd, we setup multiple endpoints within the zone. + * + * The mode, which is controlled by the zone attribute "zlog-mode" is somewhat + * of a misnomer since its purpose has evolved. The attribute currently + * can have six values which are used to control: + * - how the zfd devices are used inside the zone + * - if the output on the device(s) is also teed into another stream within + * the zone + * - if we do logging in the GZ + * See the comment on get_mode_logmax() in this file, and the comment in + * uts/common/io/zfd.c for more details. + * + * Internally the zfd_mode_t struct holds the number of stdio devs (1 or 3), + * the number of additional devs corresponding to the zone attr value and the + * GZ logging flag. + * + * Note that although the mode indicates the number of devices needed, we always + * create all possible zfd devices for simplicity. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/termios.h> +#include <sys/zfd.h> +#include <sys/mkdev.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <stropts.h> +#include <thread.h> +#include <ucred.h> +#include <unistd.h> +#include <zone.h> +#include <signal.h> +#include <wchar.h> + +#include <libdevinfo.h> +#include <libdevice.h> +#include <libzonecfg.h> + +#include <syslog.h> +#include <sys/modctl.h> + +#include "zoneadmd.h" + +static int shutting_down = 0; +static thread_t logger_tid; +static char log_name[MAXNAMELEN] = "stdio.log"; + +/* + * The eventstream is a simple one-directional flow of messages implemented + * with a pipe. It is used to wake up the poller when it needs to shutdown. + */ +static int eventstream[2] = {-1, -1}; + +#define ZLOG_MODE "zlog-mode" +#define ZLOG_NAME "zlog-name" +#define ZFDNEX_DEVTREEPATH "/pseudo/zfdnex@2" +#define ZFDNEX_FILEPATH "/devices/pseudo/zfdnex@2" +#define SERVER_SOCKPATH ZONES_TMPDIR "/%s.server_%s" +#define ZTTY_RETRY 5 + +#define NUM_ZFD_DEVS 5 + +typedef struct zfd_mode { + uint_t zmode_n_stddevs; + uint_t zmode_n_addl_devs; + boolean_t zmode_gzlogging; +} zfd_mode_t; +static zfd_mode_t mode; + +/* + * cb_data is only used by destroy_cb. + */ +struct cb_data { + zlog_t *zlogp; + int killed; +}; + +/* + * destroy_zfd_devs() and its helper destroy_cb() tears down any zfd instances + * associated with this zone. If things went very wrong, we might have an + * incorrect number of instances hanging around. This routine hunts down and + * tries to remove all of them. Of course, if the fd is open, the instance will + * not detach, which is a potential issue. + */ +static int +destroy_cb(di_node_t node, void *arg) +{ + struct cb_data *cb = (struct cb_data *)arg; + char *prop_data; + char *tmp; + char devpath[MAXPATHLEN]; + devctl_hdl_t hdl; + + if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname", + &prop_data) == -1) + return (DI_WALK_CONTINUE); + + assert(prop_data != NULL); + if (strcmp(prop_data, zone_name) != 0) { + /* this is a zfd for a different zone */ + return (DI_WALK_CONTINUE); + } + + tmp = di_devfs_path(node); + (void) snprintf(devpath, sizeof (devpath), "/devices/%s", tmp); + di_devfs_path_free(tmp); + + if ((hdl = devctl_device_acquire(devpath, 0)) == NULL) { + zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, " + "but it could not be controlled.", devpath); + return (DI_WALK_CONTINUE); + } + if (devctl_device_remove(hdl) == 0) { + cb->killed++; + } else { + zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, " + "but it could not be removed.", devpath); + } + devctl_release(hdl); + return (DI_WALK_CONTINUE); +} + +static int +destroy_zfd_devs(zlog_t *zlogp) +{ + di_node_t root; + struct cb_data cb; + + bzero(&cb, sizeof (cb)); + cb.zlogp = zlogp; + + if ((root = di_init(ZFDNEX_DEVTREEPATH, DINFOCPYALL)) == DI_NODE_NIL) { + zerror(zlogp, B_TRUE, "di_init failed"); + return (-1); + } + + (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, destroy_cb); + + di_fini(root); + return (0); +} + +static void +make_tty(zlog_t *zlogp, int id) +{ + int i; + int fd = -1; + char stdpath[MAXPATHLEN]; + + /* + * Open the master side of the dev and issue the ZFD_MAKETTY ioctl, + * which will cause the the various tty-related streams modules to be + * pushed when the slave opens the device. + * + * In very rare cases the open returns ENOENT if devfs doesn't have + * everything setup yet due to heavy zone startup load. Wait for + * 1 sec. and retry a few times. Even if we can't setup tty mode + * we still move on. + */ + (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d", + zone_name, id); + + for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) { + fd = open(stdpath, O_RDWR | O_NOCTTY); + if (fd >= 0 || errno != ENOENT) + break; + (void) sleep(1); + } + if (fd == -1) { + zerror(zlogp, B_TRUE, "ERROR: could not open zfd %d for " + "zone %s to set tty mode", id, zone_name); + } else { + /* + * This ioctl can occasionally return ENXIO if devfs doesn't + * have everything plumbed up yet due to heavy zone startup + * load. Wait for 1 sec. and retry a few times before we give + * up. + */ + for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) { + if (ioctl(fd, ZFD_MAKETTY) == 0) { + break; + } else if (errno != ENXIO) { + break; + } + (void) sleep(1); + } + } + + if (fd != -1) + (void) close(fd); +} + +/* + * init_zfd_devs() drives the device-tree configuration of the zone fd devices. + * The general strategy is to use the libdevice (devctl) interfaces to + * instantiate all of new zone fd nodes. We do a lot of sanity checking, and + * are careful to reuse a dev if one exists. + * + * Once the devices are in the device tree, we kick devfsadm via + * di_devlink_init() to ensure that the appropriate symlinks (to the master and + * slave fd devices) are placed in /dev in the global zone. + */ +static int +init_zfd_dev(zlog_t *zlogp, devctl_hdl_t bus_hdl, int id) +{ + int rv = -1; + devctl_ddef_t ddef_hdl = NULL; + devctl_hdl_t dev_hdl = NULL; + + if ((ddef_hdl = devctl_ddef_alloc("zfd", 0)) == NULL) { + zerror(zlogp, B_TRUE, "failed to allocate ddef handle"); + goto error; + } + + /* + * Set four properties on this node; the name of the zone, the dev name + * seen inside the zone, a flag which lets pseudo know that it is OK to + * automatically allocate an instance # for this device, and the last + * one tells the device framework not to auto-detach this node - we + * need the node to still be there when we ask devfsadmd to make links, + * and when we need to open it. + */ + if (devctl_ddef_string(ddef_hdl, "zfd_zname", zone_name) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd_zname property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "zfd_id", id) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd_id property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "auto-assign-instance", 1) == -1) { + zerror(zlogp, B_TRUE, "failed to create auto-assign-instance " + "property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "ddi-no-autodetach", 1) == -1) { + zerror(zlogp, B_TRUE, "failed to create ddi-no-auto-detach " + "property"); + goto error; + } + if (devctl_bus_dev_create(bus_hdl, ddef_hdl, 0, &dev_hdl) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd node"); + goto error; + } + rv = 0; + +error: + if (ddef_hdl) + devctl_ddef_free(ddef_hdl); + if (dev_hdl) + devctl_release(dev_hdl); + return (rv); +} + +static int +init_zfd_devs(zlog_t *zlogp, zfd_mode_t *mode) +{ + devctl_hdl_t bus_hdl = NULL; + di_devlink_handle_t dl = NULL; + int rv = -1; + int i; + + /* + * Time to make the devices. + */ + if ((bus_hdl = devctl_bus_acquire(ZFDNEX_FILEPATH, 0)) == NULL) { + zerror(zlogp, B_TRUE, "devctl_bus_acquire failed"); + goto error; + } + + for (i = 0; i < NUM_ZFD_DEVS; i++) { + if (init_zfd_dev(zlogp, bus_hdl, i) != 0) + goto error; + } + + if ((dl = di_devlink_init("zfd", DI_MAKE_LINK)) == NULL) { + zerror(zlogp, B_TRUE, "failed to create devlinks"); + goto error; + } + + (void) di_devlink_fini(&dl); + rv = 0; + + if (mode->zmode_n_stddevs == 1) { + /* We want the primary stream to look like a tty. */ + make_tty(zlogp, 0); + } + +error: + if (bus_hdl) + devctl_release(bus_hdl); + return (rv); +} + +static int +init_server_sock(int *servfd, char *nm) +{ + int resfd = -1; + struct sockaddr_un servaddr; + + bzero(&servaddr, sizeof (servaddr)); + servaddr.sun_family = AF_UNIX; + (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path), + SERVER_SOCKPATH, zone_name, nm); + + if ((resfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + zerror(&logplat, B_TRUE, + "server setup: could not create socket"); + goto err; + } + (void) unlink(servaddr.sun_path); + + if (bind(resfd, (struct sockaddr *)&servaddr, sizeof (servaddr)) + == -1) { + zerror(&logplat, B_TRUE, + "server setup: could not bind to socket"); + goto err; + } + + if (listen(resfd, 4) == -1) { + zerror(&logplat, B_TRUE, + "server setup: could not listen on socket"); + goto err; + } + + *servfd = resfd; + return (0); + +err: + (void) unlink(servaddr.sun_path); + if (resfd != -1) + (void) close(resfd); + return (-1); +} + +static void +destroy_server_sock(int servfd, char *nm) +{ + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), SERVER_SOCKPATH, zone_name, nm); + (void) unlink(path); + (void) shutdown(servfd, SHUT_RDWR); + (void) close(servfd); +} + +/* + * Read the "ident" string from the client's descriptor; this routine also + * tolerates being called with pid=NULL, for times when you want to "eat" + * the ident string from a client without saving it. + */ +static int +get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len, + uint_t *flagsp) +{ + char buf[BUFSIZ], *bufp; + size_t buflen = sizeof (buf); + char c = '\0'; + int i = 0, r; + ucred_t *cred = NULL; + + /* "eat up the ident string" case, for simplicity */ + if (pid == NULL) { + assert(locale == NULL && locale_len == 0); + while (read(clifd, &c, 1) == 1) { + if (c == '\n') + return (0); + } + } + + bzero(buf, sizeof (buf)); + while ((buflen > 1) && (r = read(clifd, &c, 1)) == 1) { + buflen--; + if (c == '\n') + break; + + buf[i] = c; + i++; + } + if (r == -1) + return (-1); + + /* + * We've filled the buffer, but still haven't seen \n. Keep eating + * until we find it; we don't expect this to happen, but this is + * defensive. + */ + if (c != '\n') { + while ((r = read(clifd, &c, sizeof (c))) > 0) + if (c == '\n') + break; + } + + /* + * Parse buffer for message of the form: + * IDENT <locale> <flags> + */ + bufp = buf; + if (strncmp(bufp, "IDENT ", 6) != 0) + return (-1); + bufp += 6; + + if (getpeerucred(clifd, &cred) == 0) { + *pid = ucred_getpid((const ucred_t *)cred); + ucred_free(cred); + } else { + return (-1); + } + + while (*bufp != '\0' && isspace(*bufp)) + bufp++; + buflen = strlen(bufp) - 1; + bufp[buflen - 1] = '\0'; + (void) strlcpy(locale, bufp, locale_len); + + *flagsp = atoi(&bufp[buflen]); + + return (0); +} + +static int +accept_client(int servfd, pid_t *pid, char *locale, size_t locale_len, + uint_t *flagsp) +{ + int connfd; + struct sockaddr_un cliaddr; + socklen_t clilen; + int flags; + + clilen = sizeof (cliaddr); + connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen); + if (connfd == -1) + return (-1); + if (pid != NULL) { + if (get_client_ident(connfd, pid, locale, locale_len, flagsp) + == -1) { + (void) shutdown(connfd, SHUT_RDWR); + (void) close(connfd); + return (-1); + } + (void) write(connfd, "OK\n", 3); + } + + flags = fcntl(connfd, F_GETFL, 0); + if (flags != -1) + (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC); + + return (connfd); +} + +static void +reject_client(int servfd, pid_t clientpid) +{ + int connfd; + struct sockaddr_un cliaddr; + socklen_t clilen; + char nak[MAXPATHLEN]; + + clilen = sizeof (cliaddr); + connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen); + + /* + * After getting its ident string, tell client to get lost. + */ + if (get_client_ident(connfd, NULL, NULL, 0, NULL) == 0) { + (void) snprintf(nak, sizeof (nak), "%lu\n", + clientpid); + (void) write(connfd, nak, strlen(nak)); + } + (void) shutdown(connfd, SHUT_RDWR); + (void) close(connfd); +} + +static int +accept_socket(int servfd, pid_t verpid) +{ + int connfd; + struct sockaddr_un cliaddr; + socklen_t clilen = sizeof (cliaddr); + ucred_t *cred = NULL; + pid_t rpid = -1; + int flags; + + connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen); + if (connfd == -1) + return (-1); + + /* Confirm connecting process is who we expect */ + if (getpeerucred(connfd, &cred) == 0) { + rpid = ucred_getpid((const ucred_t *)cred); + ucred_free(cred); + } + if (rpid == -1 || rpid != verpid) { + (void) shutdown(connfd, SHUT_RDWR); + (void) close(connfd); + return (-1); + } + + flags = fcntl(connfd, F_GETFL, 0); + if (flags != -1) + (void) fcntl(connfd, F_SETFL, flags | O_NONBLOCK | FD_CLOEXEC); + + return (connfd); +} + +static void +ctlcmd_process(int sockfd, int stdoutfd, unsigned int *flags) +{ + char buf[BUFSIZ]; + int i; + for (i = 0; i < BUFSIZ-1; i++) { + char c; + if (read(sockfd, &c, 1) != 1 || + c == '\n' || c == '\0') { + break; + } + buf[i] = c; + } + if (i == 0) { + goto fail; + } + buf[i] = '\0'; + + if (strncmp(buf, "TIOCSWINSZ ", 11) == 0) { + char *next = buf + 11; + struct winsize ws; + errno = 0; + ws.ws_row = strtol(next, &next, 10); + if (errno == EINVAL) { + goto fail; + } + ws.ws_col = strtol(next + 1, &next, 10); + if (errno == EINVAL) { + goto fail; + } + if (ioctl(stdoutfd, TIOCSWINSZ, &ws) == 0) { + (void) write(sockfd, "OK\n", 3); + return; + } + } + if (strncmp(buf, "SETFLAGS ", 9) == 0) { + char *next = buf + 9; + unsigned int result; + errno = 0; + result = strtoul(next, &next, 10); + if (errno == EINVAL) { + goto fail; + } + *flags = result; + (void) write(sockfd, "OK\n", 3); + return; + } +fail: + (void) write(sockfd, "FAIL\n", 5); +} + +/* + * Check to see if the client at the other end of the socket is still alive; we + * know it is not if it throws EPIPE at us when we try to write an otherwise + * harmless 0-length message to it. + */ +static int +test_client(int clifd) +{ + if ((write(clifd, "", 0) == -1) && errno == EPIPE) + return (-1); + return (0); +} + +/* + * We want to sleep for a little while but need to be responsive if the zone is + * halting. We poll/sleep on the event stream so we can notice if we're halting. + * Return true if halting, otherwise false. + */ +static boolean_t +halt_sleep(int slptime) +{ + struct pollfd evfd[1]; + + evfd[0].fd = eventstream[1]; + evfd[0].events = POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI; + + if (poll(evfd, 1, slptime) > 0) { + /* zone halting */ + return (B_TRUE); + } + return (B_FALSE); +} + +/* + * This routine drives the logging and interactive I/O loop. It polls for + * input from the zone side of the fd (output to stdout/stderr), and from the + * client (input to the zone's stdin). Additionally, it polls on the server + * fd, and disconnects any clients that might try to hook up with the zone + * while the fd's are in use. + * + * Data from the zone's stdout and stderr is formatted in json and written to + * the log file whether an interactive client is connected or not. + * + * When the client first calls us up, it is expected to send a line giving its + * "identity"; this consists of the string 'IDENT <pid> <locale>'. This is so + * that we can report that the fd's are busy, along with some diagnostics + * about who has them busy; the locale is ignore here but kept for compatability + * with the zlogin code when running on the zone's console. + * + * We need to handle the case where there is no server within the zone (or + * the server gets stuck) and data that we're writing to the zone server's + * stdin fills the pipe. Because of the way the zfd device works writes can + * flow into the stream and simply be dropped, if there is no server, or writes + * could return -1 with EAGAIN if the server is stuck. Since we ignore errors + * on the write to stdin, we won't get blocked in that case but we'd like to + * avoid dropping initial input if the server within the zone hasn't started + * yet. To handle this we wait to read initial input until we detect that there + * is a server inside the zone. We have to poll for this so that we can + * re-run the ioctl to notice when a server shows up. This poll/wait is handled + * by halt_sleep() so that we can be responsive if the zone wants to halt. + * We only do this check to avoid dropping initial input so it is possible for + * the server within the zone to go away later. At that point zfd will just + * drop any new input flowing into the stream. + */ +static void +do_zfd_io(int gzctlfd, int gzservfd, int gzerrfd, int stdinfd, int stdoutfd, + int stderrfd, int logout, int logerr) +{ + struct pollfd pollfds[8]; + char ibuf[BUFSIZ + 1]; + int cc, ret; + int ctlfd = -1; + int clifd = -1; + int clierrfd = -1; + int pollerr = 0; + char clilocale[MAXPATHLEN]; + pid_t clipid = 0; + uint_t flags = 0; + boolean_t stdin_ready = B_FALSE; + int slptime = 250; /* initial poll sleep time in ms */ + + /* client control socket, watch for read events */ + pollfds[0].fd = ctlfd; + pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | + POLLPRI | POLLERR | POLLHUP | POLLNVAL; + + /* client socket, watch for read events */ + pollfds[1].fd = clifd; + pollfds[1].events = pollfds[0].events; + + /* stdout, watch for read events */ + pollfds[2].fd = stdoutfd; + pollfds[2].events = pollfds[0].events; + + /* stderr, watch for read events */ + pollfds[3].fd = stderrfd; + pollfds[3].events = pollfds[0].events; + + /* the server control socket; watch for new connections */ + pollfds[4].fd = gzctlfd; + pollfds[4].events = POLLIN | POLLRDNORM; + + /* the server stdin/out socket; watch for new connections */ + pollfds[5].fd = gzservfd; + pollfds[5].events = POLLIN | POLLRDNORM; + + /* the server stderr socket; watch for new connections */ + pollfds[6].fd = gzerrfd; + pollfds[6].events = POLLIN | POLLRDNORM; + + /* the eventstream; any input means the zone is halting */ + pollfds[7].fd = eventstream[1]; + pollfds[7].events = pollfds[0].events; + + while (!shutting_down) { + pollfds[0].revents = pollfds[1].revents = 0; + pollfds[2].revents = pollfds[3].revents = 0; + pollfds[4].revents = pollfds[5].revents = 0; + pollfds[6].revents = pollfds[7].revents = 0; + + ret = poll(pollfds, 8, -1); + if (ret == -1 && errno != EINTR) { + zerror(&logplat, B_TRUE, "poll failed"); + /* we are hosed, close connection */ + break; + } + + /* control events from client */ + if (pollfds[0].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + /* process control message */ + ctlcmd_process(ctlfd, stdoutfd, &flags); + } else if (pollfds[0].revents) { + /* bail if any error occurs */ + pollerr = pollfds[0].revents; + zerror(&logplat, B_FALSE, "closing connection " + "with control channel, pollerr %d\n", pollerr); + break; + } + + /* event from client side */ + if (pollfds[1].revents) { + if (stdin_ready) { + if (pollfds[1].revents & (POLLIN | + POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(clifd, ibuf, BUFSIZ); + if (cc > 0) { + /* + * See comment for this + * function on what happens if + * there is no reader in the + * zone. EOF is handled below. + */ + (void) write(stdinfd, ibuf, cc); + } + } else if (pollfds[1].revents & (POLLERR | + POLLNVAL)) { + pollerr = pollfds[1].revents; + zerror(&logplat, B_FALSE, + "closing connection " + "with client, pollerr %d\n", + pollerr); + break; + } + + if (pollfds[1].revents & POLLHUP) { + if (flags & ZLOGIN_ZFD_EOF) { + /* + * Let the client know. We've + * already serviced any pending + * regular input. Let the + * stream clear since the EOF + * ioctl jumps to the head. + */ + (void) ioctl(stdinfd, I_FLUSH); + if (halt_sleep(250)) + break; + (void) ioctl(stdinfd, ZFD_EOF); + } + break; + } + } else { + if (ioctl(stdinfd, ZFD_HAS_SLAVE) == 0) { + stdin_ready = B_TRUE; + } else { + /* + * There is nothing in the zone to read + * our input. Presumably the user + * providing input expects something to + * show up, but that is no guarantee. + * Since we haven't serviced the pending + * input poll yet, we don't want to + * immediately loop around but we also + * need to be responsive if the zone is + * halting. + */ + if (halt_sleep(slptime)) + break; + + if (slptime < 5000) + slptime += 250; + } + } + } + + /* event from the zone's stdout */ + if (pollfds[2].revents) { + if (pollfds[2].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(stdoutfd, ibuf, BUFSIZ); + /* zfd is a stream, so ignore 0 length read */ + if (cc < 0 && (errno != EINTR) && + (errno != EAGAIN)) + break; + if (cc > 0) { + logstream_write(logout, ibuf, cc); + + /* + * Lose output if no one is listening, + * otherwise pass it on. + */ + if (clifd != -1) + (void) write(clifd, ibuf, cc); + } + } else { + pollerr = pollfds[2].revents; + zerror(&logplat, B_FALSE, + "closing connection with stdout zfd, " + "pollerr %d\n", pollerr); + break; + } + } + + /* event from the zone's stderr */ + if (pollfds[3].revents) { + if (pollfds[3].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(stderrfd, ibuf, BUFSIZ); + /* zfd is a stream, so ignore 0 length read */ + if (cc < 0 && (errno != EINTR) && + (errno != EAGAIN)) + break; + if (cc > 0) { + logstream_write(logerr, ibuf, cc); + + /* + * Lose output if no one is listening, + * otherwise pass it on. + */ + if (clierrfd != -1) + (void) write(clierrfd, ibuf, + cc); + } + } else { + pollerr = pollfds[3].revents; + zerror(&logplat, B_FALSE, + "closing connection with stderr zfd, " + "pollerr %d\n", pollerr); + break; + } + } + + /* connect event from server control socket */ + if (pollfds[4].revents) { + if (ctlfd != -1) { + /* + * Test the client to see if it is really + * still alive. If it has died but we + * haven't yet detected that, we might + * deny a legitimate connect attempt. If it + * is dead, we break out; once we tear down + * the old connection, the new connection + * will happen. + */ + if (test_client(ctlfd) == -1) { + break; + } + /* we're already handling a client */ + reject_client(gzctlfd, clipid); + } else { + ctlfd = accept_client(gzctlfd, &clipid, + clilocale, sizeof (clilocale), &flags); + if (ctlfd != -1) { + pollfds[0].fd = ctlfd; + } else { + break; + } + } + } + + /* connect event from server stdin/out socket */ + if (pollfds[5].revents) { + if (ctlfd == -1) { + /* + * This shouldn't happen since the client is + * expected to connect on the control socket + * first. If we see this, tear everything down + * and start over. + */ + zerror(&logplat, B_FALSE, "GZ zfd stdin/stdout " + "connection attempt with no GZ control\n"); + break; + } + assert(clifd == -1); + if ((clifd = accept_socket(gzservfd, clipid)) != -1) { + /* No need to watch for other new connections */ + pollfds[5].fd = -1; + /* Client input is of interest, though */ + pollfds[1].fd = clifd; + } else { + break; + } + } + + /* connection event from server stderr socket */ + if (pollfds[6].revents) { + if (ctlfd == -1) { + /* + * Same conditions apply to stderr as stdin/out. + */ + zerror(&logplat, B_FALSE, "GZ zfd stderr " + "connection attempt with no GZ control\n"); + break; + } + assert(clierrfd == -1); + if ((clierrfd = accept_socket(gzerrfd, clipid)) != -1) { + /* No need to watch for other new connections */ + pollfds[6].fd = -1; + } else { + break; + } + } + + /* + * Watch for events on the eventstream. This is how we get + * notified of the zone halting, etc. It provides us a + * "wakeup" from poll when important things happen, which + * is good. + */ + if (pollfds[7].revents) { + break; + } + } + + if (clifd != -1) { + (void) shutdown(clifd, SHUT_RDWR); + (void) close(clifd); + } + + if (clierrfd != -1) { + (void) shutdown(clierrfd, SHUT_RDWR); + (void) close(clierrfd); + } +} + +static int +open_fd(int id, int rw) +{ + int fd; + int flag = O_NONBLOCK | O_NOCTTY | O_CLOEXEC; + int retried = 0; + char stdpath[MAXPATHLEN]; + + (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d", + zone_name, id); + flag |= rw; + + while (!shutting_down) { + if ((fd = open(stdpath, flag)) != -1) { + /* + * Setting RPROTDIS on the stream means that the + * control portion of messages received (which we don't + * care about) will be discarded by the stream head. If + * we allowed such messages, we wouldn't be able to use + * read(2), as it fails (EBADMSG) when a message with a + * control element is received. + */ + if (ioctl(fd, I_SRDOPT, RNORM|RPROTDIS) == -1) { + zerror(&logplat, B_TRUE, + "failed to set options on zfd"); + return (-1); + } + return (fd); + } + + if (retried++ > 60) + break; + + (void) sleep(1); + } + + zerror(&logplat, B_TRUE, "failed to open zfd"); + return (-1); +} + +/* + * Body of the worker thread to log the zfd's stdout and stderr to a log file + * and to perform interactive IO to the stdin, stdout and stderr zfd's. + * + * The stdin, stdout and stderr are from the perspective of the process inside + * the zone, so the zoneadmd view is opposite (i.e. we write to the stdin fd + * and read from the stdout/stderr fds). + */ +static void * +srvr(void *modearg) +{ + zfd_mode_t *mode = (zfd_mode_t *)modearg; + int gzctlfd = -1; + int gzoutfd = -1; + int stdinfd = -1; + int stdoutfd = -1; + int gzerrfd = -1; + int stderrfd = -1; + int flags; + int len; + char ibuf[BUFSIZ + 1]; + int logout = -1; + int logerr = -1; + + if (!shutting_down && mode->zmode_gzlogging) { + logout = logstream_open(log_name, "stdout", 0); + logerr = logstream_open(log_name, "stderr", 0); + } + + if (!shutting_down) { + if (pipe(eventstream) != 0) { + zerror(&logplat, B_TRUE, "failed to open logger " + "control pipe"); + return (NULL); + } + } + + while (!shutting_down) { + if (init_server_sock(&gzctlfd, "ctl") == -1) { + zerror(&logplat, B_FALSE, + "server setup: control socket init failed"); + goto death; + } + if (init_server_sock(&gzoutfd, "out") == -1) { + zerror(&logplat, B_FALSE, + "server setup: stdout socket init failed"); + goto death; + } + if (init_server_sock(&gzerrfd, "err") == -1) { + zerror(&logplat, B_FALSE, + "server setup: stderr socket init failed"); + goto death; + } + + if (mode->zmode_n_stddevs == 1) { + if ((stdinfd = open_fd(0, O_RDWR)) == -1) { + goto death; + } + stdoutfd = stdinfd; + } else { + if ((stdinfd = open_fd(0, O_WRONLY)) == -1 || + (stdoutfd = open_fd(1, O_RDONLY)) == -1 || + (stderrfd = open_fd(2, O_RDONLY)) == -1) { + goto death; + } + } + + do_zfd_io(gzctlfd, gzoutfd, gzerrfd, stdinfd, stdoutfd, + stderrfd, logout, logerr); +death: + destroy_server_sock(gzctlfd, "ctl"); + destroy_server_sock(gzoutfd, "out"); + destroy_server_sock(gzerrfd, "err"); + + /* when shutting down, leave open until drained */ + if (!shutting_down) { + (void) close(stdinfd); + if (mode->zmode_n_stddevs == 3) { + (void) close(stdoutfd); + (void) close(stderrfd); + } + } + } + + /* + * Attempt to drain remaining log output from the zone prior to closing + * the file descriptors. This helps ensure that complete logs are + * captured during shutdown. + */ + flags = fcntl(stdoutfd, F_GETFL, 0); + if (fcntl(stdoutfd, F_SETFL, flags | O_NONBLOCK) != -1) { + while ((len = read(stdoutfd, ibuf, BUFSIZ)) > 0) { + logstream_write(logout, ibuf, len); + } + } + (void) close(stdoutfd); + + if (mode->zmode_n_stddevs > 1) { + (void) close(stdinfd); + flags = fcntl(stderrfd, F_GETFL, 0); + if (fcntl(stderrfd, F_SETFL, flags | O_NONBLOCK) != -1) { + while ((len = read(stderrfd, ibuf, BUFSIZ)) > 0) { + logstream_write(logerr, ibuf, len); + } + } + (void) close(stderrfd); + } + + + (void) close(eventstream[0]); + eventstream[0] = -1; + (void) close(eventstream[1]); + eventstream[1] = -1; + logstream_close(logout, B_FALSE); + logstream_close(logerr, B_FALSE); + return (NULL); +} + +/* + * The meaning of the original legacy values for the zlog-mode evolved over + * time, to the point where the old names no longer made sense. The current + * values are simply positional letters used to indicate various capabilities. + * The following table shows the meaning of the mode values, along with the + * legacy name which we continue to support for compatability. Any future + * capability can add a letter to the left and '-' is implied for existing + * strings. + * + * zlog-mode gz log - tty - ngz log + * --------- ------ --- ------- + * gt- (int) y y n + * g-- (log) y n n + * gtn (nlint) y y y + * g-n (nolog) y n y + * -t- n y n + * --- n n n + * + * This function also obtains any custom name for stdio.log while it is reading + * the zone configuration. + */ +static void +get_mode_logmax(zfd_mode_t *mode) +{ + zone_dochandle_t handle; + struct zone_attrtab attr; + + bzero(mode, sizeof (zfd_mode_t)); + + if ((handle = zonecfg_init_handle()) == NULL) + return; + + if (zonecfg_get_handle(zone_name, handle) != Z_OK) + goto done; + + if (zonecfg_setattrent(handle) != Z_OK) + goto done; + while (zonecfg_getattrent(handle, &attr) == Z_OK) { + if (strcmp(ZLOG_MODE, attr.zone_attr_name) == 0) { + if (strcmp("g--", attr.zone_attr_value) == 0 || + strncmp("log", attr.zone_attr_value, 3) == 0) { + mode->zmode_gzlogging = B_TRUE; + mode->zmode_n_stddevs = 3; + mode->zmode_n_addl_devs = 0; + } else if (strcmp("g-n", attr.zone_attr_value) == 0 || + strncmp("nolog", attr.zone_attr_value, 5) == 0) { + mode->zmode_gzlogging = B_TRUE; + mode->zmode_n_stddevs = 3; + mode->zmode_n_addl_devs = 2; + } else if (strcmp("gt-", attr.zone_attr_value) == 0 || + strncmp("int", attr.zone_attr_value, 3) == 0) { + mode->zmode_gzlogging = B_TRUE; + mode->zmode_n_stddevs = 1; + mode->zmode_n_addl_devs = 0; + } else if (strcmp("gtn", attr.zone_attr_value) == 0 || + strncmp("nlint", attr.zone_attr_value, 5) == 0) { + mode->zmode_gzlogging = B_TRUE; + mode->zmode_n_stddevs = 1; + mode->zmode_n_addl_devs = 1; + } else if (strcmp("-t-", attr.zone_attr_value) == 0) { + mode->zmode_gzlogging = B_FALSE; + mode->zmode_n_stddevs = 1; + mode->zmode_n_addl_devs = 0; + } else if (strcmp("---", attr.zone_attr_value) == 0) { + mode->zmode_gzlogging = B_FALSE; + mode->zmode_n_stddevs = 3; + mode->zmode_n_addl_devs = 0; + } + continue; + } + + if (strcmp(ZLOG_NAME, attr.zone_attr_name) == 0) { + (void) strlcpy(log_name, attr.zone_attr_value, + sizeof (log_name)); + continue; + } + } + (void) zonecfg_endattrent(handle); + +done: + zonecfg_fini_handle(handle); +} + +void +create_log_thread(zlog_t *zlogp) +{ + int res; + + shutting_down = 0; + + get_mode_logmax(&mode); + if (mode.zmode_n_stddevs == 0) + return; + + if (init_zfd_devs(zlogp, &mode) == -1) { + zerror(zlogp, B_FALSE, + "zfd setup: device initialization failed"); + return; + } + + res = thr_create(NULL, 0, srvr, (void *)&mode, 0, + &logger_tid); + if (res != 0) { + zerror(zlogp, B_FALSE, "error %d creating logger thread", res); + logger_tid = 0; + } +} + +void +destroy_log_thread(zlog_t *zlogp) +{ + if (logger_tid != 0) { + int stop = 1; + + shutting_down = 1; + /* break out of poll to shutdown */ + if (eventstream[0] != -1) + (void) write(eventstream[0], &stop, sizeof (stop)); + (void) thr_join(logger_tid, NULL, NULL); + logger_tid = 0; + } + + (void) destroy_zfd_devs(zlogp); +} |