diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2014-12-23 15:59:39 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2014-12-23 15:59:39 +0000 |
commit | 7559f59bb4c3691722b46b2a0e0ede29e8f4a777 (patch) | |
tree | 6cc3316c468e09ff9cc758026a5965b847835caf | |
parent | 6cc95dc1252c4c529be1b7da28efe7dc6918924a (diff) | |
download | illumos-joyent-7559f59bb4c3691722b46b2a0e0ede29e8f4a777.tar.gz |
OS-3524 in order to support interaction with docker containers, need to be able to connect to stdio for init from GZ
OS-3525 in order to support 'docker logs' need to be able to get stdio from zone to log file
-rw-r--r-- | manifest | 3 | ||||
-rw-r--r-- | usr/src/cmd/devfsadm/misc_link.c | 37 | ||||
-rw-r--r-- | usr/src/cmd/zlogin/zlogin.c | 110 | ||||
-rw-r--r-- | usr/src/cmd/zoneadm/zoneadm.c | 4 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/Makefile.com | 4 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/mcap.c | 15 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/vplat.c | 35 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/zfd.c | 1248 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/zoneadmd.c | 65 | ||||
-rw-r--r-- | usr/src/cmd/zoneadmd/zoneadmd.h | 7 | ||||
-rw-r--r-- | usr/src/lib/brand/lx/zone/platform.xml | 3 | ||||
-rw-r--r-- | usr/src/man/man1/zlogin.1 | 25 | ||||
-rw-r--r-- | usr/src/man/man7d/Makefile | 4 | ||||
-rw-r--r-- | usr/src/man/man7d/zfd.7d | 39 | ||||
-rw-r--r-- | usr/src/uts/common/Makefile.files | 2 | ||||
-rw-r--r-- | usr/src/uts/common/io/pseudo.conf | 9 | ||||
-rw-r--r-- | usr/src/uts/common/io/zfd.c | 815 | ||||
-rw-r--r-- | usr/src/uts/common/sys/Makefile | 1 | ||||
-rw-r--r-- | usr/src/uts/common/sys/zfd.h | 53 | ||||
-rw-r--r-- | usr/src/uts/intel/Makefile.intel | 1 | ||||
-rw-r--r-- | usr/src/uts/intel/zfd/Makefile | 48 | ||||
-rw-r--r-- | usr/src/uts/sparc/Makefile.sparc | 2 | ||||
-rw-r--r-- | usr/src/uts/sparc/zfd/Makefile | 50 |
23 files changed, 2466 insertions, 114 deletions
@@ -4434,6 +4434,7 @@ f usr/include/sys/xti_inet.h 0644 root bin f usr/include/sys/xti_osi.h 0644 root bin f usr/include/sys/xti_xtiopt.h 0644 root bin f usr/include/sys/zcons.h 0644 root bin +f usr/include/sys/zfd.h 0644 root bin f usr/include/sys/zmod.h 0644 root bin f usr/include/sys/zone.h 0644 root bin f usr/include/sysexits.h 0644 root bin @@ -4544,6 +4545,7 @@ f usr/kernel/drv/amd64/smbsrv 0755 root sys f usr/kernel/drv/amd64/sppp 0755 root sys f usr/kernel/drv/amd64/sppptun 0755 root sys f usr/kernel/drv/amd64/zcons 0755 root sys +f usr/kernel/drv/amd64/zfd 0755 root sys f usr/kernel/drv/bpf.conf 0644 root sys f usr/kernel/drv/dump.conf 0644 root sys f usr/kernel/drv/eventfd.conf 0644 root sys @@ -18394,6 +18396,7 @@ f usr/share/man/man7d/xge.7d 0444 root bin f usr/share/man/man7d/yge.7d 0444 root bin f usr/share/man/man7d/zcons.7d 0444 root bin f usr/share/man/man7d/zero.7d 0444 root bin +f usr/share/man/man7d/zfd.7d 0444 root bin d usr/share/man/man7fs 0755 root bin f usr/share/man/man7fs/bootfs.7fs 0444 root bin f usr/share/man/man7fs/ctfs.7fs 0444 root bin diff --git a/usr/src/cmd/devfsadm/misc_link.c b/usr/src/cmd/devfsadm/misc_link.c index 9f4c20ac5e..f37a1227b6 100644 --- a/usr/src/cmd/devfsadm/misc_link.c +++ b/usr/src/cmd/devfsadm/misc_link.c @@ -32,6 +32,7 @@ #include <limits.h> #include <sys/zone.h> #include <sys/zcons.h> +#include <sys/zfd.h> #include <sys/cpuid_drv.h> static int display(di_minor_t minor, di_node_t node); @@ -53,6 +54,7 @@ static int av_create(di_minor_t minor, di_node_t node); static int tsalarm_create(di_minor_t minor, di_node_t node); static int ntwdt_create(di_minor_t minor, di_node_t node); static int zcons_create(di_minor_t minor, di_node_t node); +static int zfd_create(di_minor_t minor, di_node_t node); static int cpuid(di_minor_t minor, di_node_t node); static int glvc(di_minor_t minor, di_node_t node); static int ses_callback(di_minor_t minor, di_node_t node); @@ -177,6 +179,9 @@ static devfsadm_create_t misc_cbt[] = { { "pseudo", "ddi_pseudo", "zcons", TYPE_EXACT | DRV_EXACT, ILEVEL_0, zcons_create, }, + { "pseudo", "ddi_pseudo", "zfd", + TYPE_EXACT | DRV_EXACT, ILEVEL_0, zfd_create, + }, { "pseudo", "ddi_pseudo", CPUID_DRIVER_NAME, TYPE_EXACT | DRV_EXACT, ILEVEL_0, cpuid, }, @@ -225,6 +230,9 @@ static devfsadm_remove_t misc_remove_cbt[] = { ZCONS_SLAVE_NAME ")$", RM_PRE | RM_HOT | RM_ALWAYS, ILEVEL_0, devfsadm_rm_all }, + { "pseudo", "^zfd/" ZONENAME_REGEXP "/(master|slave)/[0-9]+$", + RM_PRE | RM_HOT | RM_ALWAYS, ILEVEL_0, devfsadm_rm_all + }, { "pseudo", "^" CPUID_SELF_NAME "$", RM_ALWAYS | RM_PRE | RM_HOT, ILEVEL_0, devfsadm_rm_all }, @@ -672,6 +680,35 @@ zcons_create(di_minor_t minor, di_node_t node) return (DEVFSADM_CONTINUE); } +static int +zfd_create(di_minor_t minor, di_node_t node) +{ + char *minor_str; + char *zonename; + int *id; + char path[MAXPATHLEN]; + + minor_str = di_minor_name(minor); + + if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname", + &zonename) == -1) + return (DEVFSADM_CONTINUE); + + if (di_prop_lookup_ints(DDI_DEV_T_ANY, node, "zfd_id", &id) == -1) + return (DEVFSADM_CONTINUE); + + if (strncmp(minor_str, "slave", 5) == 0) { + (void) snprintf(path, sizeof (path), "zfd/%s/slave/%d", + zonename, id[0]); + } else { + (void) snprintf(path, sizeof (path), "zfd/%s/master/%d", + zonename, id[0]); + } + (void) devfsadm_mklink(path, node, minor, 0); + + return (DEVFSADM_CONTINUE); +} + /* * /dev/cpu/self/cpuid -> /devices/pseudo/cpuid@0:self */ diff --git a/usr/src/cmd/zlogin/zlogin.c b/usr/src/cmd/zlogin/zlogin.c index a5bd206b11..6a31b9c354 100644 --- a/usr/src/cmd/zlogin/zlogin.c +++ b/usr/src/cmd/zlogin/zlogin.c @@ -156,7 +156,7 @@ static boolean_t forced_login = B_FALSE; static void usage(void) { - (void) fprintf(stderr, gettext("usage: %s [ -inQCES ] [ -e cmdchar ] " + (void) fprintf(stderr, gettext("usage: %s [-inQCIES] [-e cmdchar] " "[-l user] zonename [command [args ...] ]\n"), pname); exit(2); } @@ -256,7 +256,7 @@ postfork_dropprivs() * with it to determine whether it will allow us to connect. */ static int -get_console_master(const char *zname) +get_interactive_master(const char *zname, int notcons) { int sockfd = -1; struct sockaddr_un servaddr; @@ -264,20 +264,32 @@ get_console_master(const char *zname) char handshake[MAXPATHLEN], c; int msglen; int i = 0, err = 0; + char *sock_str; if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { zperror(gettext("could not create socket")); return (-1); } + if (notcons) { + sock_str = "%s/%s.server_sock"; + } else { + sock_str = "%s/%s.console_sock"; + } + bzero(&servaddr, sizeof (servaddr)); servaddr.sun_family = AF_UNIX; (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path), - "%s/%s.console_sock", ZONES_TMPDIR, zname); + sock_str, ZONES_TMPDIR, zname); if (connect(sockfd, (struct sockaddr *)&servaddr, sizeof (servaddr)) == -1) { - zperror(gettext("Could not connect to zone console")); + if (errno == ENOENT && notcons) + (void) fprintf(stderr, "%s: %s\n", pname, + gettext("Could not connect to zone (is interactive " + "mode enabled?)")); + else + zperror(gettext("Could not connect to zone")); goto bad; } masterfd = sockfd; @@ -315,15 +327,14 @@ get_console_master(const char *zname) * the server died off. */ if (err == -1) { - zperror(gettext("Could not connect to zone console")); + zperror(gettext("Could not connect to zone")); goto bad; } if (strncmp(handshake, "OK", sizeof (handshake)) == 0) return (0); - zerror(gettext("Console is already in use by process ID %s."), - handshake); + zerror(gettext("Zone is already in use by process ID %s."), handshake); bad: (void) close(sockfd); masterfd = -1; @@ -1752,10 +1763,40 @@ get_username() return (nptr->pw_name); } +static boolean_t +is_standalone_int_mode(char *zonename) +{ + boolean_t sa = B_FALSE; + zone_dochandle_t handle; + struct zone_attrtab attr; + + if ((handle = zonecfg_init_handle()) == NULL) + return (sa); + + if (zonecfg_get_handle(zonename, handle) != Z_OK) + goto done; + + if (zonecfg_setattrent(handle) != Z_OK) + goto done; + while (zonecfg_getattrent(handle, &attr) == Z_OK) { + if (strcmp("zlog-mode", attr.zone_attr_name) == 0) { + if (strncmp("int", attr.zone_attr_value, 3) == 0) + sa = B_TRUE; + break; + } + } + (void) zonecfg_endattrent(handle); + +done: + zonecfg_fini_handle(handle); + return (sa); +} + + int main(int argc, char **argv) { - int arg, console = 0; + int arg, console = 0, imode = 0; zoneid_t zoneid; zone_state_t st; char *login = "root"; @@ -1784,7 +1825,7 @@ main(int argc, char **argv) (void) getpname(argv[0]); username = get_username(); - while ((arg = getopt(argc, argv, "inECR:Se:l:Q")) != EOF) { + while ((arg = getopt(argc, argv, "inECIR:Se:l:Q")) != EOF) { switch (arg) { case 'C': console = 1; @@ -1792,6 +1833,14 @@ main(int argc, char **argv) case 'E': nocmdchar = 1; break; + case 'I': + /* + * interactive mode is just a slight variation on the + * console mode. + */ + console = 1; + imode = 1; + break; case 'R': /* undocumented */ if (*optarg != '/') { zerror(gettext("root path must be absolute.")); @@ -1856,7 +1905,7 @@ main(int argc, char **argv) } - if (iflag !=0 && nflag != 0) { + if (iflag != 0 && nflag != 0) { zerror(gettext("-i and -n flags are incompatible")); usage(); } @@ -1975,10 +2024,15 @@ main(int argc, char **argv) } /* - * The console is a separate case from the rest of the code; handle - * it first. + * The console (or standalong interactive mode) is a separate case from + * the rest of the code; handle it first. */ if (console) { + if (imode && !is_standalone_int_mode(zonename)) { + zerror(gettext("the zlog-mode is not interactive")); + return (1); + } + /* * Ensure that zoneadmd for this zone is running. */ @@ -1988,15 +2042,19 @@ main(int argc, char **argv) /* * Make contact with zoneadmd. */ - if (get_console_master(zonename) == -1) + if (get_interactive_master(zonename, imode) == -1) return (1); - if (!quiet) - (void) printf( - gettext("[Connected to zone '%s' console]\n"), - zonename); + if (!quiet) { + if (imode) + (void) printf(gettext("[Connected to zone '%s' " + "interactively]\n"), zonename); + else + (void) printf(gettext("[Connected to zone '%s' " + "console]\n"), zonename); + } - if (set_tty_rawmode(STDIN_FILENO) == -1) { + if (!imode && set_tty_rawmode(STDIN_FILENO) == -1) { reset_tty(); zperror(gettext("failed to set stdin pty to raw mode")); return (1); @@ -2009,11 +2067,17 @@ main(int argc, char **argv) * Run the I/O loop until we get disconnected. */ doio(masterfd, -1, masterfd, -1, -1, B_FALSE); - reset_tty(); - if (!quiet) - (void) printf( - gettext("\n[Connection to zone '%s' console " - "closed]\n"), zonename); + if (!imode) + reset_tty(); + if (!quiet) { + if (imode) + (void) printf(gettext("\n[Interactive " + "connection to zone '%s' closed]\n"), + zonename); + else + (void) printf(gettext("\n[Connection to zone " + "'%s' console closed]\n"), zonename); + } return (0); } diff --git a/usr/src/cmd/zoneadm/zoneadm.c b/usr/src/cmd/zoneadm/zoneadm.c index 6fdd00e39c..396fc91699 100644 --- a/usr/src/cmd/zoneadm/zoneadm.c +++ b/usr/src/cmd/zoneadm/zoneadm.c @@ -3946,8 +3946,8 @@ cleanup_zonepath(char *zonepath, boolean_t all) * exist if the zone was force-attached after a * migration. */ - char *std_entries[] = {"dev", "lastexited", "lu", "root", - "SUNWattached.xml", NULL}; + char *std_entries[] = {"dev", "lastexited", "logs", "lu", + "root", "SUNWattached.xml", NULL}; /* (MAXPATHLEN * 5) is for the 5 std_entries dirs */ char cmdbuf[sizeof (RMCOMMAND) + (MAXPATHLEN * 5) + 64]; diff --git a/usr/src/cmd/zoneadmd/Makefile.com b/usr/src/cmd/zoneadmd/Makefile.com index 162d1f0219..c8becc3e8c 100644 --- a/usr/src/cmd/zoneadmd/Makefile.com +++ b/usr/src/cmd/zoneadmd/Makefile.com @@ -20,7 +20,7 @@ # # Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. -# Copyright (c) 2011, Joyent, Inc. All rights reserved. +# Copyright 2014, Joyent, Inc. All rights reserved. # PROG= zoneadmd @@ -30,7 +30,7 @@ include ../../Makefile.ctf ROOTCMDDIR= $(ROOTLIB)/zones -OBJS= zoneadmd.o zcons.o vplat.o mcap.o +OBJS= zoneadmd.o zcons.o zfd.o vplat.o mcap.o CFLAGS += $(CCVERBOSE) LDLIBS += -lsocket -lzonecfg -lnsl -ldevinfo -ldevice -lnvpair \ diff --git a/usr/src/cmd/zoneadmd/mcap.c b/usr/src/cmd/zoneadmd/mcap.c index 44917b0024..16cd2dd07a 100644 --- a/usr/src/cmd/zoneadmd/mcap.c +++ b/usr/src/cmd/zoneadmd/mcap.c @@ -139,8 +139,6 @@ uint64_t prev_fast_rss = 0; uint64_t fast_rss = 0; uint64_t accurate_rss = 0; -static char zonename[ZONENAME_MAX]; -static char zonepath[MAXPATHLEN]; static char zoneproc[MAXPATHLEN]; static char debug_log[MAXPATHLEN]; static zoneid_t zid; @@ -907,7 +905,7 @@ get_mcap_tunables() if ((handle = zonecfg_init_handle()) == NULL) return; - if (zonecfg_get_handle(zonename, handle) != Z_OK) + if (zonecfg_get_handle(zone_name, handle) != Z_OK) goto done; /* Reset to defaults in case rebooting and settings have changed */ @@ -1146,22 +1144,13 @@ void create_mcap_thread(zlog_t *zlogp, zoneid_t id) { int res; - char brandname[MAXNAMELEN]; shutting_down = 0; zid = id; logp = zlogp; - (void) getzonenamebyid(zid, zonename, sizeof (zonename)); - - if (zone_get_zonepath(zonename, zonepath, sizeof (zonepath)) != 0) - zerror(zlogp, B_FALSE, "zone %s missing zonepath", zonename); - - brandname[0] = '\0'; - if (zone_get_brand(zonename, brandname, sizeof (brandname)) != 0) - zerror(zlogp, B_FALSE, "zone %s missing brand", zonename); /* all but the lx brand currently use /proc */ - if (strcmp(brandname, "lx") == 0) { + if (strcmp(brand_name, "lx") == 0) { (void) snprintf(zoneproc, sizeof (zoneproc), "%s/root/native/proc", zonepath); } else { diff --git a/usr/src/cmd/zoneadmd/vplat.c b/usr/src/cmd/zoneadmd/vplat.c index e63f87d2a0..5a86b1cf50 100644 --- a/usr/src/cmd/zoneadmd/vplat.c +++ b/usr/src/cmd/zoneadmd/vplat.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013, Joyent Inc. All rights reserved. + * Copyright 2014, Joyent Inc. All rights reserved. */ /* @@ -1691,7 +1691,6 @@ static int mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) { char rootpath[MAXPATHLEN]; - char zonepath[MAXPATHLEN]; char brand[MAXNAMELEN]; char luroot[MAXPATHLEN]; int i, num_fs = 0; @@ -1709,11 +1708,6 @@ mount_filesystems(zlog_t *zlogp, zone_mnt_t mount_cmd) goto bad; } - if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { - zerror(zlogp, B_TRUE, "unable to determine zone path"); - goto bad; - } - if (zone_get_rootpath(zone_name, rootpath, sizeof (rootpath)) != Z_OK) { zerror(zlogp, B_TRUE, "unable to determine zone root"); goto bad; @@ -3601,17 +3595,11 @@ validate_rootds_label(zlog_t *zlogp, char *rootpath, m_label_t *zone_sl) zfs_handle_t *zhp; libzfs_handle_t *hdl; m_label_t ds_sl; - char zonepath[MAXPATHLEN]; char ds_hexsl[MAXNAMELEN]; if (!is_system_labeled()) return (0); - if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { - zerror(zlogp, B_TRUE, "unable to determine zone path"); - return (-1); - } - if (!is_zonepath_zfs(zonepath)) return (0); @@ -4843,7 +4831,7 @@ write_index_file(zoneid_t zoneid) int vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid) { - char zonepath[MAXPATHLEN]; + char zpath[MAXPATHLEN]; if (mount_cmd == Z_MNT_BOOT && validate_datasets(zlogp) != 0) { lofs_discard_mnttab(); @@ -4854,15 +4842,11 @@ vplat_bringup(zlog_t *zlogp, zone_mnt_t mount_cmd, zoneid_t zoneid) * Before we try to mount filesystems we need to create the * attribute backing store for /dev */ - if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { - lofs_discard_mnttab(); - return (-1); - } - resolve_lofs(zlogp, zonepath, sizeof (zonepath)); + (void) strlcpy(zpath, zonepath, sizeof (zpath)); + resolve_lofs(zlogp, zpath, sizeof (zpath)); /* Make /dev directory owned by root, grouped sys */ - if (make_one_dir(zlogp, zonepath, "/dev", DEFAULT_DIR_MODE, - 0, 3) != 0) { + if (make_one_dir(zlogp, zpath, "/dev", DEFAULT_DIR_MODE, 0, 3) != 0) { lofs_discard_mnttab(); return (-1); } @@ -4981,7 +4965,6 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, zoneid_t zoneid; int res; char pool_err[128]; - char zpath[MAXPATHLEN]; char cmdbuf[MAXPATHLEN]; brand_handle_t bh = NULL; dladm_status_t status; @@ -5033,12 +5016,6 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, goto error; } - /* Get the zonepath of this zone */ - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { - zerror(zlogp, B_FALSE, "unable to determine zone path"); - goto error; - } - /* Get a handle to the brand info for this zone */ if ((bh = brand_open(brand_name)) == NULL) { zerror(zlogp, B_FALSE, "unable to determine zone brand"); @@ -5049,7 +5026,7 @@ vplat_teardown(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, * brand a chance to cleanup any custom configuration. */ (void) strcpy(cmdbuf, EXEC_PREFIX); - if (brand_get_halt(bh, zone_name, zpath, cmdbuf + EXEC_LEN, + if (brand_get_halt(bh, zone_name, zonepath, cmdbuf + EXEC_LEN, sizeof (cmdbuf) - EXEC_LEN) < 0) { brand_close(bh); zerror(zlogp, B_FALSE, "unable to determine branded zone's " diff --git a/usr/src/cmd/zoneadmd/zfd.c b/usr/src/cmd/zoneadmd/zfd.c new file mode 100644 index 0000000000..1e1ac48d15 --- /dev/null +++ b/usr/src/cmd/zoneadmd/zfd.c @@ -0,0 +1,1248 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + * Copyright 2014 Joyent, Inc. All rights reserved. + */ + +/* + * Zone file descriptor support is used as a mechanism for a process inside the + * zone to either log messages to the GZ zoneadmd or as a way to interact + * directly with the process (via zlogin -I). The zfd thread is modeled on + * the zcons thread so see the comment header in zcons.c for a general overview. + * Unlike with zcons, which has a single endpoint within the zone and a single + * endpoint used by zoneadmd, we setup multiple endpoints within the zone. + * In the interactive mode we setup fd 0, 1 and 2 for use as stdin, stdout and + * stderr. In the logging mode we only setup fd 1 and 2 for use as stdout and + * stderr. + */ + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/termios.h> +#include <sys/zfd.h> +#include <sys/mkdev.h> + +#include <assert.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <stropts.h> +#include <thread.h> +#include <ucred.h> +#include <unistd.h> +#include <zone.h> +#include <signal.h> +#include <wchar.h> + +#include <libdevinfo.h> +#include <libdevice.h> +#include <libzonecfg.h> + +#include <syslog.h> +#include <sys/modctl.h> + +#include "zoneadmd.h" + +static zlog_t *zlogp; +static int shutting_down = 0; +static thread_t logger_tid; +static int logfd = -1; + +/* + * The eventstream is a simple one-directional flow of messages implemented + * with a pipe. It is used to wake up the poller when it needs to shutdown. + */ +static int eventstream[2] = {-1, -1}; + +#define LOGNAME "stdio.log" +#define ZLOG_MODE "zlog-mode" +#define ZFDNEX_DEVTREEPATH "/pseudo/zfdnex@2" +#define ZFDNEX_FILEPATH "/devices/pseudo/zfdnex@2" +#define SERVER_SOCKPATH ZONES_TMPDIR "/%s.server_sock" +#define ZTTY_RETRY 5 + +typedef enum { + ZLOG_NONE = 0, + ZLOG_LOG, + ZLOG_INTERACTIVE, +} zlog_mode_t; + +/* + * count_zfd_devs() and its helper count_cb() do a walk of the subtree of the + * device tree where zfd nodes are represented. The goal is to count zfd + * instances already setup for a zone with the given name. + * + * Note: this algorithm is a linear search of nodes in the zfdnex subtree + * of the device tree, and could be a scalability problem, but I don't see + * how to avoid it. + */ + +/* + * cb_data is shared by count_cb and destroy_cb for simplicity. + */ +struct cb_data { + zlog_t *zlogp; + int found; + int killed; +}; + +static int +count_cb(di_node_t node, void *arg) +{ + struct cb_data *cb = (struct cb_data *)arg; + char *prop_data; + + if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname", + &prop_data) != -1) { + assert(prop_data != NULL); + if (strcmp(prop_data, zone_name) == 0) { + cb->found++; + return (DI_WALK_CONTINUE); + } + } + return (DI_WALK_CONTINUE); +} + +static int +count_zfd_devs(zlog_t *zlogp) +{ + di_node_t root; + struct cb_data cb; + + bzero(&cb, sizeof (cb)); + cb.zlogp = zlogp; + + if ((root = di_init(ZFDNEX_DEVTREEPATH, DINFOCPYALL)) == DI_NODE_NIL) { + zerror(zlogp, B_TRUE, "di_init failed"); + return (-1); + } + + (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, count_cb); + di_fini(root); + return (cb.found); +} + +/* + * destroy_zfd_devs() and its helper destroy_cb() tears down any zfd instances + * associated with this zone. If things went very wrong, we might have an + * incorrect number of instances hanging around. This routine hunts down and + * tries to remove all of them. Of course, if the fd is open, the instance will + * not detach, which is a potential issue. + */ +static int +destroy_cb(di_node_t node, void *arg) +{ + struct cb_data *cb = (struct cb_data *)arg; + char *prop_data; + char *tmp; + char devpath[MAXPATHLEN]; + devctl_hdl_t hdl; + + if (di_prop_lookup_strings(DDI_DEV_T_ANY, node, "zfd_zname", + &prop_data) == -1) + return (DI_WALK_CONTINUE); + + assert(prop_data != NULL); + if (strcmp(prop_data, zone_name) != 0) { + /* this is a zfd for a different zone */ + return (DI_WALK_CONTINUE); + } + + cb->found++; + tmp = di_devfs_path(node); + (void) snprintf(devpath, sizeof (devpath), "/devices/%s", tmp); + di_devfs_path_free(tmp); + + if ((hdl = devctl_device_acquire(devpath, 0)) == NULL) { + zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, " + "but it could not be controlled.", devpath); + return (DI_WALK_CONTINUE); + } + if (devctl_device_remove(hdl) == 0) { + cb->killed++; + } else { + zerror(cb->zlogp, B_TRUE, "WARNING: zfd %s found, " + "but it could not be removed.", devpath); + } + devctl_release(hdl); + return (DI_WALK_CONTINUE); +} + +static int +destroy_zfd_devs(zlog_t *zlogp) +{ + di_node_t root; + struct cb_data cb; + + bzero(&cb, sizeof (cb)); + cb.zlogp = zlogp; + + if ((root = di_init(ZFDNEX_DEVTREEPATH, DINFOCPYALL)) == DI_NODE_NIL) { + zerror(zlogp, B_TRUE, "di_init failed"); + return (-1); + } + + (void) di_walk_node(root, DI_WALK_CLDFIRST, (void *)&cb, destroy_cb); + + di_fini(root); + return (0); +} + +static void +make_tty(zlog_t *zlogp, int id) +{ + int i; + int fd = -1; + char stdpath[MAXPATHLEN]; + + /* + * Open the master side of the dev and issue the ZFD_MAKETTY ioctl, + * which will cause the the various tty-related streams modules to be + * pushed when the slave opens the device. + * + * In very rare cases the open returns ENOENT if devfs doesn't have + * everything setup yet due to heavy zone startup load. Wait for + * 1 sec. and retry a few times. Even if we can't setup tty mode + * we still move on. + */ + (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d", + zone_name, id); + + for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) { + fd = open(stdpath, O_RDWR | O_NOCTTY); + if (fd >= 0 || errno != ENOENT) + break; + (void) sleep(1); + } + if (fd == -1) { + zerror(zlogp, B_TRUE, "ERROR: could not open zfd %d for " + "zone %s to set tty mode", id, zone_name); + } else { + /* + * This ioctl can occasionally return ENXIO if devfs doesn't + * have everything plumbed up yet due to heavy zone startup + * load. Wait for 1 sec. and retry a few times before we give + * up. + */ + for (i = 0; !shutting_down && i < ZTTY_RETRY; i++) { + if (ioctl(fd, ZFD_MAKETTY) == 0) { + break; + } else if (errno != ENXIO) { + break; + } + (void) sleep(1); + } + } + + if (fd != -1) + (void) close(fd); +} + +/* + * init_zfd_devs() drives the device-tree configuration of the zone fd devices. + * The general strategy is to use the libdevice (devctl) interfaces to + * instantiate 2 or 3 new zone fd nodes. We do a lot of sanity checking, and + * are careful to reuse a dev if one exists. + * + * Once the devices are in the device tree, we kick devfsadm via + * di_devlink_init() to ensure that the appropriate symlinks (to the master and + * slave fd devices) are placed in /dev in the global zone. + */ +static int +init_zfd_dev(zlog_t *zlogp, devctl_hdl_t bus_hdl, int id) +{ + int rv = -1; + devctl_ddef_t ddef_hdl = NULL; + devctl_hdl_t dev_hdl = NULL; + + if ((ddef_hdl = devctl_ddef_alloc("zfd", 0)) == NULL) { + zerror(zlogp, B_TRUE, "failed to allocate ddef handle"); + goto error; + } + + /* + * Set four properties on this node; the first is the name of the + * zone; the second is a flag which lets pseudo know that it is + * OK to automatically allocate an instance # for this device; + * the third tells the device framework not to auto-detach this + * node-- we need the node to still be there when we ask devfsadmd + * to make links, and when we need to open it. + */ + if (devctl_ddef_string(ddef_hdl, "zfd_zname", zone_name) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd_zname property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "zfd_id", id) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd_id property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "auto-assign-instance", 1) == -1) { + zerror(zlogp, B_TRUE, "failed to create auto-assign-instance " + "property"); + goto error; + } + if (devctl_ddef_int(ddef_hdl, "ddi-no-autodetach", 1) == -1) { + zerror(zlogp, B_TRUE, "failed to create ddi-no-auto-detach " + "property"); + goto error; + } + if (devctl_bus_dev_create(bus_hdl, ddef_hdl, 0, &dev_hdl) == -1) { + zerror(zlogp, B_TRUE, "failed to create zfd node"); + goto error; + } + rv = 0; + +error: + if (ddef_hdl) + devctl_ddef_free(ddef_hdl); + if (dev_hdl) + devctl_release(dev_hdl); + return (rv); +} + +static int +init_zfd_devs(zlog_t *zlogp, int start) +{ + devctl_hdl_t bus_hdl = NULL; + di_devlink_handle_t dl = NULL; + int rv = -1; + int ndevs; + int i; + + /* + * Don't re-setup zone fd devs if they already exist; just + * skip ahead to making devlinks, which we do for sanity's sake. + */ + ndevs = count_zfd_devs(zlogp); + if (ndevs == (3 - start)) + goto devlinks; + + if (ndevs > 0 || ndevs == -1) { + if (destroy_zfd_devs(zlogp) == -1) + goto error; + } + + /* + * Time to make the devices. + */ + if ((bus_hdl = devctl_bus_acquire(ZFDNEX_FILEPATH, 0)) == NULL) { + zerror(zlogp, B_TRUE, "devctl_bus_acquire failed"); + goto error; + } + + for (i = start; i < 3; i++) { + if (init_zfd_dev(zlogp, bus_hdl, i) != 0) + goto error; + } + +devlinks: + if ((dl = di_devlink_init("zfd", DI_MAKE_LINK)) == NULL) { + zerror(zlogp, B_TRUE, "failed to create devlinks"); + goto error; + } + + (void) di_devlink_fini(&dl); + rv = 0; + + /* + * We know that start is 0 when we're interactive and that is the + * only time we want to look like a tty. + */ + if (start == 0) { + for (i = start; i < 3; i++) + make_tty(zlogp, i); + } + +error: + if (bus_hdl) + devctl_release(bus_hdl); + return (rv); +} + +static int +init_server_sock(zlog_t *zlogp) +{ + int servfd; + struct sockaddr_un servaddr; + + bzero(&servaddr, sizeof (servaddr)); + servaddr.sun_family = AF_UNIX; + (void) snprintf(servaddr.sun_path, sizeof (servaddr.sun_path), + SERVER_SOCKPATH, zone_name); + + if ((servfd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + zerror(zlogp, B_TRUE, "server setup: could not create socket"); + return (-1); + } + (void) unlink(servaddr.sun_path); + + if (bind(servfd, (struct sockaddr *)&servaddr, + sizeof (servaddr)) == -1) { + zerror(zlogp, B_TRUE, + "server setup: could not bind to socket"); + goto out; + } + + if (listen(servfd, 4) == -1) { + zerror(zlogp, B_TRUE, + "server setup: could not listen on socket"); + goto out; + } + return (servfd); + +out: + (void) unlink(servaddr.sun_path); + (void) close(servfd); + return (-1); +} + +static void +destroy_server_sock(int servfd) +{ + char path[MAXPATHLEN]; + + (void) snprintf(path, sizeof (path), SERVER_SOCKPATH, zone_name); + (void) unlink(path); + (void) shutdown(servfd, SHUT_RDWR); + (void) close(servfd); +} + +/* + * Read the "ident" string from the client's descriptor; this routine also + * tolerates being called with pid=NULL, for times when you want to "eat" + * the ident string from a client without saving it. + */ +static int +get_client_ident(int clifd, pid_t *pid, char *locale, size_t locale_len) +{ + char buf[BUFSIZ], *bufp; + size_t buflen = sizeof (buf); + char c = '\0'; + int i = 0, r; + + /* "eat up the ident string" case, for simplicity */ + if (pid == NULL) { + assert(locale == NULL && locale_len == 0); + while (read(clifd, &c, 1) == 1) { + if (c == '\n') + return (0); + } + } + + bzero(buf, sizeof (buf)); + while ((buflen > 1) && (r = read(clifd, &c, 1)) == 1) { + buflen--; + if (c == '\n') + break; + + buf[i] = c; + i++; + } + if (r == -1) + return (-1); + + /* + * We've filled the buffer, but still haven't seen \n. Keep eating + * until we find it; we don't expect this to happen, but this is + * defensive. + */ + if (c != '\n') { + while ((r = read(clifd, &c, sizeof (c))) > 0) + if (c == '\n') + break; + } + + /* + * Parse buffer for message of the form: IDENT <pid> <locale> + */ + bufp = buf; + if (strncmp(bufp, "IDENT ", 6) != 0) + return (-1); + bufp += 6; + errno = 0; + *pid = strtoll(bufp, &bufp, 10); + if (errno != 0) + return (-1); + + while (*bufp != '\0' && isspace(*bufp)) + bufp++; + (void) strlcpy(locale, bufp, locale_len); + + return (0); +} + +static int +accept_client(int servfd, pid_t *pid, char *locale, size_t locale_len) +{ + int connfd; + struct sockaddr_un cliaddr; + socklen_t clilen; + int flags; + + clilen = sizeof (cliaddr); + connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen); + if (connfd == -1) + return (-1); + if (get_client_ident(connfd, pid, locale, locale_len) == -1) { + (void) shutdown(connfd, SHUT_RDWR); + (void) close(connfd); + return (-1); + } + (void) write(connfd, "OK\n", 3); + + flags = fcntl(connfd, F_GETFD, 0); + if (flags != -1) + (void) fcntl(connfd, F_SETFD, flags | O_NONBLOCK | FD_CLOEXEC); + + return (connfd); +} + +static void +reject_client(int servfd, pid_t clientpid) +{ + int connfd; + struct sockaddr_un cliaddr; + socklen_t clilen; + char nak[MAXPATHLEN]; + + clilen = sizeof (cliaddr); + connfd = accept(servfd, (struct sockaddr *)&cliaddr, &clilen); + + /* + * After getting its ident string, tell client to get lost. + */ + if (get_client_ident(connfd, NULL, NULL, 0) == 0) { + (void) snprintf(nak, sizeof (nak), "%lu\n", + clientpid); + (void) write(connfd, nak, strlen(nak)); + } + (void) shutdown(connfd, SHUT_RDWR); + (void) close(connfd); +} + +/* + * Check to see if the client at the other end of the socket is still alive; we + * know it is not if it throws EPIPE at us when we try to write an otherwise + * harmless 0-length message to it. + */ +static int +test_client(int clifd) +{ + if ((write(clifd, "", 0) == -1) && errno == EPIPE) + return (-1); + return (0); +} + +/* + * This routine drives the interactive I/O loop. It polls for input from the + * zone side of the fd (output to stdout/stderr), and from the client + * (input to the zone's stdin). Additionally, it polls on the server fd, + * and disconnects any clients that might try to hook up with the zone while + * the fd's are in use. + * + * When the client first calls us up, it is expected to send a line giving its + * "identity"; this consists of the string 'IDENT <pid> <locale>'. This is so + * that we can report that the fd's are busy, along with some diagnostics + * about who has them busy; the locale is ignore here but kept for compatability + * with the zlogin code when running on the zone's console. + * + * We need to handle the case where there is no server within the zone (or + * the server gets stuck) and data that we're writing to the zone server's + * stdin fills the pipe. Because open_fd() always opens non-blocking our + * writes could return -1 with EAGAIN. Since we ignore errors on the write + * to stdin, we won't get blocked. + */ +static void +do_zfd_io(int servfd, int stdinfd, int stdoutfd, int stderrfd) +{ + struct pollfd pollfds[5]; + char ibuf[BUFSIZ]; + int cc, ret; + int clifd = -1; + int pollerr = 0; + char clilocale[MAXPATHLEN]; + pid_t clipid = 0; + + /* client, watch for read events */ + pollfds[0].fd = clifd; + pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | + POLLPRI | POLLERR | POLLHUP | POLLNVAL; + + /* stdout, watch for read events */ + pollfds[1].fd = stdoutfd; + pollfds[1].events = pollfds[0].events; + + /* stderr, watch for read events */ + pollfds[2].fd = stderrfd; + pollfds[2].events = pollfds[0].events; + + /* the server socket; watch for events (new connections) */ + pollfds[3].fd = servfd; + pollfds[3].events = pollfds[0].events; + + /* the eventstram; any input means the zone is halting */ + pollfds[4].fd = eventstream[1]; + pollfds[4].events = pollfds[0].events; + + while (!shutting_down) { + pollfds[0].revents = pollfds[1].revents = 0; + pollfds[2].revents = pollfds[3].revents = 0; + pollfds[4].revents = 0; + + ret = poll(pollfds, 5, -1); + if (ret == -1 && errno != EINTR) { + zerror(zlogp, B_TRUE, "poll failed"); + /* we are hosed, close connection */ + break; + } + + /* event from client side */ + if (pollfds[0].revents) { + if (pollfds[0].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(clifd, ibuf, BUFSIZ); + if (cc <= 0 && (errno != EINTR) && + (errno != EAGAIN)) { + break; + } + /* + * See comment for this function on what + * happens if there is no reader in the zone. + */ + (void) write(stdinfd, ibuf, cc); + } else { + pollerr = pollfds[0].revents; + zerror(zlogp, B_FALSE, "closing connection " + "with client, pollerr %d\n", pollerr); + break; + } + } + + /* event from stdout */ + if (pollfds[1].revents) { + if (pollfds[1].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(stdoutfd, ibuf, BUFSIZ); + if (cc <= 0 && (errno != EINTR) && + (errno != EAGAIN)) + break; + /* + * Lose I/O if no one is listening + */ + if (clifd != -1 && cc > 0) + (void) write(clifd, ibuf, cc); + } else { + pollerr = pollfds[1].revents; + zerror(zlogp, B_FALSE, + "closing connection with stdout zfd, " + "pollerr %d\n", pollerr); + break; + } + } + + /* event from stderr */ + if (pollfds[2].revents) { + if (pollfds[2].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(stderrfd, ibuf, BUFSIZ); + if (cc <= 0 && (errno != EINTR) && + (errno != EAGAIN)) + break; + /* + * Lose I/O if no one is listening + */ + if (clifd != -1 && cc > 0) + (void) write(clifd, ibuf, cc); + } else { + pollerr = pollfds[2].revents; + zerror(zlogp, B_FALSE, + "closing connection with stderr zfd, " + "pollerr %d\n", pollerr); + break; + } + } + + /* event from server socket */ + if (pollfds[3].revents && + (pollfds[3].revents & (POLLIN | POLLRDNORM))) { + if (clifd != -1) { + /* + * Test the client to see if it is really + * still alive. If it has died but we + * haven't yet detected that, we might + * deny a legitimate connect attempt. If it + * is dead, we break out; once we tear down + * the old connection, the new connection + * will happen. + */ + if (test_client(clifd) == -1) { + break; + } + /* we're already handling a client */ + reject_client(servfd, clipid); + + } else if ((clifd = accept_client(servfd, &clipid, + clilocale, sizeof (clilocale))) != -1) { + pollfds[0].fd = clifd; + + } else { + break; + } + } + + /* + * Watch for events on the eventstream. This is how we get + * notified of the zone halting, etc. It provides us a + * "wakeup" from poll when important things happen, which + * is good. + */ + if (pollfds[4].revents) { + break; + } + } + + if (clifd != -1) { + (void) shutdown(clifd, SHUT_RDWR); + (void) close(clifd); + } +} + +/* + * Modify the input string with json escapes. Since the destination can thus + * be larger than the source, it may get truncated, although we do use a + * larger buffer. + */ +static void +escape_json(char *sbuf, int slen, char *dbuf, int dlen) +{ + int i; + mbstate_t mbr; + wchar_t c; + size_t sz; + + bzero(&mbr, sizeof (mbr)); + + sbuf[slen - 1] = '\0'; + i = 0; + while (i < dlen && (sz = mbrtowc(&c, sbuf, MB_CUR_MAX, &mbr)) > 0) { + switch (c) { + case '\\': + dbuf[i++] = '\\'; + dbuf[i++] = '\\'; + break; + + case '"': + dbuf[i++] = '\\'; + dbuf[i++] = '"'; + break; + + case '\b': + dbuf[i++] = '\\'; + dbuf[i++] = 'b'; + break; + + case '\f': + dbuf[i++] = '\\'; + dbuf[i++] = 'f'; + break; + + case '\n': + dbuf[i++] = '\\'; + dbuf[i++] = 'n'; + break; + + case '\r': + dbuf[i++] = '\\'; + dbuf[i++] = 'r'; + break; + + case '\t': + dbuf[i++] = '\\'; + dbuf[i++] = 't'; + break; + + default: + if ((c >= 0x00 && c <= 0x1f) || + (c > 0x7f && c <= 0xffff)) { + + i += snprintf(&dbuf[i], (dlen - i), "\\u%04x", + (int)(0xffff & c)); + } else if (c >= 0x20 && c <= 0x7f) { + dbuf[i++] = 0xff & c; + } + + break; + } + sbuf += sz; + } + + if (i == dlen) + dbuf[--i] = '\0'; + else + dbuf[i] = '\0'; +} + +/* + * We output to the log file as json. + * ex. for string 'msg\n' on the zone's stdout: + * {"log":"msg\n","stream":"stdout","time":"2014-10-24T20:12:11.101973117Z"} + * + * We use ns in the last field of the timestamp for compatability. + */ +static void +wr_log_msg(char *buf, int len, int from) +{ + struct timeval tv; + int olen; + char ts[64]; + char nbuf[BUFSIZ * 2]; + char obuf[BUFSIZ * 2]; + + escape_json(buf, len, nbuf, sizeof (nbuf)); + + if (gettimeofday(&tv, NULL) != 0) + return; + (void) strftime(ts, sizeof (ts), "%FT%T", gmtime(&tv.tv_sec)); + + olen = snprintf(obuf, sizeof (obuf), + "{\"log\":\"%s\",\"stream\":\"%s\",\"time\":\"%s.%ldZ\"}\n", + nbuf, (from == 1) ? "stdout" : "stderr", ts, tv.tv_usec * 1000); + + (void) write(logfd, obuf, olen); +} + +/* + * This routine runs the log file I/O loop. It polls for input from the + * zone's stdout and stderr, formats the msg in json and writes it to the + * log file. + */ +static void +do_zfd_logging(int stdoutfd, int stderrfd) +{ + struct pollfd pollfds[3]; + char ibuf[BUFSIZ]; + int cc, ret; + int pollerr = 0; + + /* stdout, watch for read events */ + pollfds[0].fd = stdoutfd; + pollfds[0].events = POLLIN | POLLRDNORM | POLLRDBAND | + POLLPRI | POLLERR | POLLHUP | POLLNVAL; + + /* stderr, watch for read events */ + pollfds[1].fd = stderrfd; + pollfds[1].events = pollfds[0].events; + + /* the eventstream; any input means the zone is halting */ + pollfds[2].fd = eventstream[1]; + pollfds[2].events = pollfds[0].events; + + while (!shutting_down) { + pollfds[0].revents = 0; + pollfds[1].revents = 0; + pollfds[2].revents = 0; + + ret = poll(pollfds, 3, -1); + if (ret == -1 && errno != EINTR) { + zerror(zlogp, B_TRUE, "poll failed"); + /* we are hosed, shutdown logger */ + break; + } + + /* event from zone's stdout */ + if (pollfds[0].revents) { + if (pollfds[0].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(stdoutfd, ibuf, BUFSIZ); + if (cc <= 0 && errno != EINTR && + errno != EAGAIN) + break; + if (cc > 0) + wr_log_msg(ibuf, cc, 1); + } else { + pollerr = pollfds[0].revents; + zerror(zlogp, B_FALSE, "closing connection " + "with zfd stdin, pollerr %d\n", pollerr); + break; + } + } + + /* event from zone's stderr */ + if (pollfds[1].revents) { + if (pollfds[1].revents & + (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) { + errno = 0; + cc = read(stderrfd, ibuf, BUFSIZ); + if (cc <= 0 && errno != EINTR && + errno != EAGAIN) + break; + if (cc > 0) + wr_log_msg(ibuf, cc, 2); + } else { + pollerr = pollfds[1].revents; + zerror(zlogp, B_FALSE, "closing connection " + "with zfd stderr, pollerr %d\n", pollerr); + break; + } + } + + + /* + * Watch for events on the eventstream. This is how we get + * notified of the zone halting. It provides us a "wakeup" + * from poll. + */ + if (pollfds[2].revents) + break; + } + + (void) close(logfd); + logfd = -1; +} + +static int +open_fd(int id) +{ + int fd; + int flag = O_NONBLOCK | O_NOCTTY | O_CLOEXEC; + int retried = 0; + char stdpath[MAXPATHLEN]; + + (void) snprintf(stdpath, sizeof (stdpath), "/dev/zfd/%s/master/%d", + zone_name, id); + + if (id == 0) { + /* zone's stdin, so we're writing to it */ + flag |= O_WRONLY; + } else { + /* zone's stdout or stderr, so we're reading from it */ + flag |= O_RDONLY; + } + + while (!shutting_down) { + if ((fd = open(stdpath, flag)) != -1) + return (fd); + + if (retried++ > 60) + break; + + (void) sleep(1); + } + + return (-1); +} + +/* + * Body of the worker thread to perform interactive IO to the stdin, stdout and + * stderr zfd's. + * + * The stdin, stdout and stderr are from the perspective of the process inside + * the zone, so the zoneadmd view is opposite (i.e. we write to the stdin fd + * and read from the stdout/stderr fds). + */ +static void +interactive() +{ + int serverfd = -1; + int stdinfd = -1; + int stdoutfd = -1; + int stderrfd = -1; + + if (pipe(eventstream) != 0) { + zerror(zlogp, B_TRUE, "failed to open interactive control " + "pipe"); + return; + } + + while (!shutting_down) { + if ((serverfd = init_server_sock(zlogp)) == -1) { + zerror(zlogp, B_FALSE, + "server setup: socket initialization failed"); + goto death; + } + + if (!shutting_down) { + if ((stdinfd = open_fd(0)) == -1) { + zerror(zlogp, B_TRUE, + "failed to open stdin zfd"); + goto death; + } + + /* + * Setting RPROTDIS on the stream means that the + * control portion of messages received (which we don't + * care about) will be discarded by the stream head. If + * we allowed such messages, we wouldn't be able to use + * read(2), as it fails (EBADMSG) when a message with a + * control element is received. + */ + if (ioctl(stdinfd, I_SRDOPT, RNORM|RPROTDIS) == -1) { + zerror(zlogp, B_TRUE, + "failed to set options on stdin zfd"); + goto death; + } + } + + if (!shutting_down) { + if ((stdoutfd = open_fd(1)) == -1) { + zerror(zlogp, B_TRUE, + "failed to open stdout zfd"); + goto death; + } + + if (ioctl(stdoutfd, I_SRDOPT, RNORM|RPROTDIS) == -1) { + zerror(zlogp, B_TRUE, + "failed to set options on stdout zfd"); + goto death; + } + } + + if (!shutting_down) { + if ((stderrfd = open_fd(2)) == -1) { + zerror(zlogp, B_TRUE, + "failed to open stderr zfd"); + goto death; + } + + if (ioctl(stderrfd, I_SRDOPT, RNORM|RPROTDIS) == -1) { + zerror(zlogp, B_TRUE, + "failed to set options on stderr zfd"); + goto death; + } + } + + do_zfd_io(serverfd, stdinfd, stdoutfd, stderrfd); +death: + destroy_server_sock(serverfd); + + (void) close(stdinfd); + (void) close(stdoutfd); + (void) close(stderrfd); + } + + (void) close(eventstream[0]); + eventstream[0] = -1; + (void) close(eventstream[1]); + eventstream[1] = -1; +} + +static void +open_logfile() +{ + char logpath[MAXPATHLEN]; + + logfd = -1; + + (void) snprintf(logpath, sizeof (logpath), "%s/logs", zonepath); + (void) mkdir(logpath, 0700); + + (void) snprintf(logpath, sizeof (logpath), "%s/logs/%s", zonepath, + LOGNAME); + + if ((logfd = open(logpath, O_WRONLY | O_APPEND | O_CREAT, 0600)) == -1) + zerror(zlogp, B_TRUE, "failed to open log file"); +} + +/* ARGSUSED */ +void +hup_handler(int i) +{ + (void) close(logfd); + open_logfile(); +} + +/* + * Body of the worker thread to log the zfd's stdout and stderr to a log file. + * + * The stdout and stderr are from the perspective of the process inside the + * zone, so the zoneadmd view is opposite (i.e. we read from the stdout/stderr + * fds). Since this is the logger worker we ignore the zone's stdin fd. + */ +static void +logger() +{ + int stdoutfd = -1; + int stderrfd = -1; + sigset_t blockset; + + if (!shutting_down) { + open_logfile(); + } + + /* + * This thread should receive SIGHUP so that it can close the log + * file, and reopen it, during log rotation. + */ + sigset(SIGHUP, hup_handler); + (void) sigfillset(&blockset); + (void) sigdelset(&blockset, SIGHUP); + (void) thr_sigsetmask(SIG_BLOCK, &blockset, NULL); + + if (!shutting_down) { + if (pipe(eventstream) != 0) { + zerror(zlogp, B_TRUE, "failed to open logger control " + "pipe"); + goto death; + } + } + + if (!shutting_down) { + if ((stdoutfd = open_fd(1)) == -1) { + zerror(zlogp, B_TRUE, "failed to open stdout zfd"); + goto death; + } + + /* + * Setting RPROTDIS on the stream means that the control + * portion of messages received (which we don't care about) + * will be discarded by the stream head. If we allowed such + * messages, we wouldn't be able to use read(2), as it fails + * (EBADMSG) when a message with a control element is received. + */ + if (ioctl(stdoutfd, I_SRDOPT, RNORM|RPROTDIS) == -1) { + zerror(zlogp, B_TRUE, "failed to set options on " + "stdout zfd"); + goto death; + } + } + + if (!shutting_down) { + if ((stderrfd = open_fd(2)) == -1) { + zerror(zlogp, B_TRUE, "failed to open stderr zfd"); + goto death; + } + + if (ioctl(stderrfd, I_SRDOPT, RNORM|RPROTDIS) == -1) { + zerror(zlogp, B_TRUE, "failed to set options on " + "stderr zfd"); + goto death; + } + } + + do_zfd_logging(stdoutfd, stderrfd); + +death: + (void) close(eventstream[0]); + eventstream[0] = -1; + (void) close(eventstream[1]); + eventstream[1] = -1; + (void) close(logfd); + (void) close(stdoutfd); + (void) close(stderrfd); +} + +static zlog_mode_t +get_logger_mode() +{ + zlog_mode_t mode = ZLOG_NONE; + zone_dochandle_t handle; + struct zone_attrtab attr; + + if ((handle = zonecfg_init_handle()) == NULL) + return (mode); + + if (zonecfg_get_handle(zone_name, handle) != Z_OK) + goto done; + + if (zonecfg_setattrent(handle) != Z_OK) + goto done; + while (zonecfg_getattrent(handle, &attr) == Z_OK) { + if (strcmp(ZLOG_MODE, attr.zone_attr_name) == 0) { + if (strncmp("log", attr.zone_attr_value, 3) == 0) { + mode = ZLOG_LOG; + } else if (strncmp("int", + attr.zone_attr_value, 3) == 0) { + mode = ZLOG_INTERACTIVE; + } + break; + } + } + (void) zonecfg_endattrent(handle); + +done: + zonecfg_fini_handle(handle); + return (mode); +} + +void +create_log_thread(zlog_t *logp, zoneid_t id) +{ + int res; + int zdev_start; + zlog_mode_t mode; + void *(*worker) (void*); + + shutting_down = 0; + zlogp = logp; + + mode = get_logger_mode(); + if (mode == ZLOG_NONE) + return; + + if (mode == ZLOG_INTERACTIVE) { + worker = (void *(*)(void *))interactive; + zdev_start = 0; + } else { + worker = (void *(*)(void *))logger; + zdev_start = 1; + } + + if (init_zfd_devs(zlogp, zdev_start) == -1) { + zerror(zlogp, B_FALSE, + "zfd setup: device initialization failed"); + return; + } + + res = thr_create(NULL, NULL, worker, NULL, NULL, &logger_tid); + if (res != 0) { + zerror(zlogp, B_FALSE, "error %d creating logger thread", res); + logger_tid = 0; + } +} + +void +destroy_log_thread() +{ + if (logger_tid != 0) { + int stop = 1; + + shutting_down = 1; + /* break out of poll to shutdown */ + if (eventstream[0] != -1) + (void) write(eventstream[0], &stop, sizeof (stop)); + (void) thr_join(logger_tid, NULL, NULL); + logger_tid = 0; + } + + (void) destroy_zfd_devs(zlogp); +} diff --git a/usr/src/cmd/zoneadmd/zoneadmd.c b/usr/src/cmd/zoneadmd/zoneadmd.c index bb53b01d16..72c14bc9ff 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.c +++ b/usr/src/cmd/zoneadmd/zoneadmd.c @@ -111,6 +111,7 @@ static char *progname; char *zone_name; /* zone which we are managing */ zone_dochandle_t snap_hndl; /* handle for snapshot created when ready */ +char zonepath[MAXNAMELEN]; char pool_name[MAXNAMELEN]; char default_brand[MAXNAMELEN]; char brand_name[MAXNAMELEN]; @@ -620,15 +621,8 @@ mount_early_fs(void *data, const char *spec, const char *dir, /* determine the zone rootpath */ if (mount_cmd) { - char zonepath[MAXPATHLEN]; char luroot[MAXPATHLEN]; - if (zone_get_zonepath(zone_name, - zonepath, sizeof (zonepath)) != Z_OK) { - zerror(zlogp, B_FALSE, "unable to determine zone path"); - return (-1); - } - (void) snprintf(luroot, sizeof (luroot), "%s/lu", zonepath); resolve_lofs(zlogp, luroot, sizeof (luroot)); (void) strlcpy(rootpath, luroot, sizeof (rootpath)); @@ -1014,7 +1008,7 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug) { zoneid_t zoneid; struct stat st; - char zpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; + char rpath[MAXPATHLEN], initpath[MAXPATHLEN], init_file[MAXPATHLEN]; char nbootargs[BOOTARGS_MAX]; char cmdbuf[MAXPATHLEN]; fs_callback_t cb; @@ -1058,13 +1052,8 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug) /* * Get the brand's boot callback if it exists. */ - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { - zerror(zlogp, B_FALSE, "unable to determine zone path"); - brand_close(bh); - goto bad; - } (void) strcpy(cmdbuf, EXEC_PREFIX); - if (brand_get_boot(bh, zone_name, zpath, cmdbuf + EXEC_LEN, + if (brand_get_boot(bh, zone_name, zonepath, cmdbuf + EXEC_LEN, sizeof (cmdbuf) - EXEC_LEN) != 0) { zerror(zlogp, B_FALSE, "unable to determine branded zone's boot callback"); @@ -1092,12 +1081,12 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug) assert(init_file[0] != '\0'); /* Try to anticipate possible problems: Make sure init is executable. */ - if (zone_get_rootpath(zone_name, zpath, sizeof (zpath)) != Z_OK) { + if (zone_get_rootpath(zone_name, rpath, sizeof (rpath)) != Z_OK) { zerror(zlogp, B_FALSE, "unable to determine zone root"); goto bad; } - (void) snprintf(initpath, sizeof (initpath), "%s%s", zpath, init_file); + (void) snprintf(initpath, sizeof (initpath), "%s%s", rpath, init_file); if (stat(initpath, &st) == -1) { zerror(zlogp, B_TRUE, "could not stat %s", initpath); @@ -1165,6 +1154,9 @@ zone_bootup(zlog_t *zlogp, const char *bootargs, int zstate, boolean_t debug) if (brand_poststatechg(zlogp, zstate, Z_BOOT, debug) != 0) goto bad; + /* Startup a thread to perform zfd logging/tty svc for the zone. */ + create_log_thread(zlogp, zone_id); + /* Startup a thread to perform memory capping for the zone. */ create_mcap_thread(zlogp, zone_id); @@ -1195,9 +1187,13 @@ zone_halt(zlog_t *zlogp, boolean_t unmount_cmd, boolean_t rebooting, int zstate, if (vplat_teardown(zlogp, unmount_cmd, rebooting, debug) != 0) { if (!bringup_failure_recovery) zerror(zlogp, B_FALSE, "unable to destroy zone"); + destroy_log_thread(); return (-1); } + /* Shut down is done, stop the log thread */ + destroy_log_thread(); + if (brand_poststatechg(zlogp, zstate, Z_HALT, debug) != 0) return (-1); @@ -1218,7 +1214,6 @@ zone_graceful_shutdown(zlog_t *zlogp) pid_t child; char cmdbuf[MAXPATHLEN]; brand_handle_t bh = NULL; - char zpath[MAXPATHLEN]; ctid_t ct; int tmpl_fd; int child_status; @@ -1239,18 +1234,12 @@ zone_graceful_shutdown(zlog_t *zlogp) return (-1); } - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) { - zerror(zlogp, B_FALSE, "unable to determine zone path"); - brand_close(bh); - return (-1); - } - /* * If there is a brand 'shutdown' callback, execute it now to give the * brand a chance to cleanup any custom configuration. */ (void) strcpy(cmdbuf, EXEC_PREFIX); - if (brand_get_shutdown(bh, zone_name, zpath, cmdbuf + EXEC_LEN, + if (brand_get_shutdown(bh, zone_name, zonepath, cmdbuf + EXEC_LEN, sizeof (cmdbuf) - EXEC_LEN) != 0 || strlen(cmdbuf) <= EXEC_LEN) { (void) strcat(cmdbuf, SHUTDOWN_DEFAULT); } @@ -1397,15 +1386,12 @@ audit_put_record(zlog_t *zlogp, ucred_t *uc, int return_val, static void log_init_exit(int status) { - char zpath[MAXPATHLEN]; char p[MAXPATHLEN]; char buf[128]; struct timeval t; int fd; - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) - return; - if (snprintf(p, sizeof (p), "%s/lastexited", zpath) > sizeof (p)) + if (snprintf(p, sizeof (p), "%s/lastexited", zonepath) > sizeof (p)) return; if (gettimeofday(&t, NULL) != 0) return; @@ -2035,12 +2021,15 @@ top: zone_name, zone_state_str(zstate)); /* - * Startup a thread to perform memory capping for the + * Startup a thread to perform the zfd logging/tty svc + * and a thread to perform memory capping for the * zone. zlogp won't be valid for much longer so use * logsys. */ - if ((zid = getzoneidbyname(zone_name)) != -1) + if ((zid = getzoneidbyname(zone_name)) != -1) { + create_log_thread(&logsys, zid); create_mcap_thread(&logsys, zid); + } /* recover the global configuration snapshot */ if (snap_hndl == NULL) { @@ -2120,15 +2109,10 @@ set_brand_env(zlog_t *zlogp) static int brand_callback_init(brand_handle_t bh, char *zone_name) { - char zpath[MAXPATHLEN]; - - if (zone_get_zonepath(zone_name, zpath, sizeof (zpath)) != Z_OK) - return (-1); - (void) strlcpy(pre_statechg_hook, EXEC_PREFIX, sizeof (pre_statechg_hook)); - if (brand_get_prestatechange(bh, zone_name, zpath, + if (brand_get_prestatechange(bh, zone_name, zonepath, pre_statechg_hook + EXEC_LEN, sizeof (pre_statechg_hook) - EXEC_LEN) != 0) return (-1); @@ -2139,7 +2123,7 @@ brand_callback_init(brand_handle_t bh, char *zone_name) (void) strlcpy(post_statechg_hook, EXEC_PREFIX, sizeof (post_statechg_hook)); - if (brand_get_poststatechange(bh, zone_name, zpath, + if (brand_get_poststatechange(bh, zone_name, zonepath, post_statechg_hook + EXEC_LEN, sizeof (post_statechg_hook) - EXEC_LEN) != 0) return (-1); @@ -2150,7 +2134,7 @@ brand_callback_init(brand_handle_t bh, char *zone_name) (void) strlcpy(query_hook, EXEC_PREFIX, sizeof (query_hook)); - if (brand_get_query(bh, zone_name, zpath, query_hook + EXEC_LEN, + if (brand_get_query(bh, zone_name, zonepath, query_hook + EXEC_LEN, sizeof (query_hook) - EXEC_LEN) != 0) return (-1); @@ -2278,6 +2262,11 @@ main(int argc, char *argv[]) return (1); } + if (zone_get_zonepath(zone_name, zonepath, sizeof (zonepath)) != Z_OK) { + zerror(zlogp, B_FALSE, "unable to determine zone path"); + return (-1); + } + if (zonecfg_default_brand(default_brand, sizeof (default_brand)) != Z_OK) { zerror(zlogp, B_FALSE, "unable to determine default brand"); diff --git a/usr/src/cmd/zoneadmd/zoneadmd.h b/usr/src/cmd/zoneadmd/zoneadmd.h index ceab787dab..7e5dcea432 100644 --- a/usr/src/cmd/zoneadmd/zoneadmd.h +++ b/usr/src/cmd/zoneadmd/zoneadmd.h @@ -91,6 +91,7 @@ extern mutex_t msglock; extern boolean_t in_death_throes; extern boolean_t bringup_failure_recovery; extern char *zone_name; +extern char zonepath[MAXNAMELEN]; extern zone_dochandle_t snap_hndl; extern char pool_name[MAXNAMELEN]; extern char brand_name[MAXNAMELEN]; @@ -164,6 +165,12 @@ extern void create_mcap_thread(zlog_t *, zoneid_t); extern void destroy_mcap_thread(); /* + * Zone FD log thread creation. + */ +extern void create_log_thread(zlog_t *, zoneid_t); +extern void destroy_log_thread(); + +/* * Contract handling. */ extern int init_template(void); diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml index 3df4b62922..e6a2ef46e3 100644 --- a/usr/src/lib/brand/lx/zone/platform.xml +++ b/usr/src/lib/brand/lx/zone/platform.xml @@ -121,6 +121,9 @@ <!-- Renamed devices to create under /dev --> <device match="brand/lx/ptmx" name="ptmx" /> <device match="zcons/%z/zoneconsole" name="console" /> + <device match="zfd/%z/slave/0" name="zfd/0" /> + <device match="zfd/%z/slave/1" name="zfd/1" /> + <device match="zfd/%z/slave/2" name="zfd/2" /> <!-- Audio devices to create under /dev --> <device match="brand/lx/dsp" name="dsp" /> diff --git a/usr/src/man/man1/zlogin.1 b/usr/src/man/man1/zlogin.1 index 129718e11e..4c9c8734a5 100644 --- a/usr/src/man/man1/zlogin.1 +++ b/usr/src/man/man1/zlogin.1 @@ -14,13 +14,13 @@ .\" Copyright 2013 DEY Storage Systems, Inc. .\" Copyright (c) 2014 Gary Mills .\" Copyright (c) 2014, Joyent, Inc. All Rights Reserved -.TH ZLOGIN 1 "Jan 22, 2014" +.TH ZLOGIN 1 "Dec 22, 2014" .SH NAME zlogin \- enter a zone .SH SYNOPSIS .LP .nf -\fBzlogin\fR [\fB-CEQ\fR] [\fB-e\fR \fIc\fR] [\fB-l\fR \fIusername\fR] \fIzonename\fR +\fBzlogin\fR [\fB-CEIQ\fR] [\fB-e\fR \fIc\fR] [\fB-l\fR \fIusername\fR] \fIzonename\fR .fi .LP @@ -37,7 +37,7 @@ system zone. Only a superuser operating in the global system zone can use this utility. .sp .LP -\fBzlogin\fR operates in one of three modes: +\fBzlogin\fR operates in one of four modes: .sp .ne 2 .na @@ -81,6 +81,16 @@ available once the zone is in the installed state. Connections to the console are persistent across reboot of the zone. .RE +.sp +.ne 2 +.na +\fBStandalone-processs Interactive Mode\fR +.ad +.RS 24n +If the \fB-I\fR option is specified the user is connected to the zone's stdin, +stdout and stderr \fBzfd(7D)\fR devices. +.RE + .SH OPTIONS .sp .LP @@ -125,6 +135,15 @@ Forces interactive mode when a utility argument is specified. .RE .sp +.ne 2 +.na +\fB\fB-I\fR\fR +.ad +.RS 15n +Connects to the zone's \fBzfd(7D)\fR devices. +.RE + +.sp .sp .ne 2 .na diff --git a/usr/src/man/man7d/Makefile b/usr/src/man/man7d/Makefile index 5140f21c65..ddcb5ed45b 100644 --- a/usr/src/man/man7d/Makefile +++ b/usr/src/man/man7d/Makefile @@ -13,6 +13,7 @@ # Copyright 2011, Richard Lowe # Copyright 2013 Nexenta Systems, Inc. All rights reserved. # Copyright 2014 Garrett D'Amore <garrett@damore.org> +# Copyright 2014 Joyent, Inc. All rights reserved. # include $(SRC)/Makefile.master @@ -151,7 +152,8 @@ _MANFILES= aac.7d \ xge.7d \ yge.7d \ zcons.7d \ - zero.7d + zero.7d \ + zfd.7d sparc_MANFILES= audiocs.7d \ bbc_beep.7d \ diff --git a/usr/src/man/man7d/zfd.7d b/usr/src/man/man7d/zfd.7d new file mode 100644 index 0000000000..f06777fee8 --- /dev/null +++ b/usr/src/man/man7d/zfd.7d @@ -0,0 +1,39 @@ +.\" +.\" This file and its contents are supplied under the terms of the +.\" Common Development and Distribution License ("CDDL"), version 1.0. +.\" You may only use this file in accordance with the terms of version +.\" 1.0 of the CDDL. +.\" +.\" A full copy of the text of the CDDL should have accompanied this +.\" source. A copy of the CDDL is also available via the Internet at +.\" http://www.illumos.org/license/CDDL. +.\" +.\" +.\" Copyright 2014, Joyent, Inc. All rights reserved. +.\" +.Dd "Dec 22, 2014" +.Dt ZFD 7D +.Os +.Sh NAME +.Nm zfd +.Nd Zone file descriptor driver +.Sh DESCRIPTION +The +.Nm zfd +character driver exports up to three file descriptors into the zone. These can +be used by a standalone process within the zone as +.Vt stdin , +.Vt stdout , +and +.Vt stderr . +The +.Nm zfd +driver behaves in a similar manner as the +.Nm zcons(7D) +device. +Inside a zone, the slave side file descriptors appear as +.Nm /dev/zfd/[0-2] . +.Sh SEE ALSO +.Xr zlogin 1 , +.Xr zoneadmd 1M , +.Xr zcons 7D diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index 517a7a6363..03c50355d7 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -1083,6 +1083,8 @@ QLGE_OBJS += qlge.o qlge_dbg.o qlge_flash.o qlge_fm.o qlge_gld.o qlge_mpi.o ZCONS_OBJS += zcons.o +ZFD_OBJS += zfd.o + NV_SATA_OBJS += nv_sata.o SI3124_OBJS += si3124.o diff --git a/usr/src/uts/common/io/pseudo.conf b/usr/src/uts/common/io/pseudo.conf index 42248e93d6..08affec609 100644 --- a/usr/src/uts/common/io/pseudo.conf +++ b/usr/src/uts/common/io/pseudo.conf @@ -22,8 +22,7 @@ # # Copyright 2003 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. -# -# ident "%Z%%M% %I% %E% SMI" +# Copyright 2014 Joyent, Inc. All rights reserved. # # This file is private to the pseudonex driver. It should not be edited. # @@ -38,3 +37,9 @@ name="pseudo" class="root" instance=0; # /pseudo; it has as its children the zone console pseudo nodes. # name="zconsnex" parent="/pseudo" instance=1 valid-children="zcons"; + +# +# zfdnex is an alias for pseudo; this node is instantiated as a child of +# /pseudo; it has as its children the zone fd pseudo nodes. +# +name="zfdnex" parent="/pseudo" instance=2 valid-children="zfd"; diff --git a/usr/src/uts/common/io/zfd.c b/usr/src/uts/common/io/zfd.c new file mode 100644 index 0000000000..f70115653f --- /dev/null +++ b/usr/src/uts/common/io/zfd.c @@ -0,0 +1,815 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright 2014 Joyent, Inc. All rights reserved. + */ + +/* + * Zone File Descriptor Driver. + * + * This driver is derived from the zcons driver which is in turn derived from + * the pts/ptm drivers. The purpose is to expose file descriptors within the + * zone which are connected to zoneadmd and used for logging or an interactive + * connection to a process within the zone. + * + * Its implementation is straightforward. Each instance of the driver + * represents a global-zone/local-zone pair. Unlike the zcons device, zoneadmd + * uses these devices unidirectionally to provide stdin, stdout and stderr to + * the process within the zone. + * + * Instances of zfd are onlined as children of /pseudo/zfdnex@2/ by zoneadmd, + * using the devctl framework; thus the driver does not need to maintain any + * sort of "admin" node. + * + * The driver shuttles I/O from master side to slave side and back. In a break + * from the pts/ptm semantics, if one side is not open, I/O directed towards + * it will simply be discarded. This is so that if zoneadmd is not holding the + * master side fd open (i.e. it has died somehow), processes in the zone do not + * experience any errors and I/O to the fd does not cause the process to hang. + */ + +#include <sys/types.h> +#include <sys/cmn_err.h> +#include <sys/conf.h> +#include <sys/cred.h> +#include <sys/ddi.h> +#include <sys/debug.h> +#include <sys/devops.h> +#include <sys/errno.h> +#include <sys/file.h> +#include <sys/kstr.h> +#include <sys/modctl.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <sys/stream.h> +#include <sys/stropts.h> +#include <sys/strsun.h> +#include <sys/sunddi.h> +#include <sys/sysmacros.h> +#include <sys/systm.h> +#include <sys/types.h> +#include <sys/zfd.h> +#include <sys/vnode.h> +#include <sys/fs/snode.h> +#include <sys/zone.h> + +static int zfd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); +static int zfd_attach(dev_info_t *, ddi_attach_cmd_t); +static int zfd_detach(dev_info_t *, ddi_detach_cmd_t); + +static int zfd_open(queue_t *, dev_t *, int, int, cred_t *); +static int zfd_close(queue_t *, int, cred_t *); +static void zfd_wput(queue_t *, mblk_t *); +static void zfd_rsrv(queue_t *); +static void zfd_wsrv(queue_t *); + +/* + * The instance number is encoded in the dev_t in the minor number; the lowest + * bit of the minor number is used to track the master vs. slave side of the + * fd. The rest of the bits in the minor number are the instance. + */ +#define ZFD_MASTER_MINOR 0 +#define ZFD_SLAVE_MINOR 1 + +#define ZFD_INSTANCE(x) (getminor((x)) >> 1) +#define ZFD_NODE(x) (getminor((x)) & 0x01) + +/* + * This macro converts a zfd_state_t pointer to the associated slave minor + * node's dev_t. + */ +#define ZFD_STATE_TO_SLAVEDEV(x) \ + (makedevice(ddi_driver_major((x)->zfd_devinfo), \ + (minor_t)(ddi_get_instance((x)->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR))) + +int zfd_debug = 0; +#define DBG(a) if (zfd_debug) cmn_err(CE_NOTE, a) +#define DBG1(a, b) if (zfd_debug) cmn_err(CE_NOTE, a, b) + +/* + * ZFD Pseudo Terminal Module: stream data structure definitions, + * based on zcons. + */ +static struct module_info zfd_info = { + 0x20FD, /* ZOFD - 8445 */ + "zfd", + 0, /* min packet size */ + INFPSZ, /* max packet size - infinity */ + 2048, /* high water */ + 128 /* low water */ +}; + +static struct qinit zfd_rinit = { + NULL, + (int (*)()) zfd_rsrv, + zfd_open, + zfd_close, + NULL, + &zfd_info, + NULL +}; + +static struct qinit zfd_winit = { + (int (*)()) zfd_wput, + (int (*)()) zfd_wsrv, + NULL, + NULL, + NULL, + &zfd_info, + NULL +}; + +static struct streamtab zfd_tab_info = { + &zfd_rinit, + &zfd_winit, + NULL, + NULL +}; + +#define ZFD_CONF_FLAG (D_MP | D_MTQPAIR | D_MTOUTPERIM | D_MTOCEXCL) + +/* + * this will define (struct cb_ops cb_zfd_ops) and (struct dev_ops zfd_ops) + */ +DDI_DEFINE_STREAM_OPS(zfd_ops, nulldev, nulldev, zfd_attach, zfd_detach, \ + nodev, zfd_getinfo, ZFD_CONF_FLAG, &zfd_tab_info, \ + ddi_quiesce_not_needed); + +/* + * Module linkage information for the kernel. + */ + +static struct modldrv modldrv = { + &mod_driverops, /* Type of module (this is a pseudo driver) */ + "Zone FD driver", /* description of module */ + &zfd_ops /* driver ops */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, + &modldrv, + NULL +}; + +typedef struct zfd_state { + dev_info_t *zfd_devinfo; + queue_t *zfd_master_rdq; + queue_t *zfd_slave_rdq; + vnode_t *zfd_slave_vnode; + int zfd_state; + int zfd_tty; +} zfd_state_t; + +#define ZFD_STATE_MOPEN 0x01 +#define ZFD_STATE_SOPEN 0x02 + +static void *zfd_soft_state; + +/* + * List of STREAMS modules that is pushed onto a slave instance after the + * ZFD_MAKETTY ioctl has been received. + */ +static char *zfd_mods[] = { + "ptem", + "ldterm", + "ttcompat", + NULL +}; + +int +_init(void) +{ + int err; + + if ((err = ddi_soft_state_init(&zfd_soft_state, sizeof (zfd_state_t), + 0)) != 0) { + return (err); + } + + if ((err = mod_install(&modlinkage)) != 0) + ddi_soft_state_fini(zfd_soft_state); + + return (err); +} + + +int +_fini(void) +{ + int err; + + if ((err = mod_remove(&modlinkage)) != 0) { + return (err); + } + + ddi_soft_state_fini(&zfd_soft_state); + return (0); +} + +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&modlinkage, modinfop)); +} + +static int +zfd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + zfd_state_t *zfds; + int instance; + char masternm[ZFD_NAME_LEN], slavenm[ZFD_NAME_LEN]; + + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + instance = ddi_get_instance(dip); + if (ddi_soft_state_zalloc(zfd_soft_state, instance) != DDI_SUCCESS) + return (DDI_FAILURE); + + (void) snprintf(masternm, sizeof (masternm), "%s%d", ZFD_MASTER_NAME, + instance); + (void) snprintf(slavenm, sizeof (slavenm), "%s%d", ZFD_SLAVE_NAME, + instance); + + /* + * Create the master and slave minor nodes. + */ + if ((ddi_create_minor_node(dip, slavenm, S_IFCHR, + instance << 1 | ZFD_SLAVE_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE) || + (ddi_create_minor_node(dip, masternm, S_IFCHR, + instance << 1 | ZFD_MASTER_MINOR, DDI_PSEUDO, 0) == DDI_FAILURE)) { + ddi_remove_minor_node(dip, NULL); + ddi_soft_state_free(zfd_soft_state, instance); + return (DDI_FAILURE); + } + + VERIFY((zfds = ddi_get_soft_state(zfd_soft_state, instance)) != NULL); + zfds->zfd_devinfo = dip; + zfds->zfd_tty = 0; + return (DDI_SUCCESS); +} + +static int +zfd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + zfd_state_t *zfds; + int instance; + + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + instance = ddi_get_instance(dip); + if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL) + return (DDI_FAILURE); + + if ((zfds->zfd_state & ZFD_STATE_MOPEN) || + (zfds->zfd_state & ZFD_STATE_SOPEN)) { + DBG1("zfd_detach: device (dip=%p) still open\n", (void *)dip); + return (DDI_FAILURE); + } + + ddi_remove_minor_node(dip, NULL); + ddi_soft_state_free(zfd_soft_state, instance); + + return (DDI_SUCCESS); +} + +/* + * zfd_getinfo() + * getinfo(9e) entrypoint. + */ +/*ARGSUSED*/ +static int +zfd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ + zfd_state_t *zfds; + int instance = ZFD_INSTANCE((dev_t)arg); + + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + if ((zfds = ddi_get_soft_state(zfd_soft_state, + instance)) == NULL) + return (DDI_FAILURE); + *result = zfds->zfd_devinfo; + return (DDI_SUCCESS); + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)(uintptr_t)instance; + return (DDI_SUCCESS); + } + return (DDI_FAILURE); +} + +/* + * Return the equivalent queue from the other side of the relationship. + * e.g.: given the slave's write queue, return the master's write queue. + */ +static queue_t * +zfd_switch(queue_t *qp) +{ + zfd_state_t *zfds = qp->q_ptr; + ASSERT(zfds != NULL); + + if (qp == zfds->zfd_master_rdq) + return (zfds->zfd_slave_rdq); + else if (OTHERQ(qp) == zfds->zfd_master_rdq && zfds->zfd_slave_rdq + != NULL) + return (OTHERQ(zfds->zfd_slave_rdq)); + else if (qp == zfds->zfd_slave_rdq) + return (zfds->zfd_master_rdq); + else if (OTHERQ(qp) == zfds->zfd_slave_rdq && zfds->zfd_master_rdq + != NULL) + return (OTHERQ(zfds->zfd_master_rdq)); + else + return (NULL); +} + +/* + * For debugging and outputting messages. Returns the name of the side of + * the relationship associated with this queue. + */ +static const char * +zfd_side(queue_t *qp) +{ + zfd_state_t *zfds = qp->q_ptr; + ASSERT(zfds != NULL); + + if (qp == zfds->zfd_master_rdq || + OTHERQ(qp) == zfds->zfd_master_rdq) { + return ("master"); + } + ASSERT(qp == zfds->zfd_slave_rdq || OTHERQ(qp) == zfds->zfd_slave_rdq); + return ("slave"); +} + +/*ARGSUSED*/ +static int +zfd_master_open(zfd_state_t *zfds, + queue_t *rqp, /* pointer to the read side queue */ + dev_t *devp, /* pointer to stream tail's dev */ + int oflag, /* the user open(2) supplied flags */ + int sflag, /* open state flag */ + cred_t *credp) /* credentials */ +{ + mblk_t *mop; + struct stroptions *sop; + + /* + * Enforce exclusivity on the master side; the only consumer should + * be the zoneadmd for the zone. + */ + if ((zfds->zfd_state & ZFD_STATE_MOPEN) != 0) + return (EBUSY); + + if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) { + DBG("zfd_master_open(): mop allocation failed\n"); + return (ENOMEM); + } + + zfds->zfd_state |= ZFD_STATE_MOPEN; + + /* + * q_ptr stores driver private data; stash the soft state data on both + * read and write sides of the queue. + */ + WR(rqp)->q_ptr = rqp->q_ptr = zfds; + qprocson(rqp); + + /* + * Following qprocson(), the master side is fully plumbed into the + * STREAM and may send/receive messages. Setting zfds->zfd_master_rdq + * will allow the slave to send messages to us (the master). + * This cannot occur before qprocson() because the master is not + * ready to process them until that point. + */ + zfds->zfd_master_rdq = rqp; + + /* + * set up hi/lo water marks on stream head read queue and add + * controlling tty as needed. + */ + mop->b_datap->db_type = M_SETOPTS; + mop->b_wptr += sizeof (struct stroptions); + sop = (struct stroptions *)(void *)mop->b_rptr; + if (oflag & FNOCTTY) + sop->so_flags = SO_HIWAT | SO_LOWAT; + else + sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY; + sop->so_hiwat = 512; + sop->so_lowat = 256; + putnext(rqp, mop); + + return (0); +} + +/*ARGSUSED*/ +static int +zfd_slave_open(zfd_state_t *zfds, + queue_t *rqp, /* pointer to the read side queue */ + dev_t *devp, /* pointer to stream tail's dev */ + int oflag, /* the user open(2) supplied flags */ + int sflag, /* open state flag */ + cred_t *credp) /* credentials */ +{ + mblk_t *mop; + struct stroptions *sop; + /* + * The slave side can be opened as many times as needed. + */ + if ((zfds->zfd_state & ZFD_STATE_SOPEN) != 0) { + ASSERT((rqp != NULL) && (WR(rqp)->q_ptr == zfds)); + return (0); + } + + if (zfds->zfd_tty == 1) { + major_t major; + minor_t minor; + minor_t lastminor; + uint_t anchorindex; + + /* + * Set up sad(7D) so that the necessary STREAMS modules will + * be in place. A wrinkle is that 'ptem' must be anchored + * in place (see streamio(7i)) because we always want the + * fd to have terminal semantics. + */ + minor = + ddi_get_instance(zfds->zfd_devinfo) << 1 | ZFD_SLAVE_MINOR; + major = ddi_driver_major(zfds->zfd_devinfo); + lastminor = 0; + anchorindex = 1; + if (kstr_autopush(SET_AUTOPUSH, &major, &minor, &lastminor, + &anchorindex, zfd_mods) != 0) { + DBG("zfd_slave_open(): kstr_autopush() failed\n"); + return (EIO); + } + } + + if ((mop = allocb(sizeof (struct stroptions), BPRI_MED)) == NULL) { + DBG("zfd_slave_open(): mop allocation failed\n"); + return (ENOMEM); + } + + zfds->zfd_state |= ZFD_STATE_SOPEN; + + /* + * q_ptr stores driver private data; stash the soft state data on both + * read and write sides of the queue. + */ + WR(rqp)->q_ptr = rqp->q_ptr = zfds; + + qprocson(rqp); + + /* + * Must follow qprocson(), since we aren't ready to process until then. + */ + zfds->zfd_slave_rdq = rqp; + + /* + * set up hi/lo water marks on stream head read queue and add + * controlling tty as needed. + */ + mop->b_datap->db_type = M_SETOPTS; + mop->b_wptr += sizeof (struct stroptions); + sop = (struct stroptions *)(void *)mop->b_rptr; + sop->so_flags = SO_HIWAT | SO_LOWAT | SO_ISTTY; + sop->so_hiwat = 512; + sop->so_lowat = 256; + putnext(rqp, mop); + + return (0); +} + +/* + * open(9e) entrypoint; checks sflag, and rejects anything unordinary. + */ +static int +zfd_open(queue_t *rqp, /* pointer to the read side queue */ + dev_t *devp, /* pointer to stream tail's dev */ + int oflag, /* the user open(2) supplied flags */ + int sflag, /* open state flag */ + cred_t *credp) /* credentials */ +{ + int instance = ZFD_INSTANCE(*devp); + int ret; + zfd_state_t *zfds; + + if (sflag != 0) + return (EINVAL); + + if ((zfds = ddi_get_soft_state(zfd_soft_state, instance)) == NULL) + return (ENXIO); + + switch (ZFD_NODE(*devp)) { + case ZFD_MASTER_MINOR: + ret = zfd_master_open(zfds, rqp, devp, oflag, sflag, credp); + break; + case ZFD_SLAVE_MINOR: + ret = zfd_slave_open(zfds, rqp, devp, oflag, sflag, credp); + break; + default: + ret = ENXIO; + break; + } + + return (ret); +} + +/* + * close(9e) entrypoint. + */ +/*ARGSUSED1*/ +static int +zfd_close(queue_t *rqp, int flag, cred_t *credp) +{ + queue_t *wqp; + mblk_t *bp; + zfd_state_t *zfds; + major_t major; + minor_t minor; + + zfds = (zfd_state_t *)rqp->q_ptr; + + if (rqp == zfds->zfd_master_rdq) { + DBG("Closing master side"); + + zfds->zfd_master_rdq = NULL; + zfds->zfd_state &= ~ZFD_STATE_MOPEN; + + /* + * qenable slave side write queue so that it can flush + * its messages as master's read queue is going away + */ + if (zfds->zfd_slave_rdq != NULL) { + qenable(WR(zfds->zfd_slave_rdq)); + } + + qprocsoff(rqp); + WR(rqp)->q_ptr = rqp->q_ptr = NULL; + + } else if (rqp == zfds->zfd_slave_rdq) { + + DBG("Closing slave side"); + zfds->zfd_state &= ~ZFD_STATE_SOPEN; + zfds->zfd_slave_rdq = NULL; + + wqp = WR(rqp); + while ((bp = getq(wqp)) != NULL) { + if (zfds->zfd_master_rdq != NULL) + putnext(zfds->zfd_master_rdq, bp); + else if (bp->b_datap->db_type == M_IOCTL) + miocnak(wqp, bp, 0, 0); + else + freemsg(bp); + } + + /* + * Qenable master side write queue so that it can flush its + * messages as slaves's read queue is going away. + */ + if (zfds->zfd_master_rdq != NULL) + qenable(WR(zfds->zfd_master_rdq)); + + qprocsoff(rqp); + WR(rqp)->q_ptr = rqp->q_ptr = NULL; + + if (zfds->zfd_tty == 1) { + /* + * Clear the sad configuration so that reopening + * doesn't fail to set up sad configuration. + */ + major = ddi_driver_major(zfds->zfd_devinfo); + minor = ddi_get_instance(zfds->zfd_devinfo) << 1 | + ZFD_SLAVE_MINOR; + (void) kstr_autopush(CLR_AUTOPUSH, &major, &minor, + NULL, NULL, NULL); + } + } + + return (0); +} + +static void +handle_mflush(queue_t *qp, mblk_t *mp) +{ + mblk_t *nmp; + DBG1("M_FLUSH on %s side", zfd_side(qp)); + + if (*mp->b_rptr & FLUSHW) { + DBG1("M_FLUSH, FLUSHW, %s side", zfd_side(qp)); + flushq(qp, FLUSHDATA); + *mp->b_rptr &= ~FLUSHW; + if ((*mp->b_rptr & FLUSHR) == 0) { + /* + * FLUSHW only. Change to FLUSHR and putnext other side, + * then we are done. + */ + *mp->b_rptr |= FLUSHR; + if (zfd_switch(RD(qp)) != NULL) { + putnext(zfd_switch(RD(qp)), mp); + return; + } + } else if ((zfd_switch(RD(qp)) != NULL) && + (nmp = copyb(mp)) != NULL) { + /* + * It is a FLUSHRW; we copy the mblk and send + * it to the other side, since we still need to use + * the mblk in FLUSHR processing, below. + */ + putnext(zfd_switch(RD(qp)), nmp); + } + } + + if (*mp->b_rptr & FLUSHR) { + DBG("qreply(qp) turning FLUSHR around\n"); + qreply(qp, mp); + return; + } + freemsg(mp); +} + +/* + * wput(9E) is symmetric for master and slave sides, so this handles both + * without splitting the codepath. (The only exception to this is the + * processing of zfd ioctls, which is restricted to the master side.) + * + * zfd_wput() looks at the other side; if there is no process holding that + * side open, it frees the message. This prevents processes from hanging + * if no one is holding open the fd. Otherwise, it putnext's high + * priority messages, putnext's normal messages if possible, and otherwise + * enqueues the messages; in the case that something is enqueued, wsrv(9E) + * will take care of eventually shuttling I/O to the other side. + */ +static void +zfd_wput(queue_t *qp, mblk_t *mp) +{ + unsigned char type = mp->b_datap->db_type; + zfd_state_t *zfds; + struct iocblk *iocbp; + + ASSERT(qp->q_ptr); + + DBG1("entering zfd_wput, %s side", zfd_side(qp)); + + /* + * Process zfd ioctl messages if qp is the master side's write queue. + */ + zfds = (zfd_state_t *)qp->q_ptr; + if (zfds->zfd_master_rdq != NULL && qp == WR(zfds->zfd_master_rdq) && + type == M_IOCTL) { + iocbp = (struct iocblk *)(void *)mp->b_rptr; + switch (iocbp->ioc_cmd) { + case ZFD_MAKETTY: + /* + * The process that passed the ioctl must be running in + * the global zone. + */ + if (crgetzoneid(iocbp->ioc_cr) != GLOBAL_ZONEID) { + miocack(qp, mp, 0, EINVAL); + return; + } + zfds->zfd_tty = 1; + miocack(qp, mp, 0, 0); + return; + default: + break; + } + } + + if (zfd_switch(RD(qp)) == NULL) { + DBG1("wput to %s side (no one listening)", zfd_side(qp)); + switch (type) { + case M_FLUSH: + handle_mflush(qp, mp); + break; + case M_IOCTL: + miocnak(qp, mp, 0, 0); + break; + default: + freemsg(mp); + break; + } + return; + } + + if (type >= QPCTL) { + DBG1("(hipri) wput, %s side", zfd_side(qp)); + switch (type) { + case M_READ: /* supposedly from ldterm? */ + DBG("zfd_wput: tossing M_READ\n"); + freemsg(mp); + break; + case M_FLUSH: + handle_mflush(qp, mp); + break; + default: + /* + * Put this to the other side. + */ + ASSERT(zfd_switch(RD(qp)) != NULL); + putnext(zfd_switch(RD(qp)), mp); + break; + } + DBG1("done (hipri) wput, %s side", zfd_side(qp)); + return; + } + + /* + * Only putnext if there isn't already something in the queue. + * otherwise things would wind up out of order. + */ + if (qp->q_first == NULL && + bcanputnext(RD(zfd_switch(qp)), mp->b_band)) { + DBG("wput: putting message to other side\n"); + putnext(RD(zfd_switch(qp)), mp); + } else { + DBG("wput: putting msg onto queue\n"); + (void) putq(qp, mp); + } + DBG1("done wput, %s side", zfd_side(qp)); +} + +/* + * rsrv(9E) is symmetric for master and slave, so zfd_rsrv() handles both + * without splitting up the codepath. + * + * Enable the write side of the partner. This triggers the partner to send + * messages queued on its write side to this queue's read side. + */ +static void +zfd_rsrv(queue_t *qp) +{ + zfd_state_t *zfds; + zfds = (zfd_state_t *)qp->q_ptr; + + /* + * Care must be taken here, as either of the master or slave side + * qptr could be NULL. + */ + ASSERT(qp == zfds->zfd_master_rdq || qp == zfds->zfd_slave_rdq); + if (zfd_switch(qp) == NULL) { + DBG("zfd_rsrv: other side isn't listening\n"); + return; + } + qenable(WR(zfd_switch(qp))); +} + +/* + * This routine is symmetric for master and slave, so it handles both without + * splitting up the codepath. + * + * If there are messages on this queue that can be sent to the other, send + * them via putnext(). Else, if queued messages cannot be sent, leave them + * on this queue. + */ +static void +zfd_wsrv(queue_t *qp) +{ + mblk_t *mp; + + DBG1("zfd_wsrv master (%s) side", zfd_side(qp)); + + /* + * Partner has no read queue, so take the data, and throw it away. + */ + if (zfd_switch(RD(qp)) == NULL) { + DBG("zfd_wsrv: other side isn't listening"); + while ((mp = getq(qp)) != NULL) { + if (mp->b_datap->db_type == M_IOCTL) + miocnak(qp, mp, 0, 0); + else + freemsg(mp); + } + flushq(qp, FLUSHALL); + return; + } + + /* + * while there are messages on this write queue... + */ + while ((mp = getq(qp)) != NULL) { + /* + * Due to the way zfd_wput is implemented, we should never + * see a control message here. + */ + ASSERT(mp->b_datap->db_type < QPCTL); + + if (bcanputnext(RD(zfd_switch(qp)), mp->b_band)) { + DBG("wsrv: send message to other side\n"); + putnext(RD(zfd_switch(qp)), mp); + } else { + DBG("wsrv: putting msg back on queue\n"); + (void) putbq(qp, mp); + break; + } + } +} diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index b52be94c90..6f28942704 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -659,6 +659,7 @@ CHKHDRS= \ watchpoint.h \ winlockio.h \ zcons.h \ + zfd.h \ zone.h \ xti_inet.h \ xti_osi.h \ diff --git a/usr/src/uts/common/sys/zfd.h b/usr/src/uts/common/sys/zfd.h new file mode 100644 index 0000000000..c676f143ac --- /dev/null +++ b/usr/src/uts/common/sys/zfd.h @@ -0,0 +1,53 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ +/* + * Copyright 2014 Joyent, Inc. All rights reserved. + */ + +#ifndef _SYS_ZFD_H +#define _SYS_ZFD_H + +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Minor node name of the global zone side (often called the "master" side) + * of the zfd dev. + */ +#define ZFD_MASTER_NAME "master" + +/* + * Minor node name of the non-global zone side (often called the "slave" + * side) of the zfd dev. + */ +#define ZFD_SLAVE_NAME "slave" + +#define ZFD_NAME_LEN 16 + +/* + * ZFD_IOC forms the base for all zfd ioctls. + */ +#define ZFD_IOC (('Z' << 24) | ('f' << 16) | ('d' << 8)) + +/* + * This ioctl tells the slave side it should push the TTY stream modules + * so that the fd looks like a tty. + */ +#define ZFD_MAKETTY (ZFD_IOC | 0) + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_ZFD_H */ diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel index 49ff0b8a2e..830580e390 100644 --- a/usr/src/uts/intel/Makefile.intel +++ b/usr/src/uts/intel/Makefile.intel @@ -364,6 +364,7 @@ DRV_KMODS += wpi DRV_KMODS += xge DRV_KMODS += yge DRV_KMODS += zcons +DRV_KMODS += zfd DRV_KMODS += zyd DRV_KMODS += simnet DRV_KMODS += stmf diff --git a/usr/src/uts/intel/zfd/Makefile b/usr/src/uts/intel/zfd/Makefile new file mode 100644 index 0000000000..c270466d08 --- /dev/null +++ b/usr/src/uts/intel/zfd/Makefile @@ -0,0 +1,48 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2014 Joyent, Inc. All rights reserved. +# +# uts/intel/zfd/Makefile + +UTSBASE = ../.. + +MODULE = zfd +OBJECTS = $(ZFD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(ZFD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) + +include $(UTSBASE)/intel/Makefile.intel + +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/intel/Makefile.targ diff --git a/usr/src/uts/sparc/Makefile.sparc b/usr/src/uts/sparc/Makefile.sparc index cd7206a353..a5b719ae80 100644 --- a/usr/src/uts/sparc/Makefile.sparc +++ b/usr/src/uts/sparc/Makefile.sparc @@ -217,7 +217,7 @@ DRV_KMODS += log logindmux kssl mm nca physmem pm poll pool DRV_KMODS += pseudo ptc ptm pts ptsl ramdisk random rsm rts sad DRV_KMODS += simnet softmac sppp sppptun sy sysevent sysmsg DRV_KMODS += spdsock -DRV_KMODS += tcp tcp6 tl tnf ttymux udp udp6 wc winlock zcons +DRV_KMODS += tcp tcp6 tl tnf ttymux udp udp6 wc winlock zcons zfd DRV_KMODS += ippctl DRV_KMODS += dld DRV_KMODS += ipd diff --git a/usr/src/uts/sparc/zfd/Makefile b/usr/src/uts/sparc/zfd/Makefile new file mode 100644 index 0000000000..ebdba686b4 --- /dev/null +++ b/usr/src/uts/sparc/zfd/Makefile @@ -0,0 +1,50 @@ +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +# +# Copyright 2014 Joyent, Inc. All rights reserved. +# +# uts/intel/zfd/Makefile + +UTSBASE = ../.. + +MODULE = zfd +OBJECTS = $(ZFD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(ZFD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(USR_DRV_DIR)/$(MODULE) + +include $(UTSBASE)/sparc/Makefile.sparc + +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +CFLAGS += $(CCVERBOSE) + +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +include $(UTSBASE)/sparc/Makefile.targ |