summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
authornn35248 <none@none>2006-09-11 22:51:59 -0700
committernn35248 <none@none>2006-09-11 22:51:59 -0700
commit9acbbeaf2a1ffe5c14b244867d427714fab43c5c (patch)
treed1ecd54896325c19a463220e9cbc50864874fc82 /usr/src/uts/common
parentda51466dc253d7c98dda4956059042bd0c476328 (diff)
downloadillumos-gate-9acbbeaf2a1ffe5c14b244867d427714fab43c5c.tar.gz
PSARC/2005/471 BrandZ: Support for non-native zones
6374606 ::nm -D without an object may not work on processes in zones 6409350 BrandZ project integration into Solaris 6455289 pthread_setschedparam() should return EPERM rather than panic libc 6455591 setpriority(3C) gets errno wrong for deficient privileges failure 6458178 fifofs doesn't support lofs mounts of fifos 6460380 Attempted open() of a symlink with the O_NOFOLLOW flag set returns EINVAL, not ELOOP 6463857 renice(1) errors erroneously --HG-- rename : usr/src/lib/libzonecfg/zones/SUNWblank.xml => usr/src/lib/brand/native/zone/SUNWblank.xml rename : usr/src/lib/libzonecfg/zones/SUNWdefault.xml => usr/src/lib/brand/native/zone/SUNWdefault.xml
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/Makefile.files11
-rw-r--r--usr/src/uts/common/Makefile.rules7
-rw-r--r--usr/src/uts/common/brand/lx/autofs/lx_autofs.c1558
-rw-r--r--usr/src/uts/common/brand/lx/dtrace/lx_systrace.c395
-rw-r--r--usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf27
-rw-r--r--usr/src/uts/common/brand/lx/io/ldlinux.c297
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_audio.c2026
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_audio.conf27
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_ptm.c1137
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_ptm.conf27
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c836
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_misc.c383
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_pid.c348
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_syscall.c409
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_proc.h233
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prsubr.c494
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c373
-rw-r--r--usr/src/uts/common/brand/lx/procfs/lx_prvnops.c2951
-rw-r--r--usr/src/uts/common/brand/lx/sys/ldlinux.h117
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_audio.h130
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_autofs.h334
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h121
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h210
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_futex.h51
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_impl.h62
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_ldt.h93
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_pid.h61
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_ptm.h44
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_sched.h60
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_syscalls.h68
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_brk.c59
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_clone.c135
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_futex.c471
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_getpid.c72
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_id.c297
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_kill.c249
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c121
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sched.c513
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c118
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_thread_area.c128
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.c288
-rw-r--r--usr/src/uts/common/brand/sn1/sn1_brand.h48
-rw-r--r--usr/src/uts/common/c2/audit_event.c42
-rw-r--r--usr/src/uts/common/c2/audit_kevents.h3
-rw-r--r--usr/src/uts/common/disp/class.c13
-rw-r--r--usr/src/uts/common/disp/priocntl.c87
-rw-r--r--usr/src/uts/common/disp/thread.c2
-rw-r--r--usr/src/uts/common/disp/ts.c19
-rw-r--r--usr/src/uts/common/exec/aout/aout.c19
-rw-r--r--usr/src/uts/common/exec/elf/elf.c155
-rw-r--r--usr/src/uts/common/exec/elf/elf_impl.h8
-rw-r--r--usr/src/uts/common/exec/intp/intp.c14
-rw-r--r--usr/src/uts/common/exec/java/java.c13
-rw-r--r--usr/src/uts/common/fs/fifofs/fifosubr.c4
-rw-r--r--usr/src/uts/common/fs/fifofs/fifovnops.c24
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_subr.c6
-rw-r--r--usr/src/uts/common/fs/nfs/nfs_subr.c22
-rw-r--r--usr/src/uts/common/fs/specfs/specvnops.c15
-rw-r--r--usr/src/uts/common/fs/vnode.c2
-rw-r--r--usr/src/uts/common/io/gentty.c114
-rw-r--r--usr/src/uts/common/io/l_strplumb.c27
-rw-r--r--usr/src/uts/common/io/ptm.c62
-rw-r--r--usr/src/uts/common/nfs/nfs.h1
-rw-r--r--usr/src/uts/common/os/brand.c323
-rw-r--r--usr/src/uts/common/os/ddi.c20
-rw-r--r--usr/src/uts/common/os/exec.c116
-rw-r--r--usr/src/uts/common/os/exit.c36
-rw-r--r--usr/src/uts/common/os/fork.c16
-rw-r--r--usr/src/uts/common/os/lwp.c43
-rw-r--r--usr/src/uts/common/os/main.c13
-rw-r--r--usr/src/uts/common/os/modconf.c34
-rw-r--r--usr/src/uts/common/os/pid.c49
-rw-r--r--usr/src/uts/common/os/printf.c31
-rw-r--r--usr/src/uts/common/os/procset.c21
-rw-r--r--usr/src/uts/common/os/session.c651
-rw-r--r--usr/src/uts/common/os/streamio.c231
-rw-r--r--usr/src/uts/common/os/strsubr.c74
-rw-r--r--usr/src/uts/common/os/sysent.c15
-rw-r--r--usr/src/uts/common/os/zone.c80
-rw-r--r--usr/src/uts/common/rpc/clnt_gen.c5
-rw-r--r--usr/src/uts/common/sys/Makefile1
-rw-r--r--usr/src/uts/common/sys/audioio.h15
-rw-r--r--usr/src/uts/common/sys/auxv.h17
-rw-r--r--usr/src/uts/common/sys/bitmap.h10
-rw-r--r--usr/src/uts/common/sys/brand.h134
-rw-r--r--usr/src/uts/common/sys/class.h26
-rw-r--r--usr/src/uts/common/sys/exec.h16
-rw-r--r--usr/src/uts/common/sys/klwp.h9
-rw-r--r--usr/src/uts/common/sys/modctl.h8
-rw-r--r--usr/src/uts/common/sys/proc.h14
-rw-r--r--usr/src/uts/common/sys/ptms.h53
-rw-r--r--usr/src/uts/common/sys/session.h115
-rw-r--r--usr/src/uts/common/sys/socketvar.h12
-rw-r--r--usr/src/uts/common/sys/strsubr.h9
-rw-r--r--usr/src/uts/common/sys/syscall.h8
-rw-r--r--usr/src/uts/common/sys/systm.h1
-rw-r--r--usr/src/uts/common/sys/termios.h26
-rw-r--r--usr/src/uts/common/sys/zone.h56
-rw-r--r--usr/src/uts/common/syscall/brandsys.c56
-rw-r--r--usr/src/uts/common/syscall/pgrpsys.c27
-rw-r--r--usr/src/uts/common/syscall/uucopy.c59
101 files changed, 17861 insertions, 610 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index b022fcd0c9..f0203dfeb9 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -41,6 +41,7 @@ sparc_CORE_OBJS +=
COMMON_CORE_OBJS += \
atomic.o \
bp_map.o \
+ brand.o \
chip.o \
cpu.o \
cpu_intr.o \
@@ -78,6 +79,7 @@ GENUNIX_OBJS += \
bio.o \
bitmap.o \
blabel.o \
+ brandsys.o \
callb.o \
callout.o \
chdir.o \
@@ -318,6 +320,7 @@ GENUNIX_OBJS += \
urw.o \
utime.o \
utssys.o \
+ uucopy.o \
vfs.o \
vfs_conf.o \
vmem.o \
@@ -360,6 +363,8 @@ PROFILE_OBJS += profile.o
SYSTRACE_OBJS += systrace.o
+LX_SYSTRACE_OBJS += lx_systrace.o
+
LOCKSTAT_OBJS += lockstat.o
FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o
@@ -397,6 +402,10 @@ PTSL_OBJS += tty_pts.o
PTM_OBJS += ptm.o
+LX_PTM_OBJS += lx_ptm.o
+
+LX_AUDIO_OBJS += lx_audio.o
+
PTS_OBJS += pts.o
PTY_OBJS += ptms_conf.o
@@ -937,6 +946,8 @@ DEDUMP_OBJS += dedump.o
DRCOMPAT_OBJS += drcompat.o
+LDLINUX_OBJS += ldlinux.o
+
LDTERM_OBJS += ldterm.o uwidth.o
PCKT_OBJS += pckt.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 69e32b7ee5..27b347c937 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -70,6 +70,10 @@ $(OBJS_DIR)/%.o: $(COMMONBASE)/avl/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
+$(OBJS_DIR)/%.o: $(UTSBASE)/common/brand/sn1/%.c
+ $(COMPILE.c) -o $@ $<
+ $(CTFCONVERT_O)
+
$(OBJS_DIR)/%.o: $(UTSBASE)/common/c2/%.c
$(COMPILE.c) -o $@ $<
$(CTFCONVERT_O)
@@ -911,6 +915,9 @@ $(LINTS_DIR)/%.ln: $(COMMONBASE)/acl/%.c
$(LINTS_DIR)/%.ln: $(COMMONBASE)/avl/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
+$(LINTS_DIR)/%.ln: $(UTSBASE)/common/brand/sn1/%.c
+ @($(LHEAD) $(LINT.c) $< $(LTAIL))
+
$(LINTS_DIR)/%.ln: $(UTSBASE)/common/c2/%.c
@($(LHEAD) $(LINT.c) $< $(LTAIL))
diff --git a/usr/src/uts/common/brand/lx/autofs/lx_autofs.c b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
new file mode 100644
index 0000000000..ecd4e8e44d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
@@ -0,0 +1,1558 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <fs/fs_subr.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/dirent.h>
+#include <sys/fs/fifonode.h>
+#include <sys/modctl.h>
+#include <sys/mount.h>
+#include <sys/policy.h>
+#include <sys/sunddi.h>
+
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+
+#include <sys/lx_autofs_impl.h>
+
+/*
+ * External functions
+ */
+extern uintptr_t space_fetch(char *key);
+extern int space_store(char *key, uintptr_t ptr);
+
+/*
+ * Globals
+ */
+static vfsops_t *lx_autofs_vfsops;
+static vnodeops_t *lx_autofs_vn_ops = NULL;
+static int lx_autofs_fstype;
+static major_t lx_autofs_major;
+static minor_t lx_autofs_minor = 0;
+
+/*
+ * Support functions
+ */
+static void
+i_strfree(char *str)
+{
+ kmem_free(str, strlen(str) + 1);
+}
+
+static char *
+i_strdup(char *str)
+{
+ int n = strlen(str);
+ char *ptr = kmem_alloc(n + 1, KM_SLEEP);
+ bcopy(str, ptr, n + 1);
+ return (ptr);
+}
+
+static int
+i_str_to_int(char *str, int *val)
+{
+ long res;
+
+ if (str == NULL)
+ return (-1);
+
+ if ((ddi_strtol(str, NULL, 10, &res) != 0) ||
+ (res < INT_MIN) || (res > INT_MAX))
+ return (-1);
+
+ *val = res;
+ return (0);
+}
+
+static void
+i_stack_init(list_t *lp)
+{
+ list_create(lp,
+ sizeof (stack_elem_t), offsetof(stack_elem_t, se_list));
+}
+
+static void
+i_stack_fini(list_t *lp)
+{
+ ASSERT(list_head(lp) == NULL);
+ list_destroy(lp);
+}
+
+static void
+i_stack_push(list_t *lp, caddr_t ptr1, caddr_t ptr2, caddr_t ptr3)
+{
+ stack_elem_t *se;
+
+ se = kmem_alloc(sizeof (*se), KM_SLEEP);
+ se->se_ptr1 = ptr1;
+ se->se_ptr2 = ptr2;
+ se->se_ptr3 = ptr3;
+ list_insert_head(lp, se);
+}
+
+static int
+i_stack_pop(list_t *lp, caddr_t *ptr1, caddr_t *ptr2, caddr_t *ptr3)
+{
+ stack_elem_t *se;
+
+ if ((se = list_head(lp)) == NULL)
+ return (-1);
+ list_remove(lp, se);
+ if (ptr1 != NULL)
+ *ptr1 = se->se_ptr1;
+ if (ptr2 != NULL)
+ *ptr2 = se->se_ptr2;
+ if (ptr3 != NULL)
+ *ptr3 = se->se_ptr3;
+ kmem_free(se, sizeof (*se));
+ return (0);
+}
+
+static vnode_t *
+fifo_peer_vp(vnode_t *vp)
+{
+ fifonode_t *fnp = VTOF(vp);
+ fifonode_t *fn_dest = fnp->fn_dest;
+ return (FTOV(fn_dest));
+}
+
+static vnode_t *
+i_vn_alloc(vfs_t *vfsp, vnode_t *uvp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *vp, *vp_old;
+
+ /* Allocate a new vnode structure in case we need it. */
+ vp = vn_alloc(KM_SLEEP);
+ vn_setops(vp, lx_autofs_vn_ops);
+ VN_SET_VFS_TYPE_DEV(vp, vfsp, uvp->v_type, uvp->v_rdev);
+ vp->v_data = uvp;
+ ASSERT(vp->v_count == 1);
+
+ /*
+ * Take a hold on the vfs structure. This is how unmount will
+ * determine if there are any active vnodes in the file system.
+ */
+ VFS_HOLD(vfsp);
+
+ /*
+ * Check if we already have a vnode allocated for this underlying
+ * vnode_t.
+ */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_vn_hash,
+ (mod_hash_key_t)uvp, (mod_hash_val_t *)&vp_old) != 0) {
+
+ /*
+ * Didn't find an existing node.
+ * Add this node to the hash and return.
+ */
+ VERIFY(mod_hash_insert(data->lav_vn_hash,
+ (mod_hash_key_t)uvp,
+ (mod_hash_val_t)vp) == 0);
+ mutex_exit(&data->lav_lock);
+ return (vp);
+ }
+
+ /* Get a hold on the existing vnode and free up the one we allocated. */
+ VN_HOLD(vp_old);
+ mutex_exit(&data->lav_lock);
+
+ /* Free up the new vnode we allocated. */
+ VN_RELE(uvp);
+ VFS_RELE(vfsp);
+ vn_invalid(vp);
+ vn_free(vp);
+
+ return (vp_old);
+}
+
+static void
+i_vn_free(vnode_t *vp)
+{
+ vfs_t *vfsp = vp->v_vfsp;
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *uvp = vp->v_data;
+ vnode_t *vp_tmp;
+
+ ASSERT(MUTEX_HELD((&data->lav_lock)));
+ ASSERT(MUTEX_HELD((&vp->v_lock)));
+
+ ASSERT(vp->v_count == 0);
+
+ /* We're about to free this vnode so take it out of the hash. */
+ (void) mod_hash_remove(data->lav_vn_hash,
+ (mod_hash_key_t)uvp, (mod_hash_val_t)&vp_tmp);
+
+ /*
+ * No one else can lookup this vnode any more so there's no need
+ * to hold locks.
+ */
+ mutex_exit(&data->lav_lock);
+ mutex_exit(&vp->v_lock);
+
+ /* Release the underlying vnode. */
+ VN_RELE(uvp);
+ VFS_RELE(vfsp);
+ vn_invalid(vp);
+ vn_free(vp);
+}
+
+static lx_autofs_lookup_req_t *
+i_lalr_alloc(lx_autofs_vfs_t *data, int *dup_request, char *nm)
+{
+ lx_autofs_lookup_req_t *lalr, *lalr_dup;
+
+ /* Pre-allocate a new automounter request before grabbing locks. */
+ lalr = kmem_zalloc(sizeof (*lalr), KM_SLEEP);
+ mutex_init(&lalr->lalr_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&lalr->lalr_cv, NULL, CV_DEFAULT, NULL);
+ lalr->lalr_ref = 1;
+ lalr->lalr_pkt.lap_protover = LX_AUTOFS_PROTO_VERSION;
+
+ /* Assign a unique id for this request. */
+ lalr->lalr_pkt.lap_id = id_alloc(data->lav_ids);
+
+ /*
+ * The token expected by the linux automount is the name of
+ * the directory entry to look up. (And not the entire
+ * path that is being accessed.)
+ */
+ lalr->lalr_pkt.lap_name_len = strlen(nm);
+ if (lalr->lalr_pkt.lap_name_len >
+ (sizeof (lalr->lalr_pkt.lap_name) - 1)) {
+ zcmn_err(getzoneid(), CE_NOTE,
+ "invalid autofs lookup: \"%s\"", nm);
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+ return (NULL);
+ }
+ (void) strlcpy(lalr->lalr_pkt.lap_name, nm,
+ sizeof (lalr->lalr_pkt.lap_name));
+
+ /* Check for an outstanding request for this path. */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_path_hash,
+ (mod_hash_key_t)nm, (mod_hash_val_t *)&lalr_dup) == 0) {
+ /*
+ * There's already an outstanding request for this
+ * path so we don't need a new one.
+ */
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+ lalr = lalr_dup;
+
+ /* Bump the ref count on the old request. */
+ atomic_add_int(&lalr->lalr_ref, 1);
+
+ *dup_request = 1;
+ } else {
+ /* Add it to the hashes. */
+ VERIFY(mod_hash_insert(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)lalr) == 0);
+ VERIFY(mod_hash_insert(data->lav_path_hash,
+ (mod_hash_key_t)i_strdup(nm),
+ (mod_hash_val_t)lalr) == 0);
+
+ *dup_request = 0;
+ }
+ mutex_exit(&data->lav_lock);
+
+ return (lalr);
+}
+
+static lx_autofs_lookup_req_t *
+i_lalr_find(lx_autofs_vfs_t *data, int id)
+{
+ lx_autofs_lookup_req_t *lalr;
+
+ /* Check for an outstanding request for this id. */
+ mutex_enter(&data->lav_lock);
+ if (mod_hash_find(data->lav_id_hash, (mod_hash_key_t)(uintptr_t)id,
+ (mod_hash_val_t *)&lalr) != 0) {
+ mutex_exit(&data->lav_lock);
+ return (NULL);
+ }
+ atomic_add_int(&lalr->lalr_ref, 1);
+ mutex_exit(&data->lav_lock);
+ return (lalr);
+}
+
+static void
+i_lalr_complete(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ lx_autofs_lookup_req_t *lalr_tmp;
+
+ /* Remove this request from the hashes so no one can look it up. */
+ mutex_enter(&data->lav_lock);
+ (void) mod_hash_remove(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)&lalr_tmp);
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)lalr->lalr_pkt.lap_name,
+ (mod_hash_val_t)&lalr_tmp);
+ mutex_exit(&data->lav_lock);
+
+ /* Mark this requst as complete and wakeup anyone waiting on it. */
+ mutex_enter(&lalr->lalr_lock);
+ lalr->lalr_complete = 1;
+ cv_broadcast(&lalr->lalr_cv);
+ mutex_exit(&lalr->lalr_lock);
+}
+
+static void
+i_lalr_release(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ ASSERT(!MUTEX_HELD(&lalr->lalr_lock));
+ if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0)
+ return;
+ ASSERT(lalr->lalr_ref == 0);
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+}
+
+static void
+i_lalr_abort(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+ lx_autofs_lookup_req_t *lalr_tmp;
+
+ /*
+ * This is a little tricky. We're aborting the wait for this
+ * request. So if anyone else is waiting for this request we
+ * can't free it, but if no one else is waiting for the request
+ * we should free it.
+ */
+ mutex_enter(&data->lav_lock);
+ if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0) {
+ mutex_exit(&data->lav_lock);
+ return;
+ }
+ ASSERT(lalr->lalr_ref == 0);
+
+ /* Remove this request from the hashes so no one can look it up. */
+ (void) mod_hash_remove(data->lav_id_hash,
+ (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+ (mod_hash_val_t)&lalr_tmp);
+ (void) mod_hash_remove(data->lav_path_hash,
+ (mod_hash_key_t)lalr->lalr_pkt.lap_name,
+ (mod_hash_val_t)&lalr_tmp);
+ mutex_exit(&data->lav_lock);
+
+ /* It's ok to free this now because the ref count was zero. */
+ id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+ kmem_free(lalr, sizeof (*lalr));
+}
+
+static int
+i_fifo_lookup(pid_t pgrp, int fd, file_t **fpp_wr, file_t **fpp_rd)
+{
+ proc_t *prp;
+ uf_info_t *fip;
+ uf_entry_t *ufp_wr, *ufp_rd;
+ file_t *fp_wr, *fp_rd;
+ vnode_t *vp_wr, *vp_rd;
+ int i;
+
+ /*
+ * sprlock() is zone aware, so assuming this mount call was
+ * initiated by a process in a zone, if it tries to specify
+ * a pgrp outside of it's zone this call will fail.
+ *
+ * Also, we want to grab hold of the main automounter process
+ * and its going to be the group leader for pgrp, so its
+ * pid will be equal to pgrp.
+ */
+ prp = sprlock(pgrp);
+ if (prp == NULL)
+ return (-1);
+ mutex_exit(&prp->p_lock);
+
+ /* Now we want to access the processes open file descriptors. */
+ fip = P_FINFO(prp);
+ mutex_enter(&fip->fi_lock);
+
+ /* Sanity check fifo write fd. */
+ if (fd >= fip->fi_nfiles) {
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /* Get a pointer to the write fifo. */
+ UF_ENTER(ufp_wr, fip, fd);
+ if (((fp_wr = ufp_wr->uf_file) == NULL) ||
+ ((vp_wr = fp_wr->f_vnode) == NULL) || (vp_wr->v_type != VFIFO)) {
+ /* Invalid fifo fd. */
+ UF_EXIT(ufp_wr);
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * Now we need to find the read end of the fifo (for reasons
+ * explained below.) We assume that the read end of the fifo
+ * is in the same process as the write end.
+ */
+ vp_rd = fifo_peer_vp(fp_wr->f_vnode);
+ for (i = 0; i < fip->fi_nfiles; i++) {
+ UF_ENTER(ufp_rd, fip, i);
+ if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+ (fp_rd->f_vnode == vp_rd))
+ break;
+ UF_EXIT(ufp_rd);
+ }
+ if (i == fip->fi_nfiles) {
+ /* Didn't find it. */
+ UF_EXIT(ufp_wr);
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * We need to drop fi_lock before we can try to aquire f_tlock
+ * the good news is that the file pointers are protected because
+ * we're still holding uf_lock.
+ */
+ mutex_exit(&fip->fi_lock);
+
+ /*
+ * Here we bump the open counts on the fifos. The reason
+ * that we do this is because when we go to write to the
+ * fifo we want to ensure that they are actually open (and
+ * not in the process of being closed) without having to
+ * stop the automounter. (If the write end of the fifo
+ * were closed and we tried to write to it we would panic.
+ * If the read end of the fifo was closed and we tried to
+ * write to the other end, the process that invoked the
+ * lookup operation would get an unexpected SIGPIPE.)
+ */
+ mutex_enter(&fp_wr->f_tlock);
+ fp_wr->f_count++;
+ ASSERT(fp_wr->f_count >= 2);
+ mutex_exit(&fp_wr->f_tlock);
+
+ mutex_enter(&fp_rd->f_tlock);
+ fp_rd->f_count++;
+ ASSERT(fp_rd->f_count >= 2);
+ mutex_exit(&fp_rd->f_tlock);
+
+ /* Release all our locks. */
+ UF_EXIT(ufp_wr);
+ UF_EXIT(ufp_rd);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+
+ /* Return the file pointers. */
+ *fpp_rd = fp_rd;
+ *fpp_wr = fp_wr;
+ return (0);
+}
+
+static uint_t
+/*ARGSUSED*/
+i_fifo_close_cb(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+ int *id = (int *)arg;
+ /* Return the key and terminate the walk. */
+ *id = (uintptr_t)key;
+ return (MH_WALK_TERMINATE);
+}
+
+static void
+i_fifo_close(lx_autofs_vfs_t *data)
+{
+ /*
+ * Close the fifo to prevent any future requests from
+ * getting sent to the automounter.
+ */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr != NULL) {
+ (void) closef(data->lav_fifo_wr);
+ data->lav_fifo_wr = NULL;
+ }
+ if (data->lav_fifo_rd != NULL) {
+ (void) closef(data->lav_fifo_rd);
+ data->lav_fifo_rd = NULL;
+ }
+ mutex_exit(&data->lav_lock);
+
+ /*
+ * Wakeup any threads currently waiting for the automounter
+ * note that it's possible for multiple threads to have entered
+ * this function and to be doing the work below simultaneously.
+ */
+ for (;;) {
+ lx_autofs_lookup_req_t *lalr;
+ int id;
+
+ /* Lookup the first entry in the hash. */
+ id = -1;
+ mod_hash_walk(data->lav_id_hash,
+ i_fifo_close_cb, &id);
+ if (id == -1) {
+ /* No more id's in the hash. */
+ break;
+ }
+ if ((lalr = i_lalr_find(data, id)) == NULL) {
+ /* Someone else beat us to it. */
+ continue;
+ }
+
+ /* Mark the request as compleate and release it. */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ }
+}
+
+static int
+i_fifo_verify_rd(lx_autofs_vfs_t *data)
+{
+ proc_t *prp;
+ uf_info_t *fip;
+ uf_entry_t *ufp_rd;
+ file_t *fp_rd;
+ vnode_t *vp_rd;
+ int i;
+
+ ASSERT(MUTEX_HELD((&data->lav_lock)));
+
+ /* Check if we've already been shut down. */
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ return (-1);
+ }
+ vp_rd = fifo_peer_vp(data->lav_fifo_wr->f_vnode);
+
+ /*
+ * sprlock() is zone aware, so assuming this mount call was
+ * initiated by a process in a zone, if it tries to specify
+ * a pgrp outside of it's zone this call will fail.
+ *
+ * Also, we want to grab hold of the main automounter process
+ * and its going to be the group leader for pgrp, so its
+ * pid will be equal to pgrp.
+ */
+ prp = sprlock(data->lav_pgrp);
+ if (prp == NULL)
+ return (-1);
+ mutex_exit(&prp->p_lock);
+
+ /* Now we want to access the processes open file descriptors. */
+ fip = P_FINFO(prp);
+ mutex_enter(&fip->fi_lock);
+
+ /*
+ * Now we need to find the read end of the fifo (for reasons
+ * explained below.) We assume that the read end of the fifo
+ * is in the same process as the write end.
+ */
+ for (i = 0; i < fip->fi_nfiles; i++) {
+ UF_ENTER(ufp_rd, fip, i);
+ if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+ (fp_rd->f_vnode == vp_rd))
+ break;
+ UF_EXIT(ufp_rd);
+ }
+ if (i == fip->fi_nfiles) {
+ /* Didn't find it. */
+ mutex_exit(&fip->fi_lock);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+ return (-1);
+ }
+
+ /*
+ * Seems the automounter still has the read end of the fifo
+ * open, we're done here. Release all our locks and exit.
+ */
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp_rd);
+ mutex_enter(&prp->p_lock);
+ sprunlock(prp);
+
+ return (0);
+}
+
+static int
+i_fifo_write(lx_autofs_vfs_t *data, lx_autofs_pkt_t *lap)
+{
+ struct uio uio;
+ struct iovec iov;
+ file_t *fp_wr, *fp_rd;
+ int error;
+
+ /*
+ * The catch here is we need to make sure _we_ don't close
+ * the the fifo while writing to it. (Another thread could come
+ * along and realize the automounter process is gone and close
+ * the fifo. To do this we bump the open count before we
+ * write to the fifo.
+ */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ return (ENOENT);
+ }
+ fp_wr = data->lav_fifo_wr;
+ fp_rd = data->lav_fifo_rd;
+
+ /* Bump the open count on the write fifo. */
+ mutex_enter(&fp_wr->f_tlock);
+ fp_wr->f_count++;
+ mutex_exit(&fp_wr->f_tlock);
+
+ /* Bump the open count on the read fifo. */
+ mutex_enter(&fp_rd->f_tlock);
+ fp_rd->f_count++;
+ mutex_exit(&fp_rd->f_tlock);
+
+ mutex_exit(&data->lav_lock);
+
+ iov.iov_base = (caddr_t)lap;
+ iov.iov_len = sizeof (*lap);
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_loffset = 0;
+ uio.uio_segflg = (short)UIO_SYSSPACE;
+ uio.uio_resid = sizeof (*lap);
+ uio.uio_llimit = 0;
+ uio.uio_fmode = FWRITE | FNDELAY | FNONBLOCK;
+
+ error = VOP_WRITE(fp_wr->f_vnode, &uio, 0, kcred, NULL);
+ (void) closef(fp_wr);
+ (void) closef(fp_rd);
+
+ /*
+ * After every write we verify that the automounter still has
+ * these files open.
+ */
+ mutex_enter(&data->lav_lock);
+ if (i_fifo_verify_rd(data) != 0) {
+ /*
+ * Something happened to the automounter.
+ * Close down the communication pipe we setup.
+ */
+ mutex_exit(&data->lav_lock);
+ i_fifo_close(data);
+ if (error != 0)
+ return (error);
+ return (ENOENT);
+ }
+ mutex_exit(&data->lav_lock);
+
+ return (error);
+}
+
+static int
+i_bs_readdir(vnode_t *dvp, list_t *dir_stack, list_t *file_stack)
+{
+ struct iovec iov;
+ struct uio uio;
+ dirent64_t *dp, *dbuf;
+ vnode_t *vp;
+ size_t dlen, dbuflen;
+ int eof, error, ndirents = 64;
+ char *nm;
+
+ dlen = ndirents * (sizeof (*dbuf));
+ dbuf = kmem_alloc(dlen, KM_SLEEP);
+
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = UIO_COPY_CACHED;
+ uio.uio_loffset = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+
+ eof = 0;
+ error = 0;
+ while (!error && !eof) {
+ uio.uio_resid = dlen;
+ iov.iov_base = (char *)dbuf;
+ iov.iov_len = dlen;
+
+ (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+ if (VOP_READDIR(dvp, &uio, kcred, &eof) != 0) {
+ VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+ kmem_free(dbuf, dlen);
+ return (-1);
+ }
+ VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+
+ if ((dbuflen = dlen - uio.uio_resid) == 0) {
+ /* We're done. */
+ break;
+ }
+
+ for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
+ dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
+
+ nm = dp->d_name;
+
+ if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
+ continue;
+
+ if (VOP_LOOKUP(dvp,
+ nm, &vp, NULL, 0, NULL, kcred) != 0) {
+ kmem_free(dbuf, dlen);
+ return (-1);
+ }
+ if (vp->v_type == VDIR) {
+ if (dir_stack != NULL) {
+ i_stack_push(dir_stack, (caddr_t)dvp,
+ (caddr_t)vp, i_strdup(nm));
+ } else {
+ VN_RELE(vp);
+ }
+ } else {
+ if (file_stack != NULL) {
+ i_stack_push(file_stack, (caddr_t)dvp,
+ (caddr_t)vp, i_strdup(nm));
+ } else {
+ VN_RELE(vp);
+ }
+ }
+ }
+ }
+ kmem_free(dbuf, dlen);
+ return (0);
+}
+
+static void
+i_bs_destroy(vnode_t *dvp, char *path)
+{
+ list_t search_stack;
+ list_t dir_stack;
+ list_t file_stack;
+ vnode_t *pdvp, *vp;
+ char *dpath, *fpath;
+ int ret;
+
+ if (VOP_LOOKUP(dvp, path, &vp, NULL, 0, NULL, kcred) != 0) {
+ /* A directory entry with this name doesn't actually exist. */
+ return;
+ }
+
+ if ((vp->v_type & VDIR) == 0) {
+ /* Easy, the directory entry is a file so delete it. */
+ VN_RELE(vp);
+ (void) VOP_REMOVE(dvp, path, kcred);
+ return;
+ }
+
+ /*
+ * The directory entry is a subdirectory, now we have a bit more
+ * work to do. (We'll have to recurse into the sub directory.)
+ * It would have been much easier to do this recursively but kernel
+ * stacks are notoriously small.
+ */
+ i_stack_init(&search_stack);
+ i_stack_init(&dir_stack);
+ i_stack_init(&file_stack);
+
+ /* Save our newfound subdirectory into a list. */
+ i_stack_push(&search_stack, (caddr_t)dvp, (caddr_t)vp, i_strdup(path));
+
+ /* Do a recursive depth first search into the subdirectories. */
+ while (i_stack_pop(&search_stack,
+ (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+ /* Get a list of the subdirectories in this directory. */
+ if (i_bs_readdir(dvp, &search_stack, NULL) != 0)
+ goto exit;
+
+ /* Save the current directory a seperate stack. */
+ i_stack_push(&dir_stack, (caddr_t)pdvp, (caddr_t)dvp, dpath);
+ }
+
+ /*
+ * Now dir_stack contains a list of directories, the deepest paths
+ * are at the top of the list. So let's go through and process them.
+ */
+ while (i_stack_pop(&dir_stack,
+ (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+ /* Get a list of the files in this directory. */
+ if (i_bs_readdir(dvp, NULL, &file_stack) != 0) {
+ VN_RELE(dvp);
+ i_strfree(dpath);
+ goto exit;
+ }
+
+ /* Delete all the files in this directory. */
+ while (i_stack_pop(&file_stack,
+ NULL, (caddr_t *)&vp, &fpath) == 0) {
+ VN_RELE(vp)
+ ret = VOP_REMOVE(dvp, fpath, kcred);
+ i_strfree(fpath);
+ if (ret != 0) {
+ i_strfree(dpath);
+ goto exit;
+ }
+ }
+
+ /* Delete this directory. */
+ VN_RELE(dvp);
+ ret = VOP_RMDIR(pdvp, dpath, pdvp, kcred);
+ i_strfree(dpath);
+ if (ret != 0)
+ goto exit;
+ }
+
+exit:
+ while (
+ (i_stack_pop(&search_stack, NULL, (caddr_t *)&vp, &path) == 0) ||
+ (i_stack_pop(&dir_stack, NULL, (caddr_t *)&vp, &path) == 0) ||
+ (i_stack_pop(&file_stack, NULL, (caddr_t *)&vp, &path) == 0)) {
+ VN_RELE(vp);
+ i_strfree(path);
+ }
+ i_stack_fini(&search_stack);
+ i_stack_fini(&dir_stack);
+ i_stack_fini(&file_stack);
+}
+
+static vnode_t *
+i_bs_create(vnode_t *dvp, char *bs_name)
+{
+ vnode_t *vp;
+ vattr_t vattr;
+
+ /*
+ * After looking at the mkdir syscall path it seems we don't need
+ * to initialize all of the vattr_t structure.
+ */
+ bzero(&vattr, sizeof (vattr));
+ vattr.va_type = VDIR;
+ vattr.va_mode = 0755; /* u+rwx,og=rx */
+ vattr.va_mask = AT_TYPE|AT_MODE;
+
+ if (VOP_MKDIR(dvp, bs_name, &vattr, &vp, kcred) != 0)
+ return (NULL);
+ return (vp);
+}
+
+static int
+i_automounter_call(vnode_t *dvp, char *nm)
+{
+ lx_autofs_lookup_req_t *lalr;
+ lx_autofs_vfs_t *data;
+ int error, dup_request;
+
+ /* Get a pointer to the vfs mount data. */
+ data = dvp->v_vfsp->vfs_data;
+
+ /* The automounter only support queries in the root directory. */
+ if (dvp != data->lav_root)
+ return (ENOENT);
+
+ /*
+ * Check if the current process is in the automounters process
+ * group. (If it is, the current process is either the autmounter
+ * itself or one of it's forked child processes.) If so, don't
+ * redirect this lookup back into the automounter because we'll
+ * hang.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp == curproc->p_pgrp) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ /* Verify that the automount process pipe still exists. */
+ mutex_enter(&data->lav_lock);
+ if (data->lav_fifo_wr == NULL) {
+ ASSERT(data->lav_fifo_rd == NULL);
+ mutex_exit(&data->lav_lock);
+ return (ENOENT);
+ }
+ mutex_exit(&data->lav_lock);
+
+ /* Allocate an automounter request structure. */
+ if ((lalr = i_lalr_alloc(data, &dup_request, nm)) == NULL)
+ return (ENOENT);
+
+ /*
+ * If we were the first one to allocate this request then we
+ * need to send it to the automounter.
+ */
+ if ((!dup_request) &&
+ ((error = i_fifo_write(data, &lalr->lalr_pkt)) != 0)) {
+ /*
+ * Unable to send the request to the automounter.
+ * Unblock any other threads waiting on the request
+ * and release the request.
+ */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ return (error);
+ }
+
+ /* Wait for someone to signal us that this request has compleated. */
+ mutex_enter(&lalr->lalr_lock);
+ while (!lalr->lalr_complete) {
+ if (cv_wait_sig(&lalr->lalr_cv, &lalr->lalr_lock) == 0) {
+ /* We got a signal, abort this lookup. */
+ mutex_exit(&lalr->lalr_lock);
+ i_lalr_abort(data, lalr);
+ return (EINTR);
+ }
+ }
+ mutex_exit(&lalr->lalr_lock);
+ i_lalr_release(data, lalr);
+
+ return (0);
+}
+
+static int
+i_automounter_ioctl(vnode_t *vp, int cmd, intptr_t arg)
+{
+ lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data;
+
+ /*
+ * Be strict.
+ * We only accept ioctls from the automounter process group.
+ */
+ mutex_enter(&pidlock);
+ if (data->lav_pgrp != curproc->p_pgrp) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ if ((cmd == LX_AUTOFS_IOC_READY) || (cmd == LX_AUTOFS_IOC_FAIL)) {
+ lx_autofs_lookup_req_t *lalr;
+ int id = arg;
+
+ /*
+ * We don't actually care if the request failed or succeeded.
+ * We do the same thing either way.
+ */
+ if ((lalr = i_lalr_find(data, id)) == NULL)
+ return (ENXIO);
+
+ /* Mark the request as compleate and release it. */
+ i_lalr_complete(data, lalr);
+ i_lalr_release(data, lalr);
+ return (0);
+ }
+ if (cmd == LX_AUTOFS_IOC_CATATONIC) {
+ /* The automounter is shutting down. */
+ i_fifo_close(data);
+ return (0);
+ }
+ return (ENOTSUP);
+}
+
+static int
+i_parse_mntopt(vfs_t *vfsp, lx_autofs_vfs_t *data)
+{
+ char *fd_str, *pgrp_str, *minproto_str, *maxproto_str;
+ int fd, pgrp, minproto, maxproto;
+ file_t *fp_wr, *fp_rd;
+
+ /* Require all options to be present. */
+ if ((vfs_optionisset(vfsp, LX_MNTOPT_FD, &fd_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_PGRP, &pgrp_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_MINPROTO, &minproto_str) != 1) ||
+ (vfs_optionisset(vfsp, LX_MNTOPT_MAXPROTO, &maxproto_str) != 1))
+ return (EINVAL);
+
+ /* Get the values for each parameter. */
+ if ((i_str_to_int(fd_str, &fd) != 0) ||
+ (i_str_to_int(pgrp_str, &pgrp) != 0) ||
+ (i_str_to_int(minproto_str, &minproto) != 0) ||
+ (i_str_to_int(maxproto_str, &maxproto) != 0))
+ return (EINVAL);
+
+ /*
+ * We support v2 of the linux kernel automounter protocol.
+ * Make sure the mount request we got indicates support
+ * for this version of the protocol.
+ */
+ if ((minproto > 2) || (maxproto < 2))
+ return (EINVAL);
+
+ /*
+ * Now we need to lookup the fifos we'll be using
+ * to talk to the userland automounter process.
+ */
+ if (i_fifo_lookup(pgrp, fd, &fp_wr, &fp_rd) != 0)
+ return (EINVAL);
+
+ /* Save the mount options and fifo pointers. */
+ data->lav_fd = fd;
+ data->lav_pgrp = pgrp;
+ data->lav_fifo_rd = fp_rd;
+ data->lav_fifo_wr = fp_wr;
+ return (0);
+}
+
+/*
+ * VFS entry points
+ */
+static int
+lx_autofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+ lx_autofs_vfs_t *data;
+ dev_t dev;
+ char name[40];
+ int error;
+
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT)))
+ return (EBUSY);
+
+ /* We don't support mountes in the global zone. */
+ if (getzoneid() == GLOBAL_ZONEID)
+ return (EPERM);
+
+ /* We don't support mounting on top of ourselves. */
+ if (vn_matchops(mvp, lx_autofs_vn_ops))
+ return (EPERM);
+
+ /* Allocate a vfs struct. */
+ data = kmem_zalloc(sizeof (lx_autofs_vfs_t), KM_SLEEP);
+
+ /* Parse mount options. */
+ if ((error = i_parse_mntopt(vfsp, data)) != 0) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (error);
+ }
+
+ /* Initialize the backing store. */
+ i_bs_destroy(mvp, LX_AUTOFS_BS_DIR);
+ if ((data->lav_bs_vp = i_bs_create(mvp, LX_AUTOFS_BS_DIR)) == NULL) {
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+ return (EBUSY);
+ }
+ data->lav_bs_name = LX_AUTOFS_BS_DIR;
+
+ /* We have to hold the underlying vnode we're mounted on. */
+ data->lav_mvp = mvp;
+ VN_HOLD(mvp);
+
+ /* Initialize vfs fields */
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lx_autofs_fstype;
+ vfsp->vfs_data = data;
+
+ /* Invent a dev_t (sigh) */
+ do {
+ dev = makedevice(lx_autofs_major,
+ atomic_add_32_nv(&lx_autofs_minor, 1) & L_MAXMIN32);
+ } while (vfs_devismounted(dev));
+ vfsp->vfs_dev = dev;
+ vfs_make_fsid(&vfsp->vfs_fsid, dev, lx_autofs_fstype);
+
+ /* Create an id space arena for automounter requests. */
+ (void) snprintf(name, sizeof (name), "lx_autofs_id_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_ids = id_space_create(name, 1, INT_MAX);
+
+ /* Create hashes to keep track of automounter requests. */
+ mutex_init(&data->lav_lock, NULL, MUTEX_DEFAULT, NULL);
+ (void) snprintf(name, sizeof (name), "lx_autofs_path_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_path_hash = mod_hash_create_strhash(name,
+ LX_AUTOFS_VFS_PATH_HASH_SIZE, mod_hash_null_valdtor);
+ (void) snprintf(name, sizeof (name), "lx_autofs_id_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_id_hash = mod_hash_create_idhash(name,
+ LX_AUTOFS_VFS_ID_HASH_SIZE, mod_hash_null_valdtor);
+
+ /* Create a hash to keep track of vnodes. */
+ (void) snprintf(name, sizeof (name), "lx_autofs_vn_hash_%d",
+ getminor(vfsp->vfs_dev));
+ data->lav_vn_hash = mod_hash_create_ptrhash(name,
+ LX_AUTOFS_VFS_VN_HASH_SIZE, mod_hash_null_valdtor,
+ sizeof (vnode_t));
+
+ /* Create root vnode */
+ data->lav_root = i_vn_alloc(vfsp, data->lav_bs_vp);
+ data->lav_root->v_flag |=
+ VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT;
+
+ return (0);
+}
+
+static int
+lx_autofs_unmount(vfs_t *vfsp, int flag, struct cred *cr)
+{
+ lx_autofs_vfs_t *data;
+
+ if (secpolicy_fs_unmount(cr, vfsp) != 0)
+ return (EPERM);
+
+ /* We do not currently support forced unmounts. */
+ if (flag & MS_FORCE)
+ return (ENOTSUP);
+
+ /*
+ * We should never have a reference count of less than 2: one for the
+ * caller, one for the root vnode.
+ */
+ ASSERT(vfsp->vfs_count >= 2);
+
+ /* If there are any outstanding vnodes, we can't unmount. */
+ if (vfsp->vfs_count > 2)
+ return (EBUSY);
+
+ /* Check for any remaining holds on the root vnode. */
+ data = vfsp->vfs_data;
+ ASSERT(data->lav_root->v_vfsp == vfsp);
+ if (data->lav_root->v_count > 1)
+ return (EBUSY);
+
+ /* Close the fifo to the automount process. */
+ if (data->lav_fifo_wr != NULL)
+ (void) closef(data->lav_fifo_wr);
+ if (data->lav_fifo_rd != NULL)
+ (void) closef(data->lav_fifo_rd);
+
+ /*
+ * We have to release our hold on our root vnode before we can
+ * delete the backing store. (Since the root vnode is linked
+ * to the backing store.)
+ */
+ VN_RELE(data->lav_root);
+
+ /* Cleanup the backing store. */
+ i_bs_destroy(data->lav_mvp, data->lav_bs_name);
+ VN_RELE(data->lav_mvp);
+
+ /* Cleanup out remaining data structures. */
+ mod_hash_destroy_strhash(data->lav_path_hash);
+ mod_hash_destroy_idhash(data->lav_id_hash);
+ mod_hash_destroy_ptrhash(data->lav_vn_hash);
+ id_space_destroy(data->lav_ids);
+ kmem_free(data, sizeof (lx_autofs_vfs_t));
+
+ return (0);
+}
+
+static int
+lx_autofs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+
+ *vpp = data->lav_root;
+ VN_HOLD(*vpp);
+
+ return (0);
+}
+
+static int
+lx_autofs_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ lx_autofs_vfs_t *data = vfsp->vfs_data;
+ vnode_t *urvp = data->lav_root->v_data;
+ dev32_t d32;
+ int error;
+
+ if ((error = VFS_STATVFS(urvp->v_vfsp, sp)) != 0)
+ return (error);
+
+ /* Update some of values before returning. */
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ (void) strlcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name,
+ sizeof (sp->f_basetype));
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+ return (0);
+}
+
+static const fs_operation_def_t lx_autofs_vfstops[] = {
+ { VFSNAME_MOUNT, lx_autofs_mount },
+ { VFSNAME_UNMOUNT, lx_autofs_unmount },
+ { VFSNAME_ROOT, lx_autofs_root },
+ { VFSNAME_STATVFS, lx_autofs_statvfs },
+ { NULL, NULL }
+};
+
+/*
+ * VOP entry points - simple passthrough
+ *
+ * For most VOP entry points we can simply pass the request on to
+ * the underlying filesystem we're mounted on.
+ */
+static int
+lx_autofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_CLOSE(uvp, flag, count, offset, cr));
+}
+
+static int
+lx_autofs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_READDIR(uvp, uiop, cr, eofp));
+}
+
+static int
+lx_autofs_access(vnode_t *vp, int mode, int flags, cred_t *cr)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_ACCESS(uvp, mode, flags, cr));
+}
+
+static int
+lx_autofs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ return (VOP_RWLOCK(uvp, write_lock, ctp));
+}
+
+static void
+lx_autofs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+ vnode_t *uvp = vp->v_data;
+ VOP_RWUNLOCK(uvp, write_lock, ctp);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr)
+{
+ vnode_t *udvp = dvp->v_data;
+
+ /*
+ * cdir is the calling processes current directory.
+ * If cdir is lx_autofs vnode then get its real underlying
+ * vnode ptr. (It seems like the only thing cdir is
+ * ever used for is to make sure the user doesn't delete
+ * their current directory.)
+ */
+ if (vn_matchops(cdir, lx_autofs_vn_ops)) {
+ vnode_t *ucdir = cdir->v_data;
+ return (VOP_RMDIR(udvp, nm, ucdir, cr));
+ }
+
+ return (VOP_RMDIR(udvp, nm, cdir, cr));
+}
+
+/*
+ * VOP entry points - special passthrough
+ *
+ * For some VOP entry points we will first pass the request on to
+ * the underlying filesystem we're mounted on. If there's an error
+ * then we immediatly return the error, but if the request succeedes
+ * we have to do some extra work before returning.
+ */
+static int
+lx_autofs_open(vnode_t **vpp, int flag, cred_t *cr)
+{
+ vnode_t *ovp = *vpp;
+ vnode_t *uvp = ovp->v_data;
+ int error;
+
+ if ((error = VOP_OPEN(&uvp, flag, cr)) != 0)
+ return (error);
+
+ /* Check for clone opens. */
+ if (uvp == ovp->v_data)
+ return (0);
+
+ /* Deal with clone opens by returning a new vnode. */
+ *vpp = i_vn_alloc(ovp->v_vfsp, uvp);
+ VN_RELE(ovp);
+ return (0);
+}
+
+static int
+lx_autofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
+{
+ vnode_t *uvp = vp->v_data;
+ int error;
+
+ if ((error = VOP_GETATTR(uvp, vap, flags, cr)) != 0)
+ return (error);
+
+ /* Update the attributes with our filesystem id. */
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ return (0);
+}
+
+static int
+lx_autofs_mkdir(vnode_t *dvp, char *nm, struct vattr *vap, vnode_t **vpp,
+ cred_t *cr)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *uvp = NULL;
+ int error;
+
+ if ((error = VOP_MKDIR(udvp, nm, vap, &uvp, cr)) != 0)
+ return (error);
+
+ /* Update the attributes with our filesystem id. */
+ vap->va_fsid = dvp->v_vfsp->vfs_dev;
+
+ /* Allocate a new vnode. */
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+}
+
+/*
+ * VOP entry points - custom
+ */
+/*ARGSUSED*/
+static void
+lx_autofs_inactive(struct vnode *vp, struct cred *cr)
+{
+ lx_autofs_vfs_t *data = vp->v_vfsp->vfs_data;
+
+ /*
+ * We need to hold the vfs lock because if we're going to free
+ * this vnode we have to prevent anyone from looking it up
+ * in the vnode hash.
+ */
+ mutex_enter(&data->lav_lock);
+ mutex_enter(&vp->v_lock);
+
+ if (vp->v_count < 1) {
+ panic("lx_autofs_inactive: bad v_count");
+ /*NOTREACHED*/
+ }
+
+ /* Drop the temporary hold by vn_rele now. */
+ if (--vp->v_count > 0) {
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&data->lav_lock);
+ return;
+ }
+
+ /*
+ * No one should have been blocked on this lock because we're
+ * about to free this vnode.
+ */
+ i_vn_free(vp);
+}
+
+static int
+lx_autofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+ int flags, vnode_t *rdir, cred_t *cr)
+{
+ vnode_t *udvp = dvp->v_data;
+ vnode_t *uvp = NULL;
+ int error;
+
+ /* First try to lookup if this path component already exitst. */
+ if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr)) == 0) {
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+ }
+
+ /* Only query the automounter if the path does not exist. */
+ if (error != ENOENT)
+ return (error);
+
+ /* Refer the lookup to the automounter. */
+ if ((error = i_automounter_call(dvp, nm)) != 0)
+ return (error);
+
+ /* Retry the lookup operation. */
+ if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr)) == 0) {
+ *vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+ return (0);
+ }
+ return (error);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr,
+ int *rvalp)
+{
+ vnode_t *uvp = vp->v_data;
+
+ /* Intercept certain ioctls. */
+ switch ((uint_t)cmd) {
+ case LX_AUTOFS_IOC_READY:
+ case LX_AUTOFS_IOC_FAIL:
+ case LX_AUTOFS_IOC_CATATONIC:
+ case LX_AUTOFS_IOC_EXPIRE:
+ case LX_AUTOFS_IOC_PROTOVER:
+ case LX_AUTOFS_IOC_SETTIMEOUT:
+ return (i_automounter_ioctl(vp, cmd, arg));
+ }
+
+ /* Pass any remaining ioctl on. */
+ return (VOP_IOCTL(uvp, cmd, arg, mode, cr, rvalp));
+}
+
+/*
+ * VOP entry points definitions
+ */
+static const fs_operation_def_t lx_autofs_tops_root[] = {
+ { VOPNAME_OPEN, lx_autofs_open },
+ { VOPNAME_CLOSE, lx_autofs_close },
+ { VOPNAME_IOCTL, lx_autofs_ioctl },
+ { VOPNAME_RWLOCK, lx_autofs_rwlock },
+ { VOPNAME_RWUNLOCK, (fs_generic_func_p)lx_autofs_rwunlock },
+ { VOPNAME_GETATTR, lx_autofs_getattr },
+ { VOPNAME_ACCESS, lx_autofs_access },
+ { VOPNAME_READDIR, lx_autofs_readdir },
+ { VOPNAME_LOOKUP, lx_autofs_lookup },
+ { VOPNAME_INACTIVE, (fs_generic_func_p)lx_autofs_inactive },
+ { VOPNAME_MKDIR, lx_autofs_mkdir },
+ { VOPNAME_RMDIR, lx_autofs_rmdir },
+ { NULL }
+};
+
+/*
+ * lx_autofs_init() gets invoked via the mod_install() call in
+ * this modules _init() routine. Therefor, the code that cleans
+ * up the structures we allocate below is actually found in
+ * our _fini() routine.
+ */
+/* ARGSUSED */
+static int
+lx_autofs_init(int fstype, char *name)
+{
+ int error;
+
+ if ((lx_autofs_major =
+ (major_t)space_fetch(LX_AUTOFS_SPACE_KEY_UDEV)) == 0) {
+
+ if ((lx_autofs_major = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lx_autofs_init: "
+ "can't get unique device number");
+ return (EAGAIN);
+ }
+
+ if (space_store(LX_AUTOFS_SPACE_KEY_UDEV,
+ (uintptr_t)lx_autofs_major) != 0) {
+ cmn_err(CE_WARN, "lx_autofs_init: "
+ "can't save unique device number");
+ return (EAGAIN);
+ }
+ }
+
+ lx_autofs_fstype = fstype;
+ if ((error = vfs_setfsops(
+ fstype, lx_autofs_vfstops, &lx_autofs_vfsops)) != 0) {
+ cmn_err(CE_WARN, "lx_autofs_init: bad vfs ops template");
+ return (error);
+ }
+
+ if ((error = vn_make_ops("lx_autofs vnode ops",
+ lx_autofs_tops_root, &lx_autofs_vn_ops)) != 0) {
+ VERIFY(vfs_freevfsops_by_type(fstype) == 0);
+ lx_autofs_vn_ops = NULL;
+ return (error);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Module linkage
+ */
+static mntopt_t lx_autofs_mntopt[] = {
+ { LX_MNTOPT_FD, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_PGRP, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_MINPROTO, NULL, 0, MO_HASVALUE },
+ { LX_MNTOPT_MAXPROTO, NULL, 0, MO_HASVALUE }
+};
+
+static mntopts_t lx_autofs_mntopts = {
+ sizeof (lx_autofs_mntopt) / sizeof (mntopt_t),
+ lx_autofs_mntopt
+};
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ LX_AUTOFS_NAME,
+ lx_autofs_init,
+ VSW_HASPROTO | VSW_VOLATILEDEV,
+ &lx_autofs_mntopts
+};
+
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "linux autofs filesystem", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int error;
+
+ if ((error = mod_remove(&modlinkage)) != 0)
+ return (error);
+
+ if (lx_autofs_vn_ops != NULL) {
+ vn_freevnodeops(lx_autofs_vn_ops);
+ lx_autofs_vn_ops = NULL;
+ }
+
+ /*
+ * In our init routine, if we get an error after calling
+ * vfs_setfsops() we cleanup by calling vfs_freevfsops_by_type().
+ * But we don't need to call vfs_freevfsops_by_type() here
+ * because the fs framework did this for us as part of the
+ * mod_remove() call above.
+ */
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
new file mode 100644
index 0000000000..ae049e2792
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
@@ -0,0 +1,395 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/modctl.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/frame.h>
+#include <sys/dtrace.h>
+#include <sys/dtrace_impl.h>
+
+#include <sys/lx_impl.h>
+
+#define LX_SYSTRACE_SHIFT 16
+#define LX_SYSTRACE_ISENTRY(x) ((int)(x) >> LX_SYSTRACE_SHIFT)
+#define LX_SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << LX_SYSTRACE_SHIFT) - 1))
+#define LX_SYSTRACE_ENTRY(id) ((1 << LX_SYSTRACE_SHIFT) | (id))
+#define LX_SYSTRACE_RETURN(id) (id)
+
+#define LX_SYSTRACE_ENTRY_AFRAMES 2
+#define LX_SYSTRACE_RETURN_AFRAMES 4
+
+typedef struct lx_systrace_sysent {
+ const char *lss_name;
+ dtrace_id_t lss_entry;
+ dtrace_id_t lss_return;
+} lx_systrace_sysent_t;
+
+static dev_info_t *lx_systrace_devi;
+static dtrace_provider_id_t lx_systrace_id;
+static kmutex_t lx_systrace_lock;
+static uint_t lx_systrace_nenabled;
+
+static int lx_systrace_nsysent;
+static lx_systrace_sysent_t *lx_systrace_sysent;
+
+/*ARGSUSED*/
+static void
+lx_systrace_entry(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+ ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+ dtrace_id_t id;
+
+ if (sysnum >= lx_systrace_nsysent)
+ return;
+
+ if ((id = lx_systrace_sysent[sysnum].lss_entry) == DTRACE_IDNONE)
+ return;
+
+ dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_return(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+ ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+ dtrace_id_t id;
+
+ if (sysnum >= lx_systrace_nsysent)
+ return;
+
+ if ((id = lx_systrace_sysent[sysnum].lss_return) == DTRACE_IDNONE)
+ return;
+
+ dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_provide(void *arg, const dtrace_probedesc_t *desc)
+{
+ int i;
+
+ if (desc != NULL)
+ return;
+
+ for (i = 0; i < lx_systrace_nsysent; i++) {
+ if (dtrace_probe_lookup(lx_systrace_id, NULL,
+ lx_systrace_sysent[i].lss_name, "entry") != 0)
+ continue;
+
+ (void) dtrace_probe_create(lx_systrace_id, NULL,
+ lx_systrace_sysent[i].lss_name, "entry",
+ LX_SYSTRACE_ENTRY_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE_ENTRY(i)));
+
+ (void) dtrace_probe_create(lx_systrace_id, NULL,
+ lx_systrace_sysent[i].lss_name, "return",
+ LX_SYSTRACE_RETURN_AFRAMES,
+ (void *)((uintptr_t)LX_SYSTRACE_RETURN(i)));
+
+ lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent[i].lss_return = DTRACE_IDNONE;
+ }
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_enable(void *arg, dtrace_id_t id, void *parg)
+{
+ int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+ ASSERT(sysnum < lx_systrace_nsysent);
+
+ mutex_enter(&lx_systrace_lock);
+ if (lx_systrace_nenabled++ == 0)
+ lx_brand_systrace_enable();
+ mutex_exit(&lx_systrace_lock);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent[sysnum].lss_entry = id;
+ } else {
+ lx_systrace_sysent[sysnum].lss_return = id;
+ }
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_disable(void *arg, dtrace_id_t id, void *parg)
+{
+ int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+ ASSERT(sysnum < lx_systrace_nsysent);
+
+ if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+ lx_systrace_sysent[sysnum].lss_entry = DTRACE_IDNONE;
+ } else {
+ lx_systrace_sysent[sysnum].lss_return = DTRACE_IDNONE;
+ }
+
+ mutex_enter(&lx_systrace_lock);
+ if (--lx_systrace_nenabled == 0)
+ lx_brand_systrace_disable();
+ mutex_exit(&lx_systrace_lock);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_destroy(void *arg, dtrace_id_t id, void *parg)
+{
+}
+
+/*ARGSUSED*/
+static uint64_t
+lx_systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
+ int aframes)
+{
+ struct frame *fp = (struct frame *)dtrace_getfp();
+ uintptr_t *stack;
+ uint64_t val = 0;
+ int i;
+
+ if (argno >= 6)
+ return (0);
+
+ /*
+ * Walk the four frames down the stack to the entry or return callback.
+ * Our callback calls dtrace_probe() which calls dtrace_dif_variable()
+ * which invokes this function to get the extended arguments. We get
+ * the frame pointer in via call to dtrace_getfp() above which makes for
+ * four frames.
+ */
+ for (i = 0; i < 4; i++) {
+ fp = (struct frame *)fp->fr_savfp;
+ }
+
+ stack = (uintptr_t *)&fp[1];
+
+ /*
+ * Skip the first argument to the callback -- the system call number.
+ */
+ argno++;
+
+#ifdef __amd64
+ /*
+ * On amd64, the first 6 arguments are passed in registers while
+ * subsequent arguments are on the stack.
+ */
+ argno -= 6;
+#endif
+
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+ val = stack[argno];
+ DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+ return (val);
+}
+
+
+static const dtrace_pattr_t lx_systrace_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+};
+
+static dtrace_pops_t lx_systrace_pops = {
+ lx_systrace_provide,
+ NULL,
+ lx_systrace_enable,
+ lx_systrace_disable,
+ NULL,
+ NULL,
+ NULL,
+ lx_systrace_getarg,
+ NULL,
+ lx_systrace_destroy
+};
+
+static int
+lx_systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+ int i;
+
+ switch (cmd) {
+ case DDI_ATTACH:
+ break;
+ case DDI_RESUME:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (ddi_create_minor_node(devi, "lx_systrace", S_IFCHR,
+ 0, DDI_PSEUDO, NULL) == DDI_FAILURE ||
+ dtrace_register("lx-syscall", &lx_systrace_attr,
+ DTRACE_PRIV_KERNEL, 0, &lx_systrace_pops, NULL,
+ &lx_systrace_id) != 0) {
+ ddi_remove_minor_node(devi, NULL);
+ return (DDI_FAILURE);
+ }
+
+ ddi_report_dev(devi);
+ lx_systrace_devi = devi;
+
+ /*
+ * Count up the lx_brand system calls.
+ */
+ for (i = 0; lx_sysent[i].sy_callc != NULL; i++)
+ continue;
+
+ /*
+ * Initialize our corresponding table.
+ */
+ lx_systrace_sysent = kmem_zalloc(i * sizeof (lx_systrace_sysent_t),
+ KM_SLEEP);
+ lx_systrace_nsysent = i;
+
+ for (i = 0; i < lx_systrace_nsysent; i++) {
+ lx_systrace_sysent[i].lss_name = lx_sysent[i].sy_name;
+ lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE;
+ lx_systrace_sysent[i].lss_return = DTRACE_IDNONE;
+ }
+
+ /*
+ * Install probe triggers.
+ */
+ lx_systrace_entry_ptr = lx_systrace_entry;
+ lx_systrace_return_ptr = lx_systrace_return;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
+{
+ switch (cmd) {
+ case DDI_DETACH:
+ break;
+ case DDI_SUSPEND:
+ return (DDI_SUCCESS);
+ default:
+ return (DDI_FAILURE);
+ }
+
+ if (dtrace_unregister(lx_systrace_id) != 0)
+ return (DDI_FAILURE);
+
+ /*
+ * Free table.
+ */
+ kmem_free(lx_systrace_sysent, lx_systrace_nsysent *
+ sizeof (lx_systrace_sysent_t));
+ lx_systrace_sysent = NULL;
+ lx_systrace_nsysent = 0;
+
+ /*
+ * Reset probe triggers.
+ */
+ lx_systrace_entry_ptr = NULL;
+ lx_systrace_return_ptr = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+ return (0);
+}
+
+static struct cb_ops lx_systrace_cb_ops = {
+ lx_systrace_open, /* open */
+ nodev, /* close */
+ nulldev, /* strategy */
+ nulldev, /* print */
+ nodev, /* dump */
+ nodev, /* read */
+ nodev, /* write */
+ nodev, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ ddi_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_NEW | D_MP /* Driver compatibility flag */
+};
+
+static struct dev_ops lx_systrace_ops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* refcnt */
+ ddi_getinfo_1to1, /* get_dev_info */
+ nulldev, /* identify */
+ nulldev, /* probe */
+ lx_systrace_attach, /* attach */
+ lx_systrace_detach, /* detach */
+ nodev, /* reset */
+ &lx_systrace_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ nodev /* dev power */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* module type (this is a pseudo driver) */
+ "Linux Brand System Call Tracing", /* name of module */
+ &lx_systrace_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ (void *)&modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
new file mode 100644
index 0000000000..e4499c8a5b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_systrace" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/ldlinux.c b/usr/src/uts/common/brand/lx/io/ldlinux.c
new file mode 100644
index 0000000000..76c5e1d255
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/ldlinux.c
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/ddi.h>
+#include <sys/cmn_err.h>
+#include <sys/modctl.h>
+#include <sys/ptms.h>
+#include <sys/stropts.h>
+#include <sys/strsun.h>
+#include <sys/sunddi.h>
+
+#include <sys/ldlinux.h>
+
+
+/*
+ * ldlinuxopen - open routine gets called when the module gets pushed onto the
+ * stream.
+ */
+/* ARGSUSED */
+static int
+ldlinuxopen(
+ queue_t *q, /* pointer to the read side queue */
+ dev_t *devp, /* pointer to stream tail's dev */
+ int oflag, /* the user open(2) supplied flags */
+ int sflag, /* open state flag */
+ cred_t *credp) /* credentials */
+{
+ struct ldlinux *tp; /* ldlinux entry for this module */
+ mblk_t *mop;
+ struct stroptions *sop;
+ struct termios *termiosp;
+ int len;
+
+ if (sflag != MODOPEN)
+ return (EINVAL);
+
+ if (q->q_ptr != NULL) {
+ /* It's already attached. */
+ return (0);
+ }
+
+ mop = allocb(sizeof (struct stroptions), BPRI_MED);
+ if (mop == NULL)
+ return (ENOSR);
+ mop->b_datap->db_type = M_SETOPTS;
+ mop->b_wptr += sizeof (struct stroptions);
+ sop = (struct stroptions *)mop->b_rptr;
+ sop->so_flags = SO_ISTTY;
+
+ /*
+ * Allocate state structure.
+ */
+ tp = kmem_alloc(sizeof (*tp), KM_SLEEP);
+
+ /* Stash a pointer to our private data in q_ptr. */
+ q->q_ptr = WR(q)->q_ptr = tp;
+
+ /*
+ * Get termios defaults. These are stored as
+ * a property in the "options" node.
+ */
+ if (ddi_getlongprop(DDI_DEV_T_ANY, ddi_root_node(), 0, "ttymodes",
+ (caddr_t)&termiosp, &len) == DDI_PROP_SUCCESS &&
+ len == sizeof (struct termios)) {
+ if (termiosp->c_lflag & ICANON) {
+ tp->veof = termiosp->c_cc[VEOF];
+ tp->veol = termiosp->c_cc[VEOL];
+ tp->vmin = 1;
+ tp->vtime = 0;
+ } else {
+ tp->veof = 0;
+ tp->veol = 0;
+ tp->vmin = termiosp->c_cc[VMIN];
+ tp->vtime = termiosp->c_cc[VTIME];
+ }
+ kmem_free(termiosp, len);
+ } else {
+ /*
+ * winge winge winge...
+ */
+ cmn_err(CE_WARN,
+ "ldlinuxopen: Couldn't get ttymodes property!");
+ bzero(tp, sizeof (*tp));
+ }
+
+ tp->state = 0;
+
+ /*
+ * Commit to the open and send the M_SETOPTS off to the stream head.
+ */
+ qprocson(q);
+ putnext(q, mop);
+
+ return (0);
+}
+
+
+/*
+ * ldlinuxclose - This routine gets called when the module gets
+ * popped off of the stream.
+ */
+/* ARGSUSED */
+static int
+ldlinuxclose(queue_t *q, int flag, cred_t *credp)
+{
+ struct ldlinux *tp;
+
+ qprocsoff(q);
+ tp = q->q_ptr;
+ kmem_free(tp, sizeof (*tp));
+ q->q_ptr = WR(q)->q_ptr = NULL;
+ return (0);
+}
+
+
+static void
+do_ioctl(queue_t *q, mblk_t *mp)
+{
+ struct ldlinux *tp = q->q_ptr;
+ struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
+ struct lx_cc *cb;
+ mblk_t *tmp;
+ int error;
+
+ switch (iocp->ioc_cmd) {
+ case TIOCSETLD:
+ /* prepare caller supplied data for access */
+ error = miocpullup(mp, sizeof (struct lx_cc));
+ if (error != 0) {
+ miocnak(q, mp, 0, error);
+ return;
+ }
+
+ /* get a pointer to the caller supplied data */
+ cb = (struct lx_cc *)mp->b_cont->b_rptr;
+
+ /* save caller supplied data in our per-stream cache */
+ tp->veof = cb->veof;
+ tp->veol = cb->veol;
+ tp->vmin = cb->vmin;
+ tp->vtime = cb->vtime;
+
+ /* initialize and send a reply indicating that we're done */
+ miocack(q, mp, 0, 0);
+ return;
+
+ case TIOCGETLD:
+ /* allocate a reply message */
+ if ((tmp = allocb(sizeof (struct lx_cc), BPRI_MED)) == NULL) {
+ miocnak(q, mp, 0, ENOSR);
+ return;
+ }
+
+ /* initialize the reply message */
+ mioc2ack(mp, tmp, sizeof (struct lx_cc), 0);
+
+ /* get a pointer to the reply data */
+ cb = (struct lx_cc *)mp->b_cont->b_rptr;
+
+ /* copy data from our per-stream cache into the reply data */
+ cb->veof = tp->veof;
+ cb->veol = tp->veol;
+ cb->vmin = tp->vmin;
+ cb->vtime = tp->vtime;
+
+ /* send the reply indicating that we're done */
+ qreply(q, mp);
+ return;
+
+ case PTSSTTY:
+ tp->state |= ISPTSTTY;
+ break;
+
+ default:
+ break;
+ }
+
+ putnext(q, mp);
+}
+
+
+/*
+ * ldlinuxput - Module read and write queue put procedure.
+ */
+static void
+ldlinuxput(queue_t *q, mblk_t *mp)
+{
+ struct ldlinux *tp = q->q_ptr;
+
+ switch (DB_TYPE(mp)) {
+ default:
+ break;
+ case M_IOCTL:
+ if ((q->q_flag & QREADR) == 0) {
+ do_ioctl(q, mp);
+ return;
+ }
+ break;
+
+ case M_FLUSH:
+ /*
+ * Handle read and write flushes.
+ */
+ if ((((q->q_flag & QREADR) != 0) && (*mp->b_rptr & FLUSHR)) ||
+ (((q->q_flag & QREADR) == 0) && (*mp->b_rptr & FLUSHW))) {
+ if ((tp->state & ISPTSTTY) && (*mp->b_rptr & FLUSHBAND))
+ flushband(q, *(mp->b_rptr + 1), FLUSHDATA);
+ else
+ flushq(q, FLUSHDATA);
+ }
+ break;
+ }
+ putnext(q, mp);
+}
+
+
+static struct module_info ldlinux_info = {
+ LDLINUX_MODID,
+ LDLINUX_MOD,
+ 0,
+ INFPSZ,
+ 0,
+ 0
+};
+
+static struct qinit ldlinuxinit = {
+ (int (*)()) ldlinuxput,
+ NULL,
+ ldlinuxopen,
+ ldlinuxclose,
+ NULL,
+ &ldlinux_info
+};
+
+static struct streamtab ldlinuxinfo = {
+ &ldlinuxinit,
+ &ldlinuxinit
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct fmodsw fsw = {
+ LDLINUX_MOD,
+ &ldlinuxinfo,
+ D_MTQPAIR | D_MP
+};
+
+static struct modlstrmod modlstrmod = {
+ &mod_strmodops, "termios extensions for lx brand", &fsw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, &modlstrmod, NULL
+};
+
+int
+_init()
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+ return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.c b/usr/src/uts/common/brand/lx/io/lx_audio.c
new file mode 100644
index 0000000000..07c3bd0949
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.c
@@ -0,0 +1,2026 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/audio.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/disp.h>
+#include <sys/ddi.h>
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/kmem.h>
+#include <sys/lx_audio.h>
+#include <sys/mixer.h>
+#include <sys/modhash.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/stropts.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+
+/* Properties used by the lx_audio driver */
+#define LXA_PROP_INPUTDEV "inputdev"
+#define LXA_PROP_OUTPUTDEV "outputdev"
+
+/* default device paths used by this driver */
+#define LXA_DEV_DEFAULT "/dev/audio"
+#define LXA_DEV_CUSTOM_DIR "/dev/sound/"
+
+/* maximum possible number of concurrent opens of this driver */
+#define LX_AUDIO_MAX_OPENS 1024
+
+/*
+ * these are default fragment size and fragment count values.
+ * these values were chosen to make quake work well on my
+ * laptop: 2Ghz Pentium M + NVIDIA GeForce Go 6400.
+ *
+ * for reference:
+ * - 1 sec of stereo output at 44Khz is about 171 Kb of data
+ * - 1 sec of mono output at 8Khz is about 8Kb of data
+ */
+#define LXA_OSS_FRAG_SIZE (1024) /* 1/8 sec at 8Khz mono */
+#define LXA_OSS_FRAG_CNT (1024 * 2)
+
+/* maximum ammount of fragment memory we'll allow a process to mmap */
+#define LXA_OSS_FRAG_MEM (1024 * 1024 * 2) /* 2Mb */
+
+/* forward declarations */
+typedef struct lxa_state lxa_state_t;
+typedef struct lxa_zstate lxa_zstate_t;
+
+/*
+ * Structure and enum declarations
+ */
+typedef enum {
+ LXA_TYPE_INVALID = 0,
+ LXA_TYPE_AUDIO = 1, /* audio device */
+ LXA_TYPE_AUDIOCTL = 2 /* audio control/mixer device */
+} lxa_dev_type_t;
+
+struct lxa_zstate {
+ char *lxa_zs_zonename;
+
+ /*
+ * we could store the input/output audio device setting here,
+ * but instead we're keeing them as device node properties
+ * so that a user can easily see the audio configuration for
+ * a zone via prtconf.
+ */
+
+ /*
+ * OSS doesn't support multiple opens of the audio device.
+ * (multiple opens of the mixer device are supported.)
+ * so here we'll keep a pointer to any open input/output
+ * streams. (OSS does support two opens if one is for input
+ * and the other is for output.)
+ */
+ lxa_state_t *lxa_zs_istate;
+ lxa_state_t *lxa_zs_ostate;
+
+ /*
+ * we need to cache channel gain and balance. channel gain and
+ * balance map to PCM volume in OSS, which are supposedly a property
+ * of the underlying hardware. but in solaris, channels are
+ * implemented in software and only exist when an audio device
+ * is actually open. (each open returns a unique channel.) OSS
+ * apps will expect consistent PCM volume set/get operations to
+ * work even if no audio device is open. hence, if no underlying
+ * device is open we need to cache the gain and balance setting.
+ */
+ lxa_mixer_levels_t lxa_zs_pcm_levels;
+};
+
+struct lxa_state {
+ lxa_zstate_t *lxas_zs; /* zone state pointer */
+
+ dev_t lxas_dev_old; /* dev_t used to open the device */
+ dev_t lxas_dev_new; /* new dev_t assigned to an open */
+ int lxas_flags; /* original flags passed to open */
+ lxa_dev_type_t lxas_type; /* type of device that was opened */
+
+ int lxas_devs_same; /* input and output device the same? */
+
+ /* input device variables */
+ ldi_handle_t lxas_idev_lh; /* ldi handle for access */
+ int lxas_idev_flags; /* flags used for open */
+
+ /* output device variables */
+ ldi_handle_t lxas_odev_lh; /* ldi handle for access */
+ int lxas_odev_flags; /* flags used for open */
+
+ /*
+ * since we support multiplexing of devices we need to remember
+ * certain parameters about the devices
+ */
+ uint_t lxas_hw_features;
+ uint_t lxas_sw_features;
+
+ uint_t lxas_frag_size;
+ uint_t lxas_frag_cnt;
+
+ /*
+ * members needed to support mmap device access. note that to
+ * simplifly things we only support one mmap access per open.
+ */
+ ddi_umem_cookie_t lxas_umem_cookie;
+ char *lxas_umem_ptr;
+ size_t lxas_umem_len;
+ kthread_t *lxas_mmap_thread;
+ int lxas_mmap_thread_running;
+ int lxas_mmap_thread_exit;
+ int lxas_mmap_thread_frag;
+};
+
+/*
+ * Global variables
+ */
+dev_info_t *lxa_dip = NULL;
+kmutex_t lxa_lock;
+id_space_t *lxa_minor_id = NULL;
+mod_hash_t *lxa_state_hash = NULL;
+mod_hash_t *lxa_zstate_hash = NULL;
+size_t lxa_state_hash_size = 15;
+size_t lxa_zstate_hash_size = 15;
+size_t lxa_registered_zones = 0;
+
+/*
+ * function declarations
+ */
+static void lxa_mmap_output_disable(lxa_state_t *);
+
+/*
+ * functions
+ */
+static void
+lxa_state_close(lxa_state_t *lxa_state)
+{
+ lxa_zstate_t *lxa_zs = lxa_state->lxas_zs;
+ minor_t minor = getminor(lxa_state->lxas_dev_new);
+
+ /* disable any mmap output that might still be going on */
+ lxa_mmap_output_disable(lxa_state);
+
+ /*
+ * if this was the active input/output device, unlink it from
+ * the global zone state so that other opens of the audio device
+ * can now succeed.
+ */
+ mutex_enter(&lxa_lock);
+ if (lxa_zs->lxa_zs_istate == lxa_state)
+ lxa_zs->lxa_zs_istate = NULL;
+ if (lxa_zs->lxa_zs_ostate == lxa_state) {
+ lxa_zs->lxa_zs_ostate = NULL;
+ }
+ mutex_exit(&lxa_lock);
+
+ /* remove this state structure from the hash (if it's there) */
+ (void) mod_hash_remove(lxa_state_hash,
+ (mod_hash_key_t)(uintptr_t)minor, (mod_hash_val_t *)&lxa_state);
+
+ /* close any audio device that we have open */
+ if (lxa_state->lxas_idev_lh != NULL)
+ (void) ldi_close(lxa_state->lxas_idev_lh,
+ lxa_state->lxas_idev_flags, kcred);
+ if (lxa_state->lxas_odev_lh != NULL)
+ (void) ldi_close(lxa_state->lxas_odev_lh,
+ lxa_state->lxas_odev_flags, kcred);
+
+ /* free up any memory allocated by mmaps */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ ddi_umem_free(lxa_state->lxas_umem_cookie);
+
+ /* release the id associated with this state structure */
+ id_free(lxa_minor_id, minor);
+
+ kmem_free(lxa_state, sizeof (*lxa_state));
+}
+
+static char *
+getzonename(void)
+{
+ return (curproc->p_zone->zone_name);
+}
+
+static void
+strfree(char *str)
+{
+ kmem_free(str, strlen(str) + 1);
+}
+
+static char *
+strdup(char *str)
+{
+ int n = strlen(str);
+ char *ptr = kmem_alloc(n + 1, KM_SLEEP);
+ bcopy(str, ptr, n + 1);
+ return (ptr);
+}
+
+static char *
+lxa_devprop_name(char *zname, char *pname)
+{
+ char *zpname;
+ int n;
+
+ ASSERT((pname != NULL) && (zname != NULL));
+
+ /* prepend the zone name to the property name */
+ n = snprintf(NULL, 0, "%s_%s", zname, pname) + 1;
+ zpname = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(zpname, n, "%s_%s", zname, pname);
+
+ return (zpname);
+}
+
+static int
+lxa_devprop_verify(char *pval)
+{
+ int n;
+
+ ASSERT(pval != NULL);
+
+ if (strcmp(pval, "default") == 0)
+ return (0);
+
+ /* make sure the value is an integer */
+ for (n = 0; pval[n] != '\0'; n++) {
+ if ((pval[n] < '0') && (pval[n] > '9')) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+static char *
+lxa_devprop_lookup(char *zname, char *pname, lxa_dev_type_t lxa_type)
+{
+ char *zprop_name, *pval;
+ char *dev_path;
+ int n, rv;
+
+ ASSERT((pname != NULL) && (zname != NULL));
+ ASSERT((lxa_type == LXA_TYPE_AUDIO) || (lxa_type == LXA_TYPE_AUDIOCTL));
+
+ zprop_name = lxa_devprop_name(zname, pname);
+
+ /* attempt to lookup the property */
+ rv = ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, zprop_name, &pval);
+ strfree(zprop_name);
+
+ if (rv != DDI_PROP_SUCCESS)
+ return (NULL);
+
+ if (lxa_devprop_verify(pval) != 0) {
+ ddi_prop_free(pval);
+ return (NULL);
+ }
+
+ if (strcmp(pval, "none") == 0) {
+ /* there is no audio device specified */
+ return (NULL);
+ } else if (strcmp(pval, "default") == 0) {
+ /* use the default audio device on the system */
+ dev_path = strdup(LXA_DEV_DEFAULT);
+ } else {
+ /* a custom audio device was specified, generate a path */
+ n = snprintf(NULL, 0, "%s%s", LXA_DEV_CUSTOM_DIR, pval) + 1;
+ dev_path = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(dev_path, n, "%s%s", LXA_DEV_CUSTOM_DIR, pval);
+ }
+ ddi_prop_free(pval);
+
+ /*
+ * if this is an audio control device so we need to append
+ * "ctl" to the path
+ */
+ if (lxa_type == LXA_TYPE_AUDIOCTL) {
+ char *tmp;
+ n = snprintf(NULL, 0, "%s%s", dev_path, "ctl") + 1;
+ tmp = kmem_alloc(n, KM_SLEEP);
+ (void) snprintf(tmp, n, "%s%s", dev_path, "ctl");
+ strfree(dev_path);
+ dev_path = tmp;
+ }
+
+ return (dev_path);
+}
+
+static int
+lxa_dev_getfeatures(lxa_state_t *lxa_state)
+{
+ audio_info_t ai_idev, ai_odev;
+ int n, rv;
+
+ /* set a default fragment size */
+ lxa_state->lxas_frag_size = LXA_OSS_FRAG_SIZE;
+ lxa_state->lxas_frag_cnt = LXA_OSS_FRAG_CNT;
+
+ /* get info for the currently open audio devices */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai_idev, FKIOCTL, kcred, &n)) != 0))
+ return (rv);
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai_odev, FKIOCTL, kcred, &n)) != 0))
+ return (rv);
+
+ /* if we're only open for reading or writing then it's easy */
+ if (lxa_state->lxas_idev_lh == NULL) {
+ lxa_state->lxas_sw_features = ai_odev.sw_features;
+ lxa_state->lxas_hw_features = ai_odev.hw_features;
+ return (0);
+ } else if (lxa_state->lxas_odev_lh == NULL) {
+ lxa_state->lxas_sw_features = ai_idev.sw_features;
+ lxa_state->lxas_hw_features = ai_idev.hw_features;
+ return (0);
+ }
+
+ /*
+ * well if we're open for reading and writing but the underlying
+ * device is the same then it's also pretty easy
+ */
+ if (lxa_state->lxas_devs_same) {
+ if ((ai_odev.sw_features != ai_idev.sw_features) ||
+ (ai_odev.hw_features != ai_idev.hw_features)) {
+ zcmn_err(getzoneid(), CE_WARN, "lx_audio error: "
+ "audio device reported inconsistent features");
+ return (EIO);
+ }
+ lxa_state->lxas_sw_features = ai_odev.sw_features;
+ lxa_state->lxas_hw_features = ai_odev.hw_features;
+ return (0);
+ }
+
+ /*
+ * figure out which software features we're going to support.
+ * we will report a feature as supported if both the input
+ * and output device support it.
+ */
+ lxa_state->lxas_sw_features = 0;
+ n = ai_idev.sw_features & ai_odev.sw_features;
+ if (n & AUDIO_SWFEATURE_MIXER)
+ lxa_state->lxas_sw_features |= AUDIO_SWFEATURE_MIXER;
+
+ /*
+ * figure out which hardware features we're going to support.
+ * for a first pass we will report a feature as supported if
+ * both the input and output device support it.
+ */
+ lxa_state->lxas_hw_features = 0;
+ n = ai_idev.hw_features & ai_odev.hw_features;
+ if (n & AUDIO_HWFEATURE_MSCODEC)
+ lxa_state->lxas_hw_features |= AUDIO_HWFEATURE_MSCODEC;
+
+ /*
+ * if we made it here then we have different audio input and output
+ * devices. this will allow us to report support for additional
+ * hardware features that may not supported by just the input or
+ * output device alone.
+ */
+
+ /* always report tha we support both playback and recording */
+ lxa_state->lxas_hw_features =
+ AUDIO_HWFEATURE_PLAY | AUDIO_HWFEATURE_RECORD;
+
+ /* always report full duplex support */
+ lxa_state->lxas_hw_features = AUDIO_HWFEATURE_DUPLEX;
+
+ /* never report that we have input to output loopback support */
+ ASSERT((lxa_state->lxas_hw_features & AUDIO_HWFEATURE_IN2OUT) == 0);
+ return (0);
+}
+
+static int
+lxa_dev_open(lxa_state_t *lxa_state)
+{
+ char *idev, *odev;
+ int flags, rv;
+ ldi_handle_t lh;
+ ldi_ident_t li = NULL;
+
+ ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) ||
+ (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL));
+
+ /*
+ * check if we have configuration properties for this zone.
+ * if we don't then audio isn't supported in this zone.
+ */
+ idev = lxa_devprop_lookup(getzonename(), LXA_PROP_INPUTDEV,
+ lxa_state->lxas_type);
+ odev = lxa_devprop_lookup(getzonename(), LXA_PROP_OUTPUTDEV,
+ lxa_state->lxas_type);
+
+ /* make sure there is at least one device to read from or write to */
+ if ((idev == NULL) && (odev == NULL))
+ return (ENODEV);
+
+ /* see if the input and output devices are actually the same device */
+ if (((idev != NULL) && (odev != NULL)) &&
+ (strcmp(idev, odev) == 0))
+ lxa_state->lxas_devs_same = 1;
+
+ /* we don't respect FEXCL */
+ flags = lxa_state->lxas_flags & ~FEXCL;
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ /*
+ * if we're opening audio devices then we need to muck
+ * with the FREAD/FWRITE flags.
+ *
+ * certain audio device may only support input or output
+ * (but not both.) so if we're multiplexing input/output
+ * to different devices we need to make sure we don't try
+ * and open the output device for reading and the input
+ * device for writing.
+ *
+ * if we're using the same device for input/output we still
+ * need to do this because some audio devices won't let
+ * themselves be opened multiple times for read access.
+ */
+ lxa_state->lxas_idev_flags = flags & ~FWRITE;
+ lxa_state->lxas_odev_flags = flags & ~FREAD;
+
+ /* make sure we have devices to read from and write to */
+ if (((flags & FREAD) && (idev == NULL)) ||
+ ((flags & FWRITE) && (odev == NULL))) {
+ rv = ENODEV;
+ goto out;
+ }
+ } else {
+ lxa_state->lxas_idev_flags = lxa_state->lxas_odev_flags = flags;
+ }
+
+ /* get an ident to open the devices */
+ if (ldi_ident_from_dev(lxa_state->lxas_dev_new, &li) != 0) {
+ rv = ENODEV;
+ goto out;
+ }
+
+ /* open the input device */
+ lxa_state->lxas_idev_lh = NULL;
+ if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) ||
+ (lxa_state->lxas_idev_flags & FREAD)) &&
+ (idev != NULL)) {
+ rv = ldi_open_by_name(idev, lxa_state->lxas_idev_flags,
+ kcred, &lh, li);
+ if (rv != 0) {
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "unable to open audio device: %s", idev);
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "possible zone audio configuration error");
+ goto out;
+ }
+ lxa_state->lxas_idev_lh = lh;
+ }
+
+ /* open the output device */
+ lxa_state->lxas_odev_lh = NULL;
+ if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) ||
+ (lxa_state->lxas_odev_flags & FWRITE)) &&
+ (odev != NULL)) {
+ rv = ldi_open_by_name(odev, lxa_state->lxas_odev_flags,
+ kcred, &lh, li);
+ if (rv != 0) {
+ /* if we opened an input device, close it now */
+ if (lxa_state->lxas_idev_lh != NULL) {
+ (void) ldi_close(lxa_state->lxas_idev_lh,
+ lxa_state->lxas_idev_flags, kcred);
+ }
+
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "unable to open audio device: %s", odev);
+ zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+ "possible zone audio configuration error");
+ goto out;
+ }
+ lxa_state->lxas_odev_lh = lh;
+ }
+
+ /* free up stuff */
+out:
+ if (li != NULL)
+ ldi_ident_release(li);
+ if (idev != NULL)
+ strfree(idev);
+ if (odev != NULL)
+ strfree(odev);
+
+ return (rv);
+}
+
+void
+lxa_mmap_thread_exit(lxa_state_t *lxa_state)
+{
+ mutex_enter(&lxa_lock);
+ lxa_state->lxas_mmap_thread = NULL;
+ lxa_state->lxas_mmap_thread_frag = 0;
+ lxa_state->lxas_mmap_thread_running = 0;
+ lxa_state->lxas_mmap_thread_exit = 0;
+ mutex_exit(&lxa_lock);
+ thread_exit();
+ /*NOTREACHED*/
+}
+
+void
+lxa_mmap_thread(lxa_state_t *lxa_state)
+{
+ struct uio uio, uio_null;
+ iovec_t iovec, iovec_null;
+ uint_t bytes_per_sec, usec_per_frag, ticks_per_frag;
+ int rv, junk, eof, retry;
+ audio_info_t ai;
+
+ /* we better be setup for writing to the output device */
+ ASSERT((lxa_state->lxas_flags & FWRITE) != 0);
+ ASSERT(lxa_state->lxas_odev_lh != NULL);
+
+ /* setup a uio to output one fragment */
+ uio.uio_iov = &iovec;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+
+ /* setup a uio to output a eof (a fragment with a length of 0) */
+ uio_null.uio_iov = &iovec_null;
+ uio_null.uio_iov->iov_len = 0;
+ uio_null.uio_iov->iov_base = NULL;
+ uio_null.uio_iovcnt = 1;
+ uio_null.uio_offset = 0;
+ uio_null.uio_segflg = UIO_SYSSPACE;
+ uio_null.uio_fmode = 0;
+ uio_null.uio_extflg = 0;
+ uio_null.uio_llimit = MAXOFFSET_T;
+ uio_null.uio_resid = 0;
+
+lxa_mmap_thread_top:
+ ASSERT(!MUTEX_HELD(&lxa_lock));
+
+ /* first drain any pending audio output */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_DRAIN, NULL, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_DRAIN failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /*
+ * we depend on the ai.play.eof value to keep track of
+ * audio output progress so reset it here.
+ */
+ AUDIO_INITINFO(&ai);
+ ai.play.eof = 0;
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_SETINFO failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /*
+ * we're going to need to know the sampling rate and number
+ * of output channels to estimate how long we can sleep between
+ * requests.
+ */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_GETINFO failed, aborting audio output");
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+ }
+
+ /* estimate how many ticks it takes to output a fragment of data */
+ bytes_per_sec = (ai.play.sample_rate * ai.play.channels *
+ ai.play.precision) / 8;
+ usec_per_frag = MICROSEC * lxa_state->lxas_frag_size / bytes_per_sec;
+ ticks_per_frag = drv_usectohz(usec_per_frag);
+
+ /* queue up three fragments of of data into the output stream */
+ eof = 3;
+
+ /* sanity check the eof value */
+ ASSERT(ai.play.eof == 0);
+ ai.play.eof = 0;
+
+ /* we always start audio output at fragment 0 */
+ mutex_enter(&lxa_lock);
+ lxa_state->lxas_mmap_thread_frag = 0;
+
+ /*
+ * we shouldn't have allowed the mapping if it isn't a multiple
+ * of the fragment size
+ */
+ ASSERT((lxa_state->lxas_umem_len % lxa_state->lxas_frag_size) == 0);
+
+ while (!lxa_state->lxas_mmap_thread_exit) {
+ size_t start, end;
+
+ /*
+ * calculate the start and ending offsets of the next
+ * fragment to output
+ */
+ start = lxa_state->lxas_mmap_thread_frag *
+ lxa_state->lxas_frag_size;
+ end = start + lxa_state->lxas_frag_size;
+
+ ASSERT(start < lxa_state->lxas_umem_len);
+ ASSERT(end <= lxa_state->lxas_umem_len);
+
+ /* setup the uio to output one fragment of audio */
+ uio.uio_resid = end - start;
+ uio.uio_iov->iov_len = end - start;
+ uio.uio_iov->iov_base = &lxa_state->lxas_umem_ptr[start];
+
+ /* increment the current fragment index */
+ lxa_state->lxas_mmap_thread_frag =
+ (lxa_state->lxas_mmap_thread_frag + 1) %
+ (lxa_state->lxas_umem_len / lxa_state->lxas_frag_size);
+
+ /* drop the audio lock before actually outputting data */
+ mutex_exit(&lxa_lock);
+
+ /*
+ * write the fragment of audio data to the device stream
+ * then write a eof to the stream to tell the device to
+ * increment ai.play.eof when it's done processing the
+ * fragment we just wrote
+ */
+ if ((rv = ldi_write(lxa_state->lxas_odev_lh,
+ &uio, kcred)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "ldi_write() failed (%d), "
+ "resetting audio output", rv);
+ goto lxa_mmap_thread_top;
+ }
+ if ((rv = ldi_write(lxa_state->lxas_odev_lh,
+ &uio_null, kcred)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "ldi_write(eof) failed (%d), "
+ "resetting audio output", rv);
+ goto lxa_mmap_thread_top;
+ }
+
+ /*
+ * we want to avoid buffer underrun so ensure that
+ * there is always at least one fragment of data in the
+ * output stream.
+ */
+ mutex_enter(&lxa_lock);
+ if (--eof > 0) {
+ continue;
+ }
+
+ /*
+ * now we wait until the audio device has finished outputting
+ * at least one fragment of data.
+ */
+ retry = 0;
+ while (!lxa_state->lxas_mmap_thread_exit && (eof == 0)) {
+ uint_t ai_eof_old = ai.play.eof;
+
+ mutex_exit(&lxa_lock);
+
+ /*
+ * delay for the number of ticks it takes
+ * to output one fragment of data
+ */
+ if (ticks_per_frag > 0)
+ delay(ticks_per_frag);
+
+ /* check if we've managed to output any fragments */
+ if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+ AUDIO_GETINFO, (intptr_t)&ai,
+ FKIOCTL, kcred, &junk)) != 0) {
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "AUDIO_GETINFO failed (%d), "
+ "resetting audio output", rv);
+ /* re-start mmap audio output */
+ goto lxa_mmap_thread_top;
+ }
+
+ if (ai_eof_old == ai.play.eof) {
+ /* institute a random retry limit */
+ if (retry++ < 100) {
+ mutex_enter(&lxa_lock);
+ continue;
+ }
+ cmn_err(CE_WARN, "lxa_mmap_thread: "
+ "output stalled, "
+ "resetting audio output");
+ /* re-start mmap audio output */
+ goto lxa_mmap_thread_top;
+ }
+
+ if (ai.play.eof > ai_eof_old) {
+ eof = ai.play.eof - ai_eof_old;
+ } else {
+ /* eof counter wrapped around */
+ ASSERT(ai_eof_old < ai.play.eof);
+ eof = ai.play.eof + (ai_eof_old - UINTMAX_MAX);
+ }
+ /* we're done with this loop so re-aquire the lock */
+ ASSERT(eof != 0);
+ mutex_enter(&lxa_lock);
+ }
+ }
+ mutex_exit(&lxa_lock);
+ lxa_mmap_thread_exit(lxa_state);
+ /*NOTREACHED*/
+}
+
+static void
+lxa_mmap_output_disable(lxa_state_t *lxa_state)
+{
+ kt_did_t tid;
+
+ mutex_enter(&lxa_lock);
+
+ /* if the output thread isn't running there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running == 0) {
+ mutex_exit(&lxa_lock);
+ return;
+ }
+
+ /* tell the pcm mmap output thread to exit */
+ lxa_state->lxas_mmap_thread_exit = 1;
+
+ /* wait for the mmap output thread to exit */
+ tid = lxa_state->lxas_mmap_thread->t_did;
+ mutex_exit(&lxa_lock);
+ thread_join(tid);
+}
+
+static void
+lxa_mmap_output_enable(lxa_state_t *lxa_state)
+{
+ mutex_enter(&lxa_lock);
+
+ /* if the output thread is already running there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running != 0) {
+ mutex_exit(&lxa_lock);
+ return;
+ }
+
+ /* setup output state */
+ lxa_state->lxas_mmap_thread_running = 1;
+ lxa_state->lxas_mmap_thread_exit = 0;
+ lxa_state->lxas_mmap_thread_frag = 0;
+
+ /* kick off a thread to do the mmap pcm output */
+ lxa_state->lxas_mmap_thread = thread_create(NULL, 0,
+ (void (*)())lxa_mmap_thread, lxa_state,
+ 0, &p0, TS_RUN, minclsyspri);
+ ASSERT(lxa_state->lxas_mmap_thread != NULL);
+
+ mutex_exit(&lxa_lock);
+}
+
+static int
+lxa_ioc_mmap_output(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ uint_t trigger;
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* if the user hasn't mmap the device then there's nothing to do */
+ if (lxa_state->lxas_umem_cookie == NULL)
+ return (EINVAL);
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &trigger, sizeof (trigger), mode) != 0)
+ return (EFAULT);
+
+ /* a zero value disables output */
+ if (trigger == 0) {
+ lxa_mmap_output_disable(lxa_state);
+ return (0);
+ }
+
+ /* a non-zero value enables output */
+ lxa_mmap_output_enable(lxa_state);
+ return (0);
+}
+
+static int
+lxa_ioc_mmap_ptr(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ int ptr;
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* if the user hasn't mmap the device then there's nothing to do */
+ if (lxa_state->lxas_umem_cookie == NULL)
+ return (EINVAL);
+
+ /* if the output thread isn't running then there's nothing to do */
+ if (lxa_state->lxas_mmap_thread_running == 0)
+ return (EINVAL);
+
+ mutex_enter(&lxa_lock);
+ ptr = lxa_state->lxas_mmap_thread_frag * lxa_state->lxas_frag_size;
+ mutex_exit(&lxa_lock);
+
+ if (ddi_copyout(&ptr, (void *)arg, sizeof (ptr), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_ioc_get_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_frag_info_t fi;
+
+ fi.lxa_fi_size = lxa_state->lxas_frag_size;
+ fi.lxa_fi_cnt = lxa_state->lxas_frag_cnt;
+
+ if (ddi_copyout(&fi, (void *)arg, sizeof (fi), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_ioc_set_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_frag_info_t fi;
+
+ /* if the device is mmaped we can't change the fragment settings */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EINVAL);
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &fi, sizeof (fi), mode) != 0)
+ return (EFAULT);
+
+ /* do basic bounds checking */
+ if ((fi.lxa_fi_cnt == 0) || (fi.lxa_fi_size < 16))
+ return (EINVAL);
+
+ /* don't accept size values less than 16 */
+
+ lxa_state->lxas_frag_size = fi.lxa_fi_size;
+ lxa_state->lxas_frag_cnt = fi.lxa_fi_cnt;
+
+ return (0);
+}
+
+static int
+lxa_audio_drain(lxa_state_t *lxa_state)
+{
+ int junk;
+
+ /* only applies to output buffers */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (EINVAL);
+
+ /* can't fail so ignore the return value */
+ (void) ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_DRAIN, NULL,
+ FKIOCTL, kcred, &junk);
+ return (0);
+}
+
+/*
+ * lxa_audio_info_merge() usage notes:
+ *
+ * - it's important to make sure NOT to get the ai_idev and ai_odev
+ * parameters mixed up when calling lxa_audio_info_merge().
+ *
+ * - it's important for the caller to make sure that AUDIO_GETINFO
+ * was called for the input device BEFORE the output device. (see
+ * the comments for merging the monitor_gain setting to see why.)
+ */
+static void
+lxa_audio_info_merge(lxa_state_t *lxa_state,
+ audio_info_t *ai_idev, audio_info_t *ai_odev, audio_info_t *ai_merged)
+{
+ /* if we're not setup for output return the intput device info */
+ if (lxa_state->lxas_odev_lh == NULL) {
+ *ai_merged = *ai_idev;
+ return;
+ }
+
+ /* if we're not setup for input return the output device info */
+ if (lxa_state->lxas_idev_lh == NULL) {
+ *ai_merged = *ai_odev;
+ return;
+ }
+
+ /* get record values from the input device */
+ ai_merged->record = ai_idev->record;
+
+ /* get play values from the output device */
+ ai_merged->play = ai_odev->play;
+
+ /* muting status only matters for the output device */
+ ai_merged->output_muted = ai_odev->output_muted;
+
+ /* we don't support device reference counts, always return 1 */
+ ai_merged->ref_cnt = 1;
+
+ /*
+ * for supported hw/sw features report the combined feature
+ * set we calcuated out earlier.
+ */
+ ai_merged->hw_features = lxa_state->lxas_hw_features;
+ ai_merged->sw_features = lxa_state->lxas_sw_features;
+
+ if (!lxa_state->lxas_devs_same) {
+ /*
+ * if the input and output devices are different
+ * physical devices then we don't support input to
+ * output loopback so we always report the input
+ * to output loopback gain to be zero.
+ */
+ ai_merged->monitor_gain = 0;
+ } else {
+ /*
+ * the intput and output devices are actually the
+ * same physical device. hence it probably supports
+ * intput to output loopback. regardless we should
+ * pass back the intput to output gain reported by
+ * the device. when we pick a value to passback we
+ * use the output device value since that was
+ * the most recently queried. (we base this
+ * decision on the assumption that io gain is
+ * actually hardware setting in the device and
+ * hence if it is changed on one open instance of
+ * the device the change will be visable to all
+ * other instances of the device.)
+ */
+ ai_merged->monitor_gain = ai_odev->monitor_gain;
+ }
+
+ /*
+ * for currently enabled software features always return the
+ * merger of the two. (of course the enabled software features
+ * for the input and output devices should alway be the same,
+ * so if it isn't complain.)
+ */
+ if (ai_idev->sw_features_enabled != ai_odev->sw_features_enabled)
+ zcmn_err(getzoneid(), CE_WARN, "lx_audio: "
+ "unexpected sofware feature state");
+ ai_merged->sw_features_enabled =
+ ai_idev->sw_features_enabled & ai_odev->sw_features_enabled;
+}
+
+static int
+lxa_audio_setinfo(lxa_state_t *lxa_state, int cmd, intptr_t arg,
+ int mode)
+{
+ audio_info_t ai, ai_null, ai_idev, ai_odev;
+ int rv, junk;
+
+ /* copy in the request */
+ if (ddi_copyin((void *)arg, &ai, sizeof (ai), mode) != 0)
+ return (EFAULT);
+
+ /*
+ * if the caller is attempting to enable a software feature that
+ * we didn't report as supported the return an error
+ */
+ if ((ai.sw_features_enabled != -1) &&
+ (ai.sw_features_enabled & ~lxa_state->lxas_sw_features))
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * changes to the play.eof field (since mmap output depends
+ * on this field.
+ */
+ if ((lxa_state->lxas_umem_cookie != NULL) &&
+ (ai.play.eof != -1))
+ return (EIO);
+
+ /* initialize the new requests */
+ AUDIO_INITINFO(&ai_null);
+ ai_idev = ai_odev = ai;
+
+ /* remove audio input settings from the output device request */
+ ai_odev.record = ai_null.record;
+
+ /* remove audio output settings from the input device request */
+ ai_idev.play = ai_null.play;
+ ai_idev.output_muted = ai_null.output_muted;
+
+ /* apply settings to the intput device */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, cmd,
+ (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /* apply settings to the output device */
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, cmd,
+ (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /*
+ * a AUDIO_SETINFO call performs an implicit AUDIO_GETINFO to
+ * return values (see the coments in audioio.h.) so we need
+ * to combine the values returned from the input and output
+ * device back into the users buffer.
+ */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai);
+
+ /* copyout the results */
+ if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0) {
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+lxa_audio_getinfo(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ audio_info_t ai, ai_idev, ai_odev;
+ int rv, junk;
+
+ /* get the settings from the input device */
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /* get the settings from the output device */
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO,
+ (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0))
+ return (rv);
+
+ /*
+ * we need to combine the values returned from the input
+ * and output device back into a single user buffer.
+ */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai);
+
+ /* copyout the results */
+ if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0)
+ return (EFAULT);
+
+ return (0);
+}
+
+static int
+lxa_mixer_ai_from_lh(ldi_handle_t lh, audio_info_t *ai)
+{
+ am_control_t *actl;
+ int rv, ch_count, junk;
+
+ ASSERT((lh != NULL) && (ai != NULL));
+
+ /* get the number of channels for the underlying device */
+ if ((rv = ldi_ioctl(lh, AUDIO_GET_NUM_CHS,
+ (intptr_t)&ch_count, FKIOCTL, kcred, &junk)) != 0)
+ return (rv);
+
+ /* allocate the am_control_t structure */
+ actl = kmem_alloc(AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count), KM_SLEEP);
+
+ /* get the device state and channel state */
+ if ((rv = ldi_ioctl(lh, AUDIO_MIXERCTL_GETINFO,
+ (intptr_t)actl, FKIOCTL, kcred, &junk)) != 0) {
+ kmem_free(actl, AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count));
+ return (rv);
+ }
+
+ /* return the audio_info structure */
+ *ai = actl->dev_info;
+ kmem_free(actl, AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count));
+ return (0);
+}
+
+static int
+lxa_mixer_get_ai(lxa_state_t *lxa_state, audio_info_t *ai)
+{
+ audio_info_t ai_idev, ai_odev;
+ int rv;
+
+ /* if there is no input device, query the output device */
+ if (lxa_state->lxas_idev_lh == NULL)
+ return (lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh, ai));
+
+ /* if there is no ouput device, query the intput device */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh, ai));
+
+ /*
+ * now get the audio_info and channel information for the
+ * underlying output device.
+ */
+ if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh,
+ &ai_idev)) != 0)
+ return (rv);
+ if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh,
+ &ai_odev)) != 0)
+ return (rv);
+
+ /* now merge the audio_info structures */
+ lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, ai);
+ return (0);
+}
+
+static int
+lxa_mixer_get_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ audio_info_t ai;
+ int rv;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ if ((rv = lxa_mixer_get_ai(lxa_state, &ai)) != 0)
+ return (rv);
+
+ switch (cmd) {
+ case LXA_IOC_MIXER_GET_VOL:
+ lxa_ml.lxa_ml_gain = ai.play.gain;
+ lxa_ml.lxa_ml_balance = ai.play.balance;
+ break;
+ case LXA_IOC_MIXER_GET_MIC:
+ lxa_ml.lxa_ml_gain = ai.record.gain;
+ lxa_ml.lxa_ml_balance = ai.record.balance;
+ break;
+ }
+
+ if (ddi_copyout(&lxa_ml, (void *)arg, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+ return (0);
+}
+
+static int
+lxa_mixer_set_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ audio_info_t ai;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* get the new mixer settings */
+ if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+
+ /* sanity check the mixer settings */
+ if (!LXA_MIXER_LEVELS_OK(&lxa_ml))
+ return (EINVAL);
+
+ /* initialize an audio_info struct with the new settings */
+ AUDIO_INITINFO(&ai);
+ switch (cmd) {
+ case LXA_IOC_MIXER_SET_VOL:
+ ai.play.gain = lxa_ml.lxa_ml_gain;
+ ai.play.balance = lxa_ml.lxa_ml_balance;
+ break;
+ case LXA_IOC_MIXER_SET_MIC:
+ ai.record.gain = lxa_ml.lxa_ml_gain;
+ ai.record.balance = lxa_ml.lxa_ml_balance;
+ break;
+ }
+
+ /*
+ * we're going to cheat here. normally the
+ * MIXERCTL_SETINFO ioctl take am_control_t and the
+ * AUDIO_SETINFO takes an audio_info_t. as it turns
+ * out the first element in a am_control_t is an
+ * audio_info_t. also, the rest of the am_control_t
+ * structure is normally ignored for a MIXERCTL_SETINFO
+ * ioctl. so here we'll try to fall back to the code
+ * that handles AUDIO_SETINFO ioctls.
+ */
+ return (lxa_audio_setinfo(lxa_state, AUDIO_MIXERCTL_SETINFO,
+ (intptr_t)&ai, FKIOCTL));
+}
+
+static int
+lxa_mixer_get_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* simply return the cached pcm mixer settings */
+ mutex_enter(&lxa_lock);
+ if (ddi_copyout(&lxa_state->lxas_zs->lxa_zs_pcm_levels,
+ (void *)arg,
+ sizeof (lxa_state->lxas_zs->lxa_zs_pcm_levels), mode) != 0) {
+ mutex_exit(&lxa_lock);
+ return (EFAULT);
+ }
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+lxa_mixer_set_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+ lxa_mixer_levels_t lxa_ml;
+ int rv;
+
+ ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+ /* get the new mixer settings */
+ if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0)
+ return (EFAULT);
+
+ /* sanity check the mixer settings */
+ if (!LXA_MIXER_LEVELS_OK(&lxa_ml))
+ return (EINVAL);
+
+ mutex_enter(&lxa_lock);
+
+ /* if there is an active output channel, update it */
+ if (lxa_state->lxas_zs->lxa_zs_ostate != NULL) {
+ audio_info_t ai;
+
+ /* initialize an audio_info struct with the new settings */
+ AUDIO_INITINFO(&ai);
+ ai.play.gain = lxa_ml.lxa_ml_gain;
+ ai.play.balance = lxa_ml.lxa_ml_balance;
+
+ if ((rv = lxa_audio_setinfo(lxa_state->lxas_zs->lxa_zs_ostate,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) {
+ mutex_exit(&lxa_lock);
+ return (rv);
+ }
+ }
+
+ /* update the cached mixer settings */
+ lxa_state->lxas_zs->lxa_zs_pcm_levels = lxa_ml;
+
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+lxa_zone_reg(intptr_t arg, int mode)
+{
+ lxa_zone_reg_t lxa_zr;
+ lxa_zstate_t *lxa_zs = NULL;
+ char *idev_name = NULL, *odev_name = NULL, *pval = NULL;
+ int i, junk;
+
+ if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0)
+ return (EFAULT);
+
+ /* make sure that zone_name is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++)
+ if (lxa_zr.lxa_zr_zone_name[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_zone_name))
+ return (EINVAL);
+
+ /* make sure that inputdev is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_inputdev); i++)
+ if (lxa_zr.lxa_zr_inputdev[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_inputdev))
+ return (EINVAL);
+
+ /* make sure it's a valid inputdev property value */
+ if (lxa_devprop_verify(lxa_zr.lxa_zr_inputdev) != 0)
+ return (EINVAL);
+
+ /* make sure that outputdev is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_outputdev); i++)
+ if (lxa_zr.lxa_zr_outputdev[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_outputdev))
+ return (EINVAL);
+
+ /* make sure it's a valid outputdev property value */
+ if (lxa_devprop_verify(lxa_zr.lxa_zr_outputdev) != 0)
+ return (EINVAL);
+
+ /* get the property names */
+ idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_INPUTDEV);
+ odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_OUTPUTDEV);
+
+ /*
+ * allocate and initialize a zone state structure
+ * since the audio device can't possibly be opened yet
+ * (since we're setting it up now and the zone isn't booted
+ * yet) assign some some resonable default pcm channel settings.
+ * also, default to one mixer channel.
+ */
+ lxa_zs = kmem_zalloc(sizeof (*lxa_zs), KM_SLEEP);
+ lxa_zs->lxa_zs_zonename = strdup(lxa_zr.lxa_zr_zone_name);
+ lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain = AUDIO_MID_GAIN;
+ lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance = AUDIO_MID_BALANCE;
+
+ mutex_enter(&lxa_lock);
+
+ /*
+ * make sure this zone isn't already registered
+ * a zone is registered with properties for that zone exist
+ * or there is a zone state structure for that zone
+ */
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ idev_name, &pval) == DDI_PROP_SUCCESS) {
+ goto err_unlock;
+ }
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ odev_name, &pval) == DDI_PROP_SUCCESS) {
+ goto err_unlock;
+ }
+ if (mod_hash_find(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zs->lxa_zs_zonename,
+ (mod_hash_val_t *)&junk) == 0)
+ goto err_unlock;
+
+ /*
+ * create the new properties and insert the zone state structure
+ * into the global hash
+ */
+ if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip,
+ idev_name, lxa_zr.lxa_zr_inputdev) != DDI_PROP_SUCCESS)
+ goto err_prop_remove;
+ if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip,
+ odev_name, lxa_zr.lxa_zr_outputdev) != DDI_PROP_SUCCESS)
+ goto err_prop_remove;
+ if (mod_hash_insert(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zs->lxa_zs_zonename,
+ (mod_hash_val_t)lxa_zs) != 0)
+ goto err_prop_remove;
+
+ /* success! */
+ lxa_registered_zones++;
+ mutex_exit(&lxa_lock);
+
+ /* cleanup */
+ strfree(idev_name);
+ strfree(odev_name);
+ return (0);
+
+err_prop_remove:
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name);
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name);
+
+err_unlock:
+ mutex_exit(&lxa_lock);
+
+err:
+ if (lxa_zs != NULL) {
+ strfree(lxa_zs->lxa_zs_zonename);
+ kmem_free(lxa_zs, sizeof (*lxa_zs));
+ }
+ if (pval != NULL)
+ ddi_prop_free(pval);
+ if (idev_name != NULL)
+ strfree(idev_name);
+ if (odev_name != NULL)
+ strfree(odev_name);
+ return (EIO);
+}
+
+static int
+lxa_zone_unreg(intptr_t arg, int mode)
+{
+ lxa_zone_reg_t lxa_zr;
+ lxa_zstate_t *lxa_zs = NULL;
+ char *idev_name = NULL, *odev_name = NULL, *pval = NULL;
+ int rv, i;
+
+ if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0)
+ return (EFAULT);
+
+ /* make sure that zone_name is a valid string */
+ for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++)
+ if (lxa_zr.lxa_zr_zone_name[i] == '\0')
+ break;
+ if (i == sizeof (lxa_zr.lxa_zr_zone_name))
+ return (EINVAL);
+
+ /* get the property names */
+ idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_INPUTDEV);
+ odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+ LXA_PROP_OUTPUTDEV);
+
+ mutex_enter(&lxa_lock);
+
+ if (lxa_registered_zones <= 0) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+
+ /* make sure this zone is actually registered */
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ idev_name, &pval) != DDI_PROP_SUCCESS) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ddi_prop_free(pval);
+ pval = NULL;
+ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ odev_name, &pval) != DDI_PROP_SUCCESS) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ddi_prop_free(pval);
+ pval = NULL;
+ if (mod_hash_find(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zr.lxa_zr_zone_name,
+ (mod_hash_val_t *)&lxa_zs) != 0) {
+ rv = ENOENT;
+ goto err_unlock;
+ }
+ ASSERT(strcmp(lxa_zr.lxa_zr_zone_name, lxa_zs->lxa_zs_zonename) == 0);
+
+ /*
+ * if the audio device is currently in use then refuse to
+ * unregister the zone
+ */
+ if ((lxa_zs->lxa_zs_ostate != NULL) ||
+ (lxa_zs->lxa_zs_ostate != NULL)) {
+ rv = EBUSY;
+ goto err_unlock;
+ }
+
+ /* success! cleanup zone config state */
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name);
+ (void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name);
+
+ /*
+ * note, the action of removing the zone state structure from the
+ * hash will automatically free lxa_zs->lxa_zs_zonename.
+ *
+ * the reason for this is that we used lxa_zs->lxa_zs_zonename
+ * as the hash key and by default mod_hash_create_strhash() uses
+ * mod_hash_strkey_dtor() as a the hash key destructor. (which
+ * free's the key for us.
+ */
+ (void) mod_hash_remove(lxa_zstate_hash,
+ (mod_hash_key_t)lxa_zr.lxa_zr_zone_name,
+ (mod_hash_val_t *)&lxa_zs);
+ lxa_registered_zones--;
+ mutex_exit(&lxa_lock);
+
+ /* cleanup */
+ kmem_free(lxa_zs, sizeof (*lxa_zs));
+ strfree(idev_name);
+ strfree(odev_name);
+ return (0);
+
+err_unlock:
+ mutex_exit(&lxa_lock);
+
+err:
+ if (pval != NULL)
+ ddi_prop_free(pval);
+ if (idev_name != NULL)
+ strfree(idev_name);
+ if (odev_name != NULL)
+ strfree(odev_name);
+ return (rv);
+}
+
+static int
+lxa_ioctl_devctl(int cmd, intptr_t arg, int mode)
+{
+ /* devctl ioctls are only allowed from the global zone */
+ ASSERT(getzoneid() == 0);
+ if (getzoneid() != 0)
+ return (EINVAL);
+
+ switch (cmd) {
+ case LXA_IOC_ZONE_REG:
+ return (lxa_zone_reg(arg, mode));
+ case LXA_IOC_ZONE_UNREG:
+ return (lxa_zone_unreg(arg, mode));
+ }
+
+ return (EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+lxa_open(dev_t *devp, int flags, int otyp, cred_t *credp)
+{
+ lxa_dev_type_t open_type = LXA_TYPE_INVALID;
+ lxa_zstate_t *lxa_zs;
+ lxa_state_t *lxa_state;
+ minor_t minor;
+ int rv;
+
+ if (getminor(*devp) == LXA_MINORNUM_DEVCTL) {
+ /*
+ * this is a devctl node, it exists to administer this
+ * pseudo driver so it doesn't actually need access to
+ * any underlying audio devices. hence there is nothing
+ * really to do here. course, this driver should
+ * only be administered from the global zone.
+ */
+ ASSERT(getzoneid() == 0);
+ if (getzoneid() != 0)
+ return (EINVAL);
+ return (0);
+ }
+
+ /* lookup the zone state structure */
+ if (mod_hash_find(lxa_zstate_hash, (mod_hash_key_t)getzonename(),
+ (mod_hash_val_t *)&lxa_zs) != 0) {
+ return (EIO);
+ }
+
+ /* determine what type of device was opened */
+ switch (getminor(*devp)) {
+ case LXA_MINORNUM_DSP:
+ open_type = LXA_TYPE_AUDIO;
+ break;
+ case LXA_MINORNUM_MIXER:
+ open_type = LXA_TYPE_AUDIOCTL;
+ break;
+ default:
+ return (EINVAL);
+ }
+ ASSERT(open_type != LXA_TYPE_INVALID);
+
+ /* all other opens are clone opens so get a new minor node */
+ minor = id_alloc(lxa_minor_id);
+
+ /* allocate and initialize the new lxa_state structure */
+ lxa_state = kmem_zalloc(sizeof (*lxa_state), KM_SLEEP);
+ lxa_state->lxas_zs = lxa_zs;
+ lxa_state->lxas_dev_old = *devp;
+ lxa_state->lxas_dev_new = makedevice(getmajor(*devp), minor);
+ lxa_state->lxas_flags = flags;
+ lxa_state->lxas_type = open_type;
+
+ /* initialize the input and output device */
+ if (((rv = lxa_dev_open(lxa_state)) != 0) ||
+ ((rv = lxa_dev_getfeatures(lxa_state)) != 0)) {
+ lxa_state_close(lxa_state);
+ return (rv);
+ }
+
+ /*
+ * save this audio statue structure into a hash indexed
+ * by it's minor device number. (this will provide a convient
+ * way to lookup the state structure on future operations.)
+ */
+ if (mod_hash_insert(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t)lxa_state) != 0) {
+ lxa_state_close(lxa_state);
+ return (EIO);
+ }
+
+ mutex_enter(&lxa_lock);
+
+ /* apply the currently cached zone PCM mixer levels */
+ if ((lxa_state->lxas_type == LXA_TYPE_AUDIO) &&
+ (lxa_state->lxas_odev_lh != NULL)) {
+ audio_info_t ai;
+
+ AUDIO_INITINFO(&ai);
+ ai.play.gain = lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain;
+ ai.play.balance = lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance;
+
+ if ((rv = lxa_audio_setinfo(lxa_state,
+ AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (rv);
+ }
+ }
+
+ /*
+ * we only allow one active open of the input or output device.
+ * check here for duplicate opens
+ */
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ if ((lxa_state->lxas_idev_lh != NULL) &&
+ (lxa_zs->lxa_zs_istate != NULL)) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (EBUSY);
+ }
+ if ((lxa_state->lxas_odev_lh != NULL) &&
+ (lxa_zs->lxa_zs_ostate != NULL)) {
+ mutex_exit(&lxa_lock);
+ lxa_state_close(lxa_state);
+ return (EBUSY);
+ }
+
+ /* not a duplicate open, update the global zone state */
+ if (lxa_state->lxas_idev_lh != NULL)
+ lxa_zs->lxa_zs_istate = lxa_state;
+ if (lxa_state->lxas_odev_lh != NULL)
+ lxa_zs->lxa_zs_ostate = lxa_state;
+ }
+ mutex_exit(&lxa_lock);
+
+ /* make sure to return our newly allocated dev_t */
+ *devp = lxa_state->lxas_dev_new;
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_close(dev_t dev, int flags, int otyp, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+
+ /* handle devctl minor nodes (these nodes don't have a handle */
+ if (getminor(dev) == LXA_MINORNUM_DEVCTL)
+ return (0);
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0) return
+ (EINVAL);
+
+ lxa_state_close(lxa_state);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * any more reads or writes to the device
+ */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EIO);
+
+ /* we can't do a read if there is no input device */
+ if (lxa_state->lxas_idev_lh == NULL)
+ return (EBADF);
+
+ /* pass the request on */
+ return (ldi_read(lxa_state->lxas_idev_lh, uiop, kcred));
+}
+
+static int
+/*ARGSUSED*/
+lxa_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /*
+ * if a process has mmaped this device then we don't allow
+ * any more reads or writes to the device
+ */
+ if (lxa_state->lxas_umem_cookie != NULL)
+ return (EIO);
+
+ /* we can't do a write if there is no output device */
+ if (lxa_state->lxas_odev_lh == NULL)
+ return (EBADF);
+
+ /* pass the request on */
+ return (ldi_write(lxa_state->lxas_odev_lh, uiop, kcred));
+}
+
+static int
+/*ARGSUSED*/
+lxa_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+
+ /* handle devctl minor nodes (these nodes don't have a handle */
+ if (getminor(dev) == LXA_MINORNUM_DEVCTL)
+ return (lxa_ioctl_devctl(cmd, arg, mode));
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) ||
+ (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL));
+
+ switch (cmd) {
+ case LXA_IOC_GETMINORNUM:
+ {
+ int minornum = getminor(lxa_state->lxas_dev_old);
+ if (ddi_copyout(&minornum, (void *)arg,
+ sizeof (minornum), mode) != 0)
+ return (EFAULT);
+ }
+ return (0);
+ }
+
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+ /* deal with native ioctl */
+ switch (cmd) {
+ case LXA_IOC_MMAP_OUTPUT:
+ return (lxa_ioc_mmap_output(lxa_state, arg, mode));
+ case LXA_IOC_MMAP_PTR:
+ return (lxa_ioc_mmap_ptr(lxa_state, arg, mode));
+ case LXA_IOC_GET_FRAG_INFO:
+ return (lxa_ioc_get_frag_info(lxa_state, arg, mode));
+ case LXA_IOC_SET_FRAG_INFO:
+ return (lxa_ioc_set_frag_info(lxa_state, arg, mode));
+ }
+
+ /* deal with layered ioctls */
+ switch (cmd) {
+ case AUDIO_DRAIN:
+ return (lxa_audio_drain(lxa_state));
+ case AUDIO_SETINFO:
+ return (lxa_audio_setinfo(lxa_state,
+ AUDIO_SETINFO, arg, mode));
+ case AUDIO_GETINFO:
+ return (lxa_audio_getinfo(lxa_state, arg, mode));
+ }
+ }
+
+ if (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) {
+ /* deal with native ioctl */
+ switch (cmd) {
+ case LXA_IOC_MIXER_GET_VOL:
+ return (lxa_mixer_get_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_SET_VOL:
+ return (lxa_mixer_set_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_GET_MIC:
+ return (lxa_mixer_get_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_SET_MIC:
+ return (lxa_mixer_set_common(lxa_state,
+ cmd, arg, mode));
+ case LXA_IOC_MIXER_GET_PCM:
+ return (lxa_mixer_get_pcm(lxa_state, arg, mode));
+ case LXA_IOC_MIXER_SET_PCM:
+ return (lxa_mixer_set_pcm(lxa_state, arg, mode));
+ }
+
+ }
+
+ return (EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+lxa_devmap(dev_t dev, devmap_cookie_t dhp,
+ offset_t off, size_t len, size_t *maplen, uint_t model)
+{
+ lxa_state_t *lxa_state;
+ minor_t minor = getminor(dev);
+ ddi_umem_cookie_t umem_cookie;
+ void *umem_ptr;
+ int rv;
+
+ /* get the handle for this device */
+ if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+ (mod_hash_val_t *)&lxa_state) != 0)
+ return (EINVAL);
+
+ /* we only support mmaping of audio devices */
+ if (lxa_state->lxas_type != LXA_TYPE_AUDIO)
+ return (EINVAL);
+
+ /* we only support output via mmap */
+ if ((lxa_state->lxas_flags & FWRITE) == 0)
+ return (EINVAL);
+
+ /* sanity check the amount of memory the user is allocating */
+ if ((len == 0) ||
+ (len > LXA_OSS_FRAG_MEM) ||
+ ((len % lxa_state->lxas_frag_size) != 0))
+ return (EINVAL);
+
+ /* allocate and clear memory to mmap */
+ umem_ptr = ddi_umem_alloc(len, DDI_UMEM_NOSLEEP, &umem_cookie);
+ if (umem_ptr == NULL)
+ return (ENOMEM);
+ bzero(umem_ptr, len);
+
+ /* setup the memory mappings */
+ rv = devmap_umem_setup(dhp, lxa_dip, NULL, umem_cookie, 0, len,
+ PROT_USER | PROT_READ | PROT_WRITE, 0, NULL);
+ if (rv != 0) {
+ ddi_umem_free(umem_cookie);
+ return (EIO);
+ }
+
+ mutex_enter(&lxa_lock);
+
+ /* we only support one mmap per open */
+ if (lxa_state->lxas_umem_cookie != NULL) {
+ ASSERT(lxa_state->lxas_umem_ptr != NULL);
+ mutex_exit(&lxa_lock);
+ ddi_umem_free(umem_cookie);
+ return (EBUSY);
+ }
+ ASSERT(lxa_state->lxas_umem_ptr == NULL);
+
+ *maplen = len;
+ lxa_state->lxas_umem_len = len;
+ lxa_state->lxas_umem_ptr = umem_ptr;
+ lxa_state->lxas_umem_cookie = umem_cookie;
+ mutex_exit(&lxa_lock);
+ return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ ASSERT(instance == 0);
+ if (instance != 0)
+ return (DDI_FAILURE);
+
+ lxa_dip = dip;
+ mutex_init(&lxa_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /* create our minor nodes */
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_DEVCTL, S_IFCHR,
+ LXA_MINORNUM_DEVCTL, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_DSP, S_IFCHR,
+ LXA_MINORNUM_DSP, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LXA_MINORNAME_MIXER, S_IFCHR,
+ LXA_MINORNUM_MIXER, DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ /* allocate our data structures */
+ lxa_minor_id = id_space_create("lxa_minor_id",
+ LXA_MINORNUM_COUNT, LX_AUDIO_MAX_OPENS);
+ lxa_state_hash = mod_hash_create_idhash("lxa_state_hash",
+ lxa_state_hash_size, mod_hash_null_valdtor);
+ lxa_zstate_hash = mod_hash_create_strhash("lxa_zstate_hash",
+ lxa_zstate_hash_size, mod_hash_null_valdtor);
+
+ return (DDI_SUCCESS);
+}
+
+static int
+/*ARGSUSED*/
+lxa_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ASSERT(!MUTEX_HELD(&lxa_lock));
+ if (lxa_registered_zones > 0)
+ return (DDI_FAILURE);
+
+ mod_hash_destroy_idhash(lxa_state_hash);
+ mod_hash_destroy_idhash(lxa_zstate_hash);
+ id_space_destroy(lxa_minor_id);
+ lxa_state_hash = NULL;
+ lxa_dip = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+static int
+/*ARGSUSED*/
+lxa_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp)
+{
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ *resultp = lxa_dip;
+ return (DDI_SUCCESS);
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *resultp = (void *)0;
+ return (DDI_SUCCESS);
+ }
+ return (DDI_FAILURE);
+}
+
+/*
+ * Driver flags
+ */
+static struct cb_ops lxa_cb_ops = {
+ lxa_open, /* open */
+ lxa_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ lxa_read, /* read */
+ lxa_write, /* write */
+ lxa_ioctl, /* ioctl */
+ lxa_devmap, /* devmap */
+ nodev, /* mmap */
+ ddi_devmap_segmap, /* segmap */
+ nochpoll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* cb_str */
+ D_NEW | D_MP | D_DEVMAP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops lxa_ops = {
+ DEVO_REV,
+ 0,
+ lxa_getinfo,
+ nulldev,
+ nulldev,
+ lxa_attach,
+ lxa_detach,
+ nodev,
+ &lxa_cb_ops,
+ NULL,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* type of module */
+ "linux audio driver 'lx_audio' %I%",
+ &lxa_ops /* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+/*
+ * standard module entry points
+ */
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.conf b/usr/src/uts/common/brand/lx/io/lx_audio.conf
new file mode 100644
index 0000000000..2eeb5eb7ee
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_audio" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.c b/usr/src/uts/common/brand/lx/io/lx_ptm.c
new file mode 100644
index 0000000000..e4079df133
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.c
@@ -0,0 +1,1137 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * This driver attempts to emulate some of the the behaviors of
+ * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris
+ *
+ * It does this by layering over the /dev/ptmx device and intercepting
+ * opens to it.
+ *
+ * This driver makes the following assumptions about the way the ptm/pts
+ * drivers on Solaris work:
+ *
+ * - all opens of the /dev/ptmx device node return a unique dev_t.
+ *
+ * - the dev_t minor node value for each open ptm instance corrospondes
+ * to it's associated slave terminal device number. ie. the path to
+ * the slave terminal device associated with an open ptm instance
+ * who's dev_t minor node vaue is 5, is /dev/pts/5.
+ *
+ * - the ptm driver always allocates the lowest numbered slave terminal
+ * device possible.
+ */
+
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/devops.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/kstr.h>
+#include <sys/ldlinux.h>
+#include <sys/lx_ptm.h>
+#include <sys/modctl.h>
+#include <sys/pathname.h>
+#include <sys/ptms.h>
+#include <sys/ptyvar.h>
+#include <sys/stat.h>
+#include <sys/stropts.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#define LP_PTM_PATH "/dev/ptmx"
+#define LP_PTS_PATH "/dev/pts/"
+#define LP_PTS_DRV_NAME "pts"
+#define LP_PTS_USEC_DELAY (5 * 1000) /* 5 ms */
+#define LP_PTS_USEC_DELAY_MAX (5 * MILLISEC) /* 5 ms */
+
+/*
+ * this driver is layered on top of the ptm driver. we'd like to
+ * make this drivers minor name space a mirror of the ptm drivers
+ * namespace, but we can't actually do this. the reason is that the
+ * ptm driver is opened via the clone driver. there for no minor nodes
+ * of the ptm driver are actually accessible via the filesystem.
+ * since we're not a streams device we can't be opened by the clone
+ * driver. there for we need to have at least minor node accessible
+ * via the filesystem so that consumers can open it. we use the device
+ * node with a minor number of 0 for this purpose. what this means is
+ * that minor node 0 can't be used to map ptm minor node 0. since this
+ * minor node is now reserved we need to shift our ptm minor node
+ * mappings by one. ie. a ptm minor node with a value of 0 will
+ * corrospond to our minor node with a value of 1. these mappings are
+ * managed with the following macros.
+ */
+#define DEVT_TO_INDEX(x) LX_PTM_DEV_TO_PTS(x)
+#define INDEX_TO_MINOR(x) ((x) + 1)
+
+/*
+ * grow our layered handle array by the same size increment that the ptm
+ * driver uses to grow the pty device space - PTY_MAXDELTA
+ */
+#define LP_PTY_INC 128
+
+/*
+ * lx_ptm_ops contains state information about outstanding operations on the
+ * underlying master terminal device. Currently we only track information
+ * for read operations.
+ *
+ * Note that this data has not been rolled directly into the lx_ptm_handle
+ * structure because we can't put mutex's of condition variables into
+ * lx_ptm_handle structure. The reason is that the array of lx_ptm_handle
+ * structures linked to from the global lx_ptm state can be resized
+ * dynamically, and when it's resized, the new array is at a different
+ * memory location and the old array memory is discarded. Mutexs and cvs
+ * are accessed based off their address, so if this array was re-sized while
+ * there were outstanding operations on any mutexs or cvs in the array
+ * then the system would tip over. In the future the lx_ptm_handle structure
+ * array should probably be replaced with either an array of pointers to
+ * lx_ptm_handle structures or some other kind of data structure containing
+ * pointers to lx_ptm_handle structures. Then the lx_ptm_ops structure
+ * could be folded directly into the lx_ptm_handle structures. (This will
+ * also require the definition of a new locking mechanism to protect the
+ * contents of lx_ptm_handle structures.)
+ */
+typedef struct lx_ptm_ops {
+ int lpo_rops;
+ kcondvar_t lpo_rops_cv;
+ kmutex_t lpo_rops_lock;
+} lx_ptm_ops_t;
+
+/*
+ * Every open of the master terminal device in a zone results in a new
+ * lx_ptm_handle handle allocation. These handles are stored in an array
+ * hanging off the lx_ptm_state structure.
+ */
+typedef struct lx_ptm_handle {
+ /* Device handle to the underlying real /dev/ptmx master terminal. */
+ ldi_handle_t lph_handle;
+
+ /* Flag to indicate if TIOCPKT mode has been enabled. */
+ int lph_pktio;
+
+ /* Number of times the slave device has been opened/closed. */
+ int lph_eofed;
+
+ /* Callback handler in the ptm driver to check if slave is open. */
+ ptmptsopencb_t lph_ppocb;
+
+ /* Pointer to state for operations on underlying device. */
+ lx_ptm_ops_t *lph_lpo;
+} lx_ptm_handle_t;
+
+/*
+ * Global state for the lx_ptm driver.
+ */
+typedef struct lx_ptm_state {
+ /* lx_ptm device devinfo pointer */
+ dev_info_t *lps_dip;
+
+ /* LDI ident used to open underlying real /dev/ptmx master terminals. */
+ ldi_ident_t lps_li;
+
+ /* pts drivers major number */
+ major_t lps_pts_major;
+
+ /* rw lock used to manage access and growth of lps_lh_array */
+ krwlock_t lps_lh_rwlock;
+
+ /* number of elements in lps_lh_array */
+ uint_t lps_lh_count;
+
+ /* Array of handles to underlying real /dev/ptmx master terminals. */
+ lx_ptm_handle_t *lps_lh_array;
+} lx_ptm_state_t;
+
+/* Pointer to the lx_ptm global state structure. */
+static lx_ptm_state_t lps;
+
+/*
+ * List of modules to be autopushed onto slave terminal devices when they
+ * are opened in an lx branded zone.
+ */
+static char *lx_pts_mods[] = {
+ "ptem",
+ "ldterm",
+ "ttcompat",
+ LDLINUX_MOD,
+ NULL
+};
+
+static void
+lx_ptm_lh_grow(uint_t index)
+{
+ uint_t new_lh_count, old_lh_count;
+ lx_ptm_handle_t *new_lh_array, *old_lh_array;
+
+ /*
+ * allocate a new array. we drop the rw lock on the array so that
+ * readers can still access devices in case our memory allocation
+ * blocks.
+ */
+ new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1);
+ new_lh_array =
+ kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP);
+
+ /*
+ * double check that we still actually need to increase the size
+ * of the array
+ */
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+ if (index < lps.lps_lh_count) {
+ /* someone beat us to it so there's nothing more to do */
+ rw_exit(&lps.lps_lh_rwlock);
+ kmem_free(new_lh_array,
+ sizeof (lx_ptm_handle_t) * new_lh_count);
+ return;
+ }
+
+ /* copy the existing data into the new array */
+ ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+ ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+ if (lps.lps_lh_count != 0) {
+ bcopy(lps.lps_lh_array, new_lh_array,
+ sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+ }
+
+ /* save info on the old array */
+ old_lh_array = lps.lps_lh_array;
+ old_lh_count = lps.lps_lh_count;
+
+ /* install the new array */
+ lps.lps_lh_array = new_lh_array;
+ lps.lps_lh_count = new_lh_count;
+
+ rw_exit(&lps.lps_lh_rwlock);
+
+ /* free the old array */
+ if (old_lh_array != NULL) {
+ kmem_free(old_lh_array,
+ sizeof (lx_ptm_handle_t) * old_lh_count);
+ }
+}
+
+static void
+lx_ptm_lh_insert(uint_t index, ldi_handle_t lh)
+{
+ lx_ptm_ops_t *lpo;
+
+ ASSERT(lh != NULL);
+
+ /* Allocate and initialize the ops structure */
+ lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP);
+ mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL);
+
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ /* check if we need to grow the size of the layered handle array */
+ if (index >= lps.lps_lh_count) {
+ rw_exit(&lps.lps_lh_rwlock);
+ lx_ptm_lh_grow(index);
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+ }
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle == NULL);
+ ASSERT(lps.lps_lh_array[index].lph_pktio == 0);
+ ASSERT(lps.lps_lh_array[index].lph_eofed == 0);
+ ASSERT(lps.lps_lh_array[index].lph_lpo == NULL);
+
+ /* insert the new handle and return */
+ lps.lps_lh_array[index].lph_handle = lh;
+ lps.lps_lh_array[index].lph_pktio = 0;
+ lps.lps_lh_array[index].lph_eofed = 0;
+ lps.lps_lh_array[index].lph_lpo = lpo;
+
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_remove(uint_t index)
+{
+ ldi_handle_t lh;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+ ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0);
+ ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock));
+
+ /* free the write handle */
+ kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t));
+ lps.lps_lh_array[index].lph_lpo = NULL;
+
+ /* remove the handle and return it */
+ lh = lps.lps_lh_array[index].lph_handle;
+ lps.lps_lh_array[index].lph_handle = NULL;
+ lps.lps_lh_array[index].lph_pktio = 0;
+ lps.lps_lh_array[index].lph_eofed = 0;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lh);
+}
+
+static void
+lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ *ppocb = lps.lps_lh_array[index].lph_ppocb;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static void
+lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ lps.lps_lh_array[index].lph_ppocb = *ppocb;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_lookup(uint_t index)
+{
+ ldi_handle_t lh;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the handle */
+ lh = lps.lps_lh_array[index].lph_handle;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lh);
+}
+
+static lx_ptm_ops_t *
+lx_ptm_lpo_lookup(uint_t index)
+{
+ lx_ptm_ops_t *lpo;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_lpo != NULL);
+
+ /* return the handle */
+ lpo = lps.lps_lh_array[index].lph_lpo;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (lpo);
+}
+
+static int
+lx_ptm_lh_pktio_get(uint_t index)
+{
+ int pktio;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the pktio state */
+ pktio = lps.lps_lh_array[index].lph_pktio;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (pktio);
+}
+
+static void
+lx_ptm_lh_pktio_set(uint_t index, int pktio)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* set the pktio state */
+ lps.lps_lh_array[index].lph_pktio = pktio;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_lh_eofed_get(uint_t index)
+{
+ int eofed;
+
+ rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* return the eofed state */
+ eofed = lps.lps_lh_array[index].lph_eofed;
+ rw_exit(&lps.lps_lh_rwlock);
+ return (eofed);
+}
+
+static void
+lx_ptm_lh_eofed_set(uint_t index)
+{
+ rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+ ASSERT(index < lps.lps_lh_count);
+ ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+ /* set the eofed state */
+ lps.lps_lh_array[index].lph_eofed++;
+ rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_read_start(dev_t dev)
+{
+ lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+ mutex_enter(&lpo->lpo_rops_lock);
+ ASSERT(lpo->lpo_rops >= 0);
+
+ /* Wait for other read operations to finish */
+ while (lpo->lpo_rops != 0) {
+ if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) {
+ mutex_exit(&lpo->lpo_rops_lock);
+ return (-1);
+ }
+ }
+
+ /* Start a read operation */
+ VERIFY(++lpo->lpo_rops == 1);
+ mutex_exit(&lpo->lpo_rops_lock);
+ return (0);
+}
+
+static void
+lx_ptm_read_end(dev_t dev)
+{
+ lx_ptm_ops_t *lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+ mutex_enter(&lpo->lpo_rops_lock);
+ ASSERT(lpo->lpo_rops >= 0);
+
+ /* End a read operation */
+ VERIFY(--lpo->lpo_rops == 0);
+ cv_signal(&lpo->lpo_rops_cv);
+
+ mutex_exit(&lpo->lpo_rops_lock);
+}
+
+static int
+lx_ptm_pts_isopen(dev_t dev)
+{
+ ptmptsopencb_t ppocb;
+
+ lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb);
+ return (ppocb.ppocb_func(ppocb.ppocb_arg));
+}
+
+static void
+lx_ptm_eof_read(ldi_handle_t lh)
+{
+ struct uio uio;
+ iovec_t iov;
+ char junk[1];
+
+ /*
+ * We can remove any EOF message from the head of the stream by
+ * doing a zero byte read from the stream.
+ */
+ iov.iov_len = 0;
+ iov.iov_base = junk;
+ uio.uio_iovcnt = 1;
+ uio.uio_iov = &iov;
+ uio.uio_resid = iov.iov_len;
+ uio.uio_offset = 0;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_fmode = 0;
+ uio.uio_extflg = 0;
+ uio.uio_llimit = MAXOFFSET_T;
+ (void) ldi_read(lh, &uio, kcred);
+}
+
+static int
+lx_ptm_eof_drop_1(dev_t dev, int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err, msg_size, msg_count;
+
+ *rvalp = 0;
+
+ /*
+ * Check if there is an EOF message (represented by a zero length
+ * data message) at the head of the stream. Note that the
+ * I_NREAD ioctl is a streams framework ioctl so it will succeed
+ * even if there have been previous write errors on this stream.
+ */
+ if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+ FKIOCTL, kcred, &msg_count)) != 0)
+ return (err);
+
+ if ((msg_count == 0) || (msg_size != 0)) {
+ /* No EOF message found */
+ return (0);
+ }
+
+ /* Record the fact that the slave device has been closed. */
+ lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+
+ /* drop the EOF */
+ lx_ptm_eof_read(lh);
+ *rvalp = 1;
+ return (0);
+}
+
+static int
+lx_ptm_eof_drop(dev_t dev, int *rvalp)
+{
+ int rval, err;
+
+ if (rvalp != NULL)
+ *rvalp = 0;
+ for (;;) {
+ if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0)
+ return (err);
+ if (rval == 0)
+ return (0);
+ if (rvalp != NULL)
+ *rvalp = 1;
+ }
+}
+
+static int
+lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ *rvalp = 0;
+ if (ignore_eof) {
+ int size, rval;
+
+ if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size,
+ FKIOCTL, kcred, &rval)) != 0)
+ return (err);
+ if (size != 0)
+ *rvalp = 1;
+ } else {
+ int msg_size, msg_count;
+
+ if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+ FKIOCTL, kcred, &msg_count)) != 0)
+ return (err);
+ if (msg_count != 0)
+ *rvalp = 1;
+ }
+ return (0);
+}
+
+static int
+lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int err;
+
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR,
+ ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ err = ldi_ident_from_dip(dip, &lps.lps_li);
+ if (err != 0) {
+ ddi_remove_minor_node(dip, ddi_get_name(dip));
+ return (DDI_FAILURE);
+ }
+
+ lps.lps_dip = dip;
+ lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME);
+
+ rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL);
+ lps.lps_lh_count = 0;
+ lps.lps_lh_array = NULL;
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ ldi_ident_release(lps.lps_li);
+ lps.lps_dip = NULL;
+
+ ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+ ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+ if (lps.lps_lh_array != NULL) {
+ kmem_free(lps.lps_lh_array,
+ sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+ lps.lps_lh_array = NULL;
+ lps.lps_lh_count = 0;
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+ struct strioctl iocb;
+ ptmptsopencb_t ppocb = { NULL, NULL };
+ ldi_handle_t lh;
+ major_t maj, our_major = getmajor(*devp);
+ minor_t min, lastmin;
+ uint_t index, anchor = 1;
+ dev_t ptm_dev;
+ int err, rval = 0;
+
+ /*
+ * Don't support the FNDELAY flag and FNONBLOCK until we either
+ * find a Linux app that opens /dev/ptmx with the O_NDELAY
+ * or O_NONBLOCK flags explicitly, or until we create test cases
+ * to determine how reads of master terminal devices opened with
+ * these flags behave in different situations on Linux. Supporting
+ * these flags will involve enhancing our read implementation
+ * and changing the way it deals with EOF notifications.
+ */
+ if (flag & (FNDELAY | FNONBLOCK))
+ return (ENOTSUP);
+
+ /*
+ * we're layered on top of the ptm driver so open that driver
+ * first. (note that we're opening /dev/ptmx in the global
+ * zone, not ourselves in the Linux zone.)
+ */
+ err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li);
+ if (err != 0)
+ return (err);
+
+ /* get the devt returned by the ptmx open */
+ err = ldi_get_dev(lh, &ptm_dev);
+ if (err != 0) {
+ (void) ldi_close(lh, flag, credp);
+ return (err);
+ }
+
+ /*
+ * we're a cloning driver so here's well change the devt that we
+ * return. the ptmx is also a cloning driver so we'll just use
+ * it's minor number as our minor number (it already manages it's
+ * minor name space so no reason to duplicate the effort.)
+ */
+ index = getminor(ptm_dev);
+ *devp = makedevice(our_major, INDEX_TO_MINOR(index));
+
+ /* Get a callback function to query if the pts device is open. */
+ iocb.ic_cmd = PTMPTSOPENCB;
+ iocb.ic_timout = 0;
+ iocb.ic_len = sizeof (ppocb);
+ iocb.ic_dp = (char *)&ppocb;
+
+ err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval);
+ if ((err != 0) || (rval != 0)) {
+ (void) ldi_close(lh, flag, credp);
+ return (EIO); /* XXX return something else here? */
+ }
+ ASSERT(ppocb.ppocb_func != NULL);
+
+ /*
+ * now setup autopush for the terminal slave device. this is
+ * necessary so that when a Linux program opens the device we
+ * can push required strmod modules onto the stream. in Solaris
+ * this is normally done by the application that actually
+ * allocates the terminal.
+ */
+ maj = lps.lps_pts_major;
+ min = index;
+ lastmin = 0;
+ err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin,
+ &anchor, lx_pts_mods);
+ if (err != 0) {
+ (void) ldi_close(lh, flag, credp);
+ return (EIO); /* XXX return something else here? */
+ }
+
+ /* save off this layered handle for future accesses */
+ lx_ptm_lh_insert(index, lh);
+ lx_ptm_lh_set_ppocb(index, &ppocb);
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+ ldi_handle_t lh;
+ major_t maj;
+ minor_t min, lastmin;
+ uint_t index;
+ int err;
+
+ index = DEVT_TO_INDEX(dev);
+
+ /*
+ * we must cleanup all the state associated with this major/minor
+ * terminal pair before actually closing the ptm master device.
+ * this is required because once the close of the ptm device is
+ * complete major/minor terminal pair is immediatly available for
+ * re-use in any zone.
+ */
+
+ /* free up our saved reference for this layered handle */
+ lh = lx_ptm_lh_remove(index);
+
+ /* unconfigure autopush for the associated terminal slave device */
+ maj = lps.lps_pts_major;
+ min = index;
+ lastmin = 0;
+ do {
+ /*
+ * we loop here because we don't want to release this ptm
+ * node if autopush can't be disabled on the associated
+ * slave device because then bad things could happen if
+ * another brand were to get this terminal allocated
+ * to them.
+ *
+ * XXX should we ever give up?
+ */
+ err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin,
+ 0, NULL);
+ } while (err != 0);
+
+ err = ldi_close(lh, flag, credp);
+
+ /*
+ * note that we don't have to bother with changing the permissions
+ * on the associated slave device here. the reason is that no one
+ * can actually open the device untill it's associated master
+ * device is re-opened, which will result in the permissions on
+ * it being reset.
+ */
+ return (err);
+}
+
+static int
+lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err, rval;
+ struct uio uio = *uiop;
+
+ *loop = 0;
+
+ /*
+ * Here's another way that Linux master terminals behave differently
+ * from Solaris master terminals. If you do a read on a Linux
+ * master terminal (that was opened witout NDELAY and NONBLOCK)
+ * who's corrosponding slave terminal is currently closed and
+ * has been opened and closed at least once, Linux return -1 and
+ * set errno to EIO where as Solaris blocks.
+ */
+ if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) {
+ /* Slave has been opened and closed at least once. */
+ if (lx_ptm_pts_isopen(dev) == 0) {
+ /*
+ * Slave is closed. Make sure that data is avaliable
+ * before attempting a read.
+ */
+ if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0)
+ return (err);
+
+ /* If there is no data available then return. */
+ if (rval == 0)
+ return (EIO);
+ }
+ }
+
+ /* Actually do the read operation. */
+ if ((err = ldi_read(lh, uiop, credp)) != 0)
+ return (err);
+
+ /* If read returned actual data then return. */
+ if (uio.uio_resid != uiop->uio_resid)
+ return (0);
+
+ /*
+ * This was a zero byte read (ie, an EOF). This indicates
+ * that the slave terinal device has been closed. Record
+ * the fact that the slave device has been closed and retry
+ * the read operation.
+ */
+ lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+ *loop = 1;
+ return (0);
+}
+
+static int
+lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ int pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev));
+ int err, loop;
+ struct uio uio;
+ struct iovec iovp;
+
+ ASSERT(uiop->uio_iovcnt > 0);
+
+ /*
+ * If packet mode has been enabled (via TIOCPKT) we need to pad
+ * all read requests with a leading byte that indicates any
+ * relevant control status information.
+ */
+ if (pktio != 0) {
+ /*
+ * We'd like to write the control information into
+ * the current buffer but we can't yet. We don't
+ * want to modify userspace memory here only to have
+ * the read operation fail later. So instead
+ * what we'll do here is read one character from the
+ * beginning of the memory pointed to by the uio
+ * structure. This will advance the output pointer
+ * by one. Then when the read completes successfully
+ * we can update the byte that we passed over. Before
+ * we do the read make a copy of the current uiop and
+ * iovec structs so we can write to them later.
+ */
+ uio = *uiop;
+ iovp = *uiop->uio_iov;
+ uio.uio_iov = &iovp;
+
+ if (uwritec(uiop) == -1)
+ return (EFAULT);
+ }
+
+ do {
+ /*
+ * Serialize all reads. We need to do this so that we can
+ * properly emulate the behavior of master terminals on Linux.
+ * In reality this serializaion should not pose any kind of
+ * performance problem since it would be very strange to have
+ * multiple threads trying to read from the same master
+ * terminal device concurrently.
+ */
+ if (lx_ptm_read_start(dev) != 0)
+ return (EINTR);
+
+ err = lx_ptm_read_loop(dev, uiop, credp, &loop);
+ lx_ptm_read_end(dev);
+ if (err != 0)
+ return (err);
+ } while (loop != 0);
+
+ if (pktio != 0) {
+ uint8_t pktio_data = TIOCPKT_DATA;
+
+ /*
+ * Note that the control status information we
+ * pass back is faked up in the sense that we
+ * don't actually report any events, we always
+ * report a status of 0.
+ */
+ if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0)
+ return (EFAULT);
+ }
+
+ return (0);
+}
+
+static int
+lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ err = ldi_write(lh, uiop, credp);
+
+ return (err);
+}
+
+static int
+lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ int err;
+
+ /*
+ * here we need to make sure that we never allow the
+ * I_SETSIG and I_ESETSIG ioctls to pass through. we
+ * do this because we can't support them.
+ *
+ * the native Solaris ptm device supports these ioctls because
+ * they are streams framework ioctls and all streams devices
+ * support them by default. these ioctls cause the current
+ * process to be registered with a stream and receive signals
+ * when certain stream events occur.
+ *
+ * a problem arises with cleanup of these registrations
+ * for layered drivers.
+ *
+ * normally the streams framework is notified whenever a
+ * process closes any reference to a stream and it goes ahead
+ * and cleans up these registrations. but actual device drivers
+ * are not notified when a process performs a close operation
+ * unless the process is closing the last opened reference to
+ * the device on the entire system.
+ *
+ * so while we could pass these ioctls on and allow processes
+ * to register for signal delivery, we would never receive
+ * any notification when those processes exit (or close a
+ * stream) and we wouldn't be able to unregister them.
+ *
+ * luckily these operations are streams specific and Linux
+ * doesn't support streams devices. so it doesn't actually
+ * seem like we need to support these ioctls. if it turns
+ * out that we do need to support them for some reason in
+ * the future, the current driver model will have to be
+ * enhanced to better support streams device layering.
+ */
+ if ((cmd == I_SETSIG) || (cmd == I_ESETSIG))
+ return (EINVAL);
+
+ /*
+ * here we fake up support for TIOCPKT. Linux applications expect
+ * /etc/ptmx to support this ioctl, but on Solaris it doesn't.
+ * (it is supported on older bsd style ptys.) so we'll fake
+ * up support for it here.
+ *
+ * the reason that this ioctl is emulated here instead of in
+ * userland is that this ioctl affects the results returned
+ * from read() operations. if this ioctl was emulated in
+ * userland the brand library would need to intercept all
+ * read operations and check to see if pktio was enabled
+ * for the fd being read from. since this ioctl only needs
+ * to be supported on the ptmx device it makes more sense
+ * to support it here where we can easily update the results
+ * returned for read() operations performed on ourselves.
+ */
+ if (cmd == TIOCPKT) {
+ int pktio;
+
+ if (ddi_copyin((void *)arg, &pktio, sizeof (pktio),
+ mode) != DDI_SUCCESS)
+ return (EFAULT);
+
+ if (pktio == 0)
+ lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0);
+ else
+ lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1);
+
+ return (0);
+ }
+
+ err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp);
+
+ return (err);
+}
+
+static int
+lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp, int *loop)
+{
+ ldi_handle_t lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+ short reventsp2;
+ int err, rval;
+
+ *loop = 0;
+
+ /*
+ * If the slave device has been opened and closed at least
+ * once and the slave device is currently closed, then poll
+ * always needs to returns immediatly.
+ */
+ if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) &&
+ (lx_ptm_pts_isopen(dev) == 0)) {
+ /* In this case always return POLLHUP */
+ *reventsp = POLLHUP;
+
+ /*
+ * Check if there really is data on the stream.
+ * If so set the correct return flags.
+ */
+ if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) {
+ /* Something went wrong. */
+ return (err);
+ }
+ if (rval != 0)
+ *reventsp |= (events & (POLLIN | POLLRDNORM));
+
+ /*
+ * Is the user checking for writability? Note that for ptm
+ * devices Linux seems to ignore the POLLWRBAND write flag.
+ */
+ if ((events & POLLWRNORM) == 0)
+ return (0);
+
+ /*
+ * To check if the stream is writable we have to actually
+ * call poll, but make sure to set anyyet to 1 to prevent
+ * the streams framework from setting up callbacks.
+ */
+ if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0)
+ return (err);
+
+ *reventsp |= (reventsp2 & POLLWRNORM);
+ } else {
+ int lockstate;
+
+ /* The slave device is open, do the poll */
+ if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0)
+ return (err);
+
+ /*
+ * Drop any leading EOFs on the stream.
+ *
+ * Note that we have to use pollunlock() here to avoid
+ * recursive mutex enters in the poll framework. The
+ * reason is that if there is an EOF message on the stream
+ * then the act of reading from the queue to remove the
+ * message can cause the ptm drivers event service
+ * routine to be invoked, and if there is no open
+ * slave device then the ptm driver may generate
+ * error messages and put them on the stream. This
+ * in turn will generate a poll event and the poll
+ * framework will try to invoke any poll callbacks
+ * associated with the stream. In the process of
+ * doing that the poll framework will try to aquire
+ * locks that we are already holding. So we need to
+ * drop those locks here before we do our read.
+ */
+ lockstate = pollunlock();
+ err = lx_ptm_eof_drop(dev, &rval);
+ pollrelock(lockstate);
+ if (err)
+ return (err);
+
+ /* If no EOF was dropped then return */
+ if (rval == 0)
+ return (0);
+
+ /*
+ * An EOF was removed from the stream. Retry the entire
+ * poll operation from the top because polls on the ptm
+ * device should behave differently now.
+ */
+ *loop = 1;
+ }
+ return (0);
+}
+
+static int
+lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp,
+ struct pollhead **phpp)
+{
+ int loop, err;
+
+ do {
+ /* Serialize ourself wrt read operations. */
+ if (lx_ptm_read_start(dev) != 0)
+ return (EINTR);
+
+ err = lx_ptm_poll_loop(dev,
+ events, anyyet, reventsp, phpp, &loop);
+ lx_ptm_read_end(dev);
+ if (err != 0)
+ return (err);
+ } while (loop != 0);
+ return (0);
+}
+
+static struct cb_ops lx_ptm_cb_ops = {
+ lx_ptm_open, /* open */
+ lx_ptm_close, /* close */
+ nodev, /* strategy */
+ nodev, /* print */
+ nodev, /* dump */
+ lx_ptm_read, /* read */
+ lx_ptm_write, /* write */
+ lx_ptm_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ lx_ptm_poll, /* chpoll */
+ ddi_prop_op, /* prop_op */
+ NULL, /* cb_str */
+ D_NEW | D_MP,
+ CB_REV,
+ NULL,
+ NULL
+};
+
+static struct dev_ops lx_ptm_ops = {
+ DEVO_REV,
+ 0,
+ ddi_getinfo_1to1,
+ nulldev,
+ nulldev,
+ lx_ptm_attach,
+ lx_ptm_detach,
+ nodev,
+ &lx_ptm_cb_ops,
+ NULL,
+ NULL
+};
+
+static struct modldrv modldrv = {
+ &mod_driverops,
+ "Linux master terminal driver 'lx_ptm' %I%",
+ &lx_ptm_ops
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1,
+ &modldrv,
+ NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.conf b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
new file mode 100644
index 0000000000..481b4e3c74
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+#ident "%Z%%M% %I% %E% SMI"
+
+name="lx_ptm" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
new file mode 100644
index 0000000000..d993c1eefc
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -0,0 +1,836 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/exec.h>
+#include <sys/lx_impl.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_futex.h>
+#include <sys/lx_brand.h>
+#include <sys/termios.h>
+#include <sys/sunddi.h>
+#include <sys/ddi.h>
+#include <sys/exec.h>
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+#include <sys/machelf.h>
+#include <sys/auxv.h>
+#include <sys/priv.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/archsystm.h>
+#include <sys/zone.h>
+#include <sys/brand.h>
+
+int lx_debug = 0;
+
+void lx_setbrand(proc_t *);
+int lx_getattr(zone_t *, int, void *, size_t *);
+int lx_setattr(zone_t *, int, void *, size_t);
+int lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t);
+void lx_copy_procdata(proc_t *, proc_t *);
+
+extern void lx_setrval(klwp_t *, int, int);
+extern void lx_proc_exit(proc_t *, klwp_t *);
+extern void lx_exec();
+extern int lx_initlwp(klwp_t *);
+extern void lx_forklwp(klwp_t *, klwp_t *);
+extern void lx_exitlwp(klwp_t *);
+extern void lx_freelwp(klwp_t *);
+extern greg_t lx_fixsegreg(greg_t, model_t);
+extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *);
+
+int lx_systrace_brand_enabled;
+
+lx_systrace_f *lx_systrace_entry_ptr;
+lx_systrace_f *lx_systrace_return_ptr;
+
+static int lx_systrace_enabled;
+
+static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+ struct intpdata *idata, int level, long *execsz, int setid,
+ caddr_t exec_file, struct cred *cred, int brand_action);
+
+/* lx brand */
+struct brand_ops lx_brops = {
+ lx_brandsys,
+ lx_setbrand,
+ lx_getattr,
+ lx_setattr,
+ lx_copy_procdata,
+ lx_proc_exit,
+ lx_exec,
+ lx_setrval,
+ lx_initlwp,
+ lx_forklwp,
+ lx_freelwp,
+ lx_exitlwp,
+ lx_elfexec
+};
+
+struct brand_mach_ops lx_mops = {
+ NULL,
+ lx_brand_int80_callback,
+ NULL,
+ NULL,
+ NULL,
+ lx_fixsegreg,
+};
+
+struct brand lx_brand = {
+ BRAND_VER_1,
+ "lx",
+ &lx_brops,
+ &lx_mops
+};
+
+static struct modlbrand modlbrand = {
+ &mod_brandops, "lx brand %I%", &lx_brand
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlbrand, NULL
+};
+
+void
+lx_proc_exit(proc_t *p, klwp_t *lwp)
+{
+ zone_t *z = p->p_zone;
+
+ ASSERT(p->p_brand != NULL);
+ ASSERT(p->p_brand_data != NULL);
+
+ /*
+ * If init is dying and we aren't explicitly shutting down the zone
+ * or the system, then Solaris is about to restart init. The Linux
+ * init is not designed to handle a restart, which it interprets as
+ * a reboot. To give it a sane environment in which to run, we
+ * reboot the zone.
+ */
+ if (p->p_pid == z->zone_proc_initpid) {
+ if (z->zone_boot_err == 0 &&
+ z->zone_restart_init &&
+ zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
+ zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN)
+ (void) zone_kadmin(A_REBOOT, 0, NULL, CRED());
+ } else {
+ lx_exitlwp(lwp);
+ kmem_free(p->p_brand_data, sizeof (struct lx_proc_data));
+ p->p_brand_data = NULL;
+ p->p_brand = &native_brand;
+ }
+}
+
+void
+lx_setbrand(proc_t *p)
+{
+ kthread_t *t = p->p_tlist;
+ int err;
+
+ ASSERT(p->p_brand_data == NULL);
+ ASSERT(ttolxlwp(curthread) == NULL);
+
+ p->p_brand_data = kmem_zalloc(sizeof (struct lx_proc_data), KM_SLEEP);
+
+ /*
+ * This routine can only be called for single-threaded processes.
+ * Since lx_initlwp() can only fail if we run out of PIDs for
+ * multithreaded processes, we know that this can never fail.
+ */
+ err = lx_initlwp(t->t_lwp);
+ ASSERT(err == 0);
+}
+
+/* ARGSUSED */
+int
+lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
+{
+ boolean_t val;
+
+ if (attr == LX_ATTR_RESTART_INIT) {
+ if (bufsize > sizeof (boolean_t))
+ return (ERANGE);
+ if (copyin(buf, &val, sizeof (val)) != 0)
+ return (EFAULT);
+ if (val != B_TRUE && val != B_FALSE)
+ return (EINVAL);
+ zone->zone_restart_init = val;
+ return (0);
+ }
+ return (EINVAL);
+}
+
+/* ARGSUSED */
+int
+lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
+{
+ if (attr == LX_ATTR_RESTART_INIT) {
+ if (*bufsize < sizeof (boolean_t))
+ return (ERANGE);
+ if (copyout(&zone->zone_restart_init, buf,
+ sizeof (boolean_t)) != 0)
+ return (EFAULT);
+ *bufsize = sizeof (boolean_t);
+ return (0);
+ }
+ return (-EINVAL);
+}
+
+/*
+ * Enable ptrace system call tracing for the given LWP. This is done by
+ * both setting the flag in that LWP's brand data (in the kernel) and setting
+ * the process-wide trace flag (in the brand library of the traced process).
+ */
+static int
+lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set)
+{
+ proc_t *p;
+ kthread_t *t;
+ klwp_t *lwp;
+ lx_proc_data_t *lpdp;
+ lx_lwp_data_t *lldp;
+ uintptr_t addr;
+ int ret, flag = 1;
+
+ if ((p = sprlock(pid)) == NULL)
+ return (ESRCH);
+
+ if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
+ sprunlock(p);
+ return (EPERM);
+ }
+
+ if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) {
+ sprunlock(p);
+ return (ESRCH);
+ }
+
+ if ((lpdp = p->p_brand_data) == NULL ||
+ (lldp = lwp->lwp_brand) == NULL) {
+ sprunlock(p);
+ return (ESRCH);
+ }
+
+ if (set) {
+ /*
+ * Enable the ptrace flag for this LWP and this process. Note
+ * that we will turn off the LWP's ptrace flag, but we don't
+ * turn off the process's ptrace flag.
+ */
+ lldp->br_ptrace = 1;
+ lpdp->l_ptrace = 1;
+
+ addr = lpdp->l_traceflag;
+
+ mutex_exit(&p->p_lock);
+
+ /*
+ * This can fail only in some rare corner cases where the
+ * process is exiting or we're completely out of memory. In
+ * these cases, it's sufficient to return an error to the ptrace
+ * consumer and leave the process-wide flag set.
+ */
+ ret = uwrite(p, &flag, sizeof (flag), addr);
+
+ mutex_enter(&p->p_lock);
+
+ /*
+ * If we couldn't set the trace flag, unset the LWP's ptrace
+ * flag as there ptrace consumer won't expect this LWP to stop.
+ */
+ if (ret != 0)
+ lldp->br_ptrace = 0;
+ } else {
+ lldp->br_ptrace = 0;
+ ret = 0;
+ }
+
+ sprunlock(p);
+
+ if (ret != 0)
+ ret = EIO;
+
+ return (ret);
+}
+
+static void
+lx_ptrace_fire(void)
+{
+ kthread_t *t = curthread;
+ klwp_t *lwp = ttolwp(t);
+ lx_lwp_data_t *lldp = lwp->lwp_brand;
+
+ /*
+ * The ptrace flag only applies until the next event is encountered
+ * for the given LWP. If it's set, turn off the flag and poke the
+ * controlling process by raising a signal.
+ */
+ if (lldp->br_ptrace) {
+ lldp->br_ptrace = 0;
+ tsignal(t, SIGTRAP);
+ }
+}
+
+void
+lx_brand_systrace_enable(void)
+{
+ extern void lx_brand_int80_enable(void);
+
+ ASSERT(!lx_systrace_enabled);
+
+ lx_brand_int80_enable();
+
+ lx_systrace_enabled = 1;
+}
+
+void
+lx_brand_systrace_disable(void)
+{
+ extern void lx_brand_int80_disable(void);
+
+ ASSERT(lx_systrace_enabled);
+
+ lx_brand_int80_disable();
+
+ lx_systrace_enabled = 0;
+}
+
+/*
+ * Get the addresses of the user-space system call handler and attach it to
+ * the proc structure. Returning 0 indicates success; the value returned
+ * by the system call is the value stored in rval. Returning a non-zero
+ * value indicates a failure; the value returned is used to set errno, -1
+ * is returned from the syscall and the contents of rval are ignored. To
+ * set errno and have the syscall return a value other than -1 we can
+ * manually set errno and rval and return 0.
+ */
+int
+lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+ kthread_t *t = curthread;
+ proc_t *p = ttoproc(t);
+ lx_proc_data_t *pd;
+ int linux_call;
+ struct termios *termios;
+ uint_t termios_len;
+ int error;
+ lx_brand_registration_t reg;
+
+ /*
+ * There is one operation that is suppored for non-branded
+ * process. B_EXEC_BRAND. This is the equilivant of an
+ * exec call, but the new process that is created will be
+ * a branded process.
+ */
+ if (cmd == B_EXEC_BRAND) {
+ ASSERT(p->p_zone != NULL);
+ ASSERT(p->p_zone->zone_brand == &lx_brand);
+ return (exec_common(
+ (char *)arg1, (const char **)arg2, (const char **)arg3,
+ EBA_BRAND));
+ }
+
+ /* For all other operations this must be a branded process. */
+ if (p->p_brand == NULL)
+ return (set_errno(ENOSYS));
+
+ ASSERT(p->p_brand == &lx_brand);
+ ASSERT(p->p_brand_data != NULL);
+
+ switch (cmd) {
+ case B_REGISTER:
+ if (p->p_model == DATAMODEL_NATIVE) {
+ if (copyin((void *)arg1, &reg, sizeof (reg)) != 0) {
+ lx_print("Failed to copyin brand registration "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+#ifdef _LP64
+ } else {
+ lx_brand_registration32_t reg32;
+
+ if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0) {
+ lx_print("Failed to copyin brand registration "
+ "at 0x%p\n", (void *)arg1);
+ return (EFAULT);
+ }
+
+ reg.lxbr_version = (uint_t)reg32.lxbr_version;
+ reg.lxbr_handler =
+ (void *)(uintptr_t)reg32.lxbr_handler;
+ reg.lxbr_tracehandler =
+ (void *)(uintptr_t)reg32.lxbr_tracehandler;
+ reg.lxbr_traceflag =
+ (void *)(uintptr_t)reg32.lxbr_traceflag;
+#endif
+ }
+
+ if (reg.lxbr_version != LX_VERSION_1) {
+ lx_print("Invalid brand library version (%u)\n",
+ reg.lxbr_version);
+ return (EINVAL);
+ }
+
+ lx_print("Assigning brand 0x%p and handler 0x%p to proc 0x%p\n",
+ (void *)&lx_brand, (void *)reg.lxbr_handler, (void *)p);
+ pd = p->p_brand_data;
+ pd->l_handler = (uintptr_t)reg.lxbr_handler;
+ pd->l_tracehandler = (uintptr_t)reg.lxbr_tracehandler;
+ pd->l_traceflag = (uintptr_t)reg.lxbr_traceflag;
+ *rval = 0;
+ return (0);
+ case B_TTYMODES:
+ /* This is necessary for emulating TCGETS ioctls. */
+ if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(),
+ DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&termios,
+ &termios_len) != DDI_SUCCESS)
+ return (EIO);
+
+ ASSERT(termios_len == sizeof (*termios));
+
+ if (copyout(&termios, (void *)arg1, sizeof (termios)) != 0) {
+ ddi_prop_free(termios);
+ return (EFAULT);
+ }
+
+ ddi_prop_free(termios);
+ *rval = 0;
+ return (0);
+
+ case B_ELFDATA:
+ pd = curproc->p_brand_data;
+ if (copyout(&pd->l_elf_data, (void *)arg1,
+ sizeof (lx_elf_data_t)) != 0) {
+ (void) set_errno(EFAULT);
+ return (*rval = -1);
+ }
+ *rval = 0;
+ return (0);
+
+ case B_EXEC_NATIVE:
+ error = exec_common(
+ (char *)arg1, (const char **)arg2, (const char **)arg3,
+ EBA_NATIVE);
+ if (error) {
+ (void) set_errno(error);
+ return (*rval = -1);
+ }
+ return (*rval = 0);
+
+ case B_LPID_TO_SPAIR:
+ /*
+ * Given a Linux pid as arg1, return the Solaris pid in arg2 and
+ * the Solaris LWP in arg3. We also translate pid 1 (which is
+ * hardcoded in many applications) to the zone's init process.
+ */
+ {
+ pid_t s_pid;
+ id_t s_tid;
+
+ if ((pid_t)arg1 == 1) {
+ s_pid = p->p_zone->zone_proc_initpid;
+ /* handle the dead/missing init(1M) case */
+ if (s_pid == -1)
+ s_pid = 1;
+ s_tid = 1;
+ } else if (lx_lpid_to_spair((pid_t)arg1, &s_pid,
+ &s_tid) < 0)
+ return (ESRCH);
+
+ if (copyout(&s_pid, (void *)arg2,
+ sizeof (s_pid)) != 0 ||
+ copyout(&s_tid, (void *)arg3, sizeof (s_tid)) != 0)
+ return (EFAULT);
+
+ *rval = 0;
+ return (0);
+ }
+
+ case B_PTRACE_SYSCALL:
+ *rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2,
+ (int)arg3);
+ return (0);
+
+ case B_SYSENTRY:
+ if (lx_systrace_enabled) {
+ uint32_t args[6];
+
+ ASSERT(lx_systrace_entry_ptr != NULL);
+
+ if (copyin((void *)arg2, args, sizeof (args)) != 0)
+ return (EFAULT);
+
+ (*lx_systrace_entry_ptr)(arg1, args[0], args[1],
+ args[2], args[3], args[4], args[5]);
+ }
+
+ lx_ptrace_fire();
+
+ pd = p->p_brand_data;
+
+ /*
+ * If neither DTrace not ptrace are interested in tracing
+ * this process any more, turn off the trace flag.
+ */
+ if (!lx_systrace_enabled && !pd->l_ptrace)
+ (void) suword32((void *)pd->l_traceflag, 0);
+
+ *rval = 0;
+ return (0);
+
+ case B_SYSRETURN:
+ if (lx_systrace_enabled) {
+ ASSERT(lx_systrace_return_ptr != NULL);
+
+ (*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0);
+ }
+
+ lx_ptrace_fire();
+
+ pd = p->p_brand_data;
+
+ /*
+ * If neither DTrace not ptrace are interested in tracing
+ * this process any more, turn off the trace flag.
+ */
+ if (!lx_systrace_enabled && !pd->l_ptrace)
+ (void) suword32((void *)pd->l_traceflag, 0);
+
+ *rval = 0;
+ return (0);
+
+ case B_SET_AFFINITY_MASK:
+ case B_GET_AFFINITY_MASK:
+ /*
+ * Retrieve or store the CPU affinity mask for the
+ * requested linux pid.
+ *
+ * arg1 is a linux PID (0 means curthread).
+ * arg2 is the size of the given mask.
+ * arg3 is the address of the affinity mask.
+ */
+ return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval));
+
+ default:
+ linux_call = cmd - B_EMULATE_SYSCALL;
+ if (linux_call >= 0 && linux_call < LX_NSYSCALLS) {
+ *rval = lx_emulate_syscall(linux_call, arg1, arg2,
+ arg3, arg4, arg5, arg6);
+ return (0);
+ }
+ }
+
+ return (EINVAL);
+}
+
+/*
+ * Copy the per-process brand data from a parent proc to a child.
+ */
+void
+lx_copy_procdata(proc_t *child, proc_t *parent)
+{
+ lx_proc_data_t *cpd, *ppd;
+
+ ppd = parent->p_brand_data;
+
+ ASSERT(ppd != NULL);
+
+ cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP);
+ *cpd = *ppd;
+
+ child->p_brand_data = cpd;
+}
+
+#if defined(_ELF32_COMPAT)
+/*
+ * Currently, only 32-bit branded ELF executables are supported.
+ */
+#define elfexec elf32exec
+#define mapexec_brand mapexec32_brand
+#endif /* __amd64 */
+
+extern int elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
+ intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
+ cred_t *cred, int brand_action);
+extern int mapexec_brand(vnode_t *, uarg_t *, Ehdr *ehdr, Elf32_Addr *,
+ intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *);
+
+/*
+ * Exec routine called by elfexec() to load 32-bit Linux binaries.
+ */
+static int
+lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+ struct intpdata *idata, int level, long *execsz, int setid,
+ caddr_t exec_file, struct cred *cred, int brand_action)
+{
+ int error;
+ vnode_t *nvp;
+ auxv32_t phdr_auxv = { AT_SUN_BRAND_PHDR, 0 };
+ Ehdr ehdr;
+ Elf32_Addr uphdr_vaddr;
+ intptr_t voffset;
+ int interp;
+ int i;
+ struct execenv env;
+ struct user *up = PTOU(ttoproc(curthread));
+ lx_elf_data_t *edp =
+ &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data;
+
+ ASSERT(ttoproc(curthread)->p_brand == &lx_brand);
+ ASSERT(ttoproc(curthread)->p_brand_data != NULL);
+
+ /*
+ * Set the brandname and library name for the new process so that
+ * elfexec() puts them onto the stack.
+ */
+ args->brandname = LX_BRANDNAME;
+ args->emulator = LX_LIB_PATH;
+
+ /*
+ * We will exec the brand library, and map in the linux linker and the
+ * linux executable.
+ */
+ if (error = lookupname(LX_LIB_PATH, UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &nvp)) {
+ uprintf("%s: not found.", LX_LIB);
+ return (error);
+ }
+
+ if (error = elfexec(nvp, uap, args, idata, level + 1, execsz, setid,
+ exec_file, cred, brand_action)) {
+ VN_RELE(nvp);
+ return (error);
+ }
+ VN_RELE(nvp);
+
+ bzero(&env, sizeof (env));
+
+ if (error = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
+ exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
+ &env.ex_brksize))
+ return (error);
+
+ /*
+ * Save off the important properties of the lx executable. The brand
+ * library will ask us for this data later, when it is ready to set
+ * things up for the lx executable.
+ */
+ edp->ed_phdr = (uphdr_vaddr == -1) ? voffset + ehdr.e_phoff :
+ voffset + uphdr_vaddr;
+ edp->ed_entry = voffset + ehdr.e_entry;
+ edp->ed_phent = ehdr.e_phentsize;
+ edp->ed_phnum = ehdr.e_phnum;
+
+ if (interp) {
+ if (ehdr.e_type == ET_DYN) {
+ /*
+ * This is a shared object executable, so we need to
+ * pick a reasonable place to put the heap. Just don't
+ * use the first page.
+ */
+ env.ex_brkbase = (caddr_t)PAGESIZE;
+ env.ex_bssbase = (caddr_t)PAGESIZE;
+ }
+
+ /*
+ * If the program needs an interpreter (most do), map it in and
+ * store relevant information about it in the aux vector, where
+ * the brand library can find it.
+ */
+ if (error = lookupname(LX_LINKER, UIO_SYSSPACE, FOLLOW, NULLVPP,
+ &nvp)) {
+ uprintf("%s: not found.", LX_LINKER);
+ return (error);
+ }
+ if (error = mapexec_brand(nvp, args, &ehdr, &uphdr_vaddr,
+ &voffset, exec_file, &interp, NULL, NULL, NULL)) {
+ VN_RELE(nvp);
+ return (error);
+ }
+ VN_RELE(nvp);
+
+ /*
+ * Now that we know the base address of the brand's linker,
+ * place it in the aux vector.
+ */
+ edp->ed_base = voffset;
+ edp->ed_ldentry = voffset + ehdr.e_entry;
+ } else {
+ /*
+ * This program has no interpreter. The lx brand library will
+ * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
+ * so in this case, put the entry point of the main executable
+ * there.
+ */
+ if (ehdr.e_type == ET_EXEC) {
+ /*
+ * An executable with no interpreter, this must be a
+ * statically linked executable, which means we loaded
+ * it at the address specified in the elf header, in
+ * which case the e_entry field of the elf header is an
+ * absolute address.
+ */
+ edp->ed_ldentry = ehdr.e_entry;
+ edp->ed_entry = ehdr.e_entry;
+ } else {
+ /*
+ * A shared object with no interpreter, we use the
+ * calculated address from above.
+ */
+ edp->ed_ldentry = edp->ed_entry;
+ }
+
+ /*
+ * Delay setting the brkbase until the first call to brk();
+ * see elfexec() for details.
+ */
+ env.ex_bssbase = (caddr_t)0;
+ env.ex_brkbase = (caddr_t)0;
+ env.ex_brksize = 0;
+ }
+
+ env.ex_vp = vp;
+ setexecenv(&env);
+
+ /*
+ * We don't need to copy this stuff out. It is only used by our
+ * tools to locate the lx linker's debug section. But we should at
+ * least try to keep /proc's view of the aux vector consistent with
+ * what's on the process stack.
+ */
+ phdr_auxv.a_un.a_val = edp->ed_phdr;
+ if (copyout(&phdr_auxv, args->brand_auxp, sizeof (phdr_auxv)) == -1)
+ return (EFAULT);
+
+ /*
+ * /proc uses the AT_ENTRY aux vector entry to deduce
+ * the location of the executable in the address space. The user
+ * structure contains a copy of the aux vector that needs to have those
+ * entries patched with the values of the real lx executable (they
+ * currently contain the values from the lx brand library that was
+ * elfexec'd, above).
+ *
+ * For live processes, AT_BASE is used to locate the linker segment,
+ * which /proc and friends will later use to find Solaris symbols
+ * (such as rtld_db_preinit). However, for core files, /proc uses
+ * AT_ENTRY to find the right segment to label as the executable.
+ * So we set AT_ENTRY to be the entry point of the linux executable,
+ * but leave AT_BASE to be the address of the Solaris linker.
+ */
+ for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
+ if (up->u_auxv[i].a_type == AT_ENTRY)
+ up->u_auxv[i].a_un.a_val = edp->ed_entry;
+ if (up->u_auxv[i].a_type == AT_SUN_BRAND_PHDR)
+ up->u_auxv[i].a_un.a_val = edp->ed_phdr;
+ }
+
+ return (0);
+}
+
+int
+_init(void)
+{
+ int err = 0;
+
+ /* pid/tid conversion hash tables */
+ lx_pid_init();
+
+ /* for lx_futex() */
+ lx_futex_init();
+
+ err = mod_install(&modlinkage);
+ if (err != 0) {
+ cmn_err(CE_WARN, "Couldn't install lx brand module");
+
+ /*
+ * This looks drastic, but it should never happen. These
+ * two data structures should be completely free-able until
+ * they are used by Linux processes. Since the brand
+ * wasn't loaded there should be no Linux processes, and
+ * thus no way for these data structures to be modified.
+ */
+ if (lx_futex_fini())
+ panic("lx brand module cannot be loaded or unloaded.");
+ }
+ return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+ int futex_done = 0;
+
+ /*
+ * If there are any zones using this brand, we can't allow it to be
+ * unloaded.
+ */
+ if (brand_zone_count(&lx_brand))
+ return (EBUSY);
+
+ lx_pid_fini();
+
+ if ((err = lx_futex_fini()) != 0)
+ goto done;
+ futex_done = 1;
+
+ err = mod_remove(&modlinkage);
+
+done:
+ if (err) {
+ /*
+ * If we can't unload the module, then we have to get it
+ * back into a sane state.
+ */
+ lx_pid_init();
+
+ if (futex_done)
+ lx_futex_init();
+
+ }
+
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
new file mode 100644
index 0000000000..375b99fa46
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -0,0 +1,383 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/archsystm.h>
+#include <sys/privregs.h>
+#include <sys/exec.h>
+#include <sys/lwp.h>
+#include <sys/sem.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_futex.h>
+
+/* Linux specific functions and definitions */
+void lx_setrval(klwp_t *, int, int);
+void lx_exec();
+int lx_initlwp(klwp_t *);
+void lx_forklwp(klwp_t *, klwp_t *);
+void lx_exitlwp(klwp_t *);
+void lx_freelwp(klwp_t *);
+static void lx_save(klwp_t *);
+static void lx_restore(klwp_t *);
+extern void lx_ptrace_free(proc_t *);
+
+/*
+ * Set the return code for the forked child, always zero
+ */
+/*ARGSUSED*/
+void
+lx_setrval(klwp_t *lwp, int v1, int v2)
+{
+ lwptoregs(lwp)->r_r0 = 0;
+}
+
+/*
+ * Reset process state on exec(2)
+ */
+void
+lx_exec()
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ int err;
+
+ /*
+ * There are two mutually exclusive special cases we need to
+ * address. First, if this was a native process prior to this
+ * exec(), then this lwp won't have its brand-specific data
+ * initialized and it won't be assigned a Linux PID yet. Second,
+ * if this was a multi-threaded Linux process and this lwp wasn't
+ * the main lwp, then we need to make its Solaris and Linux PIDS
+ * match.
+ */
+ if (lwpd == NULL) {
+ err = lx_initlwp(lwp);
+ /*
+ * Only possible failure from this routine should be an
+ * inability to allocate a new PID. Since single-threaded
+ * processes don't need a new PID, we should never hit this
+ * error.
+ */
+ ASSERT(err == 0);
+ lwpd = lwptolxlwp(lwp);
+ } else if (curthread->t_tid != 1) {
+ lx_pid_reassign(curthread);
+ }
+
+ installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save,
+ NULL);
+
+ /*
+ * clear out the tls array
+ */
+ bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
+
+ /*
+ * reset the tls entries in the gdt
+ */
+ kpreempt_disable();
+ lx_restore(lwp);
+ kpreempt_enable();
+}
+
+void
+lx_exitlwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+ proc_t *p;
+ kthread_t *t;
+ sigqueue_t *sqp = NULL;
+ pid_t ppid;
+ id_t ptid;
+
+ if (lwpd == NULL)
+ return; /* second time thru' */
+
+ if (lwpd->br_clear_ctidp != NULL) {
+ (void) suword32(lwpd->br_clear_ctidp, 0);
+ (void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1,
+ NULL, NULL, 0);
+ }
+
+ if (lwpd->br_signal != 0) {
+ /*
+ * The first thread in a process doesn't cause a signal to
+ * be sent when it exits. It was created by a fork(), not
+ * a clone(), so the parent should get signalled when the
+ * process exits.
+ */
+ if (lwpd->br_ptid == -1)
+ goto free;
+
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+ /*
+ * If br_ppid is 0, it means this is a CLONE_PARENT thread,
+ * so the signal goes to the parent process - not to a
+ * specific thread in this process.
+ */
+ p = lwptoproc(lwp);
+ if (lwpd->br_ppid == 0) {
+ mutex_enter(&p->p_lock);
+ ppid = p->p_ppid;
+ t = NULL;
+ } else {
+ /*
+ * If we have been reparented to init or if our
+ * parent thread is gone, then nobody gets
+ * signaled.
+ */
+ if ((lx_lwp_ppid(lwp, &ppid, &ptid) == 1) ||
+ (ptid == -1))
+ goto free;
+
+ mutex_enter(&pidlock);
+ if ((p = prfind(ppid)) == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ goto free;
+ }
+ mutex_enter(&p->p_lock);
+ mutex_exit(&pidlock);
+
+ if ((t = idtot(p, ptid)) == NULL) {
+ mutex_exit(&p->p_lock);
+ goto free;
+ }
+ }
+
+ sqp->sq_info.si_signo = lwpd->br_signal;
+ sqp->sq_info.si_code = lwpd->br_exitwhy;
+ sqp->sq_info.si_status = lwpd->br_exitwhat;
+ sqp->sq_info.si_pid = lwpd->br_pid;
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(p, t, sqp);
+ mutex_exit(&p->p_lock);
+ sqp = NULL;
+ }
+
+free:
+ if (sqp)
+ kmem_free(sqp, sizeof (sigqueue_t));
+
+ lx_freelwp(lwp);
+}
+
+void
+lx_freelwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+
+ if (lwpd != NULL) {
+ (void) removectx(lwptot(lwp), lwp, lx_save, lx_restore,
+ NULL, NULL, lx_save, NULL);
+ if (lwpd->br_pid != 0)
+ lx_pid_rele(lwptoproc(lwp)->p_pid,
+ lwptot(lwp)->t_tid);
+
+ lwp->lwp_brand = NULL;
+ kmem_free(lwpd, sizeof (struct lx_lwp_data));
+ }
+}
+
+int
+lx_initlwp(klwp_t *lwp)
+{
+ struct lx_lwp_data *lwpd;
+ struct lx_lwp_data *plwpd;
+ kthread_t *tp = lwptot(lwp);
+
+ lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP);
+ lwpd->br_exitwhy = CLD_EXITED;
+ lwpd->br_lwp = lwp;
+ lwpd->br_clear_ctidp = NULL;
+ lwpd->br_set_ctidp = NULL;
+ lwpd->br_signal = 0;
+ lwpd->br_affinitymask = 0;
+
+ /*
+ * The first thread in a process has ppid set to the parent
+ * process's pid, and ptid set to -1. Subsequent threads in the
+ * process have their ppid set to the pid of the thread that
+ * created them, and their ptid to that thread's tid.
+ */
+ if (tp->t_next == tp) {
+ lwpd->br_ppid = tp->t_procp->p_ppid;
+ lwpd->br_ptid = -1;
+ } else if (ttolxlwp(curthread) != NULL) {
+ plwpd = ttolxlwp(curthread);
+ bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls));
+ lwpd->br_ppid = plwpd->br_pid;
+ lwpd->br_ptid = curthread->t_tid;
+ } else {
+ /*
+ * Oddball case: the parent thread isn't a Linux process.
+ */
+ lwpd->br_ppid = 0;
+ lwpd->br_ptid = -1;
+ }
+ lwp->lwp_brand = lwpd;
+
+ if (lx_pid_assign(tp)) {
+ kmem_free(lwpd, sizeof (struct lx_lwp_data));
+ lwp->lwp_brand = NULL;
+ return (-1);
+ }
+ lwpd->br_tgid = lwpd->br_pid;
+
+ installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
+ lx_save, NULL);
+
+ return (0);
+}
+
+/*
+ * There is no need to have any locking for either the source or
+ * destination struct lx_lwp_data structs. This is always run in the
+ * thread context of the source thread, and the destination thread is
+ * always newly created and not referred to from anywhere else.
+ */
+void
+lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp)
+{
+ struct lx_lwp_data *src = srclwp->lwp_brand;
+ struct lx_lwp_data *dst = dstlwp->lwp_brand;
+
+ dst->br_ppid = src->br_pid;
+ dst->br_ptid = lwptot(srclwp)->t_tid;
+ bcopy(src->br_tls, dst->br_tls, sizeof (dst->br_tls));
+
+ /*
+ * copy only these flags
+ */
+ dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND;
+ dst->br_clone_args = NULL;
+}
+
+/*
+ * When switching a Linux process off the CPU, clear its GDT entries.
+ */
+/* ARGSUSED */
+static void
+lx_save(klwp_t *t)
+{
+ static user_desc_t null_desc;
+ static int inited;
+ user_desc_t *gdt;
+ int i;
+
+ if (inited == 0) {
+ bzero(&null_desc, sizeof (null_desc));
+ inited = 1;
+ }
+
+ gdt = cpu_get_gdt();
+ for (i = 0; i < LX_TLSNUM; i++)
+ gdt[GDT_TLSMIN + i] = null_desc;
+}
+
+/*
+ * When switching a Linux process on the CPU, set its GDT entries.
+ */
+static void
+lx_restore(klwp_t *t)
+{
+ struct lx_lwp_data *lwpd = lwptolxlwp(t);
+ user_desc_t *gdt;
+ user_desc_t *tls;
+ int i;
+
+ ASSERT(lwpd);
+
+ gdt = cpu_get_gdt();
+ tls = lwpd->br_tls;
+ for (i = 0; i < LX_TLSNUM; i++)
+ gdt[GDT_TLSMIN + i] = tls[i];
+}
+
+void
+lx_set_gdt(int entry, user_desc_t *descrp)
+{
+ user_desc_t *gdt = cpu_get_gdt();
+
+ gdt[entry] = *descrp;
+}
+
+void
+lx_clear_gdt(int entry)
+{
+ user_desc_t *gdt = cpu_get_gdt();
+
+ bzero(&gdt[entry], sizeof (user_desc_t));
+}
+
+longlong_t
+lx_nosys()
+{
+ return (set_errno(ENOSYS));
+}
+
+longlong_t
+lx_opnotsupp()
+{
+ return (set_errno(EOPNOTSUPP));
+}
+
+/*
+ * Brand-specific routine to check if given non-Solaris standard segment
+ * register values should be used as-is or if they should be modified to other
+ * values.
+ */
+/*ARGSUSED*/
+greg_t
+lx_fixsegreg(greg_t sr, model_t datamodel)
+{
+ struct lx_lwp_data *lxlwp = ttolxlwp(curthread);
+
+ /*
+ * If the segreg is the same as the %gs the brand callback was last
+ * entered with, allow it to be used unmodified.
+ */
+ ASSERT(sr == (sr & 0xffff));
+
+ if (sr == (lxlwp->br_ugs & 0xffff))
+ return (sr);
+
+ /*
+ * Force the SR into the LDT in ring 3 for 32-bit processes.
+ *
+ * 64-bit processes get the null GDT selector since they are not
+ * allowed to have a private LDT.
+ */
+#if defined(__amd64)
+ return (datamodel == DATAMODEL_ILP32 ? (sr | SEL_TI_LDT | SEL_UPL) : 0);
+#elif defined(__i386)
+ datamodel = datamodel; /* datamodel currently unused for 32-bit */
+ return (sr | SEL_TI_LDT | SEL_UPL);
+#endif /* __amd64 */
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c
new file mode 100644
index 0000000000..4f22efd1ee
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_pid.c
@@ -0,0 +1,348 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/bitmap.h>
+#include <sys/var.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/zone.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+
+#define LINUX_PROC_FACTOR 8 /* factor down the hash table by this */
+static int hash_len = 4; /* desired average hash chain length */
+static int hash_size; /* no of buckets in the hash table */
+
+static struct lx_pid **stol_pid_hash;
+static struct lx_pid **ltos_pid_hash;
+
+#define LTOS_HASH(pid) ((pid) & (hash_size - 1))
+#define STOL_HASH(pid, tid) (((pid) + (tid)) & (hash_size - 1))
+
+static kmutex_t hash_lock;
+
+static void
+lx_pid_insert_hash(struct lx_pid *lpidp)
+{
+ int shash = STOL_HASH(lpidp->s_pid, lpidp->s_tid);
+ int lhash = LTOS_HASH(lpidp->l_pid);
+
+ ASSERT(MUTEX_HELD(&hash_lock));
+
+ lpidp->stol_next = stol_pid_hash[shash];
+ stol_pid_hash[shash] = lpidp;
+
+ lpidp->ltos_next = ltos_pid_hash[lhash];
+ ltos_pid_hash[lhash] = lpidp;
+}
+
+static struct lx_pid *
+lx_pid_remove_hash(pid_t pid, id_t tid)
+{
+ struct lx_pid **hpp;
+ struct lx_pid *lpidp;
+
+ ASSERT(MUTEX_HELD(&hash_lock));
+
+ hpp = &stol_pid_hash[STOL_HASH(pid, tid)];
+ while (*hpp) {
+ if ((*hpp)->s_pid == pid && (*hpp)->s_tid == tid) {
+ lpidp = *hpp;
+ *hpp = (*hpp)->stol_next;
+ break;
+ }
+ hpp = &(*hpp)->stol_next;
+ }
+
+ /*
+ * when called during error recovery the pid may already
+ * be released
+ */
+ if (lpidp == NULL)
+ return (NULL);
+
+ hpp = &ltos_pid_hash[LTOS_HASH(lpidp->l_pid)];
+ while (*hpp) {
+ if (*hpp == lpidp) {
+ *hpp = lpidp->ltos_next;
+ break;
+ }
+ hpp = &(*hpp)->ltos_next;
+ }
+
+ return (lpidp);
+}
+
+/*
+ * given a solaris pid/tid pair, create a linux pid
+ */
+int
+lx_pid_assign(kthread_t *t)
+{
+ proc_t *p = ttoproc(t);
+ pid_t s_pid = p->p_pid;
+ id_t s_tid = t->t_tid;
+ struct pid *pidp;
+ struct lx_pid *lpidp;
+ lx_lwp_data_t *lwpd = ttolxlwp(t);
+ pid_t newpid;
+
+ if (p->p_lwpcnt > 0) {
+ /*
+ * Allocate a pid for any thread other than the first
+ */
+ if ((newpid = pid_allocate(p, 0)) < 0)
+ return (-1);
+
+ pidp = pid_find(newpid);
+ } else {
+ pidp = NULL;
+ newpid = s_pid;
+ }
+
+ lpidp = kmem_alloc(sizeof (struct lx_pid), KM_SLEEP);
+ lpidp->l_pid = newpid;
+ lpidp->s_pid = s_pid;
+ lpidp->s_tid = s_tid;
+ lpidp->l_pidp = pidp;
+ lpidp->l_start = t->t_start;
+
+ /*
+ * now put the pid into the linux-solaris and solaris-linux
+ * conversion hash tables
+ */
+ mutex_enter(&hash_lock);
+ lx_pid_insert_hash(lpidp);
+ mutex_exit(&hash_lock);
+
+ lwpd->br_pid = newpid;
+
+ return (0);
+}
+
+/*
+ * If we are exec()ing the process, this thread's tid is about to be reset
+ * to 1. Make sure the Linux PID bookkeeping reflects that change.
+ */
+void
+lx_pid_reassign(kthread_t *t)
+{
+ proc_t *p = ttoproc(t);
+ struct pid *old_pidp;
+ struct lx_pid *lpidp;
+
+ ASSERT(p->p_lwpcnt == 1);
+
+ mutex_enter(&hash_lock);
+
+ /*
+ * Clean up all the traces of this thread's 'fake' Linux PID.
+ */
+ lpidp = lx_pid_remove_hash(p->p_pid, t->t_tid);
+ ASSERT(lpidp != NULL);
+ old_pidp = lpidp->l_pidp;
+ lpidp->l_pidp = NULL;
+
+ /*
+ * Now register this thread as (pid, 1).
+ */
+ lpidp->l_pid = p->p_pid;
+ lpidp->s_pid = p->p_pid;
+ lpidp->s_tid = 1;
+ lx_pid_insert_hash(lpidp);
+
+ mutex_exit(&hash_lock);
+
+ if (old_pidp)
+ (void) pid_rele(old_pidp);
+}
+
+/*
+ * release a solaris pid/tid pair
+ */
+void
+lx_pid_rele(pid_t pid, id_t tid)
+{
+ struct lx_pid *lpidp;
+
+ mutex_enter(&hash_lock);
+ lpidp = lx_pid_remove_hash(pid, tid);
+ mutex_exit(&hash_lock);
+
+ if (lpidp) {
+ if (lpidp->l_pidp)
+ (void) pid_rele(lpidp->l_pidp);
+
+ kmem_free(lpidp, sizeof (*lpidp));
+ }
+}
+
+/*
+ * given a linux pid, return the solaris pid/tid pair
+ */
+int
+lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid)
+{
+ struct lx_pid *hp;
+
+ mutex_enter(&hash_lock);
+ for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next)
+ if (l_pid == hp->l_pid) {
+ if (s_pid)
+ *s_pid = hp->s_pid;
+ if (s_tid)
+ *s_tid = hp->s_tid;
+ break;
+ }
+ mutex_exit(&hash_lock);
+
+ return (hp ? 0 : -1);
+}
+
+/*
+ * Given an lwp, return the Linux pid of its parent. If the caller
+ * wants them, we return the Solaris (pid, tid) as well.
+ */
+pid_t
+lx_lwp_ppid(klwp_t *lwp, pid_t *ppidp, id_t *ptidp)
+{
+ lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+ proc_t *p = lwptoproc(lwp);
+ struct lx_pid *hp;
+ pid_t zoneinit = curproc->p_zone->zone_proc_initpid;
+ pid_t lppid, ppid;
+
+ /*
+ * Be sure not to return a parent pid that should be invisible
+ * within this zone.
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * If the parent process's pid is the zone's init process, force it
+ * to the Linux init pid value of 1.
+ */
+ if (ppid == zoneinit)
+ ppid = 1;
+
+ /*
+ * There are two cases in which the Linux definition of a 'parent'
+ * matches that of Solaris:
+ *
+ * - if our tgid is the same as our PID, then we are either the
+ * first thread in the process or a CLONE_THREAD thread.
+ *
+ * - if the brand lwp value for ppid is 0, then we are either the
+ * child of a differently-branded process or a CLONE_PARENT thread.
+ */
+ if (p->p_pid == lwpd->br_tgid || lwpd->br_ppid == 0) {
+ if (ppidp != NULL)
+ *ppidp = ppid;
+ if (ptidp != NULL)
+ *ptidp = -1;
+ return (ppid);
+ }
+
+ /*
+ * Set the default Linux parent pid to be the pid of the zone's init
+ * process; this will get converted back to the Linux default of 1
+ * later.
+ */
+ lppid = zoneinit;
+
+ /*
+ * If the process's parent isn't init, try and look up the Linux "pid"
+ * corresponding to the process's parent.
+ */
+ if (ppid != 1) {
+ /*
+ * In all other cases, we are looking for the parent of this
+ * specific thread, which in Linux refers to the thread that
+ * clone()d it. We stashed that thread's PID away when this
+ * thread was created.
+ */
+ mutex_enter(&hash_lock);
+ for (hp = ltos_pid_hash[LTOS_HASH(lwpd->br_ppid)]; hp;
+ hp = hp->ltos_next) {
+ if (lwpd->br_ppid == hp->l_pid) {
+ /*
+ * We found the PID we were looking for, but
+ * since we cached its value in this LWP's brand
+ * structure, it has exited and been reused by
+ * another process.
+ */
+ if (hp->l_start > lwptot(lwp)->t_start)
+ break;
+
+ lppid = lwpd->br_ppid;
+ if (ppidp != NULL)
+ *ppidp = hp->s_pid;
+ if (ptidp != NULL)
+ *ptidp = hp->s_tid;
+
+ break;
+ }
+ }
+ mutex_exit(&hash_lock);
+ }
+
+ if (lppid == zoneinit) {
+ lppid = 1;
+
+ if (ppidp != NULL)
+ *ppidp = lppid;
+ if (ptidp != NULL)
+ *ptidp = -1;
+ }
+
+ return (lppid);
+}
+
+void
+lx_pid_init(void)
+{
+ hash_size = 1 << highbit(v.v_proc / (hash_len * LINUX_PROC_FACTOR));
+
+ stol_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+ KM_SLEEP);
+ ltos_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+ KM_SLEEP);
+
+ mutex_init(&hash_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+lx_pid_fini(void)
+{
+ kmem_free(stol_pid_hash, sizeof (struct lx_pid *) * hash_size);
+ kmem_free(ltos_pid_hash, sizeof (struct lx_pid *) * hash_size);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c
new file mode 100644
index 0000000000..686afea458
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c
@@ -0,0 +1,409 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/brand.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+
+/*
+ * Some system calls return either a 32-bit or a 64-bit value, depending
+ * on the datamodel.
+ */
+#ifdef _LP64
+#define V_RVAL SE_64RVAL
+#else
+#define V_RVAL SE_32RVAL1
+#endif
+
+/*
+ * Define system calls that return a native 'long' quantity i.e. a 32-bit
+ * or 64-bit integer - depending on how the kernel is itself compiled
+ * e.g. read(2) returns 'ssize_t' in the kernel and in userland.
+ */
+#define LX_CL(name, call, narg) \
+ { V_RVAL, (name), (llfcn_t)(call), (narg) }
+
+/*
+ * Returns a 32 bit quantity regardless of datamodel
+ */
+#define LX_CI(name, call, narg) \
+ { SE_32RVAL1, (name), (llfcn_t)(call), (narg) }
+
+extern longlong_t lx_nosys(void);
+#define LX_NOSYS(name) \
+ {SE_64RVAL, (name), (llfcn_t)lx_nosys, 0}
+
+lx_sysent_t lx_sysent[] =
+{
+ LX_NOSYS("lx_nosys"), /* 0 */
+ LX_NOSYS("exit"), /* 0 */
+ LX_NOSYS("lx_fork"),
+ LX_NOSYS("read"),
+ LX_NOSYS("write"),
+ LX_NOSYS("open"),
+ LX_NOSYS("close"),
+ LX_NOSYS("waitpid"),
+ LX_NOSYS("creat"),
+ LX_NOSYS("link"),
+ LX_NOSYS("unlink"), /* 10 */
+ LX_NOSYS("exec"),
+ LX_NOSYS("chdir"),
+ LX_NOSYS("gtime"),
+ LX_NOSYS("mknod"),
+ LX_NOSYS("chmod"),
+ LX_NOSYS("lchown16"),
+ LX_NOSYS("break"),
+ LX_NOSYS("stat"),
+ LX_NOSYS("lseek"),
+ LX_CL("getpid", lx_getpid, 0), /* 20 */
+ LX_NOSYS("mount"),
+ LX_NOSYS("umount"),
+ LX_NOSYS("setuid16"),
+ LX_NOSYS("getuid16"),
+ LX_NOSYS("stime"),
+ LX_NOSYS("ptrace"),
+ LX_NOSYS("alarm"),
+ LX_NOSYS("fstat"),
+ LX_NOSYS("pause"),
+ LX_NOSYS("utime"), /* 30 */
+ LX_NOSYS("stty"),
+ LX_NOSYS("gtty"),
+ LX_NOSYS("access"),
+ LX_NOSYS("nice"),
+ LX_NOSYS("ftime"),
+ LX_NOSYS("sync"),
+ LX_CL("kill", lx_kill, 2),
+ LX_NOSYS("rename"),
+ LX_NOSYS("mkdir"),
+ LX_NOSYS("rmdir"), /* 40 */
+ LX_NOSYS("dup"),
+ LX_NOSYS("pipe"),
+ LX_NOSYS("times"),
+ LX_NOSYS("prof"),
+ LX_CL("brk", lx_brk, 1),
+ LX_NOSYS("setgid16"),
+ LX_NOSYS("getgid16"),
+ LX_NOSYS("signal"),
+ LX_NOSYS("geteuid16"),
+ LX_NOSYS("getegid16"), /* 50 */
+ LX_NOSYS("sysacct"),
+ LX_NOSYS("umount2"),
+ LX_NOSYS("lock"),
+ LX_NOSYS("ioctl"),
+ LX_NOSYS("fcntl"),
+ LX_NOSYS("mpx"),
+ LX_NOSYS("setpgid"),
+ LX_NOSYS("ulimit"),
+ LX_NOSYS("olduname"),
+ LX_NOSYS("umask"), /* 60 */
+ LX_NOSYS("chroot"),
+ LX_NOSYS("ustat"),
+ LX_NOSYS("dup2"),
+ LX_CL("getppid", lx_getppid, 0),
+ LX_NOSYS("pgrp"),
+ LX_NOSYS("setsid"),
+ LX_NOSYS("sigaction"),
+ LX_NOSYS("sgetmask"),
+ LX_NOSYS("ssetmask"),
+ LX_NOSYS("setreuid16"), /* 70 */
+ LX_NOSYS("setregid16"),
+ LX_NOSYS("sigsuspend"),
+ LX_NOSYS("sigpending"),
+ LX_NOSYS("sethostname"),
+ LX_NOSYS("setrlimit"),
+ LX_NOSYS("old_getrlimit"),
+ LX_NOSYS("getrusage"),
+ LX_NOSYS("gettimeofday"),
+ LX_NOSYS("settimeofday"),
+ LX_NOSYS("getgroups16"), /* 80 */
+ LX_NOSYS("setgroups16"),
+ LX_NOSYS("old_select"),
+ LX_NOSYS("symlink"),
+ LX_NOSYS("oldlstat"),
+ LX_NOSYS("readlink"),
+ LX_NOSYS("uselib"),
+ LX_NOSYS("swapon"),
+ LX_NOSYS("reboot"),
+ LX_NOSYS("old_readdir"),
+ LX_NOSYS("old_mmap"), /* 90 */
+ LX_NOSYS("munmap"),
+ LX_NOSYS("truncate"),
+ LX_NOSYS("ftruncate"),
+ LX_NOSYS("fchmod"),
+ LX_NOSYS("fchown16"),
+ LX_NOSYS("getpriority"),
+ LX_NOSYS("setpriority"),
+ LX_NOSYS("profil"),
+ LX_NOSYS("statfs"),
+ LX_NOSYS("fstatfs"), /* 100 */
+ LX_NOSYS("ioperm"),
+ LX_NOSYS("socketcall"),
+ LX_NOSYS("syslog"),
+ LX_NOSYS("setitimer"),
+ LX_NOSYS("getitimer"),
+ LX_NOSYS("newstat"),
+ LX_NOSYS("newsltat"),
+ LX_NOSYS("newsftat"),
+ LX_NOSYS("uname"),
+ LX_NOSYS("oldiopl"), /* 110 */
+ LX_NOSYS("oldvhangup"),
+ LX_NOSYS("idle"),
+ LX_NOSYS("vm86old"),
+ LX_NOSYS("wait4"),
+ LX_NOSYS("swapoff"),
+ LX_CL("sysinfo", lx_sysinfo, 1),
+ LX_NOSYS("ipc"),
+ LX_NOSYS("fsync"),
+ LX_NOSYS("sigreturn"),
+ LX_CL("clone", lx_clone, 5), /* 120 */
+ LX_NOSYS("setdomainname"),
+ LX_NOSYS("newuname"),
+ LX_CL("modify_ldt", lx_modify_ldt, 3),
+ LX_NOSYS("adjtimex"),
+ LX_NOSYS("mprotect"),
+ LX_NOSYS("sigprocmask"),
+ LX_NOSYS("create_module"),
+ LX_NOSYS("init_module"),
+ LX_NOSYS("delete_module"),
+ LX_NOSYS("get_kernel_syms"), /* 130 */
+ LX_NOSYS("quotactl"),
+ LX_NOSYS("getpgid"),
+ LX_NOSYS("fchdir"),
+ LX_NOSYS("bdflush"),
+ LX_NOSYS("sysfs"),
+ LX_NOSYS("personality"),
+ LX_NOSYS("afs_syscall"),
+ LX_NOSYS("setfsuid16"),
+ LX_NOSYS("setfsgid16"),
+ LX_NOSYS("llseek"), /* 140 */
+ LX_NOSYS("getdents"),
+ LX_NOSYS("select"),
+ LX_NOSYS("flock"),
+ LX_NOSYS("msync"),
+ LX_NOSYS("readv"),
+ LX_NOSYS("writev"),
+ LX_NOSYS("getsid"),
+ LX_NOSYS("fdatasync"),
+ LX_NOSYS("sysctl"),
+ LX_NOSYS("mlock"), /* 150 */
+ LX_NOSYS("munlock"),
+ LX_NOSYS("mlockall"),
+ LX_NOSYS("munlockall"),
+ LX_CL("sched_setparam", lx_sched_setparam, 2),
+ LX_CL("sched_getparam", lx_sched_getparam, 2),
+ LX_NOSYS("sched_setscheduler"),
+ LX_NOSYS("sched_getscheduler"),
+ LX_NOSYS("yield"),
+ LX_NOSYS("sched_get_priority_max"),
+ LX_NOSYS("sched_get_priority_min"), /* 160 */
+ LX_CL("sched_rr_get_interval", lx_sched_rr_get_interval, 2),
+ LX_NOSYS("nanosleep"),
+ LX_NOSYS("mremap"),
+ LX_CL("setresuid16", lx_setresuid16, 3),
+ LX_NOSYS("getresuid16"),
+ LX_NOSYS("vm86"),
+ LX_NOSYS("query_module"),
+ LX_NOSYS("poll"),
+ LX_NOSYS("nfsserctl"),
+ LX_CL("setresgid16", lx_setresgid16, 3), /* 170 */
+ LX_NOSYS("getresgid16"),
+ LX_NOSYS("prctl"),
+ LX_NOSYS("rt_sigreturn"),
+ LX_NOSYS("rt_sigaction"),
+ LX_NOSYS("rt_sigprocmask"),
+ LX_NOSYS("rt_sigpending"),
+ LX_NOSYS("rt_sigtimedwait"),
+ LX_NOSYS("rt_sigqueueinfo"),
+ LX_NOSYS("rt_sigsuspend"),
+ LX_NOSYS("pread64"), /* 180 */
+ LX_NOSYS("pwrite64"),
+ LX_NOSYS("chown16"),
+ LX_NOSYS("getcwd"),
+ LX_NOSYS("capget"),
+ LX_NOSYS("capset"),
+ LX_NOSYS("sigaltstack"),
+ LX_NOSYS("sendfile"),
+ LX_NOSYS("getpmsg"),
+ LX_NOSYS("putpmsg"),
+ LX_NOSYS("vfork"), /* 190 */
+ LX_NOSYS("getrlimit"),
+ LX_NOSYS("mmap2"),
+ LX_NOSYS("truncate64"),
+ LX_NOSYS("ftruncate64"),
+ LX_NOSYS("stat64"),
+ LX_NOSYS("lstat64"),
+ LX_NOSYS("fstat64"),
+ LX_NOSYS("lchown"),
+ LX_NOSYS("getuid"),
+ LX_NOSYS("getgid"), /* 200 */
+ LX_NOSYS("geteuid"),
+ LX_NOSYS("getegid"),
+ LX_NOSYS("setreuid"),
+ LX_NOSYS("setregid"),
+ LX_NOSYS("getgroups"),
+ LX_CL("setgroups", lx_setgroups, 2),
+ LX_NOSYS("fchown"),
+ LX_CL("setresuid", lx_setresuid, 3),
+ LX_NOSYS("getresuid"),
+ LX_CL("setresgid", lx_setresgid, 3), /* 210 */
+ LX_NOSYS("getresgid"),
+ LX_NOSYS("chown"),
+ LX_NOSYS("setuid"),
+ LX_NOSYS("setgid"),
+ LX_NOSYS("setfsuid"),
+ LX_NOSYS("setfsgid"),
+ LX_NOSYS("pivot_root"),
+ LX_NOSYS("mincore"),
+ LX_NOSYS("madvise"),
+ LX_NOSYS("getdents64"), /* 220 */
+ LX_NOSYS("fcntl64"),
+ LX_NOSYS("lx_nosys"),
+ LX_NOSYS("security"),
+ LX_CL("gettid", lx_gettid, 0),
+ LX_NOSYS("readahead"),
+ LX_NOSYS("setxattr"),
+ LX_NOSYS("lsetxattr"),
+ LX_NOSYS("fsetxattr"),
+ LX_NOSYS("getxattr"),
+ LX_NOSYS("lgetxattr"), /* 230 */
+ LX_NOSYS("fgetxattr"),
+ LX_NOSYS("listxattr"),
+ LX_NOSYS("llistxattr"),
+ LX_NOSYS("flistxattr"),
+ LX_NOSYS("removexattr"),
+ LX_NOSYS("lremovexattr"),
+ LX_NOSYS("fremovexattr"),
+ LX_CL("tkill", lx_tkill, 2),
+ LX_NOSYS("sendfile64"),
+ LX_CL("futex", lx_futex, 6), /* 240 */
+ LX_NOSYS("sched_setaffinity"),
+ LX_NOSYS("sched_getaffinity"),
+ LX_CL("set_thread_area", lx_set_thread_area, 1),
+ LX_CL("get_thread_area", lx_get_thread_area, 1),
+ LX_NOSYS("io_setup"),
+ LX_NOSYS("io_destroy"),
+ LX_NOSYS("io_getevents"),
+ LX_NOSYS("io_submit"),
+ LX_NOSYS("io_cancel"),
+ LX_NOSYS("fadvise64"), /* 250 */
+ LX_NOSYS("lx_nosys"),
+ LX_NOSYS("exit_group"),
+ LX_NOSYS("lookup_dcookie"),
+ LX_NOSYS("epoll_create"),
+ LX_NOSYS("epoll_ctl"),
+ LX_NOSYS("epoll_wait"),
+ LX_NOSYS("remap_file_pages"),
+ LX_CL("set_tid_address", lx_set_tid_address, 1),
+ LX_NOSYS("timer_create"),
+ LX_NOSYS("timer_settime"), /* 260 */
+ LX_NOSYS("timer_gettime"),
+ LX_NOSYS("timer_getoverrun"),
+ LX_NOSYS("timer_delete"),
+ LX_NOSYS("clock_settime"),
+ LX_NOSYS("clock_gettime"),
+ LX_NOSYS("clock_getres"),
+ LX_NOSYS("clock_nanosleep"),
+ LX_NOSYS("statfs64"),
+ LX_NOSYS("fstatfs64"),
+ LX_NOSYS("tgkill"), /* 270 */
+ LX_NOSYS("utimes"),
+ LX_NOSYS("fadvise64_64"),
+ LX_NOSYS("vserver"),
+ NULL /* NULL-termination is required for lx_systrace */
+};
+
+int64_t
+lx_emulate_syscall(int num, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+ struct lx_sysent *jsp;
+ int64_t rval;
+
+ rval = (int64_t)0;
+
+ jsp = &(lx_sysent[num]);
+
+ switch (jsp->sy_narg) {
+ case 0: {
+ lx_print("--> %s()\n", jsp->sy_name);
+ rval = (int64_t)jsp->sy_callc();
+ break;
+ }
+ case 1: {
+ lx_print("--> %s(0x%lx)\n", jsp->sy_name, arg1);
+ rval = (int64_t)jsp->sy_callc(arg1);
+ break;
+ }
+ case 2: {
+ lx_print("--> %s(0x%lx, 0x%lx)\n", jsp->sy_name, arg1, arg2);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2);
+ break;
+ }
+ case 3: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3);
+ break;
+ }
+ case 4: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4);
+ break;
+ }
+ case 5: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4, arg5);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5);
+ break;
+ }
+ case 6: {
+ lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx,"
+ " 0x%lx, 0x%lx)\n",
+ jsp->sy_name, arg1, arg2, arg3, arg4, arg5, arg6);
+ rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5,
+ arg6);
+ break;
+ }
+ default:
+ panic("Invalid syscall entry: #%d at 0x%p\n", num, jsp);
+ }
+ lx_print("----------> return (0x%llx)\n", (long long)rval);
+ return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
new file mode 100644
index 0000000000..c79e3fa590
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
@@ -0,0 +1,233 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LXPROC_H
+#define _LXPROC_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxproc.h: declarations, data structures and macros for lxprocfs
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/user.h>
+#include <sys/t_lock.h>
+#include <sys/sysmacros.h>
+#include <sys/cred_impl.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/dnlc.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <vm/as.h>
+#include <vm/anon.h>
+
+/*
+ * Convert a vnode into an lxpr_mnt_t
+ */
+#define VTOLXPM(vp) ((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data)
+
+/*
+ * convert a vnode into an lxpr_node
+ */
+#define VTOLXP(vp) ((lxpr_node_t *)(vp)->v_data)
+
+/*
+ * convert a lxprnode into a vnode
+ */
+#define LXPTOV(lxpnp) ((lxpnp)->lxpr_vnode)
+
+/*
+ * convert a lxpr_node into zone for fs
+ */
+#define LXPTOZ(lxpnp) \
+ (((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone)
+
+#define LXPNSIZ 256 /* max size of lx /proc file name entries */
+
+/*
+ * Pretend that a directory entry takes 16 bytes
+ */
+#define LXPR_SDSIZE 16
+
+/*
+ * Node/file types for lx /proc files
+ * (directories and files contained therein).
+ */
+typedef enum lxpr_nodetype {
+ LXPR_PROCDIR, /* /proc */
+ LXPR_PIDDIR, /* /proc/<pid> */
+ LXPR_PID_CMDLINE, /* /proc/<pid>/cmdline */
+ LXPR_PID_CPU, /* /proc/<pid>/cpu */
+ LXPR_PID_CURDIR, /* /proc/<pid>/cwd */
+ LXPR_PID_ENV, /* /proc/<pid>/environ */
+ LXPR_PID_EXE, /* /proc/<pid>/exe */
+ LXPR_PID_MAPS, /* /proc/<pid>/maps */
+ LXPR_PID_MEM, /* /proc/<pid>/mem */
+ LXPR_PID_ROOTDIR, /* /proc/<pid>/root */
+ LXPR_PID_STAT, /* /proc/<pid>/stat */
+ LXPR_PID_STATM, /* /proc/<pid>/statm */
+ LXPR_PID_STATUS, /* /proc/<pid>/status */
+ LXPR_PID_FDDIR, /* /proc/<pid>/fd */
+ LXPR_PID_FD_FD, /* /proc/<pid>/fd/nn */
+ LXPR_CMDLINE, /* /proc/cmdline */
+ LXPR_CPUINFO, /* /proc/cpuinfo */
+ LXPR_DEVICES, /* /proc/devices */
+ LXPR_DMA, /* /proc/dma */
+ LXPR_FILESYSTEMS, /* /proc/filesystems */
+ LXPR_INTERRUPTS, /* /proc/interrupts */
+ LXPR_IOPORTS, /* /proc/ioports */
+ LXPR_KCORE, /* /proc/kcore */
+ LXPR_KMSG, /* /proc/kmsg */
+ LXPR_LOADAVG, /* /proc/loadavg */
+ LXPR_MEMINFO, /* /proc/meminfo */
+ LXPR_MOUNTS, /* /proc/mounts */
+ LXPR_NETDIR, /* /proc/net */
+ LXPR_NET_ARP, /* /proc/net/arp */
+ LXPR_NET_DEV, /* /proc/net/dev */
+ LXPR_NET_DEV_MCAST, /* /proc/net/dev_mcast */
+ LXPR_NET_IGMP, /* /proc/net/igmp */
+ LXPR_NET_IP_MR_CACHE, /* /proc/net/ip_mr_cache */
+ LXPR_NET_IP_MR_VIF, /* /proc/net/ip_mr_vif */
+ LXPR_NET_MCFILTER, /* /proc/net/mcfilter */
+ LXPR_NET_NETSTAT, /* /proc/net/netstat */
+ LXPR_NET_RAW, /* /proc/net/raw */
+ LXPR_NET_ROUTE, /* /proc/net/route */
+ LXPR_NET_RPC, /* /proc/net/rpc */
+ LXPR_NET_RT_CACHE, /* /proc/net/rt_cache */
+ LXPR_NET_SOCKSTAT, /* /proc/net/sockstat */
+ LXPR_NET_SNMP, /* /proc/net/snmp */
+ LXPR_NET_STAT, /* /proc/net/stat */
+ LXPR_NET_TCP, /* /proc/net/tcp */
+ LXPR_NET_UDP, /* /proc/net/udp */
+ LXPR_NET_UNIX, /* /proc/net/unix */
+ LXPR_PARTITIONS, /* /proc/partitions */
+ LXPR_SELF, /* /proc/self */
+ LXPR_STAT, /* /proc/stat */
+ LXPR_UPTIME, /* /proc/uptime */
+ LXPR_VERSION, /* /proc/version */
+ LXPR_NFILES /* number of lx /proc file types */
+} lxpr_nodetype_t;
+
+
+/*
+ * Number of fds allowed for in the inode number calculation
+ * per process (if a process has more fds then inode numbers
+ * may be duplicated)
+ */
+#define LXPR_FD_PERPROC 2000
+
+/*
+ * external dirent characteristics
+ */
+#define LXPRMAXNAMELEN 14
+typedef struct {
+ lxpr_nodetype_t d_type;
+ char d_name[LXPRMAXNAMELEN];
+} lxpr_dirent_t;
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to v_data in the vnode structure
+ */
+typedef struct lxpr_node {
+ lxpr_nodetype_t lxpr_type; /* type of this node */
+ vnode_t *lxpr_vnode; /* vnode for the node */
+ vnode_t *lxpr_parent; /* parent directory */
+ vnode_t *lxpr_realvp; /* real vnode, file in dirs */
+ timestruc_t lxpr_time; /* creation etc time for file */
+ mode_t lxpr_mode; /* file mode bits */
+ uid_t lxpr_uid; /* file owner */
+ gid_t lxpr_gid; /* file group owner */
+ pid_t lxpr_pid; /* pid of proc referred to */
+ ino_t lxpr_ino; /* node id */
+ ldi_handle_t lxpr_cons_ldih; /* ldi handle for console device */
+} lxpr_node_t;
+
+struct zone; /* forward declaration */
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to vfs_data in the vfs structure
+ */
+typedef struct lxpr_mnt {
+ lxpr_node_t *lxprm_node; /* node at root of proc mount */
+ struct zone *lxprm_zone; /* zone for this mount */
+ ldi_ident_t lxprm_li; /* ident for ldi */
+} lxpr_mnt_t;
+
+extern vnodeops_t *lxpr_vnodeops;
+extern int nproc_highbit; /* highbit(v.v_nproc) */
+
+typedef struct mounta mounta_t;
+
+extern void lxpr_initnodecache();
+extern void lxpr_fininodecache();
+extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *);
+extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int);
+extern ino_t lxpr_parentinode(lxpr_node_t *);
+extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int);
+extern void lxpr_freenode(lxpr_node_t *);
+
+typedef struct lxpr_uiobuf lxpr_uiobuf_t;
+extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *);
+extern void lxpr_uiobuf_free(lxpr_uiobuf_t *);
+extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t);
+extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t);
+extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...);
+extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int);
+
+proc_t *lxpr_lock(pid_t);
+void lxpr_unlock(proc_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LXPROC_H */
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
new file mode 100644
index 0000000000..5d252207fb
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
@@ -0,0 +1,494 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * lxprsubr.c: Various functions for the /lxproc vnodeops.
+ */
+
+#include <sys/varargs.h>
+
+#include <sys/cpuvar.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <sys/prsystm.h>
+
+#include "lx_proc.h"
+
+#define LXPRCACHE_NAME "lxpr_cache"
+
+static int lxpr_node_constructor(void*, void*, int);
+static void lxpr_node_destructor(void*, void*);
+
+static kmem_cache_t *lxpr_node_cache;
+
+struct lxpr_uiobuf {
+ uio_t *uiop;
+ char *buffer;
+ uint32_t buffsize;
+ char *pos;
+ size_t beg;
+ int error;
+};
+
+#define BUFSIZE 4000
+
+struct lxpr_uiobuf *
+lxpr_uiobuf_new(uio_t *uiop)
+{
+ /* Allocate memory for both lxpr_uiobuf and output buffer */
+ struct lxpr_uiobuf *uiobuf =
+ kmem_alloc(sizeof (struct lxpr_uiobuf) + BUFSIZE, KM_SLEEP);
+
+ uiobuf->uiop = uiop;
+ uiobuf->buffer = (char *)&uiobuf[1];
+ uiobuf->buffsize = BUFSIZE;
+ uiobuf->pos = uiobuf->buffer;
+ uiobuf->beg = 0;
+ uiobuf->error = 0;
+
+ return (uiobuf);
+}
+
+void
+lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf)
+{
+ ASSERT(uiobuf != NULL);
+ ASSERT(uiobuf->pos == uiobuf->buffer);
+
+ kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize);
+}
+
+void
+lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset)
+{
+ uiobuf->uiop->uio_offset = offset;
+}
+
+void
+lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err)
+{
+ ASSERT(uiobuf->error == 0);
+
+ uiobuf->error = err;
+}
+
+int
+lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf)
+{
+ off_t off = uiobuf->uiop->uio_offset;
+ caddr_t uaddr = uiobuf->buffer;
+ size_t beg = uiobuf->beg;
+
+ size_t size = uiobuf->pos - uaddr;
+
+ if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ ASSERT(off >= beg);
+
+ if (beg+size > off && off >= 0)
+ uiobuf->error =
+ uiomove(uaddr+(off-beg), size-(off-beg),
+ UIO_READ, uiobuf->uiop);
+
+ uiobuf->beg += size;
+ }
+
+ uiobuf->pos = uaddr;
+
+ return (uiobuf->error);
+}
+
+void
+lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size)
+{
+ /* While we can still carry on */
+ while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+ uint_t remain
+ = uiobuf->buffsize-(uiobuf->pos-uiobuf->buffer);
+
+ /* Enough space in buffer? */
+ if (remain >= size) {
+ bcopy(buf, uiobuf->pos, size);
+ uiobuf->pos += size;
+ return;
+ }
+
+ /* Not enough space, so copy all we can and try again */
+ bcopy(buf, uiobuf->pos, remain);
+ uiobuf->pos += remain;
+ (void) lxpr_uiobuf_flush(uiobuf);
+ buf += remain;
+ size -= remain;
+ }
+}
+
+#define TYPBUFFSIZE 256
+void
+lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...)
+{
+ va_list args;
+ char buff[TYPBUFFSIZE];
+ int len;
+ char *buffer;
+
+ /* Can we still do any output */
+ if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0)
+ return;
+
+ va_start(args, fmt);
+
+ /* Try using stack allocated buffer */
+ len = vsnprintf(buff, TYPBUFFSIZE, fmt, args);
+ if (len < TYPBUFFSIZE) {
+ va_end(args);
+ lxpr_uiobuf_write(uiobuf, buff, len);
+ return;
+ }
+
+ /* Not enough space in pre-allocated buffer */
+ buffer = kmem_alloc(len+1, KM_SLEEP);
+
+ /*
+ * We know we allocated the correct amount of space
+ * so no check on the return value
+ */
+ (void) vsnprintf(buffer, len+1, fmt, args);
+ lxpr_uiobuf_write(uiobuf, buffer, len);
+ va_end(args);
+ kmem_free(buffer, len+1);
+}
+
+/*
+ * lxpr_lock():
+ *
+ * Lookup process from pid and return with p_plock and P_PR_LOCK held.
+ */
+proc_t *
+lxpr_lock(pid_t pid)
+{
+ proc_t *p;
+ kmutex_t *mp;
+
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ for (;;) {
+ mutex_enter(&pidlock);
+
+ /*
+ * If the pid is 1, we really want the zone's init process
+ */
+ p = prfind((pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (NULL);
+ }
+ /*
+ * p_lock is persistent, but p itself is not -- it could
+ * vanish during cv_wait(). Load p->p_lock now so we can
+ * drop it after cv_wait() without referencing p.
+ */
+ mp = &p->p_lock;
+ mutex_enter(mp);
+
+ mutex_exit(&pidlock);
+
+ if (!(p->p_proc_flag & P_PR_LOCK))
+ break;
+
+ cv_wait(&pr_pid_cv[p->p_slot], mp);
+ mutex_exit(mp);
+ }
+ p->p_proc_flag |= P_PR_LOCK;
+ THREAD_KPRI_REQUEST();
+ return (p);
+}
+
+/*
+ * lxpr_unlock()
+ *
+ * Unlock locked process
+ */
+void
+lxpr_unlock(proc_t *p)
+{
+ ASSERT(p->p_proc_flag & P_PR_LOCK);
+ ASSERT(MUTEX_HELD(&p->p_lock));
+ ASSERT(!MUTEX_HELD(&pidlock));
+
+ cv_signal(&pr_pid_cv[p->p_slot]);
+ p->p_proc_flag &= ~P_PR_LOCK;
+ mutex_exit(&p->p_lock);
+ THREAD_KPRI_RELEASE();
+}
+
+void
+lxpr_initnodecache()
+{
+ lxpr_node_cache =
+ kmem_cache_create(LXPRCACHE_NAME,
+ sizeof (lxpr_node_t), 0,
+ lxpr_node_constructor, lxpr_node_destructor, NULL,
+ NULL, NULL, 0);
+}
+
+void
+lxpr_fininodecache()
+{
+ kmem_cache_destroy(lxpr_node_cache);
+}
+
+/* ARGSUSED */
+static int
+lxpr_node_constructor(void *buf, void *un, int kmflags)
+{
+ lxpr_node_t *lxpnp = buf;
+ vnode_t *vp;
+
+ vp = lxpnp->lxpr_vnode = vn_alloc(KM_SLEEP);
+
+ (void) vn_setops(vp, lxpr_vnodeops);
+ vp->v_data = (caddr_t)lxpnp;
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+lxpr_node_destructor(void *buf, void *un)
+{
+ lxpr_node_t *lxpnp = buf;
+
+ vn_free(LXPTOV(lxpnp));
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them
+ * to give the inode number for an lxproc node
+ */
+ino_t
+lxpr_inode(lxpr_nodetype_t type, pid_t pid, int fd)
+{
+ if (pid == 1)
+ pid = curproc->p_zone->zone_proc_initpid;
+
+ switch (type) {
+ case LXPR_PIDDIR:
+ return (pid + 1);
+ case LXPR_PROCDIR:
+ return (maxpid + 2);
+ case LXPR_PID_FD_FD:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ LXPR_NFILES + fd);
+ default:
+ return (maxpid + 2 +
+ (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+ type);
+ }
+}
+
+/*
+ * Return inode number of parent (directory)
+ */
+ino_t
+lxpr_parentinode(lxpr_node_t *lxpnp)
+{
+ /*
+ * If the input node is the root then the parent inode
+ * is the mounted on inode so just return our inode number
+ */
+ if (lxpnp->lxpr_type != LXPR_PROCDIR)
+ return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino);
+ else
+ return (lxpnp->lxpr_ino);
+}
+
+/*
+ * Allocate a new lxproc node
+ *
+ * This also allocates the vnode associated with it
+ */
+lxpr_node_t *
+lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd)
+{
+ lxpr_node_t *lxpnp;
+ vnode_t *vp;
+ user_t *up;
+ timestruc_t now;
+
+ /*
+ * Allocate a new node. It is deallocated in vop_innactive
+ */
+ lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP);
+
+ /*
+ * Set defaults (may be overridden below)
+ */
+ gethrestime(&now);
+ lxpnp->lxpr_type = type;
+ lxpnp->lxpr_realvp = NULL;
+ lxpnp->lxpr_parent = dp;
+ VN_HOLD(dp);
+ if (p != NULL) {
+ lxpnp->lxpr_pid = ((p->p_pid ==
+ curproc->p_zone->zone_proc_initpid) ? 1 : p->p_pid);
+
+ lxpnp->lxpr_time = PTOU(p)->u_start;
+ lxpnp->lxpr_uid = crgetruid(p->p_cred);
+ lxpnp->lxpr_gid = crgetrgid(p->p_cred);
+ lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, fd);
+ } else {
+ /* Pretend files without a proc belong to sched */
+ lxpnp->lxpr_pid = 0;
+ lxpnp->lxpr_time = now;
+ lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0;
+ lxpnp->lxpr_ino = lxpr_inode(type, 0, 0);
+ }
+
+ /* initialize the vnode data */
+ vp = lxpnp->lxpr_vnode;
+ vn_reinit(vp);
+ vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
+ vp->v_vfsp = dp->v_vfsp;
+
+ /*
+ * Do node specific stuff
+ */
+ switch (type) {
+ case LXPR_PROCDIR:
+ vp->v_flag |= VROOT;
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by everyone */
+ break;
+
+ case LXPR_PID_CURDIR:
+ ASSERT(p != NULL);
+ up = PTOU(p);
+ lxpnp->lxpr_realvp = up->u_cdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_ROOTDIR:
+ ASSERT(p != NULL);
+ up = PTOU(p);
+ lxpnp->lxpr_realvp = up->u_rdir != NULL ? up->u_rdir : rootdir;
+ ASSERT(lxpnp->lxpr_realvp != NULL);
+ VN_HOLD(lxpnp->lxpr_realvp);
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_EXE:
+ ASSERT(p != NULL);
+ lxpnp->lxpr_realvp = p->p_exec;
+ if (lxpnp->lxpr_realvp != NULL) {
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777;
+ break;
+
+ case LXPR_SELF:
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0777; /* anyone does anything ! */
+ break;
+
+ case LXPR_PID_FD_FD:
+ ASSERT(p != NULL);
+ /* lxpr_realvp is set after we return */
+ vp->v_type = VLNK;
+ lxpnp->lxpr_mode = 0700; /* read-write-exe owner only */
+ break;
+
+ case LXPR_PID_FDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0500; /* read-search by owner only */
+ break;
+
+ case LXPR_PIDDIR:
+ ASSERT(p != NULL);
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0511;
+ break;
+
+ case LXPR_NETDIR:
+ vp->v_type = VDIR;
+ lxpnp->lxpr_mode = 0555; /* read-search by all */
+ break;
+
+ case LXPR_PID_ENV:
+ case LXPR_PID_MEM:
+ ASSERT(p != NULL);
+ /*FALLTHRU*/
+ case LXPR_KCORE:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0400; /* read-only by owner only */
+ break;
+
+ default:
+ vp->v_type = VREG;
+ lxpnp->lxpr_mode = 0444; /* read-only by all */
+ break;
+ }
+
+ return (lxpnp);
+}
+
+
+/*
+ * Free the storage obtained from lxpr_getnode().
+ */
+void
+lxpr_freenode(lxpr_node_t *lxpnp)
+{
+ ASSERT(lxpnp != NULL);
+ ASSERT(LXPTOV(lxpnp) != NULL);
+
+ /*
+ * delete any association with realvp
+ */
+ if (lxpnp->lxpr_realvp != NULL)
+ VN_RELE(lxpnp->lxpr_realvp);
+
+ /*
+ * delete any association with parent vp
+ */
+ if (lxpnp->lxpr_parent != NULL)
+ VN_RELE(lxpnp->lxpr_parent);
+
+ /*
+ * Release the lxprnode.
+ */
+ kmem_cache_free(lxpr_node_cache, lxpnp);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
new file mode 100644
index 0000000000..44891dc612
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
@@ -0,0 +1,373 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * lxprvfsops.c: vfs operations for /lxprocfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/signal.h>
+#include <sys/user.h>
+#include <sys/mount.h>
+#include <sys/bitmap.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/lx_impl.h>
+
+#include "lx_proc.h"
+
+/* Module level parameters */
+static int lxprocfstype;
+static dev_t lxprocdev;
+static kmutex_t lxpr_mount_lock;
+
+int nproc_highbit; /* highbit(v.v_nproc) */
+
+static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *);
+static int lxpr_unmount(vfs_t *, int, cred_t *);
+static int lxpr_root(vfs_t *, vnode_t **);
+static int lxpr_statvfs(vfs_t *, statvfs64_t *);
+static int lxpr_init(int, char *);
+
+static vfsdef_t vfw = {
+ VFSDEF_VERSION,
+ "lx_proc",
+ lxpr_init,
+ 0,
+ NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+ &mod_fsops, "generic linux procfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+ return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int retval;
+
+ /*
+ * attempt to unload the module
+ */
+ if ((retval = mod_remove(&modlinkage)) != 0)
+ goto done;
+
+ /*
+ * destroy lxpr_node cache
+ */
+ lxpr_fininodecache();
+
+ /*
+ * clean out the vfsops and vnodeops
+ */
+ (void) vfs_freevfsops_by_type(lxprocfstype);
+ vn_freevnodeops(lxpr_vnodeops);
+
+ mutex_destroy(&lxpr_mount_lock);
+done:
+ return (retval);
+}
+
+static int
+lxpr_init(int fstype, char *name)
+{
+ static const fs_operation_def_t lxpr_vfsops_template[] = {
+ VFSNAME_MOUNT, lxpr_mount,
+ VFSNAME_UNMOUNT, lxpr_unmount,
+ VFSNAME_ROOT, lxpr_root,
+ VFSNAME_STATVFS, lxpr_statvfs,
+ NULL, NULL
+ };
+ extern const fs_operation_def_t lxpr_vnodeops_template[];
+ int error;
+ major_t dev;
+
+ nproc_highbit = highbit(v.v_proc);
+ lxprocfstype = fstype;
+ ASSERT(lxprocfstype != 0);
+
+ mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * Associate VFS ops vector with this fstype.
+ */
+ error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL);
+ if (error != 0) {
+ cmn_err(CE_WARN, "lxpr_init: bad vfs ops template");
+ return (error);
+ }
+
+ /*
+ * Set up vnode ops vector too.
+ */
+ error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops);
+ if (error != 0) {
+ (void) vfs_freevfsops_by_type(fstype);
+ cmn_err(CE_WARN, "lxpr_init: bad vnode ops template");
+ return (error);
+ }
+
+ /*
+ * Assign a unique "device" number (used by stat(2)).
+ */
+ if ((dev = getudev()) == (major_t)-1) {
+ cmn_err(CE_WARN, "lxpr_init: can't get unique device number");
+ dev = 0;
+ }
+
+ /*
+ * Make the pseudo device
+ */
+ lxprocdev = makedevice(dev, 0);
+
+ /*
+ * Initialise cache for lxpr_nodes
+ */
+ lxpr_initnodecache();
+
+ return (0);
+}
+
+static int
+lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt;
+ zone_t *zone = curproc->p_zone;
+ ldi_ident_t li;
+ int err;
+
+ /*
+ * must be root to mount
+ */
+ if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+ return (EPERM);
+
+ /*
+ * mount point must be a directory
+ */
+ if (mvp->v_type != VDIR)
+ return (ENOTDIR);
+
+ if (zone == global_zone) {
+ zone_t *mntzone;
+
+ mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
+ zone_rele(mntzone);
+ if (zone != mntzone)
+ return (EBUSY);
+ }
+
+ /*
+ * Having the resource be anything but "lxproc" doesn't make sense
+ */
+ vfs_setresource(vfsp, "lxproc");
+
+ lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP);
+
+ if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) {
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+ return (err);
+ }
+
+ lxpr_mnt->lxprm_li = li;
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * Ensure we don't allow overlaying mounts
+ */
+ mutex_enter(&mvp->v_lock);
+ if ((uap->flags & MS_OVERLAY) == 0 &&
+ (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+ mutex_exit(&mvp->v_lock);
+ mutex_exit(&lxpr_mount_lock);
+ kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt)));
+ return (EBUSY);
+ }
+ mutex_exit(&mvp->v_lock);
+
+ /*
+ * allocate the first vnode
+ */
+ zone_hold(lxpr_mnt->lxprm_zone = zone);
+
+ /* Arbitrarily set the parent vnode to the mounted over directory */
+ lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0);
+
+ /* Correctly set the fs for the root node */
+ lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp;
+
+ vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype);
+ vfsp->vfs_bsize = DEV_BSIZE;
+ vfsp->vfs_fstype = lxprocfstype;
+ vfsp->vfs_data = (caddr_t)lxpr_mnt;
+ vfsp->vfs_dev = lxprocdev;
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+ lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data;
+ vnode_t *vp;
+ int count;
+
+ ASSERT(lxpr_mnt != NULL);
+ vp = LXPTOV(lxpr_mnt->lxprm_node);
+
+ mutex_enter(&lxpr_mount_lock);
+
+ /*
+ * must be root to unmount
+ */
+ if (secpolicy_fs_unmount(cr, vfsp) != 0) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EPERM);
+ }
+
+ /*
+ * forced unmount is not supported by this file system
+ */
+ if (flag & MS_FORCE) {
+ mutex_exit(&lxpr_mount_lock);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Ensure that no vnodes are in use on this mount point.
+ */
+ mutex_enter(&vp->v_lock);
+ count = vp->v_count;
+ mutex_exit(&vp->v_lock);
+ if (count > 1) {
+ mutex_exit(&lxpr_mount_lock);
+ return (EBUSY);
+ }
+
+
+ /*
+ * purge the dnlc cache for vnode entries
+ * associated with this file system
+ */
+ count = dnlc_purge_vfsp(vfsp, 0);
+
+ /*
+ * free up the lxprnode
+ */
+ lxpr_freenode(lxpr_mnt->lxprm_node);
+ zone_rele(lxpr_mnt->lxprm_zone);
+ kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+
+ mutex_exit(&lxpr_mount_lock);
+
+ return (0);
+}
+
+static int
+lxpr_root(vfs_t *vfsp, vnode_t **vpp)
+{
+ lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node;
+ vnode_t *vp = LXPTOV(lxpnp);
+
+ VN_HOLD(vp);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+ int n;
+ dev32_t d32;
+ extern uint_t nproc;
+
+ n = v.v_proc - nproc;
+
+ bzero((caddr_t)sp, sizeof (*sp));
+ sp->f_bsize = DEV_BSIZE;
+ sp->f_frsize = DEV_BSIZE;
+ sp->f_blocks = (fsblkcnt64_t)0;
+ sp->f_bfree = (fsblkcnt64_t)0;
+ sp->f_bavail = (fsblkcnt64_t)0;
+ sp->f_files = (fsfilcnt64_t)v.v_proc + 2;
+ sp->f_ffree = (fsfilcnt64_t)n;
+ sp->f_favail = (fsfilcnt64_t)n;
+ (void) cmpldev(&d32, vfsp->vfs_dev);
+ sp->f_fsid = d32;
+ /* It is guaranteed that vsw_name will fit in f_basetype */
+ (void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name);
+ sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+ sp->f_namemax = 64; /* quite arbitrary */
+ bzero(sp->f_fstr, sizeof (sp->f_fstr));
+
+ /* We know f_fstr is 32 chars */
+ (void) strcpy(sp->f_fstr, "/proc");
+ (void) strcpy(&sp->f_fstr[6], "/proc");
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
new file mode 100644
index 0000000000..45bff38e16
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -0,0 +1,2951 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * lxpr_vnops.c: Vnode operations for the lx /proc file system
+ *
+ * Assumptions and Gotchas:
+ *
+ * In order to preserve Solaris' security policy. This file system's
+ * functionality does not override Solaris' security policies even if
+ * that means breaking Linux compatability.
+ *
+ * Linux has no concept of lwps so we only implement procs here as in the
+ * old /proc interface.
+ */
+
+#include <sys/cpupart.h>
+#include <sys/cpuvar.h>
+#include <sys/session.h>
+#include <sys/vmparam.h>
+#include <sys/mman.h>
+#include <vm/rm.h>
+#include <vm/seg_vn.h>
+#include <sys/sdt.h>
+#include <lx_signum.h>
+#include <sys/strlog.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_brand.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/fp.h>
+#include <sys/pool_pset.h>
+#include <sys/pset.h>
+#include <sys/zone.h>
+
+/* Dependent on the Solaris procfs */
+extern kthread_t *prchoose(proc_t *);
+
+#include "lx_proc.h"
+
+extern pgcnt_t swapfs_minfree;
+extern volatile clock_t lbolt;
+extern time_t boot_time;
+
+/*
+ * Pointer to the vnode ops vector for this fs.
+ * This is instantiated in lxprinit() in lxpr_vfsops.c
+ */
+vnodeops_t *lxpr_vnodeops;
+
+static int lxpr_open(vnode_t **, int, cred_t *);
+static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *);
+static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *);
+static int lxpr_access(vnode_t *, int, int, cred_t *);
+static int lxpr_lookup(vnode_t *, char *, vnode_t **,
+ pathname_t *, int, vnode_t *, cred_t *);
+static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *);
+static int lxpr_readlink(vnode_t *, uio_t *);
+static int lxpr_cmp(vnode_t *, vnode_t *);
+static int lxpr_realvp(vnode_t *, vnode_t **);
+static int lxpr_sync(void);
+static void lxpr_inactive(vnode_t *, cred_t *);
+
+static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
+
+static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
+
+static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
+
+/*
+ * Simple conversion
+ */
+#define btok(x) ((x) >> 10) /* bytes to kbytes */
+#define ptok(x) ((x) << (PAGESHIFT - 10)) /* pages to kbytes */
+
+/*
+ * The lx /proc vnode operations vector
+ */
+const fs_operation_def_t lxpr_vnodeops_template[] = {
+ VOPNAME_OPEN, lxpr_open,
+ VOPNAME_CLOSE, lxpr_close,
+ VOPNAME_READ, lxpr_read,
+ VOPNAME_GETATTR, lxpr_getattr,
+ VOPNAME_ACCESS, lxpr_access,
+ VOPNAME_LOOKUP, lxpr_lookup,
+ VOPNAME_READDIR, lxpr_readdir,
+ VOPNAME_READLINK, lxpr_readlink,
+ VOPNAME_FSYNC, lxpr_sync,
+ VOPNAME_SEEK, lxpr_sync,
+ VOPNAME_INACTIVE, (fs_generic_func_p) lxpr_inactive,
+ VOPNAME_CMP, lxpr_cmp,
+ VOPNAME_REALVP, lxpr_realvp,
+ NULL, NULL
+};
+
+
+/*
+ * file contents of an lx /proc directory.
+ */
+static lxpr_dirent_t lx_procdir[] = {
+ { LXPR_CMDLINE, "cmdline" },
+ { LXPR_CPUINFO, "cpuinfo" },
+ { LXPR_DEVICES, "devices" },
+ { LXPR_DMA, "dma" },
+ { LXPR_FILESYSTEMS, "filesystems" },
+ { LXPR_INTERRUPTS, "interrupts" },
+ { LXPR_IOPORTS, "ioports" },
+ { LXPR_KCORE, "kcore" },
+ { LXPR_KMSG, "kmsg" },
+ { LXPR_LOADAVG, "loadavg" },
+ { LXPR_MEMINFO, "meminfo" },
+ { LXPR_MOUNTS, "mounts" },
+ { LXPR_NETDIR, "net" },
+ { LXPR_PARTITIONS, "partitions" },
+ { LXPR_SELF, "self" },
+ { LXPR_STAT, "stat" },
+ { LXPR_UPTIME, "uptime" },
+ { LXPR_VERSION, "version" }
+};
+
+#define PROCDIRFILES (sizeof (lx_procdir) / sizeof (lx_procdir[0]))
+
+/*
+ * Contents of an lx /proc/<pid> directory.
+ */
+static lxpr_dirent_t piddir[] = {
+ { LXPR_PID_CMDLINE, "cmdline" },
+ { LXPR_PID_CPU, "cpu" },
+ { LXPR_PID_CURDIR, "cwd" },
+ { LXPR_PID_ENV, "environ" },
+ { LXPR_PID_EXE, "exe" },
+ { LXPR_PID_MAPS, "maps" },
+ { LXPR_PID_MEM, "mem" },
+ { LXPR_PID_ROOTDIR, "root" },
+ { LXPR_PID_STAT, "stat" },
+ { LXPR_PID_STATM, "statm" },
+ { LXPR_PID_STATUS, "status" },
+ { LXPR_PID_FDDIR, "fd" }
+};
+
+#define PIDDIRFILES (sizeof (piddir) / sizeof (piddir[0]))
+
+/*
+ * contents of lx /proc/net directory
+ */
+static lxpr_dirent_t netdir[] = {
+ { LXPR_NET_ARP, "arp" },
+ { LXPR_NET_DEV, "dev" },
+ { LXPR_NET_DEV_MCAST, "dev_mcast" },
+ { LXPR_NET_IGMP, "igmp" },
+ { LXPR_NET_IP_MR_CACHE, "ip_mr_cache" },
+ { LXPR_NET_IP_MR_VIF, "ip_mr_vif" },
+ { LXPR_NET_MCFILTER, "mcfilter" },
+ { LXPR_NET_NETSTAT, "netstat" },
+ { LXPR_NET_RAW, "raw" },
+ { LXPR_NET_ROUTE, "route" },
+ { LXPR_NET_RPC, "rpc" },
+ { LXPR_NET_RT_CACHE, "rt_cache" },
+ { LXPR_NET_SOCKSTAT, "sockstat" },
+ { LXPR_NET_SNMP, "snmp" },
+ { LXPR_NET_STAT, "stat" },
+ { LXPR_NET_TCP, "tcp" },
+ { LXPR_NET_UDP, "udp" },
+ { LXPR_NET_UNIX, "unix" }
+};
+
+#define NETDIRFILES (sizeof (netdir) / sizeof (netdir[0]))
+
+/*
+ * lxpr_open(): Vnode operation for VOP_OPEN()
+ */
+static int
+lxpr_open(vnode_t **vpp, int flag, cred_t *cr)
+{
+ vnode_t *vp = *vpp;
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ vnode_t *rvp;
+ int error = 0;
+
+ /*
+ * We only allow reading in this file systrem
+ */
+ if (flag & FWRITE)
+ return (EROFS);
+
+ /*
+ * If we are opening an underlying file only allow regular files
+ * reject the open for anything but a regular file.
+ * Just do it if we are opening the current or root directory.
+ */
+ if (lxpnp->lxpr_realvp != NULL) {
+ rvp = lxpnp->lxpr_realvp;
+
+ if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
+ error = EACCES;
+ else {
+ /*
+ * Need to hold rvp since VOP_OPEN() may release it.
+ */
+ VN_HOLD(rvp);
+ error = VOP_OPEN(&rvp, flag, cr);
+ if (error) {
+ VN_RELE(rvp);
+ } else {
+ *vpp = rvp;
+ VN_RELE(vp);
+ }
+ }
+ }
+
+ if (type == LXPR_KMSG) {
+ ldi_ident_t li = VTOLXPM(vp)->lxprm_li;
+ struct strioctl str;
+ int rv;
+
+ /*
+ * Open the zone's console device using the layered driver
+ * interface.
+ */
+ if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
+ &lxpnp->lxpr_cons_ldih, li)) != 0)
+ return (error);
+
+ /*
+ * Send an ioctl to the underlying console device, letting it
+ * know we're interested in getting console messages.
+ */
+ str.ic_cmd = I_CONSLOG;
+ str.ic_timout = 0;
+ str.ic_len = 0;
+ str.ic_dp = NULL;
+ if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
+ (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
+ return (error);
+ }
+
+ return (error);
+}
+
+
+/*
+ * lxpr_close(): Vnode operation for VOP_CLOSE()
+ */
+/* ARGSUSED */
+static int
+lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
+{
+ lxpr_node_t *lxpr = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpr->lxpr_type;
+ int err;
+
+ /*
+ * we should never get here because the close is done on the realvp
+ * for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR &&
+ type != LXPR_PID_EXE);
+
+ if (type == LXPR_KMSG) {
+ if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0)
+ return (err);
+ }
+
+ return (0);
+}
+
+static void (*lxpr_read_function[LXPR_NFILES])() = {
+ lxpr_read_isdir, /* /proc */
+ lxpr_read_isdir, /* /proc/<pid> */
+ lxpr_read_pid_cmdline, /* /proc/<pid>/cmdline */
+ lxpr_read_empty, /* /proc/<pid>/cpu */
+ lxpr_read_invalid, /* /proc/<pid>/cwd */
+ lxpr_read_empty, /* /proc/<pid>/environ */
+ lxpr_read_invalid, /* /proc/<pid>/exe */
+ lxpr_read_pid_maps, /* /proc/<pid>/maps */
+ lxpr_read_empty, /* /proc/<pid>/mem */
+ lxpr_read_invalid, /* /proc/<pid>/root */
+ lxpr_read_pid_stat, /* /proc/<pid>/stat */
+ lxpr_read_pid_statm, /* /proc/<pid>/statm */
+ lxpr_read_pid_status, /* /proc/<pid>/status */
+ lxpr_read_isdir, /* /proc/<pid>/fd */
+ lxpr_read_fd, /* /proc/<pid>/fd/nn */
+ lxpr_read_empty, /* /proc/cmdline */
+ lxpr_read_cpuinfo, /* /proc/cpuinfo */
+ lxpr_read_empty, /* /proc/devices */
+ lxpr_read_empty, /* /proc/dma */
+ lxpr_read_empty, /* /proc/filesystems */
+ lxpr_read_empty, /* /proc/interrupts */
+ lxpr_read_empty, /* /proc/ioports */
+ lxpr_read_empty, /* /proc/kcore */
+ lxpr_read_kmsg, /* /proc/kmsg */
+ lxpr_read_loadavg, /* /proc/loadavg */
+ lxpr_read_meminfo, /* /proc/meminfo */
+ lxpr_read_mounts, /* /proc/mounts */
+ lxpr_read_isdir, /* /proc/net */
+ lxpr_read_net_arp, /* /proc/net/arp */
+ lxpr_read_net_dev, /* /proc/net/dev */
+ lxpr_read_net_dev_mcast, /* /proc/net/dev_mcast */
+ lxpr_read_net_igmp, /* /proc/net/igmp */
+ lxpr_read_net_ip_mr_cache, /* /proc/net/ip_mr_cache */
+ lxpr_read_net_ip_mr_vif, /* /proc/net/ip_mr_vif */
+ lxpr_read_net_mcfilter, /* /proc/net/mcfilter */
+ lxpr_read_net_netstat, /* /proc/net/netstat */
+ lxpr_read_net_raw, /* /proc/net/raw */
+ lxpr_read_net_route, /* /proc/net/route */
+ lxpr_read_net_rpc, /* /proc/net/rpc */
+ lxpr_read_net_rt_cache, /* /proc/net/rt_cache */
+ lxpr_read_net_sockstat, /* /proc/net/sockstat */
+ lxpr_read_net_snmp, /* /proc/net/snmp */
+ lxpr_read_net_stat, /* /proc/net/stat */
+ lxpr_read_net_tcp, /* /proc/net/tcp */
+ lxpr_read_net_udp, /* /proc/net/udp */
+ lxpr_read_net_unix, /* /proc/net/unix */
+ lxpr_read_partitions, /* /proc/partitions */
+ lxpr_read_invalid, /* /proc/self */
+ lxpr_read_stat, /* /proc/stat */
+ lxpr_read_uptime, /* /proc/uptime */
+ lxpr_read_version, /* /proc/version */
+};
+
+/*
+ * Array of lookup functions, indexed by lx /proc file type.
+ */
+static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
+ lxpr_lookup_procdir, /* /proc */
+ lxpr_lookup_piddir, /* /proc/<pid> */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/root */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/status */
+ lxpr_lookup_fddir, /* /proc/<pid>/fd */
+ lxpr_lookup_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_lookup_not_a_dir, /* /proc/cmdline */
+ lxpr_lookup_not_a_dir, /* /proc/cpuinfo */
+ lxpr_lookup_not_a_dir, /* /proc/devices */
+ lxpr_lookup_not_a_dir, /* /proc/dma */
+ lxpr_lookup_not_a_dir, /* /proc/filesystems */
+ lxpr_lookup_not_a_dir, /* /proc/interrupts */
+ lxpr_lookup_not_a_dir, /* /proc/ioports */
+ lxpr_lookup_not_a_dir, /* /proc/kcore */
+ lxpr_lookup_not_a_dir, /* /proc/kmsg */
+ lxpr_lookup_not_a_dir, /* /proc/loadavg */
+ lxpr_lookup_not_a_dir, /* /proc/meminfo */
+ lxpr_lookup_not_a_dir, /* /proc/mounts */
+ lxpr_lookup_netdir, /* /proc/net */
+ lxpr_lookup_not_a_dir, /* /proc/net/arp */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev */
+ lxpr_lookup_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_lookup_not_a_dir, /* /proc/net/igmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_lookup_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_lookup_not_a_dir, /* /proc/net/netstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/raw */
+ lxpr_lookup_not_a_dir, /* /proc/net/route */
+ lxpr_lookup_not_a_dir, /* /proc/net/rpc */
+ lxpr_lookup_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_lookup_not_a_dir, /* /proc/net/sockstat */
+ lxpr_lookup_not_a_dir, /* /proc/net/snmp */
+ lxpr_lookup_not_a_dir, /* /proc/net/stat */
+ lxpr_lookup_not_a_dir, /* /proc/net/tcp */
+ lxpr_lookup_not_a_dir, /* /proc/net/udp */
+ lxpr_lookup_not_a_dir, /* /proc/net/unix */
+ lxpr_lookup_not_a_dir, /* /proc/partitions */
+ lxpr_lookup_not_a_dir, /* /proc/self */
+ lxpr_lookup_not_a_dir, /* /proc/stat */
+ lxpr_lookup_not_a_dir, /* /proc/uptime */
+ lxpr_lookup_not_a_dir, /* /proc/version */
+};
+
+/*
+ * Array of readdir functions, indexed by /proc file type.
+ */
+static int (*lxpr_readdir_function[LXPR_NFILES])() = {
+ lxpr_readdir_procdir, /* /proc */
+ lxpr_readdir_piddir, /* /proc/<pid> */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cpu */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/cwd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/environ */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/exe */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/maps */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/mem */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/root */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/stat */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/statm */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/status */
+ lxpr_readdir_fddir, /* /proc/<pid>/fd */
+ lxpr_readdir_not_a_dir, /* /proc/<pid>/fd/nn */
+ lxpr_readdir_not_a_dir, /* /proc/cmdline */
+ lxpr_readdir_not_a_dir, /* /proc/cpuinfo */
+ lxpr_readdir_not_a_dir, /* /proc/devices */
+ lxpr_readdir_not_a_dir, /* /proc/dma */
+ lxpr_readdir_not_a_dir, /* /proc/filesystems */
+ lxpr_readdir_not_a_dir, /* /proc/interrupts */
+ lxpr_readdir_not_a_dir, /* /proc/ioports */
+ lxpr_readdir_not_a_dir, /* /proc/kcore */
+ lxpr_readdir_not_a_dir, /* /proc/kmsg */
+ lxpr_readdir_not_a_dir, /* /proc/loadavg */
+ lxpr_readdir_not_a_dir, /* /proc/meminfo */
+ lxpr_readdir_not_a_dir, /* /proc/mounts */
+ lxpr_readdir_netdir, /* /proc/net */
+ lxpr_readdir_not_a_dir, /* /proc/net/arp */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev */
+ lxpr_readdir_not_a_dir, /* /proc/net/dev_mcast */
+ lxpr_readdir_not_a_dir, /* /proc/net/igmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/ip_mr_vif */
+ lxpr_readdir_not_a_dir, /* /proc/net/mcfilter */
+ lxpr_readdir_not_a_dir, /* /proc/net/netstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/raw */
+ lxpr_readdir_not_a_dir, /* /proc/net/route */
+ lxpr_readdir_not_a_dir, /* /proc/net/rpc */
+ lxpr_readdir_not_a_dir, /* /proc/net/rt_cache */
+ lxpr_readdir_not_a_dir, /* /proc/net/sockstat */
+ lxpr_readdir_not_a_dir, /* /proc/net/snmp */
+ lxpr_readdir_not_a_dir, /* /proc/net/stat */
+ lxpr_readdir_not_a_dir, /* /proc/net/tcp */
+ lxpr_readdir_not_a_dir, /* /proc/net/udp */
+ lxpr_readdir_not_a_dir, /* /proc/net/unix */
+ lxpr_readdir_not_a_dir, /* /proc/partitions */
+ lxpr_readdir_not_a_dir, /* /proc/self */
+ lxpr_readdir_not_a_dir, /* /proc/stat */
+ lxpr_readdir_not_a_dir, /* /proc/uptime */
+ lxpr_readdir_not_a_dir, /* /proc/version */
+};
+
+
+/*
+ * lxpr_read(): Vnode operation for VOP_READ()
+ *
+ * As the format of all the files that can be read in the lx procfs is human
+ * readable and not binary structures there do not have to be different
+ * read variants depending on whether the reading process model is 32 or 64 bits
+ * (at least in general, and certainly the difference is unlikely to be enough
+ * to justify have different routines for 32 and 64 bit reads
+ */
+/* ARGSUSED */
+static int
+lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+ caller_context_t *ct)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
+ int error;
+
+ ASSERT(type >= 0 && type < LXPR_NFILES);
+
+ lxpr_read_function[type](lxpnp, uiobuf);
+
+ error = lxpr_uiobuf_flush(uiobuf);
+ lxpr_uiobuf_free(uiobuf);
+
+ return (error);
+}
+
+
+/*
+ * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
+ *
+ * Various special case reads:
+ * - trying to read a directory
+ * - invalid file (used to mean a file that should be implemented,
+ * but isn't yet)
+ * - empty file
+ * - wait to be able to read a file that will never have anything to read
+ */
+/* ARGSUSED */
+static void
+lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EISDIR);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_pid_cmdline():
+ *
+ * This is not precisely compatible with linux:
+ *
+ * The linux cmdline returns argv with the correct separation
+ * using \0 between the arguments, we cannot do that without
+ * copying the real argv from the correct process context.
+ * This is too difficult to attempt so we pretend that the
+ * entire cmdline is just argv[0]. This is good enough for
+ * ps to display correctly, but might cause some other things
+ * not to work correctly.
+ */
+static void
+lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ if (PTOU(p)->u_argv != 0) {
+ char *buff = PTOU(p)->u_psargs;
+ int len = strlen(buff);
+ lxpr_unlock(p);
+ lxpr_uiobuf_write(uiobuf, buff, len+1);
+ } else {
+ lxpr_unlock(p);
+ }
+}
+
+
+/*
+ * lxpr_read_pid_maps(): memory map file
+ */
+static void
+lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ struct seg *seg;
+ char *buf;
+ int buflen = MAXPATHLEN;
+ struct print_data {
+ caddr_t saddr;
+ caddr_t eaddr;
+ int type;
+ char prot[5];
+ uint32_t offset;
+ vnode_t *vp;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *pbuf;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ if (as == &kas) {
+ lxpr_unlock(p);
+ return;
+ }
+
+ mutex_exit(&p->p_lock);
+
+ /* Iterate over all segments in the address space */
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
+ vnode_t *vp;
+ uint_t protbits;
+
+ pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
+
+ pbuf->saddr = seg->s_base;
+ pbuf->eaddr = seg->s_base+seg->s_size;
+ pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
+
+ /*
+ * Cheat and only use the protection bits of the first page
+ * in the segment
+ */
+ (void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
+ (void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
+
+ if (protbits & PROT_READ) pbuf->prot[0] = 'r';
+ if (protbits & PROT_WRITE) pbuf->prot[1] = 'w';
+ if (protbits & PROT_EXEC) pbuf->prot[2] = 'x';
+ if (pbuf->type & MAP_SHARED) pbuf->prot[3] = 's';
+ else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
+
+ if (seg->s_ops == &segvn_ops &&
+ SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
+ vp != NULL && vp->v_type == VREG) {
+ VN_HOLD(vp);
+ pbuf->vp = vp;
+ } else {
+ pbuf->vp = NULL;
+ }
+
+ pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
+
+ pbuf->next = NULL;
+ *print_tail = pbuf;
+ print_tail = &pbuf->next;
+ }
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ buf = kmem_alloc(buflen, KM_SLEEP);
+
+ /* print the data we've extracted */
+ pbuf = print_head;
+ while (pbuf != NULL) {
+ struct print_data *pbuf_next;
+ vattr_t vattr;
+
+ int maj = 0;
+ int min = 0;
+ int inode = 0;
+
+ *buf = '\0';
+ if (pbuf->vp != NULL) {
+ vattr.va_mask = AT_FSID | AT_NODEID;
+ if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED()) == 0) {
+ maj = getmajor(vattr.va_fsid);
+ min = getminor(vattr.va_fsid);
+ inode = vattr.va_nodeid;
+ }
+ (void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
+ VN_RELE(pbuf->vp);
+ }
+
+ if (*buf != '\0') {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %d %s\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode, buf);
+ } else {
+ lxpr_uiobuf_printf(uiobuf,
+ "%08x-%08x %s %08x %02d:%03d %d\n",
+ pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+ maj, min, inode);
+ }
+
+ pbuf_next = pbuf->next;
+ kmem_free(pbuf, sizeof (*pbuf));
+ pbuf = pbuf_next;
+ }
+
+ kmem_free(buf, buflen);
+}
+
+/*
+ * lxpr_read_pid_statm(): memory status file
+ */
+static void
+lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ struct as *as;
+ size_t vsize;
+ size_t rss;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ as = p->p_as;
+
+ mutex_exit(&p->p_lock);
+
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = btopr(rm_assize(as));
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+
+ mutex_enter(&p->p_lock);
+ lxpr_unlock(p);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%lu %lu %lu %lu %lu %lu %lu\n",
+ vsize, rss, 0l, rss, 0l, 0l, 0l);
+}
+
+/*
+ * lxpr_read_pid_status(): status file
+ */
+static void
+lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ user_t *up;
+ cred_t *cr;
+ const gid_t *groups;
+ int ngroups;
+ struct as *as;
+ char *status;
+ pid_t pid, ppid;
+ size_t vsize;
+ size_t rss;
+ k_sigset_t current, ignore, handle;
+ int i, lx_sig;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's init
+ * process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1;
+ ppid = 0; /* parent pid for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ status = "S (sleeping)";
+ break;
+ case TS_RUN:
+ case TS_ONPROC:
+ status = "R (running)";
+ break;
+ case TS_ZOMB:
+ status = "Z (zombie)";
+ break;
+ case TS_STOPPED:
+ status = "T (stopped)";
+ break;
+ default:
+ status = "! (unknown)";
+ break;
+ }
+ thread_unlock(t);
+ } else {
+ /*
+ * there is a hole in the exit code, where a proc can have
+ * no threads but it is yet to be flagged SZOMB. We will
+ * assume we are about to become a zombie
+ */
+ status = "Z (zombie)";
+ }
+
+ up = PTOU(p);
+ mutex_enter(&p->p_crlock);
+ crhold(cr = p->p_cred);
+ mutex_exit(&p->p_crlock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "Name:\t%s\n"
+ "State:\t%s\n"
+ "Tgid:\t%d\n"
+ "Pid:\t%d\n"
+ "PPid:\t%d\n"
+ "TracerPid:\t%d\n"
+ "Uid:\t%d\t%d\t%d\t%d\n"
+ "Gid:\t%d\t%d\t%d\t%d\n"
+ "FDSize:\t%d\n"
+ "Groups:\t",
+ up->u_comm,
+ status,
+ pid, /* thread group id - same as pid until we map lwps to procs */
+ pid,
+ ppid,
+ 0,
+ crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
+ crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
+ p->p_fno_ctl);
+
+ ngroups = crgetngroups(cr);
+ groups = crgetgroups(cr);
+ for (i = 0; i < ngroups; i++) {
+ lxpr_uiobuf_printf(uiobuf,
+ "%d ",
+ groups[i]);
+ }
+ crfree(cr);
+
+ as = p->p_as;
+ if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = rm_assize(as);
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "VmSize:\t%8lu kB\n"
+ "VmLck:\t%8lu kB\n"
+ "VmRSS:\t%8lu kB\n"
+ "VmData:\t%8lu kB\n"
+ "VmStk:\t%8lu kB\n"
+ "VmExe:\t%8lu kB\n"
+ "VmLib:\t%8lu kB",
+ btok(vsize),
+ 0l,
+ ptok(rss),
+ 0l,
+ btok(p->p_stksize),
+ ptok(rss),
+ 0l);
+ }
+
+ sigemptyset(&current);
+ sigemptyset(&ignore);
+ sigemptyset(&handle);
+
+ for (i = 1; i < MAXSIG; i++) {
+ lx_sig = stol_signo[i];
+
+ if ((lx_sig > 0) && (lx_sig < MAXSIG)) {
+ if (sigismember(&p->p_sig, i))
+ sigaddset(&current, lx_sig);
+
+ if (up->u_signal[i] == SIG_IGN)
+ sigaddset(&ignore, lx_sig);
+ else if (up->u_signal[i] != SIG_DFL)
+ sigaddset(&handle, lx_sig);
+ }
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "\n"
+ "SigPnd:\t%08x%08x\n"
+ "SigBlk:\t%08x%08x\n"
+ "SigIgn:\t%08x%08x\n"
+ "SigCgt:\t%08x%08x\n"
+ "CapInh:\t%016x\n"
+ "CapPrm:\t%016x\n"
+ "CapEff:\t%016x\n",
+ current.__sigbits[1], current.__sigbits[0],
+ 0, 0, /* signals blocked on per thread basis */
+ ignore.__sigbits[1], ignore.__sigbits[0],
+ handle.__sigbits[1], handle.__sigbits[0],
+ /* Can't do anything with linux capabilities */
+ 0,
+ 0,
+ 0);
+
+ lxpr_unlock(p);
+}
+
+
+/*
+ * lxpr_read_pid_stat(): pid stat file
+ */
+static void
+lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ proc_t *p;
+ kthread_t *t;
+ struct as *as;
+ char stat;
+ pid_t pid, ppid, pgpid, spid;
+ gid_t psgid;
+ dev_t psdev;
+ size_t rss, vsize;
+ int nice, pri;
+ caddr_t wchan;
+ processorid_t cpu;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
+
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL) {
+ lxpr_uiobuf_seterr(uiobuf, EINVAL);
+ return;
+ }
+
+ pid = p->p_pid;
+
+ /*
+ * Set Linux defaults if we're the zone's init process
+ */
+ if (pid == curproc->p_zone->zone_proc_initpid) {
+ pid = 1; /* PID for init */
+ ppid = 0; /* parent PID for init is 0 */
+ pgpid = 0; /* process group for init is 0 */
+ psgid = -1; /* credential GID for init is -1 */
+ spid = 0; /* session id for init is 0 */
+ psdev = 0; /* session device for init is 0 */
+ } else {
+ /*
+ * Make sure not to reference parent PIDs that reside outside
+ * the zone
+ */
+ ppid = ((p->p_flag & SZONETOP)
+ ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+ /*
+ * Convert ppid to the Linux default of 1 if our parent is the
+ * zone's init process
+ */
+ if (ppid == curproc->p_zone->zone_proc_initpid)
+ ppid = 1;
+
+ pgpid = p->p_pgrp;
+
+ mutex_enter(&p->p_splock);
+ mutex_enter(&p->p_sessp->s_lock);
+ spid = p->p_sessp->s_sid;
+ /* XXBRAND psdev = DEV_TO_LXDEV(p->p_sessp->s_dev, VCHR); */
+ psdev = p->p_sessp->s_dev;
+ if (p->p_sessp->s_cred)
+ psgid = crgetgid(p->p_sessp->s_cred);
+ else
+ psgid = crgetgid(p->p_cred);
+
+ mutex_exit(&p->p_sessp->s_lock);
+ mutex_exit(&p->p_splock);
+ }
+
+ t = prchoose(p);
+ if (t != NULL) {
+ switch (t->t_state) {
+ case TS_SLEEP:
+ stat = 'S'; break;
+ case TS_RUN:
+ case TS_ONPROC:
+ stat = 'R'; break;
+ case TS_ZOMB:
+ stat = 'Z'; break;
+ case TS_STOPPED:
+ stat = 'T'; break;
+ default:
+ stat = '!'; break;
+ }
+
+ if (CL_DONICE(t, NULL, 0, &nice) != 0)
+ nice = 0;
+
+ pri = v.v_maxsyspri - t->t_pri;
+ wchan = t->t_wchan;
+ cpu = t->t_cpu->cpu_seqid;
+ thread_unlock(t);
+ } else {
+ /* Only zombies have no threads */
+ stat = 'Z';
+ nice = 0;
+ pri = 0;
+ wchan = 0;
+ cpu = 0;
+ }
+ as = p->p_as;
+ mutex_exit(&p->p_lock);
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ vsize = rm_assize(as);
+ rss = rm_asrss(as);
+ AS_LOCK_EXIT(as, &as->a_lock);
+ mutex_enter(&p->p_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%d (%s) %c %d %d %d %d %d "
+ "%lu %lu %lu %lu %lu "
+ "%lu %lu %ld %ld "
+ "%d %d "
+ "0 "
+ "%ld %lu "
+ "%lu %ld %llu "
+ "%lu %lu %u "
+ "%lu %lu "
+ "%lu %lu %lu %lu "
+ "%lu "
+ "%lu %lu "
+ "%d "
+ "%d"
+ "\n",
+ pid,
+ PTOU(p)->u_comm,
+ stat,
+ ppid, pgpid,
+ spid, psdev, psgid,
+ 0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
+ p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
+ pri, nice,
+ 0l, PTOU(p)->u_ticks, /* ticks till next SIGALARM, start time */
+ vsize, rss, p->p_vmem_ctl,
+ 0l, 0l, USRSTACK, /* startcode, endcode, startstack */
+ 0l, 0l, /* kstkesp, kstkeip */
+ 0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
+ wchan,
+ 0l, 0l, /* nswap, cnswap */
+ 0, /* exit_signal */
+ cpu);
+
+ lxpr_unlock(p);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf, "Inter-| Receive "
+ " | Transmit\n");
+ lxpr_uiobuf_printf(uiobuf, " face |bytes packets errs drop fifo"
+ " frame compressed multicast|bytes packets errs drop fifo"
+ " colls carrier compressed\n");
+
+ /*
+ * XXX: data about each interface should go here, but we'll wait to
+ * see if anybody wants to use it.
+ */
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_kmsg(): read the contents of the kernel message queue. We
+ * translate this into the reception of console messages for this lx zone; each
+ * read copies out a single zone console message, or blocks until the next one
+ * is produced.
+ */
+
+#define LX_KMSG_PRI "<0>"
+
+static void
+lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
+{
+ ldi_handle_t lh = lxpnp->lxpr_cons_ldih;
+ mblk_t *mp;
+
+ if (ldi_getmsg(lh, &mp, NULL) == 0) {
+ /*
+ * lx procfs doesn't like successive reads to the same file
+ * descriptor unless we do an explicit rewind each time.
+ */
+ lxpr_uiobuf_seek(uiobuf, 0);
+
+ lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
+ mp->b_cont->b_rptr);
+
+ freemsg(mp);
+ }
+}
+
+/*
+ * lxpr_read_loadavg(): read the contents of the "loadavg" file.
+ *
+ * Just enough for uptime to work
+ */
+extern int nthread;
+
+static void
+lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ulong_t avenrun1;
+ ulong_t avenrun5;
+ ulong_t avenrun15;
+ ulong_t avenrun1_cs;
+ ulong_t avenrun5_cs;
+ ulong_t avenrun15_cs;
+ int loadavg[3];
+ int *loadbuf;
+ cpupart_t *cp;
+
+ uint_t nrunnable = 0;
+ rctl_qty_t nlwps;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
+
+ mutex_enter(&cpu_lock);
+
+ /*
+ * Need to add up values over all CPU partitions. If pools are active,
+ * only report the values of the zone's partition, which by definition
+ * includes the current CPU.
+ */
+ if (pool_pset_enabled()) {
+ psetid_t psetid = zone_pset_get(curproc->p_zone);
+
+ ASSERT(curproc->p_zone != &zone0);
+ cp = CPU->cpu_part;
+
+ nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
+ (void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
+ loadbuf = &loadavg[0];
+
+ /*
+ * We'll report the total number of lwps in the zone for the
+ * "nproc" parameter of /proc/loadavg; good enough for lx.
+ */
+ nlwps = curproc->p_zone->zone_nlwps;
+ } else {
+ cp = cp_list_head;
+ do {
+ nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
+ } while ((cp = cp->cp_next) != cp_list_head);
+
+ loadbuf = &avenrun[0];
+
+ /*
+ * This will report kernel threads as well as user lwps, but it
+ * should be good enough for lx consumers.
+ */
+ nlwps = nthread;
+ }
+
+ mutex_exit(&cpu_lock);
+
+ avenrun1 = loadbuf[0] >> FSHIFT;
+ avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun5 = loadbuf[1] >> FSHIFT;
+ avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
+ avenrun15 = loadbuf[2] >> FSHIFT;
+ avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
+ avenrun1, avenrun1_cs,
+ avenrun5, avenrun5_cs,
+ avenrun15, avenrun15_cs,
+ nrunnable, nlwps, 0);
+}
+
+/*
+ * lxpr_read_meminfo(): read the contents of the "meminfo" file.
+ */
+static void
+lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ long total_mem = physmem * PAGESIZE;
+ long free_mem = freemem * PAGESIZE;
+ long total_swap = k_anoninfo.ani_max * PAGESIZE;
+ long used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
+
+ lxpr_uiobuf_printf(uiobuf,
+ " total: used: free: shared: buffers: cached:\n"
+ "Mem: %8lu %8lu %8lu %8u %8u %8u\n"
+ "Swap: %8lu %8lu %8lu\n"
+ "MemTotal: %8lu kB\n"
+ "MemFree: %8lu kB\n"
+ "MemShared: %8u kB\n"
+ "Buffers: %8u kB\n"
+ "Cached: %8u kB\n"
+ "SwapCached:%8u kB\n"
+ "Active: %8u kB\n"
+ "Inactive: %8u kB\n"
+ "HighTotal: %8u kB\n"
+ "HighFree: %8u kB\n"
+ "LowTotal: %8u kB\n"
+ "LowFree: %8u kB\n"
+ "SwapTotal: %8lu kB\n"
+ "SwapFree: %8lu kB\n",
+ total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
+ total_swap, used_swap, total_swap - used_swap,
+ btok(total_mem), /* MemTotal */
+ btok(free_mem), /* MemFree */
+ 0, /* MemShared */
+ 0, /* Buffers */
+ 0, /* Cached */
+ 0, /* SwapCached */
+ 0, /* Active */
+ 0, /* Inactive */
+ 0, /* HighTotal */
+ 0, /* HighFree */
+ btok(total_mem), /* LowTotal */
+ btok(free_mem), /* LowFree */
+ btok(total_swap), /* SwapTotal */
+ btok(total_swap - used_swap)); /* SwapFree */
+}
+
+/*
+ * lxpr_read_mounts():
+ */
+/* ARGSUSED */
+static void
+lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ struct vfs *vfsp;
+ struct vfs *vfslist;
+ zone_t *zone = LXPTOZ(lxpnp);
+ struct print_data {
+ refstr_t *vfs_mntpt;
+ refstr_t *vfs_resource;
+ uint_t vfs_flag;
+ int vfs_fstype;
+ struct print_data *next;
+ } *print_head = NULL;
+ struct print_data **print_tail = &print_head;
+ struct print_data *printp;
+
+ vfs_list_read_lock();
+
+ if (zone == global_zone) {
+ vfsp = vfslist = rootvfs;
+ } else {
+ vfsp = vfslist = zone->zone_vfslist;
+ /*
+ * If the zone has a root entry, it will be the first in
+ * the list. If it doesn't, we conjure one up.
+ */
+ if (vfslist == NULL ||
+ strcmp(refstr_value(vfsp->vfs_mntpt),
+ zone->zone_rootpath) != 0) {
+ struct vfs *tvfsp;
+ /*
+ * The root of the zone is not a mount point. The vfs
+ * we want to report is that of the zone's root vnode.
+ */
+ tvfsp = zone->zone_rootvp->v_vfsp;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "/ / %s %s 0 0\n",
+ vfssw[tvfsp->vfs_fstype].vsw_name,
+ tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+ }
+ if (vfslist == NULL) {
+ vfs_list_unlock();
+ return;
+ }
+ }
+
+ /*
+ * Later on we have to do a lookupname, which can end up causing
+ * another vfs_list_read_lock() to be called. Which can lead to a
+ * deadlock. To avoid this, we extract the data we need into a local
+ * list, then we can run this list without holding vfs_list_read_lock()
+ * We keep the list in the same order as the vfs_list
+ */
+ do {
+ /* Skip mounts we shouldn't show */
+ if (vfsp->vfs_flag & VFS_NOMNTTAB) {
+ goto nextfs;
+ }
+
+ printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
+ refstr_hold(vfsp->vfs_mntpt);
+ printp->vfs_mntpt = vfsp->vfs_mntpt;
+ refstr_hold(vfsp->vfs_resource);
+ printp->vfs_resource = vfsp->vfs_resource;
+ printp->vfs_flag = vfsp->vfs_flag;
+ printp->vfs_fstype = vfsp->vfs_fstype;
+ printp->next = NULL;
+
+ *print_tail = printp;
+ print_tail = &printp->next;
+
+nextfs:
+ vfsp = (zone == global_zone) ?
+ vfsp->vfs_next : vfsp->vfs_zone_next;
+
+ } while (vfsp != vfslist);
+
+ vfs_list_unlock();
+
+ /*
+ * now we can run through what we've extracted without holding
+ * vfs_list_read_lock()
+ */
+ printp = print_head;
+ while (printp != NULL) {
+ struct print_data *printp_next;
+ const char *resource;
+ char *mntpt;
+ struct vnode *vp;
+ int error;
+
+ mntpt = (char *)refstr_value(printp->vfs_mntpt);
+ resource = refstr_value(printp->vfs_resource);
+
+ if (mntpt != NULL && mntpt[0] != '\0')
+ mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+ else
+ mntpt = "-";
+
+ error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+
+ if (error != 0)
+ goto nextp;
+
+ if (!(vp->v_flag & VROOT)) {
+ VN_RELE(vp);
+ goto nextp;
+ }
+ VN_RELE(vp);
+
+ if (resource != NULL && resource[0] != '\0') {
+ if (resource[0] == '/') {
+ resource = ZONE_PATH_VISIBLE(resource, zone) ?
+ ZONE_PATH_TRANSLATE(resource, zone) :
+ mntpt;
+ }
+ } else {
+ resource = "-";
+ }
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%s %s %s %s 0 0\n",
+ resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
+ printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+nextp:
+ printp_next = printp->next;
+ refstr_rele(printp->vfs_mntpt);
+ refstr_rele(printp->vfs_resource);
+ kmem_free(printp, sizeof (*printp));
+ printp = printp_next;
+
+ }
+}
+
+/*
+ * lxpr_read_partitions():
+ *
+ * We don't support partitions in a local zone because it requires access to
+ * physical devices. But we need to fake up enough of the file to show that we
+ * have no partitions.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "major minor #blocks name rio rmerge rsect ruse "
+ "wio wmerge wsect wuse running use aveq\n\n");
+}
+
+/*
+ * lxpr_read_version(): read the contents of the "version" file.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ lxpr_uiobuf_printf(uiobuf,
+ "%s version %s (%s version %d.%d.%d) "
+ "#%s SMP %s\n",
+ LX_UNAME_SYSNAME, LX_UNAME_RELEASE,
+#if defined(__GNUC__)
+ "gcc",
+ __GNUC__,
+ __GNUC_MINOR__,
+ __GNUC_PATCHLEVEL__,
+#else
+ "Sun C",
+ __SUNPRO_C / 0x100,
+ (__SUNPRO_C & 0xff) / 0x10,
+ __SUNPRO_C & 0xf,
+#endif
+ LX_UNAME_VERSION,
+ __TIME__ " " __DATE__);
+}
+
+
+/*
+ * lxpr_read_stat(): read the contents of the "stat" file.
+ *
+ */
+/* ARGSUSED */
+static void
+lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t sys_cum = 0;
+ ulong_t user_cum = 0;
+ ulong_t pgpgin_cum = 0;
+ ulong_t pgpgout_cum = 0;
+ ulong_t pgswapout_cum = 0;
+ ulong_t pgswapin_cum = 0;
+ ulong_t intr_cum = 0;
+ ulong_t pswitch_cum = 0;
+ ulong_t forks_cum = 0;
+ hrtime_t msnsecs[NCMSTATES];
+
+ ASSERT(lxpnp->lxpr_type == LXPR_STAT);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ /* Calculate cumulative stats */
+ cp = cpstart = CPU;
+ do {
+ int i;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_cum += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
+ pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
+ pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
+ pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+
+ for (i = 0; i < PIL_MAX; i++)
+ intr_cum += CPU_STATS(cp, sys.intr[i]);
+
+ pswitch_cum += CPU_STATS(cp, sys.pswitch);
+ forks_cum += CPU_STATS(cp, sys.sysfork);
+ forks_cum += CPU_STATS(cp, sys.sysvfork);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu %ld %ld %ld %ld\n",
+ user_cum, 0, sys_cum, idle_cum);
+
+ /* Do per processor stats */
+ do {
+ ulong_t idle_ticks;
+ ulong_t sys_ticks;
+ ulong_t user_ticks;
+
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ get_cpu_mstate(cp, msnsecs);
+
+ idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+ sys_ticks = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+ user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "cpu%d %ld %ld %ld %ld\n",
+ cp->cpu_id,
+ user_ticks, 0, sys_ticks, idle_ticks);
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "page %lu %lu\n"
+ "swap %lu %lu\n"
+ "intr %lu\n"
+ "ctxt %lu\n"
+ "btime %lu\n"
+ "processes %lu\n",
+ pgpgin_cum, pgpgout_cum,
+ pgswapin_cum, pgswapout_cum,
+ intr_cum,
+ pswitch_cum,
+ boot_time,
+ forks_cum);
+}
+
+
+/*
+ * lxpr_read_uptime(): read the contents of the "uptime" file.
+ *
+ * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
+ * Use fixed point arithmetic to get 2 decimal places
+ */
+/* ARGSUSED */
+static void
+lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ ulong_t idle_cum = 0;
+ ulong_t cpu_count = 0;
+ ulong_t idle_s;
+ ulong_t idle_cs;
+ ulong_t up_s;
+ ulong_t up_cs;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
+
+ /* Calculate cumulative stats */
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU;
+ do {
+ /*
+ * Don't count CPUs that aren't even in the system
+ * or aren't up yet.
+ */
+ if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+ continue;
+ }
+
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
+ idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
+ cpu_count += 1;
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+ mutex_exit(&cpu_lock);
+
+ /* Capture lbolt in case it changes */
+ up_cs = lbolt;
+ up_s = up_cs / hz;
+ up_cs %= hz;
+ up_cs *= 100;
+ up_cs /= hz;
+
+ ASSERT(cpu_count > 0);
+ idle_cum /= cpu_count;
+ idle_s = idle_cum / hz;
+ idle_cs = idle_cum % hz;
+ idle_cs *= 100;
+ idle_cs /= hz;
+
+ lxpr_uiobuf_printf(uiobuf,
+ "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
+}
+
+static const char *amd_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "mp",
+ "nx", NULL, "mmxext", NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", "3dnowext", "3dnow"
+};
+
+static const char *amd_x_ecx[] = {
+ "lahf_lm", NULL, "svm", NULL,
+ "altmovcr8"
+};
+
+static const char *tm_x_edx[] = {
+ "recovery", "longrun", NULL, "lrti"
+};
+
+/*
+ * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
+ */
+static const char *intc_x_edx[] = {
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, "syscall",
+ NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ "nx", NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL,
+ NULL, "lm", NULL, NULL
+};
+
+static const char *intc_edx[] = {
+ "fpu", "vme", "de", "pse",
+ "tsc", "msr", "pae", "mce",
+ "cx8", "apic", NULL, "sep",
+ "mtrr", "pge", "mca", "cmov",
+ "pat", "pse36", "pn", "clflush",
+ NULL, "dts", "acpi", "mmx",
+ "fxsr", "sse", "sse2", "ss",
+ "ht", "tm", "ia64", "pbe"
+};
+
+/*
+ * "sse3" on linux is called "pni" (Prescott New Instructions).
+ */
+static const char *intc_ecx[] = {
+ "pni", NULL, NULL, "monitor",
+ "ds_cpl", NULL, NULL, "est",
+ "tm2", NULL, "cid", NULL,
+ NULL, "cx16", "xtpr"
+};
+
+static void
+lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ int i;
+ uint32_t bits;
+ cpu_t *cp, *cpstart;
+ int pools_enabled;
+ const char **fp;
+ char brandstr[CPU_IDSTRLEN];
+ struct cpuid_regs cpr;
+ int maxeax;
+ int std_ecx, std_edx, ext_ecx, ext_edx;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
+
+ mutex_enter(&cpu_lock);
+ pools_enabled = pool_pset_enabled();
+
+ cp = cpstart = CPU;
+ do {
+ /*
+ * This returns the maximum eax value for standard cpuid
+ * functions in eax.
+ */
+ cpr.cp_eax = 0;
+ (void) cpuid_insn(cp, &cpr);
+ maxeax = cpr.cp_eax;
+
+ /*
+ * Get standard x86 feature flags.
+ */
+ cpr.cp_eax = 1;
+ (void) cpuid_insn(cp, &cpr);
+ std_ecx = cpr.cp_ecx;
+ std_edx = cpr.cp_edx;
+
+ /*
+ * Now get extended feature flags.
+ */
+ cpr.cp_eax = 0x80000001;
+ (void) cpuid_insn(cp, &cpr);
+ ext_ecx = cpr.cp_ecx;
+ ext_edx = cpr.cp_edx;
+
+ (void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
+
+ lxpr_uiobuf_printf(uiobuf,
+ "processor\t: %d\n"
+ "vendor_id\t: %s\n"
+ "cpu family\t: %d\n"
+ "model\t\t: %d\n"
+ "model name\t: %s\n"
+ "stepping\t: %d\n"
+ "cpu MHz\t\t: %u.%03u\n",
+ cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
+ cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
+ (uint32_t)(cpu_freq_hz / 1000000),
+ ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
+
+ lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
+ getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
+
+ if (x86_feature & X86_HTT) {
+ /*
+ * 'siblings' is used for HT-style threads
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "physical id\t: %lu\n"
+ "siblings\t: %u\n", chip_plat_get_chipid(cp),
+ cpuid_get_ncpu_per_chip(cp));
+ }
+
+ /*
+ * Since we're relatively picky about running on older hardware,
+ * we can be somewhat cavalier about the answers to these ones.
+ *
+ * In fact, given the hardware we support, we just say:
+ *
+ * fdiv_bug : no (if we're on a 64-bit kernel)
+ * hlt_bug : no
+ * f00f_bug : no
+ * coma_bug : no
+ * wp : yes (write protect in supervsr mode)
+ */
+ lxpr_uiobuf_printf(uiobuf,
+ "fdiv_bug\t: %s\n"
+ "hlt_bug \t: no\n"
+ "f00f_bug\t: no\n"
+ "coma_bug\t: no\n"
+ "fpu\t\t: %s\n"
+ "fpu_exception\t: %s\n"
+ "cpuid level\t: %d\n"
+ "flags\t\t:",
+#if defined(__i386)
+ fpu_pentium_fdivbug ? "yes" : "no",
+#else
+ "no",
+#endif /* __i386 */
+ fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
+ maxeax);
+
+ for (bits = std_edx, fp = intc_edx, i = 0;
+ i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ /*
+ * name additional features where appropriate
+ */
+ switch (x86_vendor) {
+ case X86_VENDOR_Intel:
+ for (bits = ext_edx, fp = intc_x_edx, i = 0;
+ i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_AMD:
+ for (bits = ext_edx, fp = amd_x_edx, i = 0;
+ i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
+ i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+
+ case X86_VENDOR_TM:
+ for (bits = ext_edx, fp = tm_x_edx, i = 0;
+ i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
+ fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+ break;
+ default:
+ break;
+ }
+
+ for (bits = std_ecx, fp = intc_ecx, i = 0;
+ i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
+ if ((bits & (1 << i)) != 0 && *fp)
+ lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+ lxpr_uiobuf_printf(uiobuf, "\n\n");
+
+ if (pools_enabled)
+ cp = cp->cpu_next_part;
+ else
+ cp = cp->cpu_next;
+ } while (cp != cpstart);
+
+ mutex_exit(&cpu_lock);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
+ lxpr_uiobuf_seterr(uiobuf, EFAULT);
+}
+
+
+
+/*
+ * lxpr_getattr(): Vnode operation for VOP_GETATTR()
+ */
+static int
+lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
+{
+ register lxpr_node_t *lxpnp = VTOLXP(vp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ extern uint_t nproc;
+ int error;
+
+ /*
+ * Return attributes of underlying vnode if ATTR_REAL
+ *
+ * but keep fd files with the symlink permissions
+ */
+ if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
+ vnode_t *rvp = lxpnp->lxpr_realvp;
+
+ /*
+ * withold attribute information to owner or root
+ */
+ if ((error = VOP_ACCESS(rvp, 0, 0, cr)) != 0) {
+ return (error);
+ }
+
+ /*
+ * now its attributes
+ */
+ if ((error = VOP_GETATTR(rvp, vap, flags, cr)) != 0) {
+ return (error);
+ }
+
+ /*
+ * if it's a file in lx /proc/pid/fd/xx then set its
+ * mode and keep it looking like a symlink
+ */
+ if (type == LXPR_PID_FD_FD) {
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_type = vp->v_type;
+ vap->va_size = 0;
+ vap->va_nlink = 1;
+ }
+ return (0);
+ }
+
+ /* Default attributes, that may be overridden below */
+ bzero(vap, sizeof (*vap));
+ vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
+ vap->va_nlink = 1;
+ vap->va_type = vp->v_type;
+ vap->va_mode = lxpnp->lxpr_mode;
+ vap->va_fsid = vp->v_vfsp->vfs_dev;
+ vap->va_blksize = DEV_BSIZE;
+ vap->va_uid = lxpnp->lxpr_uid;
+ vap->va_gid = lxpnp->lxpr_gid;
+ vap->va_nodeid = lxpnp->lxpr_ino;
+
+ switch (type) {
+ case LXPR_PROCDIR:
+ vap->va_nlink = nproc + 2 + PROCDIRFILES;
+ vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
+ break;
+ case LXPR_PIDDIR:
+ vap->va_nlink = PIDDIRFILES;
+ vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
+ break;
+ case LXPR_SELF:
+ vap->va_uid = crgetruid(curproc->p_cred);
+ vap->va_gid = crgetrgid(curproc->p_cred);
+ break;
+ default:
+ break;
+ }
+
+ vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
+ return (0);
+}
+
+
+/*
+ * lxpr_access(): Vnode operation for VOP_ACCESS()
+ */
+static int
+lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr)
+{
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ int shift = 0;
+ proc_t *tp;
+
+ /* lx /proc is a read only file system */
+ if (mode & VWRITE)
+ return (EROFS);
+
+ /*
+ * If this is a restricted file, check access permissions.
+ */
+ switch (lxpnp->lxpr_type) {
+ case LXPR_PIDDIR:
+ return (0);
+ case LXPR_PID_CURDIR:
+ case LXPR_PID_ENV:
+ case LXPR_PID_EXE:
+ case LXPR_PID_MAPS:
+ case LXPR_PID_MEM:
+ case LXPR_PID_ROOTDIR:
+ case LXPR_PID_FDDIR:
+ case LXPR_PID_FD_FD:
+ if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
+ return (ENOENT);
+ if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
+ priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
+ lxpr_unlock(tp);
+ return (EACCES);
+ }
+ lxpr_unlock(tp);
+ default:
+ break;
+ }
+
+ if (lxpnp->lxpr_realvp != NULL) {
+ /*
+ * For these we use the underlying vnode's accessibility.
+ */
+ return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr));
+ }
+
+ /*
+ * Access check is based on only
+ * one of owner, group, public.
+ * If not owner, then check group.
+ * If not a member of the group, then
+ * check public access.
+ */
+ if (crgetuid(cr) != lxpnp->lxpr_uid) {
+ shift += 3;
+ if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
+ shift += 3;
+ }
+
+ mode &= ~(lxpnp->lxpr_mode << shift);
+
+ if (mode == 0)
+ return (0);
+
+ return (EACCES);
+}
+
+
+
+
+/* ARGSUSED */
+static vnode_t *
+lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
+{
+ return (NULL);
+}
+
+
+/*
+ * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
+ */
+/* ARGSUSED */
+static int
+lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
+ int flags, vnode_t *rdir, cred_t *cr)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type >= 0 && type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the lookup
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict lookup permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VEXEC, 0, cr)) != 0) {
+ return (error);
+ }
+
+ /*
+ * Just return the parent vnode
+ * if thats where we are trying to go
+ */
+ if (strcmp(comp, "..") == 0) {
+ VN_HOLD(lxpnp->lxpr_parent);
+ *vpp = lxpnp->lxpr_parent;
+ return (0);
+ }
+
+ /*
+ * Special handling for directory searches
+ * Note: null component name is synonym for
+ * current directory being searched.
+ */
+ if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
+ VN_HOLD(dp);
+ *vpp = dp;
+ return (0);
+ }
+
+ *vpp = (lxpr_lookup_function[type](dp, comp));
+ return ((*vpp == NULL) ? ENOENT : 0);
+}
+
+/*
+ * Do a sequential search on the given directory table
+ */
+static vnode_t *
+lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ lxpr_node_t *lxpnp;
+ int count;
+
+ for (count = 0; count < dirtablen; count++) {
+ if (strcmp(dirtab[count].d_name, comp) == 0) {
+ lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+ return (dp);
+ }
+ }
+ return (NULL);
+}
+
+
+static vnode_t *
+lxpr_lookup_piddir(vnode_t *dp, char *comp)
+{
+ proc_t *p;
+
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
+
+ p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
+ if (p == NULL)
+ return (NULL);
+
+ dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
+
+ lxpr_unlock(p);
+
+ return (dp);
+}
+
+
+/*
+ * Lookup one of the process's open files.
+ */
+static vnode_t *
+lxpr_lookup_fddir(vnode_t *dp, char *comp)
+{
+ lxpr_node_t *dlxpnp = VTOLXP(dp);
+ lxpr_node_t *lxpnp;
+ vnode_t *vp = NULL;
+ proc_t *p;
+ file_t *fp;
+ uint_t fd;
+ int c;
+ uf_entry_t *ufp;
+ uf_info_t *fip;
+
+ ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ /*
+ * convert the string rendition of the filename
+ * to a file descriptor
+ */
+ fd = 0;
+ while ((c = *comp++) != '\0') {
+ int ofd;
+ if (c < '0' || c > '9')
+ return (NULL);
+
+ ofd = fd;
+ fd = 10*fd + c - '0';
+ /* integer overflow */
+ if (fd / 10 != ofd)
+ return (NULL);
+ }
+
+ /*
+ * get the proc to work with and lock it
+ */
+ p = lxpr_lock(dlxpnp->lxpr_pid);
+ if ((p == NULL))
+ return (NULL);
+
+ /*
+ * If the process is a zombie or system process
+ * it can't have any open files.
+ */
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * get us a fresh node/vnode
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
+
+ /*
+ * get open file info
+ */
+ fip = (&(p)->p_user.u_finfo);
+ mutex_enter(&fip->fi_lock);
+
+ /*
+ * got the fd data so now done with this proc
+ */
+ lxpr_unlock(p);
+
+ if (fd < fip->fi_nfiles) {
+ UF_ENTER(ufp, fip, fd);
+ /*
+ * ensure the fd is still kosher.
+ * it may have gone between the readdir and
+ * the lookup
+ */
+ if (fip->fi_list[fd].uf_file == NULL) {
+ mutex_exit(&fip->fi_lock);
+ UF_EXIT(ufp);
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ }
+
+ if ((fp = ufp->uf_file) != NULL)
+ vp = fp->f_vnode;
+ UF_EXIT(ufp);
+ }
+ mutex_exit(&fip->fi_lock);
+
+ if (vp == NULL) {
+ lxpr_freenode(lxpnp);
+ return (NULL);
+ } else {
+ /*
+ * Fill in the lxpr_node so future references will
+ * be able to find the underlying vnode.
+ * The vnode is held on the realvp.
+ */
+ lxpnp->lxpr_realvp = vp;
+ VN_HOLD(lxpnp->lxpr_realvp);
+ }
+
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+}
+
+
+static vnode_t *
+lxpr_lookup_netdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
+
+ dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
+
+ return (dp);
+}
+
+
+static vnode_t *
+lxpr_lookup_procdir(vnode_t *dp, char *comp)
+{
+ ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
+
+ /*
+ * We know all the names of files & dirs in our
+ * file system structure except those that are pid names.
+ * These change as pids are created/deleted etc.
+ * So just look for a number as the first char to see if we
+ * are we doing pid lookups?
+ *
+ * Don't need to check for "self" as it is implemented as a symlink
+ */
+ if (*comp >= '0' && *comp <= '9') {
+ pid_t pid = 0;
+ lxpr_node_t *lxpnp = NULL;
+ proc_t *p;
+ int c;
+
+ while ((c = *comp++) != '\0')
+ pid = 10*pid + c - '0';
+
+ /*
+ * Can't continue if the process is still loading
+ * or it doesn't really exist yet (or maybe it just died!)
+ */
+ p = lxpr_lock(pid);
+ if (p == NULL)
+ return (NULL);
+
+ if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ lxpr_unlock(p);
+ return (NULL);
+ }
+
+ /*
+ * allocate and fill in a new lx /proc node
+ */
+ lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
+
+ lxpr_unlock(p);
+
+ dp = LXPTOV(lxpnp);
+ ASSERT(dp != NULL);
+
+ return (dp);
+
+ }
+
+ /* Lookup fixed names */
+ return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
+}
+
+
+
+
+/*
+ * lxpr_readdir(): Vnode operation for VOP_READDIR()
+ */
+/* ARGSUSED */
+static int
+lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp)
+{
+ lxpr_node_t *lxpnp = VTOLXP(dp);
+ lxpr_nodetype_t type = lxpnp->lxpr_type;
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+
+ ASSERT(dp->v_type == VDIR);
+ ASSERT(type >= 0 && type < LXPR_NFILES);
+
+ /*
+ * we should never get here because the readdir
+ * is done on the realvp for these nodes
+ */
+ ASSERT(type != LXPR_PID_FD_FD &&
+ type != LXPR_PID_CURDIR &&
+ type != LXPR_PID_ROOTDIR);
+
+ /*
+ * restrict readdir permission to owner or root
+ */
+ if ((error = lxpr_access(dp, VREAD, 0, cr)) != 0)
+ return (error);
+
+ uoffset = uiop->uio_offset;
+ uresid = uiop->uio_resid;
+
+ /* can't do negative reads */
+ if (uoffset < 0 || uresid <= 0)
+ return (EINVAL);
+
+ /* can't read directory entries that don't exist! */
+ if (uoffset % LXPR_SDSIZE)
+ return (ENOENT);
+
+ return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
+}
+
+
+/* ARGSUSED */
+static int
+lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ return (ENOTDIR);
+}
+
+/*
+ * This has the common logic for returning directory entries
+ */
+static int
+lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
+ lxpr_dirent_t *dirtab, int dirtablen)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+
+ oresid = uiop->uio_resid;
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Satisfy user request
+ */
+ while ((uresid = uiop->uio_resid) > 0) {
+ int dirindex;
+ off_t uoffset;
+ int reclen;
+ int error;
+
+ uoffset = uiop->uio_offset;
+ dirindex = (uoffset / LXPR_SDSIZE) - 2;
+
+ if (uoffset == 0) {
+
+ dirent->d_ino = lxpnp->lxpr_ino;
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '\0';
+ reclen = DIRENT64_RECLEN(1);
+
+ } else if (uoffset == LXPR_SDSIZE) {
+
+ dirent->d_ino = lxpr_parentinode(lxpnp);
+ dirent->d_name[0] = '.';
+ dirent->d_name[1] = '.';
+ dirent->d_name[2] = '\0';
+ reclen = DIRENT64_RECLEN(2);
+
+ } else if (dirindex < dirtablen) {
+ int slen = strlen(dirtab[dirindex].d_name);
+
+ dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
+ lxpnp->lxpr_pid, 0);
+
+ ASSERT(slen < LXPNSIZ);
+ (void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+ reclen = DIRENT64_RECLEN(slen);
+
+ } else {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid) {
+ return (EINVAL);
+ }
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and
+ * uiop->uio_offset by the same amount. But we want
+ * uiop->uio_offset to change in increments
+ * of LXPR_SDSIZE, which is different from the number of bytes
+ * being returned to the user.
+ * So we set uiop->uio_offset separately, ignoring what
+ * uiomove() does.
+ */
+ if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) {
+ return (error);
+ }
+
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ /* Have run out of space, but could have just done last table entry */
+ if (eofp) {
+ *eofp =
+ (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
+ }
+ return (0);
+}
+
+
+static int
+lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ zoneid_t zoneid;
+ pid_t pid;
+ int error;
+ int ceof;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
+
+ oresid = uiop->uio_resid;
+ zoneid = LXPTOZ(lxpnp)->zone_id;
+
+ /*
+ * We return directory entries in the order:
+ * "." and ".." then the unique lx procfs files, then the
+ * directories corresponding to the running processes.
+ *
+ * This is a good order because it allows us to more easily
+ * keep track of where we are betwen calls to getdents().
+ * If the number of processes changes between calls then we
+ * can't lose track of where we are in the lx procfs files.
+ */
+
+ /* Do the fixed entries */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
+ PROCDIRFILES);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ return (error);
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /* Do the process entries */
+ while ((uresid = uiop->uio_resid) > 0) {
+ proc_t *p;
+ int len;
+ int reclen;
+ int i;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop when entire proc table has been examined.
+ */
+ i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
+ if (i >= v.v_proc) {
+ /* Run out of table entries */
+ if (eofp) {
+ *eofp = 1;
+ }
+ return (0);
+ }
+ mutex_enter(&pidlock);
+
+ /*
+ * Skip indices for which there is no pid_entry, PIDs for
+ * which there is no corresponding process, the zched process,
+ * a PID of 0, and anything the security policy doesn't allow
+ * us to look at.
+ */
+ if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
+ p->p_pid == curproc->p_zone->zone_zsched->p_pid ||
+ p->p_pid == 0 ||
+ secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+ mutex_exit(&pidlock);
+ goto next;
+ }
+ mutex_exit(&pidlock);
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the zone's
+ * init process, otherwise use the value from the proc
+ * structure
+ */
+ pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
+ p->p_pid : 1);
+
+ /*
+ * If this /proc was mounted in the global zone, view
+ * all procs; otherwise, only view zone member procs.
+ */
+ if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
+ goto next;
+ }
+
+ ASSERT(p->p_stat != 0);
+
+ dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ /*
+ * if the size of the data to transfer is greater
+ * that that requested then we can't do it this transfer.
+ */
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ return (EINVAL);
+ break;
+ }
+
+ /*
+ * uiomove() updates both uiop->uio_resid and
+ * uiop->uio_offset by the same amount. But we want
+ * uiop->uio_offset to change in increments
+ * of LXPR_SDSIZE, which is different from the number of bytes
+ * being returned to the user.
+ * So we set uiop->uio_offset separately, in the
+ * increment of this for loop, ignoring what uiomove() does.
+ */
+ if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
+ return (error);
+
+next:
+ uiop->uio_offset = uoffset + LXPR_SDSIZE;
+ }
+
+ if (eofp)
+ *eofp =
+ (uiop->uio_offset >=
+ ((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
+
+ return (0);
+}
+
+
+static int
+lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ proc_t *p;
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
+
+ /* can't read its contents if it died */
+ mutex_enter(&pidlock);
+
+ p = prfind((lxpnp->lxpr_pid == 1) ?
+ curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
+
+ if (p == NULL || p->p_stat == SIDL) {
+ mutex_exit(&pidlock);
+ return (ENOENT);
+ }
+ mutex_exit(&pidlock);
+
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
+}
+
+
+static int
+lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
+ return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
+}
+
+
+static int
+lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+ /* bp holds one dirent64 structure */
+ longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+ dirent64_t *dirent = (dirent64_t *)bp;
+ ssize_t oresid; /* save a copy for testing later */
+ ssize_t uresid;
+ off_t uoffset;
+ int error;
+ int ceof;
+ proc_t *p;
+ int fddirsize;
+ uf_info_t *fip;
+
+
+ ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+ oresid = uiop->uio_resid;
+
+ /* can't read its contents if it died */
+ p = lxpr_lock(lxpnp->lxpr_pid);
+ if (p == NULL)
+ return (ENOENT);
+
+ /* Get open file info */
+ fip = (&(p)->p_user.u_finfo);
+
+ if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
+ fddirsize = 0;
+ else
+ fddirsize = fip->fi_nfiles;
+
+ mutex_enter(&fip->fi_lock);
+ lxpr_unlock(p);
+
+ /* Do the fixed entries (in this case just "." & "..") */
+ error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
+
+ /* Finished if we got an error or if we couldn't do all the table */
+ if (error != 0 || ceof == 0)
+ return (error);
+
+ /* clear out the dirent buffer */
+ bzero(bp, sizeof (bp));
+
+ /*
+ * Loop until user's request is satisfied or until
+ * all file descriptors have been examined.
+ */
+ for (; (uresid = uiop->uio_resid) > 0;
+ uiop->uio_offset = uoffset + LXPR_SDSIZE) {
+ int reclen;
+ int fd;
+ int len;
+
+ uoffset = uiop->uio_offset;
+
+ /*
+ * Stop at the end of the fd list
+ */
+ fd = (uoffset / LXPR_SDSIZE) - 2;
+ if (fd >= fddirsize) {
+ if (eofp) {
+ *eofp = 1;
+ }
+ goto out;
+ }
+
+ if (fip->fi_list[fd].uf_file == NULL)
+ continue;
+
+ dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
+ len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
+ ASSERT(len < LXPNSIZ);
+ reclen = DIRENT64_RECLEN(len);
+
+ dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+ dirent->d_reclen = (ushort_t)reclen;
+
+ if (reclen > uresid) {
+ /*
+ * Error if no entries have been returned yet.
+ */
+ if (uresid == oresid)
+ error = EINVAL;
+ goto out;
+ }
+
+ if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
+ goto out;
+ }
+
+ if (eofp)
+ *eofp =
+ (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
+
+out:
+ mutex_exit(&fip->fi_lock);
+ return (error);
+}
+
+
+/*
+ * lxpr_readlink(): Vnode operation for VOP_READLINK()
+ */
+static int
+lxpr_readlink(vnode_t *vp, uio_t *uiop)
+{
+ char bp[MAXPATHLEN + 1];
+ size_t buflen = sizeof (bp);
+ lxpr_node_t *lxpnp = VTOLXP(vp);
+ pid_t pid;
+ int error = 0;
+
+ /* must be a symbolic link file */
+ if (vp->v_type != VLNK)
+ return (EINVAL);
+
+ /*
+ * Try to produce a symlink name for anything that's really a regular
+ * file or directory (but not for anything else)
+ */
+ if (lxpnp->lxpr_realvp != NULL && (lxpnp->lxpr_realvp->v_type == VDIR ||
+ lxpnp->lxpr_realvp->v_type == VREG)) {
+ if ((error = lxpr_access(vp, VREAD, 0, CRED())) != 0)
+ return (error);
+ error = vnodetopath(NULL, lxpnp->lxpr_realvp, bp, buflen,
+ CRED());
+ if (error != 0)
+ return (error);
+ } else {
+ switch (lxpnp->lxpr_type) {
+ case LXPR_SELF:
+ /*
+ * Don't need to check result as every possible int
+ * will fit within MAXPATHLEN bytes
+ */
+
+ /*
+ * Convert pid to the Linux default of 1 if we're the
+ * zone's init process
+ */
+ pid = ((curproc->p_pid !=
+ curproc->p_zone->zone_proc_initpid)
+ ? curproc->p_pid : 1);
+
+ (void) snprintf(bp, buflen, "%d", pid);
+ break;
+ default:
+ /*
+ * Need to return error so that nothing thinks
+ * that the symlink is empty and hence "."
+ */
+ return (EINVAL);
+ }
+ }
+
+ /* copy the link data to user space */
+ return (uiomove(bp, strlen(bp), UIO_READ, uiop));
+}
+
+
+/*
+ * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
+ * Vnode is no longer referenced, deallocate the file
+ * and all its resources.
+ */
+/* ARGSUSED */
+static void
+lxpr_inactive(vnode_t *vp, cred_t *cr)
+{
+ lxpr_freenode(VTOLXP(vp));
+}
+
+
+/*
+ * lxpr_sync(): Vnode operation for VOP_SYNC()
+ */
+static int
+lxpr_sync()
+{
+ /*
+ * nothing to sync but this
+ * function must never fail
+ */
+ return (0);
+}
+
+
+/*
+ * lxpr_cmp(): Vnode operation for VOP_CMP()
+ */
+static int
+lxpr_cmp(vnode_t *vp1, vnode_t *vp2)
+{
+ vnode_t *rvp;
+
+ while (vn_matchops(vp1, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp1)->lxpr_realvp) != NULL)
+ vp1 = rvp;
+ while (vn_matchops(vp2, lxpr_vnodeops) &&
+ (rvp = VTOLXP(vp2)->lxpr_realvp) != NULL)
+ vp2 = rvp;
+ if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
+ return (vp1 == vp2);
+ return (VOP_CMP(vp1, vp2));
+}
+
+
+/*
+ * lxpr_realvp(): Vnode operation for VOP_REALVP()
+ */
+static int
+lxpr_realvp(vnode_t *vp, vnode_t **vpp)
+{
+ vnode_t *rvp;
+
+ if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
+ vp = rvp;
+ if (VOP_REALVP(vp, &rvp) == 0)
+ vp = rvp;
+ }
+
+ *vpp = vp;
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/sys/ldlinux.h b/usr/src/uts/common/brand/lx/sys/ldlinux.h
new file mode 100644
index 0000000000..b259c05d97
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/ldlinux.h
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LDLINUX_H
+#define _SYS_LDLINUX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * The ldlinux streams module is only intended for use in lx branded zones.
+ * This streams module implements the following ioctls:
+ * TIOCSETLD and TIOCGETLD
+ *
+ * These ioctls are special ioctls supported only by the ldlinux streams
+ * module and invoked only by the lx brand emulation library. These ioctls
+ * do not exist on native Linux systems.
+ *
+ * The TIOCSETLD ioctl is used when emulating the following Linux ioctls:
+ * TCSETS/TCSETSW/TCSETSF
+ * TCSETA/TCSETAW/TCSETAF
+ *
+ * The TIOCGETLD ioctl is used when emulating the following Linux ioctls:
+ * TCGETS/TCGETA
+ *
+ * This module is needed to emulate these ioctls because the following arrays:
+ * termio.c_cc
+ * termios.c_cc
+ * which are parameters for the following ioctls:
+ * TCSETS/TCSETSW/TCSETSF
+ * TCSETA/TCSETAW/TCSETAF
+ * TCGETS/TCGETA
+ *
+ * are defined differently on Solaris and Linux.
+ *
+ * According to the termio(7I) man page on Solaris the following is true of
+ * the members of the c_cc array:
+ * The VMIN element is the same element as the VEOF element.
+ * The VTIME element is the same element as the VEOL element.
+ *
+ * But on Linux the termios(3) man page states:
+ * These symbolic subscript values are all different, except that
+ * VTIME, VMIN may have the same value as VEOL, VEOF, respectively.
+ *
+ * While the man page indicates that these values may be the same empirical
+ * tests shows them to be different. Since these values are different on
+ * Linux systems it's possible that applications could set the members of
+ * the c_cc array to different values and then later expect to be able to
+ * read back those same separate values. The ldlinux module exists to provide
+ * a per-stream storage area where the lx_brand emulation library can save
+ * these values. The values are set and retrieved via the TIOCSETLD and
+ * TIOCGETLD ioctls respectively.
+ */
+
+#include <sys/termios.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LDLINUX_MOD "ldlinux"
+
+#ifdef _KERNEL
+
+/*
+ * LDLINUX_MODID - This should be a unique number associated with
+ * this particular module. Unfortunatly there is no authority responsible
+ * for administering this name space, hence there's no real guarantee that
+ * whatever number we choose will be unique. Luckily, this constant
+ * is not really used anywhere by the system. It is used by some
+ * kernel subsystems to check for the presence of certain streams
+ * modules with known id vaules. Since no other kernel subsystem
+ * checks for the presence of this module we'll just set the id to 0.
+ */
+#define LDLINUX_MODID 0
+
+struct ldlinux {
+ int state; /* state information */
+ /* Linux expects the next four c_cc values */
+ /* to be distinct, whereas solaris (legally) */
+ /* overlaps their storage */
+ unsigned char veof; /* veof value */
+ unsigned char veol; /* veol value */
+ unsigned char vmin; /* vmin value */
+ unsigned char vtime; /* vtime value */
+};
+
+#define ISPTSTTY 0x01
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LDLINUX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_audio.h b/usr/src/uts/common/brand/lx/sys/lx_audio.h
new file mode 100644
index 0000000000..cbb3431c4b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_audio.h
@@ -0,0 +1,130 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUDIO_H
+#define _LX_AUDIO_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/zone.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * name for this driver
+ */
+#define LX_AUDIO_DRV "lx_audio"
+
+/*
+ * names for the minor nodes this driver exports
+ */
+#define LXA_MINORNAME_DEVCTL "lx_devctl"
+#define LXA_MINORNAME_DSP "lx_dsp"
+#define LXA_MINORNAME_MIXER "lx_mixer"
+
+/*
+ * minor numbers for the minor nodes this driver exporrts
+ */
+#define LXA_MINORNUM_DEVCTL 0
+#define LXA_MINORNUM_DSP 1
+#define LXA_MINORNUM_MIXER 2
+#define LXA_MINORNUM_COUNT 3
+
+/*
+ * driver ioctls
+ *
+ * note that we're layering on top of solaris audio devices so we want
+ * to make sure that our ioctls namespace doesn't conflict with theirs.
+ * looking in sys/audioio.h and sys/mixer.h we see that they seem to
+ * use an _IO key of 'A' and 'M', so we'll choose an _IO key of 'a.'
+ */
+
+/*
+ * administrative ioctls.
+ * these ioctls are only supported on the DEVCTL minor node
+ */
+#define LXA_IOC_ZONE_REG (_IOR('a', 0, lxa_zone_reg_t))
+#define LXA_IOC_ZONE_UNREG (_IOR('a', 1, lxa_zone_reg_t))
+
+
+/*
+ * audio and mixer device ioctls
+ * these ioctls are supported on DSP and MIXER minor nodes.
+ */
+#define LXA_IOC_GETMINORNUM (_IOR('a', 20, int))
+
+/*
+ * audio device ioctls.
+ * these ioctls are supports on DSP minor nodes.
+ */
+#define LXA_IOC_MMAP_OUTPUT (_IOR('a', 41, int))
+#define LXA_IOC_MMAP_PTR (_IOR('a', 42, int))
+#define LXA_IOC_GET_FRAG_INFO (_IOR('a', 43, lxa_frag_info_t))
+#define LXA_IOC_SET_FRAG_INFO (_IOR('a', 44, lxa_frag_info_t))
+
+/*
+ * mixer device ioctls.
+ * these ioctls are supports on MIXER minor nodes.
+ */
+#define LXA_IOC_MIXER_GET_VOL (_IOR('a', 60, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_VOL (_IOR('a', 61, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_GET_MIC (_IOR('a', 62, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_MIC (_IOR('a', 63, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_GET_PCM (_IOR('a', 64, lxa_mixer_levels_t))
+#define LXA_IOC_MIXER_SET_PCM (_IOR('a', 65, lxa_mixer_levels_t))
+
+/* command structure for LXA_IOC_ZONE_REG */
+#define LXA_INTSTRLEN 32
+typedef struct lxa_zone_reg {
+ char lxa_zr_zone_name[ZONENAME_MAX];
+ char lxa_zr_inputdev[LXA_INTSTRLEN];
+ char lxa_zr_outputdev[LXA_INTSTRLEN];
+} lxa_zone_reg_t;
+
+/* command structure for LXA_IOC_GET_FRAG_INFO and LXA_IOC_SET_FRAG_INFO */
+typedef struct lxa_frag_info {
+ int lxa_fi_size;
+ int lxa_fi_cnt;
+} lxa_frag_info_t;
+
+/* command structure for LXA_IOC_MIXER_GET_* and LXA_IOC_MIXER_SET_* */
+typedef struct lxa_mixer_levels {
+ int lxa_ml_gain;
+ int lxa_ml_balance;
+} lxa_mixer_levels_t;
+
+/* verify that a solaris mixer level structure has valid values */
+#define LXA_MIXER_LEVELS_OK(x) (((x)->lxa_ml_gain >= AUDIO_MIN_GAIN) && \
+ ((x)->lxa_ml_gain <= AUDIO_MAX_GAIN) && \
+ ((x)->lxa_ml_balance >= AUDIO_LEFT_BALANCE) && \
+ ((x)->lxa_ml_balance <= AUDIO_RIGHT_BALANCE))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUDIO_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs.h b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
new file mode 100644
index 0000000000..4436226deb
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
@@ -0,0 +1,334 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUTOFS_H
+#define _LX_AUTOFS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * The lx_autofs filesystem exists to emulate the Linux autofs filesystem
+ * and provide support for the Linux "automount" automounter.
+ *
+ *
+ *
+ * +++ Linux automounter background.
+ *
+ * Linux has two automounters: "amd" and "automount"
+ *
+ * 1) "amd" is a userland NFS server. It basically mounts an NFS filesystem
+ * at an automount point, and it acts as the NFS server for the mount. When
+ * an access is done to that NFS filesystem, the access is redirected by the
+ * kernel to the "amd" process via rpc. "amd" then looks up any information
+ * required to resolve the requests, mounts real NFS filesystems if
+ * necessary, and returns. "amd" has it's own strange configuration
+ * mechanism that doesn't seem to be very compatabile with Solaris's network
+ * based automounter map support.
+ *
+ * 2) "automount" is the other Linux automounter. It utilizes a kernel
+ * filesystem (autofs) to provide it's functionality. Basically, it mounts
+ * the autofs filesystem at any automounter controlled mount point. This
+ * filesystem then intercepts and redirects lookup operations (and only
+ * lookup ops) to the userland automounter process via a pipe. (The
+ * pipe to the automounter is establised via mount options when the autofs
+ * filesystem is mounted.) When the automounter recieves a request via this
+ * pipe, it does lookups to whatever backing store it's configured to use,
+ * does mkdir operations on the autofs filesystem, mounts remote NFS
+ * filesystems on any leaf directories it just created, and signals the
+ * autofs filesystem via an ioctl to let it know that the lookup can
+ * continue.
+ *
+ *
+ *
+ * +++ Linux autofs (and automount daemon) notes
+ *
+ * Since we're mimicking the behavior of the Linux autofs filesystem it's
+ * important to document some of it's observed behavior here since there's
+ * no doubt that in the future this behavior will change. These comments
+ * apply to the behavior of the automounter as observed on a system
+ * running Linux v2.4.21 (autofs is bundled with the Linux kernel).
+ *
+ * A) Autofs allows root owned, non-automounter processes to create
+ * directories in the autofs filesystem. The autofs filesystem treats the
+ * automounter's process group as special, but it doesn't prevent root
+ * processes outside of the automounter's process group from creating new
+ * directories in the autofs filesystem.
+ *
+ * B) Autofs doesn't allow creation of any non-directory entries in the
+ * autofs filesystem. No entity can create files (e.g. /bin/touch or
+ * VOP_CREATE/VOP_SYMLINK/etc.) The only entries that can exist within
+ * the autofs filesystem are directories.
+ *
+ * C) Autofs only intercepts vop lookup operations. Notably, it does _not_
+ * intercept and re-direct vop readdir operations. This means that the
+ * observed behavior of the Linux automounter can be considerably different
+ * from that of the Solaris automounter. Specifically, on Solaris if autofs
+ * mount point is mounted _without_ the -nobrowse option then if a user does
+ * an ls operation (which translates into a vop readdir operation) then the
+ * automounter will intercept that operation and list all the possible
+ * directories and mount points without actually mounting any filesystems.
+ * Essentially, all automounter managed mount points on Linux will behave
+ * like "-nobrowse" mount points on Solaris. Here's an example to
+ * illustrate this. If /ws was mounted on Solaris without the -nobrowse
+ * option and an auto_ws yp map was setup as the backing store for this
+ * mount point, then an "ls /ws" would list all the keys in the map as
+ * valid directories, but an "ls /ws" on Linux would list an emptry
+ * directory.
+ *
+ * D) NFS mounts are performed by the automount process. When the automount
+ * process gets a redirected lookup request, it determines _all_ the
+ * possible remote mount points for that request, creates directory paths
+ * via mkdir, and mounts the remote filesystems on the newly created paths.
+ * So for example, if a machine called mcescher exported /var/crash and
+ * /var/core, an "ls /net/mcescher" would result in the following actions
+ * being done by the automounter:
+ * mkdir /net/mcescher
+ * mkdir /net/mcescher/var
+ * mkdir /net/mcescher/var/crash
+ * mkdir /net/mcescher/var/core
+ * mount mcescher:/var/crash /var/crash
+ * mount mcescher:/var/crash /var/core
+ * once the automounter compleated the work above it would signal the autofs
+ * filesystem (via an ioctl) that the lookup could continue.
+ *
+ * E.1) Autofs only redirects vop lookup operations for path entries that
+ * don't already exist in the autofs filesystem. So for the example above,
+ * an initial (after the start of the automounter) "ls /net/mcescher" would
+ * result in a request to the automounter. A subsequest "ls /net/mcescher"
+ * would not result in a request to the automounter. Even if
+ * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted
+ * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher"
+ * would not result in a new request to the automounter.
+ *
+ * E.2) Autofs lookup requests that are sent to the automounter only include
+ * the root directory path component. So for example, after starting up
+ * the automounter if a user were to do a "ls /net/mcescher/var/crash", the
+ * lookup request actually sent to the automounter would just be for
+ * "mcescher". (The same request as if the user had done "ls /net/mcescher".)
+ *
+ * E.3) The two statements above aren't entirely entirely true. The Linux
+ * autofs filesystem will also redirect lookup operations for leaf
+ * directories that don't have a filesystem mounted on them. Using the
+ * example above, if a user did a "ls /net/mcescher", then manually
+ * unmounted /net/mcescher/var/crash, and then did an "ls
+ * /net/mcescher/var/crash", this would result in a request for
+ * "mcescher/var/crash" being sent to the automounter. The strange thing
+ * (a Linux bug perhaps) is that the automounter won't do anything with this
+ * request and the lookup will fail.
+ *
+ * F) The autofs filesystem communication protocol (what ioctls it supports
+ * and what data it passes to the automount process) are versioned. The
+ * source for the userland automount daemon (i looked at version v3.1.7)
+ * seemed to support two versions of the Linux kernel autofs implementation.
+ * Both versions supported communiciation with a pipe and the format of the
+ * structure passed via this pipe was the same. The difference between the
+ * two versions was in the functionality supported. (The v3 version has
+ * additional ioctls to support automount timeouts.)
+ *
+ *
+ *
+ * +++ lx_autofs notes
+ *
+ * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the
+ * Linux autofs filesystem with the following exceptions:
+ *
+ * 1.1) We don't bother to implement the E.3 functionality listed above
+ * since it doesn't appear to be of any use.
+ *
+ * 1.2) We only implement v2 of the automounter protocol since
+ * implementing v3 would take a _lot_ more work. If this proves to be a
+ * problem we can re-visit this decision later. (More details about v3
+ * support are included in comments below.)
+ *
+ * 2) In general, the approach taken for lx_autofs is to keep it as simple
+ * as possible and to minimize it's memory usage. To do this all information
+ * about the contents of the lx_autofs filesystem are mirrored in the
+ * underlying filesystem that lx_autofs is mounted on and most vop operations
+ * are simply passed onto this underlying filesystem. This means we don't
+ * have to implement most the complex operations that a full filesystem
+ * normally has to implement. It also means that most of our filesystem state
+ * (wrt the contents of the filesystem) doesn't actually have to be stored
+ * in memory, we can simply go to the underlying filesystem to get it when
+ * it's requested. For the purposes of discussion, we'll call the underlying
+ * filesystem the "backing store."
+ *
+ * The backing store is actually directory called ".lx_afs" which is created in
+ * the directory where the lx_autofs filesystem is mounted. When the lx_autofs
+ * filesystem is unmounted this backing store directory is deleted. If this
+ * directory exists at mount time (perhaps the system crashed while a previous
+ * lx_autofs instance was mounted at the same location) it will be deleted.
+ * There are a few implications of using a backing store worth mentioning.
+ *
+ * 2.1) lx_autofs can't be mounted on a read only filesystem. If this
+ * proves to be a problem we can probably move the location of the
+ * backing store.
+ *
+ * 2.2) If the backing store filesystem runs out of space then the
+ * automounter process won't be able to create more directories and mount
+ * new filesystems. Of course, strange failures usually happen when
+ * filesystems run out of space.
+ *
+ * 3) Why aren't we using gfs? gfs has two different usage models.
+ *
+ * 3.1) I'm my own filesystem but i'm using gfs to help with managing
+ * readdir operations.
+ *
+ * 3.2) I'm a gfs filesystem and gfs is managing all my vnodes
+ *
+ * We're not using the 3.1 interfaces because we don't implement readdir
+ * ourselves. We pass all readdir operations onto the backing store
+ * filesystem and utilize its readdir implementation.
+ *
+ * We're not using the 3.2 interfaces because they are really designed for
+ * in memory filesystems where all of the filesystem state is stored in
+ * memory. They don't lend themselves to filesystems where part of the
+ * state is in memory and part of the state is on disk.
+ *
+ * For more information on gfs take a look at the block comments in the
+ * top of gfs.c
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Note that the name of the actual Solaris filesystem is lx_afs and not
+ * lx_autofs. This is becase filesystem names are stupidly limited to 8
+ * characters.
+ */
+#define LX_AUTOFS_NAME "lx_afs"
+
+/*
+ * Mount options supported.
+ */
+#define LX_MNTOPT_FD "fd"
+#define LX_MNTOPT_PGRP "pgrp"
+#define LX_MNTOPT_MINPROTO "minproto"
+#define LX_MNTOPT_MAXPROTO "maxproto"
+
+/* Version of the Linux kernel automount protocol we support. */
+#define LX_AUTOFS_PROTO_VERSION 2
+
+/*
+ * Command structure sent to automount process from lx_autofs via a pipe.
+ * This structure is the same for v2 and v3 of the automount protocol
+ * (the communication pipe is established at mount time).
+ */
+typedef struct lx_autofs_pkt {
+ int lap_protover; /* protocol version number */
+ int lap_constant; /* always set to 0 */
+ int lap_id; /* every pkt must have a unique id */
+ int lap_name_len; /* don't include newline or NULL */
+ char lap_name[256]; /* path component to lookup */
+} lx_autofs_pkt_t;
+
+/*
+ * Ioctls supprted (v2 protocol).
+ */
+#define LX_AUTOFS_IOC_READY 0x00009360 /* arg: int */
+#define LX_AUTOFS_IOC_FAIL 0x00009361 /* arg: int */
+#define LX_AUTOFS_IOC_CATATONIC 0x00009362 /* arg: <none> */
+
+/*
+ * Ioctls not supported (v3 protocol).
+ *
+ * Initially we're only going to support v2 of the Linux kernel automount
+ * protocol. This means that we don't support the following ioctls.
+ *
+ * 1) The protocol version ioctl (by not supporting it the automounter
+ * will assume version 2).
+ *
+ * 2) Automounter timeout ioctls. For v3 and later the automounter can
+ * be started with a timeout option. It will notify the filesystem of
+ * this timeout and, if any automounter filesystem root directory entry
+ * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE
+ * ioctl. For example, if the timeout is 60 seconds, the Linux
+ * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for
+ * timeouts more often than that. (v3.1.7 of the automount daemon would
+ * perform this ioctl every <timeout>/4 seconds.) Then, if the autofs
+ * filesystem will
+ * report top level directories that aren't in use to the automounter
+ * via this ioctl. If /net was managed by the automounter and
+ * there were the following mount points:
+ * /net/jurassic/var/crash
+ * /net/mcescher/var/crash
+ * and no one was looking at any crash dumps on mcescher but someone
+ * was analyzing a crash dump on jurassic, then after <timeout> seconds
+ * had passed the autofs filesystem would let the automounter know that
+ * "mcescher" could be unmounted. (Note the granularity of notification
+ * is directories in the root of the autofs filesystem.) Here's two
+ * ideas for how this functionality could be implemented on Solaris:
+ *
+ * 2.1) The easy incomplete way. Don't do any in-use detection. Simply
+ * tell the automounter it can try to unmount the filesystem every time
+ * the specified timeout passes. If the filesystem is in use then the
+ * unmount will fail. This would break down for remote hosts with multiple
+ * mounts. For example, if the automounter had mounted the following
+ * filesystems:
+ * /net/jurassic/var/crash
+ * /net/jurassic/var/core
+ * and the user was looking at a core file, and the timeout expired, the
+ * automounter would recieve notification to unmount "jurassic". Then
+ * it would unmount crash (which would succeed) and then to try unmount
+ * core (which would fail). After that (since the automounter only
+ * performs mounts for failed lookups in the root autofs directory)
+ * future access to /net/jurassic/var/crash would result to access
+ * to an empty autofs directory. We might be able to work around
+ * this by caching which root autofs directories we've timed out,
+ * then any access to paths that contain those directories could be
+ * stalled and we could resend another request to the automounter.
+ * This could work if the automounter ignores mount failures.
+ *
+ * 2.2) The hard correct way. The real difficulty here is detecting
+ * files in use on other filesystems (say NFS) that have been mounted
+ * on top of autofs. (Detecting in use autofs vnodes should be easy.)
+ * to do this we would probably have to create a new brand op to intercept
+ * mount/umount filesystem operations. Then using this entry point we
+ * could detect mounts of other filesystems on top of lx_autofs. When
+ * a successful mount finishes we would use the FEM (file event
+ * monitoring) framework to push a module onto that filesystem and
+ * intercept VOP operations that allocate/free vnodes in that filesystem.
+ * (We would also then have to track mount operations on top of that
+ * filesystem, etc.) this would allow us to properly detect any
+ * usage of subdirectories of an autofs directory.
+ */
+#define LX_AUTOFS_IOC_PROTOVER 0x80049363 /* arg: int */
+#define LX_AUTOFS_IOC_EXPIRE 0x81109365 /* arg: lx_autofs_expire * */
+#define LX_AUTOFS_IOC_SETTIMEOUT 0xc0049364 /* arg: ulong_t */
+
+typedef struct lx_autofs_expire {
+ int lap_protover; /* protol version number */
+ int lap_constant; /* always set to 1 */
+ int lap_name_len; /* don't include newline or NULL */
+ char lap_name[256]; /* path component that has timed out */
+} lx_autofs_expire_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUTOFS_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
new file mode 100644
index 0000000000..9c5517b8d5
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_AUTOFS_IMPL_H
+#define _LX_AUTOFS_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/modhash.h>
+#include <sys/vnode.h>
+
+#include <sys/lx_autofs.h>
+
+/*
+ * Space key.
+ * Used to persist data across lx_autofs filesystem module unloads.
+ */
+#define LX_AUTOFS_SPACE_KEY_UDEV LX_AUTOFS_NAME "_udev"
+
+/*
+ * Name of the backing store directory.
+ */
+#define LX_AUTOFS_BS_DIR "." LX_AUTOFS_NAME
+
+#define LX_AUTOFS_VFS_ID_HASH_SIZE 15
+#define LX_AUTOFS_VFS_PATH_HASH_SIZE 15
+#define LX_AUTOFS_VFS_VN_HASH_SIZE 15
+
+/*
+ * VFS data object.
+ */
+typedef struct lx_autofs_vfs {
+ /* Info about the underlying filesystem and backing store. */
+ vnode_t *lav_mvp;
+ char *lav_bs_name;
+ vnode_t *lav_bs_vp;
+
+ /* Info about the automounter process managing this filesystem. */
+ int lav_fd;
+ pid_t lav_pgrp;
+ file_t *lav_fifo_wr;
+ file_t *lav_fifo_rd;
+
+ /* Each automount requests needs a unique id. */
+ id_space_t *lav_ids;
+
+ /* All remaining structure members are protected by lav_lock. */
+ kmutex_t lav_lock;
+
+ /* Hashes to keep track of outstanding automounter requests. */
+ mod_hash_t *lav_path_hash;
+ mod_hash_t *lav_id_hash;
+
+ /* We need to keep track of all our vnodes. */
+ vnode_t *lav_root;
+ mod_hash_t *lav_vn_hash;
+} lx_autofs_vfs_t;
+
+/*
+ * Structure to keep track of requests sent to the automounter.
+ */
+typedef struct lx_autofs_lookup_req {
+ /* Packet that gets sent to the automounter. */
+ lx_autofs_pkt_t lalr_pkt;
+
+ /* Reference count. Always updated atomically. */
+ uint_t lalr_ref;
+
+ /*
+ * Fields to keep track and sync threads waiting on a lookup.
+ * Fields are protected by lalr_lock.
+ */
+ kmutex_t lalr_lock;
+ kcondvar_t lalr_cv;
+ int lalr_complete;
+} lx_autofs_lookup_req_t;
+
+/*
+ * Generic stack structure.
+ */
+typedef struct stack_elem {
+ list_node_t se_list;
+ caddr_t se_ptr1;
+ caddr_t se_ptr2;
+ caddr_t se_ptr3;
+} stack_elem_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_AUTOFS_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
new file mode 100644
index 0000000000..4cbcda48bf
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -0,0 +1,210 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_BRAND_H
+#define _LX_BRAND_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/cpuvar.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_BRANDNAME "lx"
+
+/*
+ * Brand uname info
+ */
+#define LX_UNAME_SYSNAME "Linux"
+#define LX_UNAME_RELEASE "2.4.21"
+#define LX_UNAME_VERSION "BrandZ fake linux"
+#define LX_UNAME_MACHINE "i686"
+
+#define LX_LINKER_NAME "ld-linux.so.2"
+#define LX_LINKER "/lib/" LX_LINKER_NAME
+#define LX_LIBC_NAME "libc.so.6"
+#define LIB_PATH "/native/usr/lib/"
+#define LX_LIB "lx_brand.so.1"
+#define LX_LIB_PATH LIB_PATH LX_LIB
+
+#define LX_NSYSCALLS 270
+
+/*
+ * brand(2) subcommands
+ *
+ * Everything >= 128 is a brand-specific subcommand.
+ * 192 to 462 are reserved for system calls, although most of that space is
+ * unused.
+ */
+#define B_LPID_TO_SPAIR 128
+#define B_SYSENTRY 129
+#define B_SYSRETURN 130
+#define B_PTRACE_SYSCALL 131
+#define B_SET_AFFINITY_MASK 132
+#define B_GET_AFFINITY_MASK 133
+
+#define B_EMULATE_SYSCALL 192
+
+#define LX_VERSION_1 1
+#define LX_VERSION LX_VERSION_1
+
+#define LX_ATTR_RESTART_INIT ZONE_ATTR_BRAND_ATTRS
+
+#ifndef _ASM
+
+typedef struct lx_brand_registration {
+ uint_t lxbr_version; /* version number */
+ void *lxbr_handler; /* base address of handler */
+ void *lxbr_tracehandler; /* base address of trace handler */
+ void *lxbr_traceflag; /* address of trace flag */
+} lx_brand_registration_t;
+
+#ifdef _SYSCALL32
+typedef struct lx_brand_registration32 {
+ uint32_t lxbr_version; /* version number */
+ caddr32_t lxbr_handler; /* base address of handler */
+ caddr32_t lxbr_tracehandler; /* base address of trace handler */
+ caddr32_t lxbr_traceflag; /* address of trace flag */
+} lx_brand_registration32_t;
+#endif
+
+typedef struct lx_regs {
+ long lxr_gs;
+ long lxr_edi;
+ long lxr_esi;
+ long lxr_ebp;
+ long lxr_esp;
+ long lxr_ebx;
+ long lxr_edx;
+ long lxr_ecx;
+ long lxr_eax;
+ long lxr_eip;
+
+ long lxr_orig_eax;
+} lx_regs_t;
+
+#endif /* _ASM */
+
+/*
+ * GDT usage
+ */
+#define GDT_TLSMIN (GDT_BRANDMIN)
+#define GDT_TLSMAX (GDT_TLSMIN + 2)
+#define LX_TLSNUM (GDT_TLSMAX - GDT_TLSMIN)
+
+#ifndef _ASM
+
+/*
+ * Stores information needed by the lx linker to launch the main
+ * lx executable.
+ */
+typedef struct lx_elf_data {
+ int ed_phdr;
+ int ed_phent;
+ int ed_phnum;
+ int ed_entry;
+ int ed_base;
+ int ed_ldentry;
+} lx_elf_data_t;
+
+#ifdef _KERNEL
+
+typedef struct lx_proc_data {
+ uintptr_t l_handler; /* address of user-space handler */
+ uintptr_t l_tracehandler; /* address of user-space traced handler */
+ uintptr_t l_traceflag; /* address of 32-bit tracing flag */
+ void (*l_sigrestorer[MAXSIG])(void); /* array of sigrestorer fns */
+ pid_t l_ppid; /* pid of originating parent proc */
+ uint64_t l_ptrace; /* process being observed with ptrace */
+ lx_elf_data_t l_elf_data; /* ELF data for linux executable */
+} lx_proc_data_t;
+
+#ifdef __amd64
+typedef uint64_t lx_affmask_t; /* Tolerates NCPU up to 64 */
+#else
+typedef uint32_t lx_affmask_t; /* Tolerates NCPU up to 32 */
+#endif /* __amd64 */
+
+/*
+ * lx-specific data in the klwp_t
+ */
+typedef struct lx_lwp_data {
+ uint_t br_lwp_flags; /* misc. flags */
+ klwp_t *br_lwp; /* back pointer to container lwp */
+ int br_signal; /* signal to send to parent when */
+ /* clone()'ed child terminates */
+ int br_exitwhy; /* reason for thread (process) exit */
+ int br_exitwhat; /* exit code / killing signal */
+ lx_affmask_t br_affinitymask; /* bitmask of CPU sched affinities */
+ struct user_desc br_tls[LX_TLSNUM];
+ /* descriptors used by libc for TLS */
+ pid_t br_pid; /* converted pid for this thread */
+ pid_t br_tgid; /* thread group ID for this thread */
+ pid_t br_ppid; /* parent pid for this thread */
+ id_t br_ptid; /* parent tid for this thread */
+ void *br_clear_ctidp; /* clone thread id ptr */
+ void *br_set_ctidp; /* clone thread id ptr */
+
+ /*
+ * The following struct is used by lx_clone()
+ * to pass info into fork()
+ */
+ void *br_clone_args;
+
+ /*
+ * Space to save off userland Linux %gs pointer so we can restore it
+ * before calling signal handlers.
+ */
+ greg_t br_ugs;
+
+ uint_t br_ptrace; /* ptrace is active for this LWP */
+} lx_lwp_data_t;
+
+#define BR_CPU_BOUND 0x0001
+
+#define ttolxlwp(t) ((struct lx_lwp_data *)ttolwpbrand(t))
+#define lwptolxlwp(l) ((struct lx_lwp_data *)lwptolwpbrand(l))
+#define ttolxproc(t) ((struct lx_proc_data *)(t)->t_procp->p_brand_data)
+
+void lx_brand_int80_callback(void);
+int64_t lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t);
+
+extern int lx_debug;
+#define lx_print if (lx_debug) printf
+
+#endif /* _KERNEL */
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_BRAND_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h
new file mode 100644
index 0000000000..ac963b015b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h
@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_FUTEX_H
+#define _SYS_LX_FUTEX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_FD 2
+#define FUTEX_REQUEUE 3
+#define FUTEX_CMP_REQUEUE 4
+#define FUTEX_MAX_CMD FUTEX_CMP_REQUEUE
+
+extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
+ uintptr_t addr2, int val2);
+extern void lx_futex_init(void);
+extern int lx_futex_fini(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_FUTEX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_impl.h b/usr/src/uts/common/brand/lx/sys/lx_impl.h
new file mode 100644
index 0000000000..12f1aab2b3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_impl.h
@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_IMPL_H
+#define _LX_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int64_t (*llfcn_t)();
+
+typedef struct lx_sysent {
+ int sy_flags;
+ char *sy_name;
+ llfcn_t sy_callc;
+ char sy_narg;
+} lx_sysent_t;
+
+typedef void (lx_systrace_f)(ulong_t, ulong_t, ulong_t, ulong_t, ulong_t,
+ ulong_t, ulong_t);
+
+
+extern lx_sysent_t lx_sysent[];
+
+extern lx_systrace_f *lx_systrace_entry_ptr;
+extern lx_systrace_f *lx_systrace_return_ptr;
+
+extern void lx_brand_systrace_enable(void);
+extern void lx_brand_systrace_disable(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _LX_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ldt.h b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
new file mode 100644
index 0000000000..5080c3adae
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_LDT_H
+#define _SYS_LINUX_LDT_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/segments.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ldt_info {
+ uint_t entry_number;
+ uint_t base_addr;
+ uint_t limit;
+ uint_t seg_32bit:1,
+ contents:2,
+ read_exec_only:1,
+ limit_in_pages:1,
+ seg_not_present:1,
+ useable:1;
+};
+
+#define LDT_INFO_EMPTY(info) \
+ ((info)->base_addr == 0 && (info)->limit == 0 && \
+ (info)->contents == 0 && (info)->read_exec_only == 1 && \
+ (info)->seg_32bit == 0 && (info)->limit_in_pages == 0 && \
+ (info)->seg_not_present == 1 && (info)->useable == 0)
+
+#if defined(__amd64)
+#define SETMODE(desc) (desc)->usd_long = SDP_SHORT;
+#else
+#define SETMODE(desc)
+#endif
+
+#define LDT_INFO_TO_DESC(info, desc) { \
+ USEGD_SETBASE(desc, (info)->base_addr); \
+ USEGD_SETLIMIT(desc, (info)->limit); \
+ (desc)->usd_type = ((info)->contents << 2) | \
+ ((info)->read_exec_only ^ 1) << 1 | 0x10; \
+ (desc)->usd_dpl = SEL_UPL; \
+ (desc)->usd_p = (info)->seg_not_present ^ 1; \
+ (desc)->usd_def32 = (info)->seg_32bit; \
+ (desc)->usd_gran = (info)->limit_in_pages; \
+ (desc)->usd_avl = (info)->useable; \
+ SETMODE(desc); \
+}
+
+#define DESC_TO_LDT_INFO(desc, info) { \
+ bzero((info), sizeof (*(info))); \
+ (info)->base_addr = USEGD_GETBASE(desc); \
+ (info)->limit = USEGD_GETLIMIT(desc); \
+ (info)->seg_not_present = (desc)->usd_p ^ 1; \
+ (info)->contents = ((desc)->usd_type >> 2) & 3; \
+ (info)->read_exec_only = (((desc)->usd_type >> 1) & 1) ^ 1; \
+ (info)->seg_32bit = (desc)->usd_def32; \
+ (info)->limit_in_pages = (desc)->usd_gran; \
+ (info)->useable = (desc)->usd_avl; \
+}
+
+extern void lx_set_gdt(int, user_desc_t *);
+extern void lx_clear_gdt(int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_LDT_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_pid.h b/usr/src/uts/common/brand/lx/sys/lx_pid.h
new file mode 100644
index 0000000000..80c8079f0b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_pid.h
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_PID_H
+#define _SYS_LX_PID_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/note.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+struct lx_pid {
+ pid_t s_pid; /* the solaris pid and ... */
+ id_t s_tid; /* ... tid pair */
+ pid_t l_pid; /* the corresponding linux pid */
+ time_t l_start; /* birthday of this pid */
+ struct pid *l_pidp;
+ struct lx_pid *stol_next; /* link in stol hash table */
+ struct lx_pid *ltos_next; /* link in ltos hash table */
+};
+
+extern int lx_pid_assign(kthread_t *);
+extern void lx_pid_reassign(kthread_t *);
+extern void lx_pid_rele(pid_t, id_t);
+extern pid_t lx_lpid_to_spair(pid_t, pid_t *, id_t *);
+extern pid_t lx_lwp_ppid(klwp_t *, pid_t *, id_t *);
+extern void lx_pid_init(void);
+extern void lx_pid_fini(void);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LX_PID_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ptm.h b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
new file mode 100644
index 0000000000..74bbc939a3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_PTM_LINUX_H
+#define _SYS_PTM_LINUX_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define LX_PTM_DRV "lx_ptm"
+#define LX_PTM_MINOR_NODE "lx_ptmajor"
+
+#define LX_PTM_DEV_TO_PTS(dev) (getminor(dev) - 1)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PTM_LINUX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h
new file mode 100644
index 0000000000..b0ae748f3c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_sched.h
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_SCHED_H
+#define _SYS_LINUX_SCHED_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/procset.h>
+#include <sys/priocntl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux scheduler policies.
+ */
+#define LX_SCHED_OTHER 0
+#define LX_SCHED_FIFO 1
+#define LX_SCHED_RR 2
+
+#define LX_PRI_MAX 99
+
+typedef int l_pid_t;
+
+struct lx_sched_param {
+ int lx_sched_prio;
+};
+
+extern int sched_setprocset(procset_t *, l_pid_t);
+extern long do_priocntlsys(int, procset_t *, void *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_SCHED_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
new file mode 100644
index 0000000000..b4d41d5241
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
@@ -0,0 +1,68 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_SYSCALLS_H
+#define _SYS_LINUX_SYSCALLS_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+
+extern long lx_brk();
+extern long lx_getpid();
+extern long lx_getppid();
+extern long lx_clone();
+extern long lx_kill();
+extern long lx_tkill();
+extern long lx_modify_ldt();
+extern long lx_gettid();
+extern long lx_futex();
+extern long lx_get_thread_area();
+extern long lx_sched_getparam();
+extern long lx_sched_getscheduler();
+extern long lx_sched_rr_get_interval();
+extern long lx_sched_setparam();
+extern long lx_sched_setscheduler();
+extern long lx_set_thread_area();
+extern long lx_set_tid_address();
+extern long lx_setresgid();
+extern long lx_setresgid16();
+extern long lx_setresuid();
+extern long lx_setresuid16();
+extern long lx_sysinfo();
+extern long lx_setgroups();
+
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_LINUX_SYSCALLS_H */
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_brk.c b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
new file mode 100644
index 0000000000..25a719986e
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+
+/*
+ * The brk() system call needs to be in-kernel because Linux expects a call to
+ * brk(0) to return the current breakpoint. In Solaris, the process breakpoint
+ * is setup and managed by libc. Due to the way we link our libraries and the
+ * need for Linux to manage its own breakpoint, this has to remain in the
+ * kernel.
+ */
+extern int brk(caddr_t);
+
+long
+lx_brk(caddr_t nva)
+{
+ proc_t *p = curproc;
+ klwp_t *lwp = ttolwp(curthread);
+
+ if (nva != 0) {
+ (void) brk(nva);
+
+ /*
+ * Despite claims to the contrary in the manpage, when Linux
+ * brk() fails, errno is left unchanged.
+ */
+ lwp->lwp_errno = 0;
+ }
+
+out:
+ return ((long)(p->p_brkbase + p->p_brksize));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
new file mode 100644
index 0000000000..2af3c00bae
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
@@ -0,0 +1,135 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+
+#define LX_CSIGNAL 0x000000ff
+#define LX_CLONE_VM 0x00000100
+#define LX_CLONE_FS 0x00000200
+#define LX_CLONE_FILES 0x00000400
+#define LX_CLONE_SIGHAND 0x00000800
+#define LX_CLONE_PID 0x00001000
+#define LX_CLONE_PTRACE 0x00002000
+#define LX_CLONE_PARENT 0x00008000
+#define LX_CLONE_THREAD 0x00010000
+#define LX_CLONE_SYSVSEM 0x00040000
+#define LX_CLONE_SETTLS 0x00080000
+#define LX_CLONE_PARENT_SETTID 0x00100000
+#define LX_CLONE_CHILD_CLEARTID 0x00200000
+#define LX_CLONE_DETACH 0x00400000
+#define LX_CLONE_CHILD_SETTID 0x01000000
+
+/*
+ * Our lwp has already been created at this point, so this routine is
+ * responsible for setting up all the state needed to track this as a
+ * linux cloned thread.
+ */
+/* ARGSUSED */
+long
+lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp)
+{
+ struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+ struct ldt_info info;
+ struct user_desc descr;
+ int tls_index;
+ int entry = -1;
+ int signo;
+
+ signo = flags & LX_CSIGNAL;
+ if (signo < 0 || signo > MAXSIG)
+ return (set_errno(EINVAL));
+
+ if (flags & LX_CLONE_SETTLS) {
+ if (copyin((caddr_t)ldtinfo, &info, sizeof (info)))
+ return (set_errno(EFAULT));
+
+ if (LDT_INFO_EMPTY(&info))
+ return (set_errno(EINVAL));
+
+ entry = info.entry_number;
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ tls_index = entry - GDT_TLSMIN;
+
+ /*
+ * Convert the user-space structure into a real x86
+ * descriptor and copy it into this LWP's TLS array. We
+ * also load it into the GDT.
+ */
+ LDT_INFO_TO_DESC(&info, &descr);
+ bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr));
+ lx_set_gdt(entry, &lwpd->br_tls[tls_index]);
+ } else {
+ tls_index = -1;
+ bzero(&descr, sizeof (descr));
+ }
+
+ lwpd->br_clear_ctidp =
+ (flags & LX_CLONE_CHILD_CLEARTID) ? ctidp : NULL;
+
+ if (signo && ! (flags & LX_CLONE_DETACH))
+ lwpd->br_signal = signo;
+ else
+ lwpd->br_signal = 0;
+
+ if (flags & LX_CLONE_THREAD)
+ lwpd->br_tgid = curthread->t_procp->p_pid;
+
+ if (flags & LX_CLONE_PARENT)
+ lwpd->br_ppid = 0;
+
+ if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) &&
+ (suword32(ctidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+ if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) &&
+ (suword32(ptidp, lwpd->br_pid) != 0)) {
+ if (entry >= 0)
+ lx_clear_gdt(entry);
+ return (set_errno(EFAULT));
+ }
+
+ return (lwpd->br_pid);
+}
+
+long
+lx_set_tid_address(int *tidp)
+{
+ struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+
+ lwpd->br_clear_ctidp = tidp;
+
+ return (lwpd->br_pid);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_futex.c b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
new file mode 100644
index 0000000000..ceb6f330aa
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
@@ -0,0 +1,471 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+#include <vm/page.h>
+#include <sys/mman.h>
+#include <sys/timer.h>
+#include <sys/condvar.h>
+#include <sys/inttypes.h>
+#include <sys/lx_futex.h>
+
+/*
+ * Futexes are a Linux-specific implementation of inter-process mutexes.
+ * They are designed to use shared memory for simple, uncontested
+ * operations, and rely on the kernel to resolve any contention issues.
+ *
+ * Most of the information in this section comes from the paper "Futexes
+ * Are Tricky", by Ulrich Drepper. This paper is currently available at:
+ * http://people.redhat.com/~drepper/futex.pdf.
+ *
+ * A futex itself a 4-byte integer, which must be 4-byte aligned. The
+ * value of this integer is expected to be modified using user-level atomic
+ * operations. The futex(4) design itself does not impose any semantic
+ * constraints on the value stored in the futex; it is up to the
+ * application to define its own protocol.
+ *
+ * When the application decides that kernel intervention is required, it
+ * will use the futex(2) system call. There are 5 different operations
+ * that can be performed on a futex, using this system call. Since this
+ * interface has evolved over time, there are several different prototypes
+ * available to the user. Fortunately, there is only a single kernel-level
+ * interface:
+ *
+ * long sys_futex(void *futex1, int cmd, int val1,
+ * struct timespec *timeout, void *futex2, int val2)
+ *
+ * The kernel-level operations that may be performed on a futex are:
+ *
+ * FUTEX_WAIT
+ *
+ * Atomically verify that futex1 contains the value val1. If it
+ * doesn't, return EWOULDBLOCK. If it does contain the expected
+ * value, the thread will sleep until somebody performs a FUTEX_WAKE
+ * on the futex. The caller may also specify a timeout, indicating
+ * the maximum time the thread should sleep. If the timer expires,
+ * the call returns ETIMEDOUT. If the thread is awoken with a signal,
+ * the call returns EINTR. Otherwise, the call returns 0.
+ *
+ * FUTEX_WAKE
+ *
+ * Wake up val1 processes that are waiting on futex1. The call
+ * returns the number of blocked threads that were woken up.
+ *
+ * FUTEX_CMP_REQUEUE
+ *
+ * If the value stored in futex1 matches that passed in in val2, wake
+ * up val1 processes that are waiting on futex1. Otherwise, return
+ * EAGAIN.
+ *
+ * If there are more than val1 threads waiting on the futex, remove
+ * the remaining threads from this futex, and requeue them on futex2.
+ * The caller can limit the number of threads being requeued by
+ * encoding an integral numerical value in the position usually used
+ * for the timeout pointer.
+ *
+ * The call returns the number of blocked threads that were woken up
+ * or requeued.
+ *
+ * FUTEX_REQUEUE
+ *
+ * Identical to FUTEX_CMP_REQUEUE except that it does not use val2.
+ * This command has been declared broken and obsolete, but we still
+ * need to support it.
+ *
+ * FUTEX_FD
+ *
+ * Return a file descriptor, which can be used to refer to the futex.
+ * We don't support this operation.
+ */
+
+/*
+ * This structure is used to track all the threads currently waiting on a
+ * futex. There is one fwaiter_t for each blocked thread. We store all
+ * fwaiter_t's in a hash structure, indexed by the memid_t of the integer
+ * containing the futex's value.
+ *
+ * At the moment, all fwaiter_t's for a single futex are simply dumped into
+ * the hash bucket. If futex contention ever becomes a hot path, we can
+ * chain a single futex's waiters together.
+ */
+typedef struct fwaiter {
+ memid_t fw_memid; /* memid of the user-space futex */
+ kcondvar_t fw_cv; /* cond var */
+ struct fwaiter *fw_next; /* hash queue */
+ struct fwaiter *fw_prev; /* hash queue */
+ volatile int fw_woken;
+} fwaiter_t;
+
+#define MEMID_COPY(s, d) \
+ { (d)->val[0] = (s)->val[0]; (d)->val[1] = (s)->val[1]; }
+#define MEMID_EQUAL(s, d) \
+ ((d)->val[0] == (s)->val[0] && (d)->val[1] == (s)->val[1])
+
+/* Borrowed from the page freelist hash code. */
+#define HASH_SHIFT_SZ 7
+#define HASH_SIZE (1 << HASH_SHIFT_SZ)
+#define HASH_FUNC(id) \
+ ((((uintptr_t)((id)->val[1]) >> PAGESHIFT) + \
+ ((uintptr_t)((id)->val[1]) >> (PAGESHIFT + HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[0]) >> 3) + \
+ ((uintptr_t)((id)->val[0]) >> (3 + HASH_SHIFT_SZ)) + \
+ ((uintptr_t)((id)->val[0]) >> (3 + 2 * HASH_SHIFT_SZ))) & \
+ (HASH_SIZE - 1))
+
+static fwaiter_t *futex_hash[HASH_SIZE];
+static kmutex_t futex_hash_lock[HASH_SIZE];
+
+static void
+futex_hashin(fwaiter_t *fwp)
+{
+ int index;
+
+ index = HASH_FUNC(&fwp->fw_memid);
+ ASSERT(MUTEX_HELD(&futex_hash_lock[index]));
+
+ fwp->fw_prev = NULL;
+ fwp->fw_next = futex_hash[index];
+ if (fwp->fw_next)
+ fwp->fw_next->fw_prev = fwp;
+ futex_hash[index] = fwp;
+}
+
+static void
+futex_hashout(fwaiter_t *fwp)
+{
+ int index;
+
+ index = HASH_FUNC(&fwp->fw_memid);
+ ASSERT(MUTEX_HELD(&futex_hash_lock[index]));
+
+ if (fwp->fw_prev)
+ fwp->fw_prev->fw_next = fwp->fw_next;
+ if (fwp->fw_next)
+ fwp->fw_next->fw_prev = fwp->fw_prev;
+ if (futex_hash[index] == fwp)
+ futex_hash[index] = fwp->fw_next;
+
+ fwp->fw_prev = NULL;
+ fwp->fw_next = NULL;
+}
+
+/*
+ * Go to sleep until somebody does a WAKE operation on this futex, we get a
+ * signal, or the timeout expires.
+ */
+static int
+futex_wait(memid_t *memid, caddr_t addr, int val, timespec_t *timeout)
+{
+ int err, ret;
+ int32_t curval;
+ fwaiter_t fw;
+ int index;
+
+ fw.fw_woken = 0;
+ MEMID_COPY(memid, &fw.fw_memid);
+ cv_init(&fw.fw_cv, NULL, CV_DEFAULT, NULL);
+
+ index = HASH_FUNC(&fw.fw_memid);
+ mutex_enter(&futex_hash_lock[index]);
+
+ if (fuword32(addr, (uint32_t *)&curval)) {
+ err = set_errno(EFAULT);
+ goto out;
+ }
+ if (curval != val) {
+ err = set_errno(EWOULDBLOCK);
+ goto out;
+ }
+
+ futex_hashin(&fw);
+
+ err = 0;
+ while ((fw.fw_woken == 0) && (err == 0)) {
+ ret = cv_waituntil_sig(&fw.fw_cv, &futex_hash_lock[index],
+ timeout, timechanged);
+ if (ret < 0)
+ err = set_errno(ETIMEDOUT);
+ else if (ret == 0)
+ err = set_errno(EINTR);
+ }
+
+ /*
+ * The futex is normally hashed out in wakeup. If we timed out or
+ * got a signal, we need to hash it out here instead.
+ */
+ if (fw.fw_woken == 0)
+ futex_hashout(&fw);
+
+out:
+ mutex_exit(&futex_hash_lock[index]);
+
+ return (err);
+}
+
+/*
+ * Wake up to wake_threads threads that are blocked on the futex at memid.
+ */
+static int
+futex_wake(memid_t *memid, int wake_threads)
+{
+ fwaiter_t *fwp, *next;
+ int index;
+ int ret = 0;
+
+ index = HASH_FUNC(memid);
+
+ mutex_enter(&futex_hash_lock[index]);
+
+ for (fwp = futex_hash[index]; fwp && ret < wake_threads; fwp = next) {
+ next = fwp->fw_next;
+ if (MEMID_EQUAL(&fwp->fw_memid, memid)) {
+ futex_hashout(fwp);
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ ret++;
+ }
+ }
+
+ mutex_exit(&futex_hash_lock[index]);
+
+ return (ret);
+}
+
+/*
+ * Wake up to wake_threads waiting on the futex at memid. If there are
+ * more than that many threads waiting, requeue the remaining threads on
+ * the futex at requeue_memid.
+ */
+static int
+futex_requeue(memid_t *memid, memid_t *requeue_memid, int wake_threads,
+ ulong_t requeue_threads, caddr_t addr, int *cmpval)
+{
+ fwaiter_t *fwp, *next;
+ int index1, index2;
+ int ret = 0;
+ int32_t curval;
+ kmutex_t *l1, *l2;
+
+ /*
+ * To ensure that we don't miss a wakeup if the value of cmpval
+ * changes, we need to grab locks on both the original and new hash
+ * buckets. To avoid deadlock, we always grab the lower-indexed
+ * lock first.
+ */
+ index1 = HASH_FUNC(memid);
+ index2 = HASH_FUNC(requeue_memid);
+
+ if (index1 == index2) {
+ l1 = &futex_hash_lock[index1];
+ l2 = NULL;
+ } else if (index1 < index2) {
+ l1 = &futex_hash_lock[index1];
+ l2 = &futex_hash_lock[index2];
+ } else {
+ l1 = &futex_hash_lock[index2];
+ l2 = &futex_hash_lock[index1];
+ }
+
+ mutex_enter(l1);
+ if (l2 != NULL)
+ mutex_enter(l2);
+
+ if (cmpval != NULL) {
+ if (fuword32(addr, (uint32_t *)&curval)) {
+ ret = -EFAULT;
+ goto out;
+ }
+ if (curval != *cmpval) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+
+ for (fwp = futex_hash[index1]; fwp; fwp = next) {
+ next = fwp->fw_next;
+ if (!MEMID_EQUAL(&fwp->fw_memid, memid))
+ continue;
+
+ futex_hashout(fwp);
+ if (ret++ < wake_threads) {
+ fwp->fw_woken = 1;
+ cv_signal(&fwp->fw_cv);
+ } else {
+ MEMID_COPY(requeue_memid, &fwp->fw_memid);
+ futex_hashin(fwp);
+
+ if ((ret - wake_threads) >= requeue_threads)
+ break;
+ }
+ }
+
+out:
+ if (l2 != NULL)
+ mutex_exit(l2);
+ mutex_exit(l1);
+
+ if (ret < 0)
+ return (set_errno(-ret));
+ return (ret);
+}
+
+/*
+ * Copy in the relative timeout provided by the application and convert it
+ * to an absolute timeout.
+ */
+static int
+get_timeout(void *lx_timeout, timestruc_t *timeout)
+{
+ timestruc_t now;
+
+ if (get_udatamodel() == DATAMODEL_NATIVE) {
+ if (copyin(lx_timeout, timeout, sizeof (timestruc_t)))
+ return (EFAULT);
+ }
+#ifdef _SYSCALL32_IMPL
+ else {
+ timestruc32_t timeout32;
+ if (copyin(lx_timeout, &timeout32, sizeof (timestruc32_t)))
+ return (EFAULT);
+ timeout->tv_sec = (time_t)timeout32.tv_sec;
+ timeout->tv_nsec = timeout32.tv_nsec;
+ }
+#endif
+ gethrestime(&now);
+
+ if (itimerspecfix(timeout))
+ return (EINVAL);
+
+ timespecadd(timeout, &now);
+ return (0);
+}
+
+long
+lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
+ uintptr_t addr2, int val2)
+{
+ struct as *as = curproc->p_as;
+ memid_t memid, requeue_memid;
+ timestruc_t timeout;
+ timestruc_t *tptr = NULL;
+ int requeue_threads;
+ int *requeue_cmp = NULL;
+ int rval = 0;
+
+ /* must be aligned on int boundary */
+ if (addr & 0x3)
+ return (set_errno(EINVAL));
+
+ /* Sanity check the futex command */
+ if (cmd < 0 || cmd > FUTEX_MAX_CMD)
+ return (set_errno(EINVAL));
+
+ /* Copy in the timeout structure from userspace. */
+ if (cmd == FUTEX_WAIT && lx_timeout != NULL) {
+ rval = get_timeout((timespec_t *)lx_timeout, &timeout);
+ if (rval != 0)
+ return (set_errno(rval));
+ tptr = &timeout;
+ }
+
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) {
+ if (cmd == FUTEX_CMP_REQUEUE)
+ requeue_cmp = &val2;
+
+ /*
+ * lx_timeout is nominally a pointer to a userspace
+ * address. For these two commands, it actually contains
+ * an integer which indicates the maximum number of threads
+ * to requeue. This is horrible, and I'm sorry.
+ */
+ requeue_threads = (int)lx_timeout;
+ }
+
+ /*
+ * Translate the process-specific, user-space futex virtual
+ * address(es) to universal memid.
+ */
+ rval = as_getmemid(as, (void *)addr, &memid);
+ if (rval != 0)
+ return (set_errno(rval));
+
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) {
+ rval = as_getmemid(as, (void *)addr2, &requeue_memid);
+ if (rval)
+ return (set_errno(rval));
+ }
+
+ switch (cmd) {
+ case FUTEX_WAIT:
+ rval = futex_wait(&memid, (void *)addr, val, tptr);
+ break;
+
+ case FUTEX_WAKE:
+ rval = futex_wake(&memid, val);
+ break;
+
+ case FUTEX_CMP_REQUEUE:
+ case FUTEX_REQUEUE:
+ rval = futex_requeue(&memid, &requeue_memid, val,
+ requeue_threads, (void *)addr2, requeue_cmp);
+
+ break;
+ }
+
+ return (rval);
+}
+
+void
+lx_futex_init(void)
+{
+ int i;
+
+ for (i = 0; i < HASH_SIZE; i++)
+ mutex_init(&futex_hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
+ bzero(futex_hash, sizeof (futex_hash));
+}
+
+int
+lx_futex_fini(void)
+{
+ int i, err;
+
+ err = 0;
+ for (i = 0; (err == 0) && (i < HASH_SIZE); i++) {
+ mutex_enter(&futex_hash_lock[i]);
+ if (futex_hash[i] != NULL)
+ err = EBUSY;
+ mutex_exit(&futex_hash_lock[i]);
+ }
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getpid.c b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
new file mode 100644
index 0000000000..91dc24c6d6
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+
+/*
+ * return the pid
+ */
+long
+lx_getpid()
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+ long rv;
+
+ if (curproc->p_pid == curproc->p_zone->zone_proc_initpid)
+ rv = 1;
+ else
+ rv = lwpd->br_tgid;
+
+ return (rv);
+}
+
+/*
+ * return the parent pid
+ */
+long
+lx_getppid(void)
+{
+ return (lx_lwp_ppid(ttolwp(curthread), NULL, NULL));
+}
+
+/*
+ * return the thread id
+ */
+long
+lx_gettid(void)
+{
+ lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+
+ return (lwpd->br_pid);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_id.c b/usr/src/uts/common/brand/lx/syscall/lx_id.c
new file mode 100644
index 0000000000..077194ee25
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_id.c
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/zone.h>
+#include <sys/cred_impl.h>
+#include <sys/policy.h>
+
+typedef ushort_t l_uid16_t;
+typedef ushort_t l_gid16_t;
+typedef uint_t l_uid_t;
+typedef uint_t l_gid_t;
+
+#define LINUX_UID16_TO_UID32(uid16) \
+ (((uid16) == (l_uid16_t)-1) ? ((l_uid_t)-1) : (l_uid_t)(uid16))
+
+#define LINUX_GID16_TO_GID32(gid16) \
+ (((gid16) == (l_gid16_t)-1) ? ((l_gid_t)-1) : (l_gid_t)(gid16))
+
+#define LX_NGROUPS_MAX 32
+extern int setgroups(int, gid_t *);
+
+/*
+ * This function is based on setreuid in common/syscall/uid.c and exists
+ * because Solaris does not have a way to explicitly set the saved uid (suid)
+ * from any other system call.
+ */
+long
+lx_setresuid(l_uid_t ruid, l_uid_t euid, l_uid_t suid)
+{
+ proc_t *p;
+ int error = 0;
+ int do_nocd = 0;
+ int uidchge = 0;
+ uid_t oldruid = ruid;
+ cred_t *cr, *newcr;
+ zoneid_t zoneid = getzoneid();
+
+ if ((ruid != -1 && (ruid < 0 || ruid > MAXUID)) ||
+ (euid != -1 && (euid < 0 || euid > MAXUID)) ||
+ (suid != -1 && (suid < 0 || suid > MAXUID))) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Need to pre-allocate the new cred structure before grabbing
+ * the p_crlock mutex.
+ */
+ newcr = cralloc();
+
+ p = ttoproc(curthread);
+
+retry:
+ mutex_enter(&p->p_crlock);
+ cr = p->p_cred;
+
+ if (ruid != -1 &&
+ ruid != cr->cr_ruid && ruid != cr->cr_uid &&
+ ruid != cr->cr_suid && secpolicy_allow_setid(cr, ruid, B_FALSE)) {
+ error = EPERM;
+ } else if (euid != -1 &&
+ euid != cr->cr_ruid && euid != cr->cr_uid &&
+ euid != cr->cr_suid && secpolicy_allow_setid(cr, euid, B_FALSE)) {
+ error = EPERM;
+ } else if (suid != -1 &&
+ suid != cr->cr_ruid && suid != cr->cr_uid &&
+ suid != cr->cr_suid && secpolicy_allow_setid(cr, suid, B_FALSE)) {
+ error = EPERM;
+ } else {
+ if (!uidchge && ruid != -1 && cr->cr_ruid != ruid) {
+ /*
+ * The ruid of the process is going to change. In order
+ * to avoid a race condition involving the
+ * process count associated with the newly given ruid,
+ * we increment the count before assigning the
+ * credential to the process.
+ * To do that, we'll have to take pidlock, so we first
+ * release p_crlock.
+ */
+ mutex_exit(&p->p_crlock);
+ uidchge = 1;
+ mutex_enter(&pidlock);
+ upcount_inc(ruid, zoneid);
+ mutex_exit(&pidlock);
+ /*
+ * As we released p_crlock we can't rely on the cr
+ * we read. So retry the whole thing.
+ */
+ goto retry;
+ }
+ crhold(cr);
+ crcopy_to(cr, newcr);
+ p->p_cred = newcr;
+
+ if (euid != -1)
+ newcr->cr_uid = euid;
+ if (suid != -1)
+ newcr->cr_suid = suid;
+ if (ruid != -1) {
+ oldruid = newcr->cr_ruid;
+ newcr->cr_ruid = ruid;
+ ASSERT(ruid != oldruid ? uidchge : 1);
+ }
+
+ /*
+ * A process that gives up its privilege
+ * must be marked to produce no core dump.
+ */
+ if ((cr->cr_uid != newcr->cr_uid ||
+ cr->cr_ruid != newcr->cr_ruid ||
+ cr->cr_suid != newcr->cr_suid))
+ do_nocd = 1;
+
+ crfree(cr);
+ }
+ mutex_exit(&p->p_crlock);
+
+ /*
+ * We decrement the number of processes associated with the oldruid
+ * to match the increment above, even if the ruid of the process
+ * did not change or an error occurred (oldruid == uid).
+ */
+ if (uidchge) {
+ ASSERT(oldruid != -1 && ruid != -1);
+ mutex_enter(&pidlock);
+ upcount_dec(oldruid, zoneid);
+ mutex_exit(&pidlock);
+ }
+
+ if (error == 0) {
+ if (do_nocd) {
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SNOCD;
+ mutex_exit(&p->p_lock);
+ }
+ crset(p, newcr); /* broadcast to process threads */
+ goto done;
+ }
+ crfree(newcr);
+done:
+ if (error)
+ return (set_errno(error));
+ else
+ return (0);
+}
+
+long
+lx_setresuid16(l_uid16_t ruid16, l_uid16_t euid16, l_uid16_t suid16)
+{
+ long rval;
+
+ rval = lx_setresuid(
+ LINUX_UID16_TO_UID32(ruid16),
+ LINUX_UID16_TO_UID32(euid16),
+ LINUX_UID16_TO_UID32(suid16));
+
+ return (rval);
+}
+
+/*
+ * This function is based on setregid in common/syscall/gid.c
+ */
+long
+lx_setresgid(l_gid_t rgid, l_gid_t egid, l_gid_t sgid)
+{
+ proc_t *p;
+ int error = 0;
+ int do_nocd = 0;
+ cred_t *cr, *newcr;
+
+ if ((rgid != -1 && (rgid < 0 || rgid > MAXUID)) ||
+ (egid != -1 && (egid < 0 || egid > MAXUID)) ||
+ (sgid != -1 && (sgid < 0 || sgid > MAXUID))) {
+ error = EINVAL;
+ goto done;
+ }
+
+ /*
+ * Need to pre-allocate the new cred structure before grabbing
+ * the p_crlock mutex.
+ */
+ newcr = cralloc();
+
+ p = ttoproc(curthread);
+ mutex_enter(&p->p_crlock);
+ cr = p->p_cred;
+
+ if (rgid != -1 &&
+ rgid != cr->cr_rgid && rgid != cr->cr_gid &&
+ rgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else if (egid != -1 &&
+ egid != cr->cr_rgid && egid != cr->cr_gid &&
+ egid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else if (sgid != -1 &&
+ sgid != cr->cr_rgid && sgid != cr->cr_gid &&
+ sgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+ error = EPERM;
+ } else {
+ crhold(cr);
+ crcopy_to(cr, newcr);
+ p->p_cred = newcr;
+
+ if (egid != -1)
+ newcr->cr_gid = egid;
+ if (sgid != -1)
+ newcr->cr_sgid = sgid;
+ if (rgid != -1)
+ newcr->cr_rgid = rgid;
+
+ /*
+ * A process that gives up its privilege
+ * must be marked to produce no core dump.
+ */
+ if ((cr->cr_gid != newcr->cr_gid ||
+ cr->cr_rgid != newcr->cr_rgid ||
+ cr->cr_sgid != newcr->cr_sgid))
+ do_nocd = 1;
+
+ crfree(cr);
+ }
+ mutex_exit(&p->p_crlock);
+
+ if (error == 0) {
+ if (do_nocd) {
+ mutex_enter(&p->p_lock);
+ p->p_flag |= SNOCD;
+ mutex_exit(&p->p_lock);
+ }
+ crset(p, newcr); /* broadcast to process threads */
+ goto done;
+ }
+ crfree(newcr);
+done:
+ if (error)
+ return (set_errno(error));
+ else
+ return (0);
+}
+
+long
+lx_setresgid16(l_gid16_t rgid16, l_gid16_t egid16, l_gid16_t sgid16)
+{
+ long rval;
+
+ rval = lx_setresgid(
+ LINUX_GID16_TO_GID32(rgid16),
+ LINUX_GID16_TO_GID32(egid16),
+ LINUX_GID16_TO_GID32(sgid16));
+
+ return (rval);
+}
+
+/*
+ * Linux defines NGROUPS_MAX to be 32, but on Solaris it is only 16. We employ
+ * the terrible hack below so that tests may proceed, if only on DEBUG kernels.
+ */
+long
+lx_setgroups(int ngroups, gid_t *grouplist)
+{
+#ifdef DEBUG
+ if (ngroups > ngroups_max && ngroups <= LX_NGROUPS_MAX)
+ ngroups = ngroups_max;
+#endif /* DEBUG */
+
+ return (setgroups(ngroups, grouplist));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_kill.c b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
new file mode 100644
index 0000000000..d86d50f4e6
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
@@ -0,0 +1,249 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/zone.h>
+#include <sys/thread.h>
+#include <sys/signal.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+#include <lx_signum.h>
+
+extern int kill(pid_t, int);
+
+/*
+ * Check if it is legal to send this signal to the init process. Linux
+ * kill(2) semantics dictate that no _unhandled_ signal may be sent to pid
+ * 1.
+ */
+static int
+init_sig_check(int sig, pid_t pid)
+{
+ proc_t *p;
+ int rv = 0;
+
+ mutex_enter(&pidlock);
+
+ if (((p = prfind(pid)) == NULL) || (p->p_stat == SIDL))
+ rv = ESRCH;
+ else if (sig && (sigismember(&cantmask, sig) ||
+ (PTOU(p)->u_signal[sig-1] == SIG_DFL) ||
+ (PTOU(p)->u_signal[sig-1] == SIG_IGN)))
+ rv = EPERM;
+
+ mutex_exit(&pidlock);
+
+ return (rv);
+}
+
+long
+lx_tkill(pid_t pid, int lx_sig)
+{
+ kthread_t *t;
+ proc_t *pp;
+ pid_t initpid;
+ sigqueue_t *sqp;
+ struct lx_lwp_data *br = ttolxlwp(curthread);
+ int tid = 1; /* default tid */
+ int sig, rv;
+
+ /*
+ * Unlike kill(2), Linux tkill(2) doesn't allow signals to
+ * be sent to process IDs <= 0 as it doesn't overlay any special
+ * semantics on the pid.
+ */
+ if ((pid <= 0) || ((lx_sig < 0) || (lx_sig >= LX_NSIG)) ||
+ ((sig = ltos_signo[lx_sig]) < 0))
+ return (set_errno(EINVAL));
+
+ /*
+ * If the Linux pid is 1, translate the pid to the actual init
+ * pid for the zone. Note that Linux dictates that no unhandled
+ * signals may be sent to init, so check for that, too.
+ *
+ * Otherwise, extract the tid and real pid from the Linux pid.
+ */
+ initpid = curproc->p_zone->zone_proc_initpid;
+ if (pid == 1)
+ pid = initpid;
+ if ((pid == initpid) && ((rv = init_sig_check(sig, pid)) != 0))
+ return (set_errno(rv));
+ else if (lx_lpid_to_spair(pid, &pid, &tid) < 0)
+ return (set_errno(ESRCH));
+
+ sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+
+ /*
+ * Find the process for the passed pid...
+ */
+ mutex_enter(&pidlock);
+ if (((pp = prfind(pid)) == NULL) || (pp->p_stat == SIDL)) {
+ mutex_exit(&pidlock);
+ rv = set_errno(ESRCH);
+ goto free_and_exit;
+ }
+ mutex_enter(&pp->p_lock);
+ mutex_exit(&pidlock);
+
+ /*
+ * Deny permission to send the signal if either of the following
+ * is true:
+ *
+ * + The signal is SIGCONT and the target pid is not in the same
+ * session as the sender
+ *
+ * + prochasprocperm() shows the user lacks sufficient permission
+ * to send the signal to the target pid
+ */
+ if (((sig == SIGCONT) && (pp->p_sessp != curproc->p_sessp)) ||
+ (!prochasprocperm(pp, curproc, CRED()))) {
+ mutex_exit(&pp->p_lock);
+ rv = set_errno(EPERM);
+ goto free_and_exit;
+ }
+
+ /* check for the tid */
+ if ((t = idtot(pp, tid)) == NULL) {
+ mutex_exit(&pp->p_lock);
+ rv = set_errno(ESRCH);
+ goto free_and_exit;
+ }
+
+ /* a signal of 0 means just check for the existence of the thread */
+ if (lx_sig == 0) {
+ mutex_exit(&pp->p_lock);
+ rv = 0;
+ goto free_and_exit;
+ }
+
+ sqp->sq_info.si_signo = sig;
+ sqp->sq_info.si_code = SI_LWP;
+ sqp->sq_info.si_pid = br->br_pid;
+ sqp->sq_info.si_uid = crgetruid(CRED());
+ sigaddqa(pp, t, sqp);
+
+ mutex_exit(&pp->p_lock);
+
+ return (0);
+
+free_and_exit:
+ kmem_free(sqp, sizeof (sigqueue_t));
+ return (rv);
+}
+
+long
+lx_kill(pid_t lx_pid, int lx_sig)
+{
+ pid_t s_pid, initpid;
+ sigsend_t v;
+ zone_t *zone = curproc->p_zone;
+ struct proc *p;
+ int err, sig, nfound;
+
+ if ((lx_sig < 0) || (lx_sig >= LX_NSIG) ||
+ ((sig = ltos_signo[lx_sig]) < 0))
+ return (set_errno(EINVAL));
+
+ /*
+ * Since some linux apps rely on init(1M) having PID 1, we
+ * transparently translate 1 to the real init(1M)'s pid. We then
+ * check to be sure that it is legal for this process to send this
+ * signal to init(1M).
+ */
+ initpid = zone->zone_proc_initpid;
+ if (lx_pid == 1 || lx_pid == -1) {
+ s_pid = initpid;
+ } else if (lx_pid == 0) {
+ s_pid = 0;
+ } else {
+ if (lx_pid < 0)
+ err = lx_lpid_to_spair(-lx_pid, &s_pid, NULL);
+ else
+ err = lx_lpid_to_spair(lx_pid, &s_pid, NULL);
+
+ /*
+ * If we didn't find this pid in our hash table, it either
+ * means that the process doesn't exist, that it exists but
+ * isn't a Linux process, or that it is a zombie process.
+ * In each of these cases, assuming that the Linux pid is
+ * the same as the Solaris pid will get us the correct
+ * behavior.
+ */
+ if (err < 0)
+ s_pid = lx_pid;
+ }
+
+ if ((s_pid == initpid) && ((err = init_sig_check(sig, s_pid)) != 0))
+ return (set_errno(err));
+
+ /*
+ * For individual processes, kill() semantics are the same between
+ * Solaris and Linux.
+ */
+ if (lx_pid >= 0)
+ return (kill(s_pid, sig));
+
+ /*
+ * In Solaris, sending a signal to -pid means "send a signal to
+ * everyone in process group pid." In Linux it means "send a
+ * signal to everyone in the group other than init." Sending a
+ * signal to -1 means "send a signal to every process except init
+ * and myself."
+ */
+
+ bzero(&v, sizeof (v));
+ v.sig = sig;
+ v.checkperm = 1;
+ v.sicode = SI_USER;
+ err = 0;
+
+ mutex_enter(&pidlock);
+
+ p = (lx_pid == -1) ? practive : pgfind(s_pid);
+ nfound = 0;
+ while (err == 0 && p != NULL) {
+ if ((p->p_zone == zone) && (p->p_stat != SIDL) &&
+ (p->p_pid != initpid) && (lx_pid < -1 || p != curproc)) {
+ nfound++;
+ err = sigsendproc(p, &v);
+ }
+
+ p = (lx_pid == -1) ? p->p_next : p->p_pglink;
+ }
+ mutex_exit(&pidlock);
+ if (nfound == 0)
+ err = ESRCH;
+ else if (err == 0 && v.perm == 0)
+ err = EPERM;
+ return (err ? set_errno(err) : 0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
new file mode 100644
index 0000000000..aa6e12a7d8
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/segments.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/sysi86.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_ldt.h>
+
+/*
+ * Read the ldt_info structure in from the Linux app, convert it to an ssd
+ * structure, and then call setdscr() to do all the heavy lifting.
+ */
+static int
+write_ldt(void *data, ulong_t count)
+{
+ user_desc_t usd;
+ struct ssd ssd;
+ struct ldt_info ldt_inf;
+ proc_t *pp = curthread->t_procp;
+ int err;
+
+ if (count != sizeof (ldt_inf))
+ return (set_errno(EINVAL));
+
+ if (copyin(data, &ldt_inf, sizeof (ldt_inf)))
+ return (set_errno(EFAULT));
+
+ if (ldt_inf.entry_number >= MAXNLDT)
+ return (set_errno(EINVAL));
+
+ LDT_INFO_TO_DESC(&ldt_inf, &usd);
+ usd_to_ssd(&usd, &ssd, SEL_LDT(ldt_inf.entry_number));
+
+ /*
+ * Get everyone into a safe state before changing the LDT.
+ */
+ if (!holdlwps(SHOLDFORK1))
+ return (set_errno(EINTR));
+
+ err = setdscr(&ssd);
+
+ /*
+ * Release the hounds!
+ */
+ mutex_enter(&pp->p_lock);
+ continuelwps(pp);
+ mutex_exit(&pp->p_lock);
+
+ return (err ? set_errno(err) : 0);
+}
+
+static int
+read_ldt(void *uptr, ulong_t count)
+{
+ proc_t *pp = curproc;
+ int bytes;
+
+ if (pp->p_ldt == NULL)
+ return (0);
+
+ bytes = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
+ if (bytes > count)
+ bytes = count;
+
+ if (copyout(pp->p_ldt, uptr, bytes))
+ return (set_errno(EFAULT));
+
+ return (bytes);
+}
+
+long
+lx_modify_ldt(int op, void *data, ulong_t count)
+{
+ int rval;
+
+ switch (op) {
+ case 0:
+ rval = read_ldt(data, count);
+ break;
+
+ case 1:
+ rval = write_ldt(data, count);
+ break;
+
+ default:
+ rval = set_errno(ENOSYS);
+ break;
+ }
+
+ return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
new file mode 100644
index 0000000000..7b1cd49f37
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
@@ -0,0 +1,513 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/cpu.h>
+#include <sys/rtpriocntl.h>
+#include <sys/tspriocntl.h>
+#include <sys/processor.h>
+#include <sys/brand.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_sched.h>
+#include <sys/lx_brand.h>
+
+extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t);
+
+int
+lx_sched_affinity(int cmd, uintptr_t pid, int len, uintptr_t maskp,
+ int64_t *rval)
+{
+ pid_t s_pid;
+ id_t s_tid;
+ kthread_t *t = curthread;
+ lx_lwp_data_t *lx_lwp;
+
+ if (cmd != B_GET_AFFINITY_MASK && cmd != B_SET_AFFINITY_MASK)
+ return (set_errno(EINVAL));
+
+ /*
+ * The caller wants to know how large the mask should be.
+ */
+ if (cmd == B_GET_AFFINITY_MASK && len == 0) {
+ *rval = sizeof (lx_affmask_t);
+ return (0);
+ }
+
+ /*
+ * Otherwise, ensure they have a large enough mask.
+ */
+ if (cmd == B_GET_AFFINITY_MASK && len < sizeof (lx_affmask_t)) {
+ *rval = -1;
+ return (set_errno(EINVAL));
+ }
+
+ if (pid == 0) {
+ s_pid = curproc->p_pid;
+ s_tid = curthread->t_tid;
+ } else if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) == -1) {
+ return (set_errno(ESRCH));
+ }
+
+ /*
+ * For now, we only support manipulating threads in the
+ * same process.
+ */
+ if (curproc->p_pid != s_pid)
+ return (set_errno(EPERM));
+
+ /*
+ * We must hold the process lock so that the thread list
+ * doesn't change while we're looking at it. We'll hold
+ * the lock until we no longer reference the
+ * corresponding lwp.
+ */
+
+ mutex_enter(&curproc->p_lock);
+
+ do {
+ if (t->t_tid == s_tid)
+ break;
+ t = t->t_forw;
+ } while (t != curthread);
+
+ /*
+ * If the given PID is in the current thread's process,
+ * then we _must_ find it in the process's thread list.
+ */
+ ASSERT(t->t_tid == s_tid);
+
+ lx_lwp = t->t_lwp->lwp_brand;
+
+ if (cmd == B_SET_AFFINITY_MASK) {
+ if (copyin_nowatch((void *)maskp, &lx_lwp->br_affinitymask,
+ sizeof (lx_affmask_t)) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EFAULT));
+ }
+
+ *rval = 0;
+ } else {
+ if (copyout_nowatch(&lx_lwp->br_affinitymask, (void *)maskp,
+ sizeof (lx_affmask_t)) != 0) {
+ mutex_exit(&curproc->p_lock);
+ return (set_errno(EFAULT));
+ }
+
+ *rval = sizeof (lx_affmask_t);
+ }
+
+ mutex_exit(&curproc->p_lock);
+ return (0);
+}
+
+long
+lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ procset_t procset_cid;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ struct lx_sched_param sched_param;
+ tsparms_t *tsp;
+ int prio, maxupri;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if (rv = sched_setprocset(&procset, pid))
+ return (rv);
+
+ if (copyin(param, &sched_param, sizeof (sched_param)))
+ return (set_errno(EFAULT));
+
+ prio = sched_param.lx_sched_prio;
+
+ if (policy < 0) {
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the current policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ return (set_errno(EINVAL));
+ }
+
+ bzero(&pcinfo, sizeof (pcinfo));
+ bzero(&pcparm, sizeof (pcparm));
+ setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (prio < 0 ||
+ prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+ return (set_errno(EINVAL));
+ pcparm.pc_cid = pcinfo.pc_cid;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+ policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+ break;
+
+ case LX_SCHED_OTHER:
+ (void) strcpy(pcinfo.pc_clname, "TS");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+ if (prio > maxupri || prio < -maxupri)
+ return (set_errno(EINVAL));
+
+ pcparm.pc_cid = pcinfo.pc_cid;
+ tsp = (tsparms_t *)pcparm.pc_clparms;
+ tsp->ts_upri = prio;
+ tsp->ts_uprilim = TS_NOCHANGE;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * finally set scheduling policy and parameters
+ */
+ (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+ return (0);
+}
+
+long
+lx_sched_getscheduler(l_pid_t pid)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ int policy;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if (rv = sched_setprocset(&procset, pid))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ policy = set_errno(EINVAL);
+
+ return (policy);
+}
+
+long
+lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ procset_t procset;
+ procset_t procset_cid;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ struct lx_sched_param sched_param;
+ tsparms_t *tsp;
+ int policy;
+ int prio, maxupri;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if (rv = sched_setprocset(&procset, pid))
+ return (rv);
+
+ if (copyin(param, &sched_param, sizeof (sched_param)))
+ return (set_errno(EFAULT));
+
+ prio = sched_param.lx_sched_prio;
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the current policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (strcmp(pcinfo.pc_clname, "TS") == 0)
+ policy = LX_SCHED_OTHER;
+ else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+ RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+ else
+ return (set_errno(EINVAL));
+
+ bzero(&pcinfo, sizeof (pcinfo));
+ bzero(&pcparm, sizeof (pcparm));
+ setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+ switch (policy) {
+ case LX_SCHED_FIFO:
+ case LX_SCHED_RR:
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (prio < 0 ||
+ prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+ return (set_errno(EINVAL));
+ pcparm.pc_cid = pcinfo.pc_cid;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+ policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+ break;
+
+ case LX_SCHED_OTHER:
+ (void) strcpy(pcinfo.pc_clname, "TS");
+ (void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+ if (prio > maxupri || prio < -maxupri)
+ return (set_errno(EINVAL));
+
+ pcparm.pc_cid = pcinfo.pc_cid;
+ tsp = (tsparms_t *)pcparm.pc_clparms;
+ tsp->ts_upri = prio;
+ tsp->ts_uprilim = TS_NOCHANGE;
+ break;
+
+ default:
+ return (set_errno(EINVAL));
+ }
+
+ /*
+ * finally set scheduling policy and parameters
+ */
+ (void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+ return (0);
+}
+
+long
+lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct lx_sched_param local_param;
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ tsinfo_t *tsi;
+ int prio, scale;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if (rv = sched_setprocset(&procset, pid))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ bzero(&pcinfo, sizeof (pcinfo));
+ pcinfo.pc_cid = pcparm.pc_cid;
+ (void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ bzero(&local_param, sizeof (local_param));
+ if (strcmp(pcinfo.pc_clname, "TS") == 0) {
+ /*
+ * I don't know if we need to do this, coz it can't be
+ * changed from zero anyway.....
+ */
+ tsi = (tsinfo_t *)pcinfo.pc_clinfo;
+ prio = ((tsparms_t *)pcparm.pc_clparms)->ts_upri;
+ scale = tsi->ts_maxupri;
+ if (scale == 0)
+ local_param.lx_sched_prio = 0;
+ else
+ local_param.lx_sched_prio = -(prio * 20) / scale;
+ } else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+ local_param.lx_sched_prio =
+ ((rtparms_t *)pcparm.pc_clparms)->rt_pri;
+ else
+ rv = set_errno(EINVAL);
+
+ if (rv == 0)
+ if (copyout(&local_param, param, sizeof (local_param)))
+ return (set_errno(EFAULT));
+
+ return (rv);
+}
+
+long
+lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival)
+{
+ klwp_t *lwp = ttolwp(curthread);
+ struct timespec interval;
+ procset_t procset;
+ pcparms_t pcparm;
+ pcinfo_t pcinfo;
+ int rv;
+
+ if (pid < 0)
+ return (set_errno(ESRCH));
+
+ if (rv = sched_setprocset(&procset, pid))
+ return (rv);
+
+ /*
+ * get the class id
+ */
+ pcparm.pc_cid = PC_CLNULL;
+ (void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ /*
+ * get the class info and identify the equivalent linux policy
+ */
+ setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0);
+ bzero(&pcinfo, sizeof (pcinfo));
+ (void) strcpy(pcinfo.pc_clname, "RT");
+ (void) do_priocntlsys(PC_GETCID, &procset, &pcinfo);
+ if (lwp->lwp_errno)
+ return (lwp->lwp_errno);
+
+ if (pcparm.pc_cid == pcinfo.pc_cid &&
+ ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) {
+ interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs;
+ interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs;
+
+ if (copyout(&interval, ival, sizeof (interval)))
+ return (set_errno(EFAULT));
+
+ return (0);
+ }
+
+ return (set_errno(EINVAL));
+}
+
+int
+sched_setprocset(procset_t *procset, l_pid_t pid)
+{
+ id_t lid, rid;
+ idtype_t lidtype, ridtype;
+
+ /*
+ * define the target lwp
+ */
+ if (pid == 0) {
+ ridtype = P_ALL;
+ lidtype = P_PID;
+ rid = 0;
+ lid = P_MYID;
+ } else {
+ if (lx_lpid_to_spair(pid, &pid, &lid) < 0)
+ return (set_errno(ESRCH));
+ if (pid != curproc->p_pid)
+ return (set_errno(ESRCH));
+ rid = 0;
+ ridtype = P_ALL;
+ lidtype = P_LWPID;
+ }
+ setprocset(procset, POP_AND, lidtype, lid, ridtype, rid);
+
+ return (0);
+}
+
+long
+do_priocntlsys(int cmd, procset_t *procset, void *arg)
+{
+ return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0,
+ UIO_SYSSPACE));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
new file mode 100644
index 0000000000..9fdb734805
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
@@ -0,0 +1,118 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <vm/anon.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/zone.h>
+#include <sys/time.h>
+
+struct lx_sysinfo {
+ int32_t si_uptime; /* Seconds since boot */
+ uint32_t si_loads[3]; /* 1, 5, and 15 minute avg runq length */
+ uint32_t si_totalram; /* Total memory size */
+ uint32_t si_freeram; /* Available memory */
+ uint32_t si_sharedram; /* Shared memory */
+ uint32_t si_bufferram; /* Buffer memory */
+ uint32_t si_totalswap; /* Total swap space */
+ uint32_t si_freeswap; /* Avail swap space */
+ uint16_t si_procs; /* Process count */
+ uint32_t si_totalhigh; /* High memory size */
+ uint32_t si_freehigh; /* Avail high memory */
+ uint32_t si_mem_unit; /* Unit size of memory fields */
+};
+
+long
+lx_sysinfo(struct lx_sysinfo *sip)
+{
+ struct lx_sysinfo si;
+ hrtime_t birthtime;
+ zone_t *zone = curthread->t_procp->p_zone;
+ proc_t *init_proc;
+
+ /*
+ * We don't record the time a zone was booted, so we use the
+ * birthtime of that zone's init process instead.
+ */
+ mutex_enter(&pidlock);
+ init_proc = prfind(zone->zone_proc_initpid);
+ if (init_proc != NULL)
+ birthtime = init_proc->p_mstart;
+ else
+ birthtime = p0.p_mstart;
+ mutex_exit(&pidlock);
+ si.si_uptime = (gethrtime() - birthtime) / NANOSEC;
+
+ /*
+ * We scale down the load in avenrun to allow larger load averages
+ * to fit in 32 bits. Linux doesn't, so we remove the scaling
+ * here.
+ */
+ si.si_loads[0] = avenrun[0] << FSHIFT;
+ si.si_loads[1] = avenrun[1] << FSHIFT;
+ si.si_loads[2] = avenrun[2] << FSHIFT;
+
+ /*
+ * In linux each thread looks like a process, so we conflate the
+ * two in this stat as well.
+ */
+ si.si_procs = (int32_t)zone->zone_nlwps;
+
+ /*
+ * If the maximum memory stat is less than 1^20 pages (i.e. 4GB),
+ * then we report the result in bytes. Otherwise we use pages.
+ * Once we start supporting >1TB x86 systems, we'll need a third
+ * option.
+ */
+ if (MAX(physmem, k_anoninfo.ani_max) < 1024 * 1024) {
+ si.si_totalram = physmem * PAGESIZE;
+ si.si_freeram = freemem * PAGESIZE;
+ si.si_totalswap = k_anoninfo.ani_max * PAGESIZE;
+ si.si_freeswap = k_anoninfo.ani_free * PAGESIZE;
+ si.si_mem_unit = 1;
+ } else {
+ si.si_totalram = physmem;
+ si.si_freeram = freemem;
+ si.si_totalswap = k_anoninfo.ani_max;
+ si.si_freeswap = k_anoninfo.ani_free;
+ si.si_mem_unit = PAGESIZE;
+ }
+ si.si_bufferram = 0;
+ si.si_sharedram = 0;
+
+ /*
+ * These two stats refer to high physical memory. If an
+ * application running in a Linux zone cares about this, then
+ * either it or we are broken.
+ */
+ si.si_totalhigh = 0;
+ si.si_freehigh = 0;
+
+ if (copyout(&si, sip, sizeof (si)) != 0)
+ return (set_errno(EFAULT));
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
new file mode 100644
index 0000000000..f9751819f9
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/cpuvar.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+
+long
+lx_get_thread_area(struct ldt_info *inf)
+{
+ struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+ struct ldt_info ldt_inf;
+ user_desc_t *dscrp;
+ int entry;
+
+ if (fuword32(&inf->entry_number, (uint32_t *)&entry))
+ return (set_errno(EFAULT));
+
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+ /*
+ * convert the solaris ldt to the linux format expected by the
+ * caller
+ */
+ DESC_TO_LDT_INFO(dscrp, &ldt_inf);
+ ldt_inf.entry_number = entry;
+
+ if (copyout(&ldt_inf, inf, sizeof (struct ldt_info)))
+ return (set_errno(EFAULT));
+
+ return (0);
+}
+
+long
+lx_set_thread_area(struct ldt_info *inf)
+{
+ struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+ struct ldt_info ldt_inf;
+ user_desc_t *dscrp;
+ int entry;
+ int i;
+
+ if (copyin(inf, &ldt_inf, sizeof (ldt_inf)))
+ return (set_errno(EFAULT));
+
+ entry = ldt_inf.entry_number;
+ if (entry == -1) {
+ /*
+ * find an empty entry in the tls for this thread
+ */
+ for (i = 0, dscrp = jlwp->br_tls;
+ i < LX_TLSNUM; i++, dscrp++)
+ if (((unsigned long *)dscrp)[0] == 0 &&
+ ((unsigned long *)dscrp)[1] == 0)
+ break;
+
+ if (i < LX_TLSNUM) {
+ /*
+ * found one
+ */
+ entry = i + GDT_TLSMIN;
+ if (suword32(&inf->entry_number, entry))
+ return (set_errno(EFAULT));
+ } else {
+ return (set_errno(ESRCH));
+ }
+ }
+
+ if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+ return (set_errno(EINVAL));
+
+ /*
+ * convert the linux ldt info to standard intel descriptor
+ */
+ dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+ if (LDT_INFO_EMPTY(&ldt_inf)) {
+ ((unsigned long *)dscrp)[0] = 0;
+ ((unsigned long *)dscrp)[1] = 0;
+ } else {
+ LDT_INFO_TO_DESC(&ldt_inf, dscrp);
+ }
+
+ /*
+ * update the gdt with the new descriptor
+ */
+ kpreempt_disable();
+
+ for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++)
+ lx_set_gdt(GDT_TLSMIN + i, dscrp);
+
+ kpreempt_enable();
+
+ return (0);
+}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c
new file mode 100644
index 0000000000..a46ea3c979
--- /dev/null
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.c
@@ -0,0 +1,288 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/errno.h>
+#include <sys/exec.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/model.h>
+#include <sys/proc.h>
+#include <sys/syscall.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cmn_err.h>
+#include <sys/archsystm.h>
+
+#include <sys/machbrand.h>
+#include <sys/brand.h>
+#include "sn1_brand.h"
+
+char *sn1_emulation_table = NULL;
+
+void sn1_setbrand(proc_t *);
+int sn1_getattr(zone_t *, int, void *, size_t *);
+int sn1_setattr(zone_t *, int, void *, size_t);
+int sn1_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t);
+void sn1_copy_procdata(proc_t *, proc_t *);
+void sn1_proc_exit(struct proc *, klwp_t *);
+void sn1_exec();
+int sn1_initlwp(klwp_t *);
+void sn1_forklwp(klwp_t *, klwp_t *);
+void sn1_freelwp(klwp_t *);
+void sn1_lwpexit(klwp_t *);
+int sn1_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
+ long *, int, caddr_t, cred_t *, int);
+
+/* sn1 brand */
+struct brand_ops sn1_brops = {
+ sn1_brandsys,
+ sn1_setbrand,
+ sn1_getattr,
+ sn1_setattr,
+ sn1_copy_procdata,
+ sn1_proc_exit,
+ sn1_exec,
+ lwp_setrval,
+ sn1_initlwp,
+ sn1_forklwp,
+ sn1_freelwp,
+ sn1_lwpexit,
+ sn1_elfexec
+};
+
+#ifdef sparc
+
+struct brand_mach_ops sn1_mops = {
+ sn1_brand_syscall_callback,
+ sn1_brand_syscall_callback
+};
+
+#else /* sparc */
+
+#ifdef __amd64
+
+struct brand_mach_ops sn1_mops = {
+ sn1_brand_sysenter_callback,
+ NULL,
+ sn1_brand_int91_callback,
+ sn1_brand_syscall_callback,
+ sn1_brand_syscall32_callback,
+ NULL
+};
+
+#else /* ! __amd64 */
+
+struct brand_mach_ops sn1_mops = {
+ sn1_brand_sysenter_callback,
+ NULL,
+ NULL,
+ sn1_brand_syscall_callback,
+ NULL,
+ NULL
+};
+#endif /* __amd64 */
+
+#endif /* _sparc */
+
+struct brand sn1_brand = {
+ BRAND_VER_1,
+ "sn1",
+ &sn1_brops,
+ &sn1_mops
+};
+
+static struct modlbrand modlbrand = {
+ &mod_brandops, "Solaris N-1 Brand %I%", &sn1_brand
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlbrand, NULL
+};
+
+void
+sn1_setbrand(proc_t *p)
+{
+ p->p_brand_data = NULL;
+ p->p_brand = &sn1_brand;
+}
+
+/* ARGSUSED */
+int
+sn1_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
+{
+ return (EINVAL);
+}
+
+/* ARGSUSED */
+int
+sn1_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
+{
+ return (EINVAL);
+}
+
+/*
+ * Get the address of the user-space system call handler from the user
+ * process and attach it to the proc structure.
+ */
+/*ARGSUSED*/
+int
+sn1_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
+ uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+ proc_t *p = curproc;
+ *rval = 0;
+
+ if (cmd == B_REGISTER) {
+ p->p_brand = &sn1_brand;
+ p->p_brand_data = (void *) arg1;
+ return (0);
+ }
+
+ ASSERT(p->p_brand == &sn1_brand);
+
+ return (EINVAL);
+}
+
+/*
+ * Copy the per-process brand data from a parent proc to a child. In the
+ * sn1 brand, the only per-process state is the address of the user-space
+ * handler.
+ */
+void
+sn1_copy_procdata(proc_t *child, proc_t *parent)
+{
+ child->p_brand_data = parent->p_brand_data;
+}
+
+/*ARGSUSED*/
+void
+sn1_proc_exit(struct proc *p, klwp_t *l)
+{
+ p->p_brand_data = NULL;
+ p->p_brand = &native_brand;
+}
+
+void
+sn1_exec()
+{
+ curproc->p_brand_data = NULL;
+}
+
+/*ARGSUSED*/
+int
+sn1_initlwp(klwp_t *l)
+{
+ return (0);
+}
+
+/*ARGSUSED*/
+void
+sn1_forklwp(klwp_t *p, klwp_t *c)
+{
+}
+
+/*ARGSUSED*/
+void
+sn1_freelwp(klwp_t *l)
+{
+}
+
+/*ARGSUSED*/
+void
+sn1_lwpexit(klwp_t *l)
+{
+}
+
+int
+sn1_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
+ int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
+ int brand_action)
+{
+ args->brandname = "sn1";
+ return ((args->execswp->exec_func)(vp, uap, args, idatap, level + 1,
+ execsz, setid, exec_file, cred, brand_action));
+}
+
+
+int
+_init(void)
+{
+ int err;
+
+#if defined(sparc) && !defined(DEBUG)
+ cmn_err(CE_WARN, "The sn1 brand is only supported on DEBUG kernels.");
+ return (ENOTSUP);
+#else
+
+ /*
+ * Set up the table indicating which system calls we want to
+ * interpose on. We should probably build this automatically from
+ * a list of system calls that is shared with the user-space
+ * library.
+ */
+ sn1_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
+ sn1_emulation_table[SYS_uname] = 1;
+ sn1_emulation_table[SYS_fork1] = 1;
+
+ err = mod_install(&modlinkage);
+ if (err) {
+ cmn_err(CE_WARN, "Couldn't install brand module");
+ kmem_free(sn1_emulation_table, NSYSCALL);
+ }
+
+ return (err);
+#endif
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int err;
+
+ /*
+ * If there are any zones using this brand, we can't allow it to be
+ * unloaded.
+ */
+ if (brand_zone_count(&sn1_brand))
+ return (EBUSY);
+
+ kmem_free(sn1_emulation_table, NSYSCALL);
+ sn1_emulation_table = NULL;
+
+ err = mod_remove(&modlinkage);
+ if (err)
+ cmn_err(CE_WARN, "Couldn't unload sn1 brand module");
+
+ return (err);
+}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.h b/usr/src/uts/common/brand/sn1/sn1_brand.h
new file mode 100644
index 0000000000..a4efca189b
--- /dev/null
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SN1_BRAND_H
+#define _SN1_BRAND_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void sn1_brand_syscall_callback(void);
+void sn1_brand_sysenter_callback(void);
+void sn1_brand_int91_callback(void);
+#ifdef __amd64
+void sn1_brand_syscall32_callback(void);
+#endif
+
+extern struct brand *sbrand;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SN1_BRAND_H */
diff --git a/usr/src/uts/common/c2/audit_event.c b/usr/src/uts/common/c2/audit_event.c
index 4ee95e1728..b45da7bf26 100644
--- a/usr/src/uts/common/c2/audit_event.c
+++ b/usr/src/uts/common/c2/audit_event.c
@@ -158,6 +158,7 @@ static void aus_sigqueue(struct t_audit_data *);
static void aus_p_online(struct t_audit_data *);
static void aus_processor_bind(struct t_audit_data *);
static void aus_inst_sync(struct t_audit_data *);
+static void aus_brandsys(struct t_audit_data *);
static void auf_accept(struct t_audit_data *, int, rval_t *);
@@ -270,7 +271,7 @@ aui_null, AUE_FSTATFS, aus_fstatfs, /* 38 fstatfs */
auf_null, S2E_PUB,
aui_null, AUE_SETPGRP, aus_null, /* 39 setpgrp */
auf_null, 0,
-aui_null, AUE_NULL, aus_null, /* 40 (loadable) was cxenix */
+aui_null, AUE_NULL, aus_null, /* 40 uucopystr */
auf_null, 0,
aui_null, AUE_NULL, aus_null, /* 41 dup */
auf_null, 0,
@@ -564,7 +565,7 @@ aui_null, AUE_NULL, aus_null, /* 175 llseek */
aui_null, AUE_INST_SYNC, aus_inst_sync, /* 176 (loadable) */
/* aus_inst_sync */
auf_null, 0,
-aui_null, AUE_NULL, aus_null, /* 177 (loadable) */
+aui_null, AUE_BRANDSYS, aus_brandsys, /* 177 brandsys */
auf_null, 0,
aui_null, AUE_NULL, aus_null, /* 178 (loadable) */
auf_null, 0,
@@ -718,8 +719,7 @@ aui_null, AUE_NULL, aus_null, /* 252 lwp_mutex_init */
auf_null, 0,
aui_null, AUE_NULL, aus_null, /* 253 cladm */
auf_null, 0,
-aui_null, AUE_NULL, aus_null, /* 254 (loadable) */
- /* was lwp_sigtimedwait */
+aui_null, AUE_NULL, aus_null, /* 254 uucopy */
auf_null, 0,
aui_null, AUE_UMOUNT2, aus_umount2, /* 255 umount2 */
auf_null, 0
@@ -4706,6 +4706,40 @@ aus_inst_sync(struct t_audit_data *tad)
/*ARGSUSED*/
static void
+aus_brandsys(struct t_audit_data *tad)
+{
+ klwp_t *clwp = ttolwp(curthread);
+
+ struct a {
+ long cmd;
+ long arg1;
+ long arg2;
+ long arg3;
+ long arg4;
+ long arg5;
+ long arg6;
+ } *uap = (struct a *)clwp->lwp_ap;
+
+ au_uwrite(au_to_arg32(1, "cmd", (uint_t)uap->cmd));
+#ifdef _LP64
+ au_uwrite(au_to_arg64(2, "arg1", (uint64_t)uap->arg1));
+ au_uwrite(au_to_arg64(3, "arg2", (uint64_t)uap->arg2));
+ au_uwrite(au_to_arg64(4, "arg3", (uint64_t)uap->arg3));
+ au_uwrite(au_to_arg64(5, "arg4", (uint64_t)uap->arg4));
+ au_uwrite(au_to_arg64(6, "arg5", (uint64_t)uap->arg5));
+ au_uwrite(au_to_arg64(7, "arg6", (uint64_t)uap->arg6));
+#else
+ au_uwrite(au_to_arg32(2, "arg1", (uint32_t)uap->arg1));
+ au_uwrite(au_to_arg32(3, "arg2", (uint32_t)uap->arg2));
+ au_uwrite(au_to_arg32(4, "arg3", (uint32_t)uap->arg3));
+ au_uwrite(au_to_arg32(5, "arg4", (uint32_t)uap->arg4));
+ au_uwrite(au_to_arg32(6, "arg5", (uint32_t)uap->arg5));
+ au_uwrite(au_to_arg32(7, "arg6", (uint32_t)uap->arg6));
+#endif
+}
+
+/*ARGSUSED*/
+static void
aus_p_online(struct t_audit_data *tad)
{
struct a {
diff --git a/usr/src/uts/common/c2/audit_kevents.h b/usr/src/uts/common/c2/audit_kevents.h
index 942887ae72..4a2e5b27db 100644
--- a/usr/src/uts/common/c2/audit_kevents.h
+++ b/usr/src/uts/common/c2/audit_kevents.h
@@ -330,9 +330,10 @@ extern "C" {
#define AUE_MODADDPRIV 291 /* =ad modctl(2) */
#define AUE_CRYPTOADM 292 /* =as kernel cryptographic framework */
#define AUE_CONFIGKSSL 293 /* =as kernel SSL */
+#define AUE_BRANDSYS 294 /* =ot */
/* NOTE: update MAX_KEVENTS below if events are added. */
-#define MAX_KEVENTS 293
+#define MAX_KEVENTS 294
#ifdef __cplusplus
diff --git a/usr/src/uts/common/disp/class.c b/usr/src/uts/common/disp/class.c
index b5b2674d89..8e83a839ee 100644
--- a/usr/src/uts/common/disp/class.c
+++ b/usr/src/uts/common/disp/class.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -325,7 +324,8 @@ parmsset(pcparms_t *parmsp, kthread_id_t targtp)
* The parameters are specified by a key.
*/
int
-vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp)
+vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp,
+ uio_seg_t seg)
{
char *clname;
@@ -348,7 +348,8 @@ vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp)
return (EINVAL);
clname = sclass[prmsp->pc_cid].cl_name;
- if (copyout(clname, (void *)(uintptr_t)vaparmsp->pc_parms[0].pc_parm,
+ if ((seg == UIO_USERSPACE ? copyout : kcopy)(clname,
+ (void *)(uintptr_t)vaparmsp->pc_parms[0].pc_parm,
MIN(strlen(clname) + 1, PC_CLNMSZ)))
return (EFAULT);
diff --git a/usr/src/uts/common/disp/priocntl.c b/usr/src/uts/common/disp/priocntl.c
index 3c1a271155..3bb90cf1fa 100644
--- a/usr/src/uts/common/disp/priocntl.c
+++ b/usr/src/uts/common/disp/priocntl.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -81,7 +80,7 @@ struct stprmargs {
* between the 64-bit kernel ABI and the 32-bit user ABI.
*/
static int
-copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap)
+copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg)
{
pc_vaparms32_t vaparms32;
pc_vaparm32_t *src;
@@ -90,7 +89,8 @@ copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap)
ASSERT(get_udatamodel() == DATAMODEL_ILP32);
- if (copyin(arg, &vaparms32, sizeof (vaparms32)))
+ if ((seg == UIO_USERSPACE ? copyin : kcopy)(arg, &vaparms32,
+ sizeof (vaparms32)))
return (EFAULT);
vap->pc_vaparmscnt = vaparms32.pc_vaparmscnt;
@@ -104,13 +104,13 @@ copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap)
return (0);
}
-#define COPYIN_VAPARMS(arg, vap, size) \
+#define COPYIN_VAPARMS(arg, vap, size, seg) \
(get_udatamodel() == DATAMODEL_NATIVE ? \
- copyin(arg, vap, size) : copyin_vaparms32(arg, vap))
+ (*copyinfn)(arg, vap, size) : copyin_vaparms32(arg, vap, seg))
#else
-#define COPYIN_VAPARMS(arg, vap, size) copyin(arg, vap, size)
+#define COPYIN_VAPARMS(arg, vap, size, seg) (*copyinfn)(arg, vap, size)
#endif
@@ -123,7 +123,8 @@ extern int threadcmp(struct pcmpargs *, kthread_id_t);
* The priocntl system call.
*/
long
-priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
+priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
+ caddr_t arg2, uio_seg_t seg)
{
pcinfo_t pcinfo;
pcparms_t pcparms;
@@ -144,6 +145,8 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
int rv = 0;
pid_t saved_pid;
id_t classid;
+ int (*copyinfn)(const void *, void *, size_t);
+ int (*copyoutfn)(const void *, void *, size_t);
/*
* First just check the version number. Right now there is only
@@ -157,6 +160,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
if (pc_version != PC_VERSION)
return (set_errno(EINVAL));
+ if (seg == UIO_USERSPACE) {
+ copyinfn = copyin;
+ copyoutfn = copyout;
+ } else {
+ copyinfn = kcopy;
+ copyoutfn = kcopy;
+ }
+
switch (cmd) {
case PC_GETCID:
/*
@@ -171,7 +182,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
rv = loaded_classes;
break;
} else {
- if (copyin(arg, &pcinfo, sizeof (pcinfo)))
+ if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
return (set_errno(EFAULT));
}
@@ -204,7 +215,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
if (error)
return (set_errno(error));
- if (copyout(&pcinfo, arg, sizeof (pcinfo)))
+ if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
return (set_errno(EFAULT));
rv = loaded_classes;
@@ -221,7 +232,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
rv = loaded_classes;
break;
} else {
- if (copyin(arg, &pcinfo, sizeof (pcinfo)))
+ if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
return (set_errno(EFAULT));
}
@@ -245,7 +256,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
if (error)
return (set_errno(error));
- if (copyout(&pcinfo, arg, sizeof (pcinfo)))
+ if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
return (set_errno(EFAULT));
rv = loaded_classes;
@@ -259,13 +270,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
* because it's done on a per thread basis by parmsset().
*/
if (cmd == PC_SETPARMS) {
- if (copyin(arg, &pcparms, sizeof (pcparms)))
+ if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
return (set_errno(EFAULT));
error = parmsin(&pcparms, NULL);
} else {
- if (copyin(arg, clname, PC_CLNMSZ) ||
- COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms)))
+ if ((*copyinfn)(arg, clname, PC_CLNMSZ) ||
+ COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
+ seg))
return (set_errno(EFAULT));
clname[PC_CLNMSZ-1] = '\0';
@@ -281,7 +293,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
/*
* Get the procset from the user.
*/
- if (copyin(psp, &procset, sizeof (procset)))
+ if ((*copyinfn)(psp, &procset, sizeof (procset)))
return (set_errno(EFAULT));
/*
@@ -372,11 +384,11 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
case PC_GETPARMS:
case PC_GETXPARMS:
if (cmd == PC_GETPARMS) {
- if (copyin(arg, &pcparms, sizeof (pcparms)))
+ if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
return (set_errno(EFAULT));
} else {
if (arg != NULL) {
- if (copyin(arg, clname, PC_CLNMSZ))
+ if ((*copyinfn)(arg, clname, PC_CLNMSZ))
return (set_errno(EFAULT));
clname[PC_CLNMSZ-1] = '\0';
@@ -385,7 +397,9 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
return (set_errno(EINVAL));
} else
pcparms.pc_cid = PC_CLNULL;
- if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms)))
+
+ if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
+ seg))
return (set_errno(EFAULT));
}
@@ -393,7 +407,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
(pcparms.pc_cid < 1 && pcparms.pc_cid != PC_CLNULL))
return (set_errno(EINVAL));
- if (copyin(psp, &procset, sizeof (procset)))
+ if ((*copyinfn)(psp, &procset, sizeof (procset)))
return (set_errno(EFAULT));
/*
@@ -590,9 +604,10 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
return (set_errno(error));
if (cmd == PC_GETPARMS) {
- if (copyout(&pcparms, arg, sizeof (pcparms)))
+ if ((*copyoutfn)(&pcparms, arg, sizeof (pcparms)))
return (set_errno(EFAULT));
- } else if ((error = vaparmsout(arg, &pcparms, &vaparms)) != 0)
+ } else if ((error = vaparmsout(arg, &pcparms, &vaparms,
+ seg)) != 0)
return (set_errno(error));
/*
@@ -603,14 +618,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
case PC_ADMIN:
if (get_udatamodel() == DATAMODEL_NATIVE) {
- if (copyin(arg, &pcadmin, sizeof (pcadmin_t)))
+ if ((*copyinfn)(arg, &pcadmin, sizeof (pcadmin_t)))
return (set_errno(EFAULT));
#ifdef _SYSCALL32_IMPL
} else {
/* pcadmin struct from ILP32 callers */
pcadmin32_t pcadmin32;
- if (copyin(arg, &pcadmin32, sizeof (pcadmin32_t)))
+ if ((*copyinfn)(arg, &pcadmin32, sizeof (pcadmin32_t)))
return (set_errno(EFAULT));
pcadmin.pc_cid = pcadmin32.pc_cid;
pcadmin.pc_cladmin = (caddr_t)(uintptr_t)
@@ -632,7 +647,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
break;
case PC_GETPRIRANGE:
- if (copyin(arg, &pcpri, sizeof (pcpri_t)))
+ if ((*copyinfn)(arg, &pcpri, sizeof (pcpri_t)))
return (set_errno(EFAULT));
if (pcpri.pc_cid >= loaded_classes || pcpri.pc_cid < 0)
@@ -640,7 +655,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
error = CL_GETCLPRI(&sclass[pcpri.pc_cid], &pcpri);
if (!error) {
- if (copyout(&pcpri, arg, sizeof (pcpri)))
+ if ((*copyoutfn)(&pcpri, arg, sizeof (pcpri)))
return (set_errno(EFAULT));
}
break;
@@ -649,14 +664,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
/*
* Get pcnice and procset structures from the user.
*/
- if (copyin(arg, &pcnice, sizeof (pcnice)) ||
- copyin(psp, &procset, sizeof (procset)))
+ if ((*copyinfn)(arg, &pcnice, sizeof (pcnice)) ||
+ (*copyinfn)(psp, &procset, sizeof (procset)))
return (set_errno(EFAULT));
error = donice(&procset, &pcnice);
if (!error && (pcnice.pc_op == PC_GETNICE)) {
- if (copyout(&pcnice, arg, sizeof (pcnice)))
+ if ((*copyoutfn)(&pcnice, arg, sizeof (pcnice)))
return (set_errno(EFAULT));
}
break;
@@ -684,6 +699,12 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
return (error ? (set_errno(error)) : rv);
}
+long
+priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
+{
+ return (priocntl_common(pc_version, psp, cmd, arg, arg2,
+ UIO_USERSPACE));
+}
/*
* The proccmp() function is part of the implementation of the
@@ -844,7 +865,7 @@ setparms(proc_t *targpp, struct stprmargs *stprmp)
return (0);
}
-static int
+int
setthreadnice(pcnice_t *pcnice, kthread_t *tp)
{
int error = 0;
@@ -889,7 +910,7 @@ setthreadnice(pcnice_t *pcnice, kthread_t *tp)
return (error);
}
-static int
+int
setprocnice(proc_t *pp, pcnice_t *pcnice)
{
kthread_t *tp;
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index 91b4db8103..5f352b2203 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -64,6 +64,7 @@
#include <sys/spl.h>
#include <sys/copyops.h>
#include <sys/rctl.h>
+#include <sys/brand.h>
#include <sys/pool.h>
#include <sys/zone.h>
#include <sys/tsol/label.h>
@@ -186,6 +187,7 @@ thread_init(void)
rctl_init();
project_init();
+ brand_init();
zone_init();
task_init();
tcache_init();
diff --git a/usr/src/uts/common/disp/ts.c b/usr/src/uts/common/disp/ts.c
index a190297100..738a2e47b4 100644
--- a/usr/src/uts/common/disp/ts.c
+++ b/usr/src/uts/common/disp/ts.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -21,7 +20,7 @@
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1269,14 +1268,14 @@ ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
tspp->ts_flags |= TSIASET;
thread_unlock(tx);
}
- TTY_HOLD(p->p_sessp);
+ mutex_enter(&p->p_sessp->s_lock);
sess_held = 1;
if ((pid == sid) && (p->p_sessp->s_vp != NULL) &&
((stp = p->p_sessp->s_vp->v_stream) != NULL)) {
if ((stp->sd_pgidp != NULL) && (stp->sd_sidp != NULL)) {
pgid = stp->sd_pgidp->pid_id;
sess_held = 0;
- TTY_RELE(p->p_sessp);
+ mutex_exit(&p->p_sessp->s_lock);
if (iaparmsp->ia_mode ==
IA_SET_INTERACTIVE) {
off = 0;
@@ -1292,7 +1291,7 @@ ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
}
}
if (sess_held)
- TTY_RELE(p->p_sessp);
+ mutex_exit(&p->p_sessp->s_lock);
thread_lock(tx);
@@ -2130,14 +2129,14 @@ ia_set_process_group(pid_t sid, pid_t bg_pgid, pid_t fg_pgid)
* that do not have focus and are changing the process group
* attatched to the tty, e.g. a process that is exiting
*/
- TTY_HOLD(leader->p_sessp);
+ mutex_enter(&leader->p_sessp->s_lock);
if (!(tspp->ts_flags & TSIASET) ||
(leader->p_sessp->s_vp == NULL) ||
(leader->p_sessp->s_vp->v_stream == NULL)) {
- TTY_RELE(leader->p_sessp);
+ mutex_exit(&leader->p_sessp->s_lock);
return;
}
- TTY_RELE(leader->p_sessp);
+ mutex_exit(&leader->p_sessp->s_lock);
/*
* If we're already holding the leader's p_lock, we should use
diff --git a/usr/src/uts/common/exec/aout/aout.c b/usr/src/uts/common/exec/aout/aout.c
index 5c7b6b1773..4e814b339b 100644
--- a/usr/src/uts/common/exec/aout/aout.c
+++ b/usr/src/uts/common/exec/aout/aout.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -56,19 +55,19 @@
static int aoutexec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, long *execsz, int setid,
- caddr_t exec_file, cred_t *cred);
+ caddr_t exec_file, cred_t *cred, int brand_action);
static int get_aout_head(struct vnode **vpp, struct exdata *edp, long *execsz,
int *isdyn);
static int aoutcore(vnode_t *vp, proc_t *pp, cred_t *credp,
rlim64_t rlimit, int sig, core_content_t content);
#ifdef _LP64
extern int elf32exec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
- long *, int, caddr_t, cred_t *);
+ long *, int, caddr_t, cred_t *, int);
extern int elf32core(vnode_t *, proc_t *, cred_t *, rlim64_t, int,
core_content_t);
#else /* _LP64 */
extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
- long *, int, caddr_t, cred_t *);
+ long *, int, caddr_t, cred_t *, int);
extern int elfcore(vnode_t *, proc_t *, cred_t *, rlim64_t, int,
core_content_t);
#endif /* _LP64 */
@@ -141,7 +140,7 @@ _info(struct modinfo *modinfop)
static int
aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args,
struct intpdata *idatap, int level, long *execsz, int setid,
- caddr_t exec_file, cred_t *cred)
+ caddr_t exec_file, cred_t *cred, int brand_action)
{
int error;
struct exdata edp, edpout;
@@ -201,10 +200,10 @@ aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args,
}
#ifdef _LP64
if (error = elf32exec(nvp, uap, args, idatap, level, execsz,
- setid, exec_file, cred))
+ setid, exec_file, cred, brand_action))
#else /* _LP64 */
if (error = elfexec(nvp, uap, args, idatap, level, execsz,
- setid, exec_file, cred))
+ setid, exec_file, cred, brand_action))
#endif /* _LP64 */
{
VN_RELE(nvp);
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index 33e3cc9b8e..6508cdae85 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -62,8 +62,11 @@
#include <sys/shm_impl.h>
#include <sys/archsystm.h>
#include <sys/fasttrap.h>
+#include <sys/brand.h>
#include "elf_impl.h"
+#include <sys/sdt.h>
+
extern int at_flags;
#define ORIGIN_STR "ORIGIN"
@@ -77,7 +80,7 @@ static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
- caddr_t *, caddr_t *, intptr_t *, size_t, long *, size_t *);
+ caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
typedef enum {
STR_CTF,
@@ -160,10 +163,83 @@ dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
return (0);
}
+/*
+ * Map in the executable pointed to by vp. Returns 0 on success.
+ */
+int
+mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Elf32_Addr *uphdr_vaddr,
+ intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
+ caddr_t *brkbase, size_t *brksize)
+{
+ size_t len;
+ struct vattr vat;
+ caddr_t phdrbase = NULL;
+ ssize_t phdrsize;
+ int nshdrs, shstrndx, nphdrs;
+ int error = 0;
+ Phdr *uphdr = NULL;
+ Phdr *junk = NULL;
+ Phdr *dynphdr = NULL;
+ Phdr *dtrphdr = NULL;
+ uintptr_t lddata;
+ long execsz;
+ intptr_t minaddr;
+
+ if (error = execpermissions(vp, &vat, args)) {
+ uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
+ return (error);
+ }
+
+ if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
+ &nphdrs)) != 0 ||
+ (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
+ &phdrsize)) != 0) {
+ uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
+ return (error);
+ }
+
+ if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
+ uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
+ kmem_free(phdrbase, phdrsize);
+ return (ENOEXEC);
+ }
+
+ if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
+ &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
+ len, &execsz, brksize)) {
+ uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
+ kmem_free(phdrbase, phdrsize);
+ return (error);
+ }
+
+ /*
+ * Inform our caller if the executable needs an interpreter.
+ */
+ *interp = (dynphdr == NULL) ? 0 : 1;
+
+ /*
+ * If this is a statically linked executable, voffset should indicate
+ * the address of the executable itself (it normally holds the address
+ * of the interpreter).
+ */
+ if (ehdr->e_type == ET_EXEC && *interp == 0)
+ *voffset = minaddr;
+
+ if (uphdr != NULL) {
+ *uphdr_vaddr = uphdr->p_vaddr;
+ } else {
+ *uphdr_vaddr = (Elf32_Addr)-1;
+ }
+
+ kmem_free(phdrbase, phdrsize);
+ return (error);
+}
+
/*ARGSUSED*/
int
elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
- int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred)
+ int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
+ int brand_action)
{
caddr_t phdrbase = NULL;
caddr_t bssbase = 0;
@@ -175,10 +251,10 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
ssize_t resid;
int fd = -1;
intptr_t voffset;
- Phdr *dyphdr = NULL;
- Phdr *stphdr = NULL;
- Phdr *uphdr = NULL;
- Phdr *junk = NULL;
+ Phdr *dyphdr = NULL;
+ Phdr *stphdr = NULL;
+ Phdr *uphdr = NULL;
+ Phdr *junk = NULL;
size_t len;
ssize_t phdrsize;
int postfixsize = 0;
@@ -189,6 +265,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
int hasu = 0;
int hasauxv = 0;
int hasdy = 0;
+ int branded = 0;
struct proc *p = ttoproc(curthread);
struct user *up = PTOU(p);
@@ -209,6 +286,13 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
+ if ((level < 2) &&
+ (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ return (BROP(p)->b_elfexec(vp, uap, args,
+ idatap, level + 1, execsz, setid, exec_file, cred,
+ brand_action));
+ }
+
bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
ehdrp = &bigwad->ehdr;
dlnp = bigwad->dl_name;
@@ -353,6 +437,22 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
} else
args->auxsize = 0;
+ /*
+ * If this binary is using an emulator, we need to add an
+ * AT_SUN_EMULATOR aux entry.
+ */
+ if (args->emulator != NULL)
+ args->auxsize += sizeof (aux_entry_t);
+
+ if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+ branded = 1;
+ /*
+ * We will be adding 2 entries to the aux vector. One for
+ * the branded binary's phdr and one for the brandname.
+ */
+ args->auxsize += 2 * sizeof (aux_entry_t);
+ }
+
aux = bigwad->elfargs;
/*
* Move args to the user's stack.
@@ -364,6 +464,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
}
goto out;
}
+ /* we're single threaded after this point */
/*
* If this is an ET_DYN executable (shared object),
@@ -377,8 +478,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
dtrphdr = NULL;
if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
- &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, len,
- execsz, &brksize)) != 0)
+ &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
+ len, execsz, &brksize)) != 0)
goto bad;
if (uphdr != NULL && dyphdr == NULL)
@@ -542,8 +643,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
dtrphdr = NULL;
error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
- &junk, &dtrphdr, NULL, NULL, NULL, &voffset, len, execsz,
- NULL);
+ &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
+ execsz, NULL);
if (error || junk != NULL) {
VN_RELE(nvp);
uprintf("%s: Cannot map %s\n", exec_file, dlnp);
@@ -601,6 +702,16 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
#else
ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
#endif
+ if (branded) {
+ /*
+ * Reserve space for the brand-private aux vector entry,
+ * and record the user addr of that space.
+ */
+ args->brand_auxp = (auxv32_t *)((char *)args->stackend +
+ ((char *)&aux->a_type - (char *)bigwad->elfargs));
+ ADDAUX(aux, AT_SUN_BRAND_PHDR, 0)
+ }
+
ADDAUX(aux, AT_NULL, 0)
postfixsize = (char *)aux - (char *)bigwad->elfargs;
ASSERT(postfixsize == args->auxsize);
@@ -639,6 +750,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
/*
* Copy auxv to the process's user structure for use by /proc.
+ * If this is a branded process, the brand's exec routine will
+ * copy it's private entries to the user structure later. It
+ * relies on the fact that the blank entries are at the end.
*/
num_auxv = postfixsize / sizeof (aux_entry_t);
ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
@@ -968,6 +1082,7 @@ mapelfexec(
caddr_t *bssbase,
caddr_t *brkbase,
intptr_t *voffset,
+ intptr_t *minaddr,
size_t len,
long *execsz,
size_t *brksize)
@@ -980,6 +1095,7 @@ mapelfexec(
int page;
off_t offset;
int hsize = ehdr->e_phentsize;
+ caddr_t mintmp = (caddr_t)-1;
if (ehdr->e_type == ET_DYN) {
/*
@@ -1010,6 +1126,14 @@ mapelfexec(
prot |= PROT_EXEC;
addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
+
+ /*
+ * Keep track of the segment with the lowest starting
+ * address.
+ */
+ if (addr < mintmp)
+ mintmp = addr;
+
zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
offset = phdr->p_offset;
@@ -1110,6 +1234,12 @@ mapelfexec(
}
phdr = (Phdr *)((caddr_t)phdr + hsize);
}
+
+ if (minaddr != NULL) {
+ ASSERT(mintmp != (caddr_t)-1);
+ *minaddr = (intptr_t)mintmp;
+ }
+
return (0);
bad:
if (error == 0)
@@ -1850,13 +1980,14 @@ static struct execsw esw = {
};
static struct modlexec modlexec = {
- &mod_execops, "exec module for elf", &esw
+ &mod_execops, "exec module for elf %I%", &esw
};
#ifdef _LP64
extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
intpdata_t *idatap, int level, long *execsz,
- int setid, caddr_t exec_file, cred_t *cred);
+ int setid, caddr_t exec_file, cred_t *cred,
+ int brand_action);
extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
rlim64_t rlimit, int sig, core_content_t content);
diff --git a/usr/src/uts/common/exec/elf/elf_impl.h b/usr/src/uts/common/exec/elf/elf_impl.h
index 52094e3794..010d5e6256 100644
--- a/usr/src/uts/common/exec/elf/elf_impl.h
+++ b/usr/src/uts/common/exec/elf/elf_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -80,6 +79,7 @@ typedef struct {
#define elfexec elf32exec
#define elfnote elf32note
#define elfcore elf32core
+#define mapexec_brand mapexec32_brand
#define setup_note_header setup_note_header32
#define write_elfnotes write_elfnotes32
#define setup_old_note_header setup_old_note_header32
diff --git a/usr/src/uts/common/exec/intp/intp.c b/usr/src/uts/common/exec/intp/intp.c
index 6c6c98246d..4d5c04dfd4 100644
--- a/usr/src/uts/common/exec/intp/intp.c
+++ b/usr/src/uts/common/exec/intp/intp.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -45,6 +44,7 @@
#include <sys/disp.h>
#include <sys/exec.h>
#include <sys/kmem.h>
+#include <sys/note.h>
/*
* This is the loadable module wrapper.
@@ -166,8 +166,10 @@ intpexec(
long *execsz,
int setid,
caddr_t exec_file,
- struct cred *cred)
+ struct cred *cred,
+ int brand_action)
{
+ _NOTE(ARGUNUSED(brand_action))
vnode_t *nvp;
int error = 0;
struct intpdata idata;
@@ -223,8 +225,8 @@ intpexec(
args->fname = devfd;
}
- error = gexec(&nvp, uap, args, &idata, ++level,
- execsz, exec_file, cred);
+ error = gexec(&nvp, uap, args, &idata, ++level, execsz, exec_file, cred,
+ EBA_NONE);
done:
VN_RELE(nvp);
args->pathname = opath;
diff --git a/usr/src/uts/common/exec/java/java.c b/usr/src/uts/common/exec/java/java.c
index 0e8c3996e7..bcf61453c9 100644
--- a/usr/src/uts/common/exec/java/java.c
+++ b/usr/src/uts/common/exec/java/java.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -88,7 +87,7 @@ char *jexec_arg = "-jar";
static int
javaexec(vnode_t *vp, struct execa *uap, struct uarg *args,
struct intpdata *idatap, int level, long *execsz, int setid,
- caddr_t execfile, cred_t *cred)
+ caddr_t execfile, cred_t *cred, int brand_action)
{
struct intpdata idata;
int error;
@@ -162,8 +161,8 @@ javaexec(vnode_t *vp, struct execa *uap, struct uarg *args,
args->pathname = resolvepn.pn_path;
/* don't free resolvepn until we are done with args */
pn_free(&lookpn);
- error = gexec(&nvp,
- uap, args, &idata, level + 1, execsz, execfile, cred);
+ error = gexec(&nvp, uap, args, &idata, level + 1, execsz, execfile,
+ cred, EBA_NONE);
VN_RELE(nvp);
args->pathname = opath;
pn_free(&resolvepn);
diff --git a/usr/src/uts/common/fs/fifofs/fifosubr.c b/usr/src/uts/common/fs/fifofs/fifosubr.c
index 3ee72c9124..8767999322 100644
--- a/usr/src/uts/common/fs/fifofs/fifosubr.c
+++ b/usr/src/uts/common/fs/fifofs/fifosubr.c
@@ -304,7 +304,8 @@ static void fifo_reinit_vp(vnode_t *vp)
{
vn_reinit(vp);
vp->v_type = VFIFO;
- vp->v_flag = VNOMAP | VNOSWAP;
+ vp->v_flag &= VROOT;
+ vp->v_flag |= VNOMAP | VNOSWAP;
}
/*
@@ -470,6 +471,7 @@ fifovp(vnode_t *vp, cred_t *crp)
fifo_reinit_vp(newvp);
newvp->v_vfsp = vp->v_vfsp;
newvp->v_rdev = vp->v_rdev;
+ newvp->v_flag |= (vp->v_flag & VROOT);
fifoinsert(fnp);
mutex_exit(&ftable_lock);
diff --git a/usr/src/uts/common/fs/fifofs/fifovnops.c b/usr/src/uts/common/fs/fifofs/fifovnops.c
index cab88019ff..34f731af1e 100644
--- a/usr/src/uts/common/fs/fifofs/fifovnops.c
+++ b/usr/src/uts/common/fs/fifofs/fifovnops.c
@@ -77,6 +77,8 @@ static int fifo_setattr(vnode_t *, vattr_t *, int, cred_t *,
caller_context_t *);
static int fifo_realvp(vnode_t *, vnode_t **);
static int fifo_access(vnode_t *, int, int, cred_t *);
+static int fifo_create(struct vnode *, char *, vattr_t *, enum vcexcl,
+ int, struct vnode **, struct cred *, int);
static int fifo_fid(vnode_t *, fid_t *);
static int fifo_fsync(vnode_t *, int, cred_t *);
static int fifo_seek(vnode_t *, offset_t, offset_t *);
@@ -116,6 +118,7 @@ const fs_operation_def_t fifo_vnodeops_template[] = {
VOPNAME_GETATTR, fifo_getattr,
VOPNAME_SETATTR, fifo_setattr,
VOPNAME_ACCESS, fifo_access,
+ VOPNAME_CREATE, fifo_create,
VOPNAME_FSYNC, fifo_fsync,
VOPNAME_INACTIVE, (fs_generic_func_p) fifo_inactive,
VOPNAME_FID, fifo_fid,
@@ -1542,6 +1545,27 @@ fifo_access(vnode_t *vp, int mode, int flags, cred_t *crp)
}
/*
+ * This can be called if creat or an open with O_CREAT is done on the root
+ * of a lofs mount where the mounted entity is a fifo.
+ */
+/*ARGSUSED*/
+static int
+fifo_create(struct vnode *dvp, char *name, vattr_t *vap, enum vcexcl excl,
+ int mode, struct vnode **vpp, struct cred *cr, int flag)
+{
+ int error;
+
+ ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0');
+ if (excl == NONEXCL) {
+ if (mode && (error = fifo_access(dvp, mode, 0, cr)))
+ return (error);
+ VN_HOLD(dvp);
+ return (0);
+ }
+ return (EEXIST);
+}
+
+/*
* If shadowing a vnode, apply the VOP_FSYNC to it.
* Otherwise, return 0.
*/
diff --git a/usr/src/uts/common/fs/nfs/nfs4_subr.c b/usr/src/uts/common/fs/nfs/nfs4_subr.c
index 9278fe03da..2a6505ccf9 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_subr.c
@@ -1451,7 +1451,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
} else
mutex_exit(&mi->mi_lock);
- if (*doqueue && curproc->p_sessp->s_vp != NULL) {
+ if (*doqueue && nfs_has_ctty()) {
*doqueue = 0;
if (!(mi->mi_flags & MI4_NOPRINT))
nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
@@ -1481,7 +1481,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
bufp = clnt_sperror(client, svp->sv_hostname);
zprintf(zoneid, "NFS%d %s failed for %s\n",
mi->mi_vers, mi->mi_rfsnames[which], bufp);
- if (curproc->p_sessp->s_vp != NULL) {
+ if (nfs_has_ctty()) {
if (!(mi->mi_flags & MI4_NOPRINT)) {
uprintf("NFS%d %s failed for %s\n",
mi->mi_vers, mi->mi_rfsnames[which],
@@ -1494,7 +1494,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
"NFS %s failed for server %s: error %d (%s)\n",
mi->mi_rfsnames[which], svp->sv_hostname,
status, clnt_sperrno(status));
- if (curproc->p_sessp->s_vp != NULL) {
+ if (nfs_has_ctty()) {
if (!(mi->mi_flags & MI4_NOPRINT)) {
uprintf(
"NFS %s failed for server %s: error %d (%s)\n",
diff --git a/usr/src/uts/common/fs/nfs/nfs_subr.c b/usr/src/uts/common/fs/nfs/nfs_subr.c
index 05e70935be..bf1beb1275 100644
--- a/usr/src/uts/common/fs/nfs/nfs_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs_subr.c
@@ -1235,7 +1235,7 @@ failoverretry:
#endif
} else
mutex_exit(&mi->mi_lock);
- if (*douprintf && curproc->p_sessp->s_vp != NULL) {
+ if (*douprintf && nfs_has_ctty()) {
*douprintf = 0;
if (!(mi->mi_flags & MI_NOPRINT))
#ifdef DEBUG
@@ -1292,7 +1292,7 @@ failoverretry:
bufp = clnt_sperror(client, svp->sv_hostname);
zprintf(zoneid, "NFS%d %s failed for %s\n",
mi->mi_vers, mi->mi_rfsnames[which], bufp);
- if (curproc->p_sessp->s_vp != NULL) {
+ if (nfs_has_ctty()) {
if (!(mi->mi_flags & MI_NOPRINT)) {
uprintf("NFS%d %s failed for %s\n",
mi->mi_vers, mi->mi_rfsnames[which],
@@ -1305,7 +1305,7 @@ failoverretry:
"NFS %s failed for server %s: error %d (%s)\n",
mi->mi_rfsnames[which], svp->sv_hostname,
status, clnt_sperrno(status));
- if (curproc->p_sessp->s_vp != NULL) {
+ if (nfs_has_ctty()) {
if (!(mi->mi_flags & MI_NOPRINT)) {
uprintf(
"NFS %s failed for server %s: error %d (%s)\n",
@@ -1821,7 +1821,7 @@ failoverretry:
#endif
} else
mutex_exit(&mi->mi_lock);
- if (*douprintf && curproc->p_sessp->s_vp != NULL) {
+ if (*douprintf && nfs_has_ctty()) {
*douprintf = 0;
if (!(mi->mi_flags & MI_NOPRINT))
#ifdef DEBUG
@@ -1886,7 +1886,7 @@ failoverretry:
bufp = clnt_sperror(client, svp->sv_hostname);
zprintf(zoneid, "NFS_ACL%d %s failed for %s\n",
mi->mi_vers, mi->mi_aclnames[which], bufp);
- if (curproc->p_sessp->s_vp != NULL) {
+ if (nfs_has_ctty()) {
if (!(mi->mi_flags & MI_NOPRINT)) {
uprintf("NFS_ACL%d %s failed for %s\n",
mi->mi_vers, mi->mi_aclnames[which],
@@ -1899,7 +1899,7 @@ failoverretry:
"NFS %s failed for server %s: error %d (%s)\n",
mi->mi_aclnames[which], svp->sv_hostname,
status, clnt_sperrno(status));
- if (curproc->p_sessp->s_vp != NULL) {
+ if (nfs_has_ctty()) {
if (!(mi->mi_flags & MI_NOPRINT))
uprintf(
"NFS %s failed for server %s: error %d (%s)\n",
@@ -5117,3 +5117,13 @@ out:
label_rele(zlabel);
return (retv);
}
+
+boolean_t
+nfs_has_ctty(void)
+{
+ boolean_t rv;
+ mutex_enter(&curproc->p_splock);
+ rv = (curproc->p_sessp->s_vp != NULL);
+ mutex_exit(&curproc->p_splock);
+ return (rv);
+}
diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c
index 6a2d6f73d0..24c7ffedab 100644
--- a/usr/src/uts/common/fs/specfs/specvnops.c
+++ b/usr/src/uts/common/fs/specfs/specvnops.c
@@ -680,13 +680,16 @@ streams_open:
/* STREAMS devices don't have a size */
sp->s_size = csp->s_size = 0;
- /*
- * try to allocate it as a controlling terminal
- */
- if ((stp->sd_flag & STRISTTY) && !(flag & FNOCTTY))
- stralloctty(stp);
+ if (!(stp->sd_flag & STRISTTY) || (flag & FNOCTTY))
+ return (0);
- return (0);
+ /* try to allocate it as a controlling terminal */
+ if (strctty(stp) != EINTR)
+ return (0);
+
+ /* strctty() was interrupted by a signal */
+ (void) spec_close(vp, flag, 1, 0, cr);
+ return (EINTR);
}
/*
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c
index 7c64462314..49bde7abeb 100644
--- a/usr/src/uts/common/fs/vnode.c
+++ b/usr/src/uts/common/fs/vnode.c
@@ -943,7 +943,7 @@ top:
* Do remaining checks for FNOFOLLOW and FNOLINKS.
*/
if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
- error = EINVAL;
+ error = ELOOP;
goto out;
}
if (filemode & FNOLINKS) {
diff --git a/usr/src/uts/common/io/gentty.c b/usr/src/uts/common/io/gentty.c
index 9cb3e23b87..431e80245d 100644
--- a/usr/src/uts/common/io/gentty.c
+++ b/usr/src/uts/common/io/gentty.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 1990-1992,1996,1998-2003 Sun Microsystems, Inc.
+ * Copyright 2006 Sun Microsystems, Inc.
* All rights reserved.
* Use is subject to license terms.
*/
@@ -198,17 +197,20 @@ syopen(dev_t *devp, int flag, int otyp, struct cred *cr)
{
dev_t ttyd;
vnode_t *ttyvp;
- sess_t *sp = curproc->p_sessp;
+ sess_t *sp;
int error;
- if ((ttyd = sp->s_dev) == NODEV)
+ if ((sp = tty_hold()) == NULL)
+ return (EINTR);
+
+ if (sp->s_dev == NODEV) {
+ tty_rele(sp);
return (ENXIO);
- TTY_HOLD(sp);
- if ((ttyvp = sp->s_vp) == NULL) {
- TTY_RELE(sp);
- return (EIO);
}
+ ttyd = sp->s_dev;
+ ttyvp = sp->s_vp;
+
/*
* Open the control terminal. The control terminal may be
* opened multiple times and it is closed in freectty().
@@ -237,10 +239,12 @@ syopen(dev_t *devp, int flag, int otyp, struct cred *cr)
ASSERT(vn_matchops(ttyvp, spec_getvnodeops()));
csp = VTOS(VTOS(ttyvp)->s_commonvp);
mutex_enter(&csp->s_lock);
+ ASSERT(csp->s_count > 1);
csp->s_count--;
mutex_exit(&csp->s_lock);
}
- TTY_RELE(sp);
+
+ tty_rele(sp);
return (error);
}
@@ -255,41 +259,41 @@ syclose(dev_t dev, int flag, int otyp, struct cred *cr)
int
syread(dev_t dev, struct uio *uiop, struct cred *cr)
{
- vnode_t *ttyvp;
- sess_t *sp = curproc->p_sessp;
+ sess_t *sp;
int error;
- if (sp->s_dev == NODEV)
+ if ((sp = tty_hold()) == NULL)
+ return (EINTR);
+
+ if (sp->s_dev == NODEV) {
+ tty_rele(sp);
return (ENXIO);
- TTY_HOLD(sp);
- if ((ttyvp = sp->s_vp) == NULL) {
- TTY_RELE(sp);
- return (EIO);
}
- error = VOP_READ(ttyvp, uiop, 0, cr, NULL);
- TTY_RELE(sp);
- return (error);
+ error = VOP_READ(sp->s_vp, uiop, 0, cr, NULL);
+
+ tty_rele(sp);
+ return (error);
}
/* ARGSUSED */
int
sywrite(dev_t dev, struct uio *uiop, struct cred *cr)
{
- vnode_t *ttyvp;
- sess_t *sp = curproc->p_sessp;
+ sess_t *sp;
int error;
- if (sp->s_dev == NODEV)
+ if ((sp = tty_hold()) == NULL)
+ return (EINTR);
+
+ if (sp->s_dev == NODEV) {
+ tty_rele(sp);
return (ENXIO);
- TTY_HOLD(sp);
- if ((ttyvp = sp->s_vp) == NULL) {
- TTY_RELE(sp);
- return (EIO);
}
- error = VOP_WRITE(ttyvp, uiop, 0, cr, NULL);
- TTY_RELE(sp);
+ error = VOP_WRITE(sp->s_vp, uiop, 0, cr, NULL);
+
+ tty_rele(sp);
return (error);
}
@@ -299,19 +303,32 @@ int
syioctl(dev_t dev, int cmd, intptr_t arg, int mode, struct cred *cr,
int *rvalp)
{
- vnode_t *ttyvp;
- sess_t *sp = curproc->p_sessp;
+ sess_t *sp;
int error;
- if (sp->s_dev == NODEV)
+ if (cmd == TIOCNOTTY) {
+ /*
+ * we can't allow this ioctl. the reason is that it
+ * attempts to remove the ctty for a session. to do
+ * this the ctty can't be in use but we grab a hold on
+ * the current ctty (via tty_hold) to perform this ioctl.
+ * if we were to allow this ioctl to pass through we
+ * would deadlock with ourselves.
+ */
+ return (EINVAL);
+ }
+
+ if ((sp = tty_hold()) == NULL)
+ return (EINTR);
+
+ if (sp->s_dev == NODEV) {
+ tty_rele(sp);
return (ENXIO);
- TTY_HOLD(sp);
- if ((ttyvp = sp->s_vp) == NULL) {
- TTY_RELE(sp);
- return (EIO);
}
- error = VOP_IOCTL(ttyvp, cmd, arg, mode, cr, rvalp);
- TTY_RELE(sp);
+
+ error = VOP_IOCTL(sp->s_vp, cmd, arg, mode, cr, rvalp);
+
+ tty_rele(sp);
return (error);
}
@@ -322,18 +339,19 @@ int
sypoll(dev_t dev, short events, int anyyet, short *reventsp,
struct pollhead **phpp)
{
- vnode_t *ttyvp;
- sess_t *sp = curproc->p_sessp;
+ sess_t *sp;
int error;
- if (sp->s_dev == NODEV)
+ if ((sp = tty_hold()) == NULL)
+ return (EINTR);
+
+ if (sp->s_dev == NODEV) {
+ tty_rele(sp);
return (ENXIO);
- TTY_HOLD(sp);
- if ((ttyvp = sp->s_vp) == NULL) {
- TTY_RELE(sp);
- return (EIO);
}
- error = VOP_POLL(ttyvp, events, anyyet, reventsp, phpp);
- TTY_RELE(sp);
+
+ error = VOP_POLL(sp->s_vp, events, anyyet, reventsp, phpp);
+
+ tty_rele(sp);
return (error);
}
diff --git a/usr/src/uts/common/io/l_strplumb.c b/usr/src/uts/common/io/l_strplumb.c
index 287ad1f08f..3997874684 100644
--- a/usr/src/uts/common/io/l_strplumb.c
+++ b/usr/src/uts/common/io/l_strplumb.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -228,7 +227,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
li = ldi_ident_from_anon();
if (op == SET_AUTOPUSH || op == CLR_AUTOPUSH) {
error = ldi_open_by_name(SAD_ADM, FREAD|FWRITE,
- CRED(), &lh, li);
+ kcred, &lh, li);
if (error) {
printf("kstr_autopush: open failed error %d\n", error);
ldi_ident_release(li);
@@ -236,7 +235,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
}
} else {
error = ldi_open_by_name(SAD_USR, FREAD|FWRITE,
- CRED(), &lh, li);
+ kcred, &lh, li);
if (error) {
printf("kstr_autopush: open failed error %d\n", error);
ldi_ident_release(li);
@@ -253,11 +252,11 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
push.sap_minor = *min;
error = ldi_ioctl(lh, SAD_GAP, (intptr_t)&push,
- FKIOCTL, CRED(), &rval);
+ FKIOCTL, kcred, &rval);
if (error) {
printf("kstr_autopush: ioctl failed, error %d\n",
error);
- (void) ldi_close(lh, FREAD|FWRITE, CRED());
+ (void) ldi_close(lh, FREAD|FWRITE, kcred);
return (error);
}
switch (push.sap_cmd) {
@@ -288,7 +287,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
(void) strcpy(mods[i], push.sap_list[i]);
mods[i] = NULL;
}
- (void) ldi_close(lh, FREAD|FWRITE, CRED());
+ (void) ldi_close(lh, FREAD|FWRITE, kcred);
return (0);
case CLR_AUTOPUSH:
@@ -299,12 +298,12 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
push.sap_major = *maj;
error = ldi_ioctl(lh, SAD_SAP, (intptr_t)&push,
- FKIOCTL, CRED(), &rval);
+ FKIOCTL, kcred, &rval);
if (error) {
printf("kstr_autopush: ioctl failed, error %d\n",
error);
}
- (void) ldi_close(lh, FREAD|FWRITE, CRED());
+ (void) ldi_close(lh, FREAD|FWRITE, kcred);
return (error);
case SET_AUTOPUSH:
@@ -338,16 +337,16 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
push.sap_list[i][0] = '\0';
error = ldi_ioctl(lh, SAD_SAP, (intptr_t)&push,
- FKIOCTL, CRED(), &rval);
+ FKIOCTL, kcred, &rval);
if (error) {
printf("kstr_autopush: ioctl failed, error %d\n",
error);
}
- (void) ldi_close(lh, FREAD|FWRITE, CRED());
+ (void) ldi_close(lh, FREAD|FWRITE, kcred);
return (error);
default:
- (void) ldi_close(lh, FREAD|FWRITE, CRED());
+ (void) ldi_close(lh, FREAD|FWRITE, kcred);
return (EINVAL);
}
}
diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c
index bd4dc10511..7910b58cc8 100644
--- a/usr/src/uts/common/io/ptm.c
+++ b/usr/src/uts/common/io/ptm.c
@@ -449,6 +449,18 @@ ptmclose(queue_t *rqp, int flag, cred_t *credp)
return (0);
}
+static boolean_t
+ptmptsopencb(ptmptsopencb_arg_t arg)
+{
+ struct pt_ttys *ptmp = (struct pt_ttys *)arg;
+ boolean_t rval;
+
+ PT_ENTER_READ(ptmp);
+ rval = (ptmp->pt_nullmsg != NULL);
+ PT_EXIT_READ(ptmp);
+ return (rval);
+}
+
/*
* The wput procedure will only handle ioctl and flush messages.
*/
@@ -572,6 +584,41 @@ ptmwput(queue_t *qp, mblk_t *mp)
miocack(qp, mp, 0, 0);
break;
}
+ case PTMPTSOPENCB:
+ {
+ mblk_t *dp; /* ioctl reply data */
+ ptmptsopencb_t *ppocb;
+
+ /* only allow the kernel to invoke this ioctl */
+ if (iocp->ioc_cr != kcred) {
+ miocnak(qp, mp, 0, EINVAL);
+ break;
+ }
+
+ /* we don't support transparent ioctls */
+ ASSERT(iocp->ioc_count != TRANSPARENT);
+ if (iocp->ioc_count == TRANSPARENT) {
+ miocnak(qp, mp, 0, EINVAL);
+ break;
+ }
+
+ /* allocate a response message */
+ dp = allocb(sizeof (ptmptsopencb_t), BPRI_MED);
+ if (dp == NULL) {
+ miocnak(qp, mp, 0, EAGAIN);
+ break;
+ }
+
+ /* initialize the ioctl results */
+ ppocb = (ptmptsopencb_t *)dp->b_rptr;
+ ppocb->ppocb_func = ptmptsopencb;
+ ppocb->ppocb_arg = (ptmptsopencb_arg_t)ptmp;
+
+ /* send the reply data */
+ mioc2ack(mp, dp, sizeof (ptmptsopencb_t), 0);
+ qreply(qp, mp);
+ break;
+ }
}
break;
@@ -643,6 +690,13 @@ ptmwsrv(queue_t *qp)
ASSERT(qp->q_ptr);
ptmp = (struct pt_ttys *)qp->q_ptr;
+
+ if ((mp = getq(qp)) == NULL) {
+ /* If there are no messages there's nothing to do. */
+ DBG(("leaving ptmwsrv (no messages)\n"));
+ return;
+ }
+
PT_ENTER_READ(ptmp);
if ((ptmp->pt_state & PTLOCK) || (ptmp->pts_rdq == NULL)) {
DBG(("in master write srv proc but no slave\n"));
@@ -652,12 +706,12 @@ ptmwsrv(queue_t *qp)
* the user process waiting for ACK/NAK from
* the ioctl invocation
*/
- while ((mp = getq(qp)) != NULL) {
+ do {
if (mp->b_datap->db_type == M_IOCTL)
miocnak(qp, mp, 0, EINVAL);
else
freemsg(mp);
- }
+ } while ((mp = getq(qp)) != NULL);
flushq(qp, FLUSHALL);
mp = mexchange(NULL, NULL, 2, M_ERROR, -1);
@@ -672,7 +726,7 @@ ptmwsrv(queue_t *qp)
/*
* while there are messages on this write queue...
*/
- while ((mp = getq(qp)) != NULL) {
+ do {
/*
* if don't have control message and cannot put
* msg. on slave's read queue, put it back on
@@ -689,7 +743,7 @@ ptmwsrv(queue_t *qp)
*/
DBG(("send message to slave\n"));
putnext(ptmp->pts_rdq, mp);
- }
+ } while ((mp = getq(qp)) != NULL);
DBG(("leaving ptmwsrv\n"));
PT_EXIT_READ(ptmp);
}
diff --git a/usr/src/uts/common/nfs/nfs.h b/usr/src/uts/common/nfs/nfs.h
index eda293574e..03c32254b7 100644
--- a/usr/src/uts/common/nfs/nfs.h
+++ b/usr/src/uts/common/nfs/nfs.h
@@ -931,6 +931,7 @@ extern void nfsauth_fini();
extern int nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *args);
extern int nfs_mount_label_policy(vfs_t *vfsp, struct netbuf *addr,
struct knetconfig *knconf, cred_t *cr);
+extern boolean_t nfs_has_ctty(void);
extern void nfs_srv_stop_all(void);
extern void nfs_srv_quiesce_all(void);
extern void (*nfs_srv_quiesce_func)(void);
diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c
new file mode 100644
index 0000000000..15d82871bf
--- /dev/null
+++ b/usr/src/uts/common/os/brand.c
@@ -0,0 +1,323 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+#include <sys/brand.h>
+#include <sys/machbrand.h>
+#include <sys/modctl.h>
+#include <sys/rwlock.h>
+#include <sys/zone.h>
+
+#define SUPPORTED_BRAND_VERSION BRAND_VER_1
+
+#if defined(__sparcv9)
+struct brand_mach_ops native_mach_ops = {
+ NULL, NULL
+};
+#else
+struct brand_mach_ops native_mach_ops = {
+ NULL, NULL, NULL, NULL, NULL, NULL
+};
+#endif
+
+brand_t native_brand = {
+ BRAND_VER_1,
+ "native",
+ NULL,
+ &native_mach_ops
+};
+
+/*
+ * Used to maintain a list of all the brands currently loaded into the
+ * kernel.
+ */
+struct brand_list {
+ int bl_refcnt;
+ struct brand_list *bl_next;
+ brand_t *bl_brand;
+};
+
+static struct brand_list *brand_list = NULL;
+
+/*
+ * This lock protects the integrity of the brand list.
+ */
+static kmutex_t brand_list_lock;
+
+void
+brand_init()
+{
+ mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
+ p0.p_brand = &native_brand;
+}
+
+int
+brand_register(brand_t *brand)
+{
+ struct brand_list *list, *scan;
+
+ if (brand == NULL)
+ return (EINVAL);
+
+ if (is_system_labeled()) {
+ cmn_err(CE_WARN,
+ "Branded zones are not allowed on labeled systems.");
+ return (EINVAL);
+ }
+
+ if (brand->b_version != SUPPORTED_BRAND_VERSION) {
+ if (brand->b_version < SUPPORTED_BRAND_VERSION) {
+ cmn_err(CE_WARN,
+ "brand '%s' was built to run on older versions "
+ "of Solaris.",
+ brand->b_name);
+ } else {
+ cmn_err(CE_WARN,
+ "brand '%s' was built to run on a newer version "
+ "of Solaris.",
+ brand->b_name);
+ }
+ return (EINVAL);
+ }
+
+ /* Sanity checks */
+ if (brand->b_name == NULL || brand->b_ops == NULL ||
+ brand->b_ops->b_brandsys == NULL) {
+ cmn_err(CE_WARN, "Malformed brand");
+ return (EINVAL);
+ }
+
+ list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
+
+ /* Add the brand to the list of loaded brands. */
+ mutex_enter(&brand_list_lock);
+
+ /*
+ * Check to be sure we haven't already registered this brand.
+ */
+ for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
+ if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
+ cmn_err(CE_WARN,
+ "Invalid attempt to load a second instance of "
+ "brand %s", brand->b_name);
+ mutex_exit(&brand_list_lock);
+ kmem_free(list, sizeof (struct brand_list));
+ return (EINVAL);
+ }
+ }
+
+ list->bl_brand = brand;
+ list->bl_refcnt = 0;
+ list->bl_next = brand_list;
+ brand_list = list;
+ mutex_exit(&brand_list_lock);
+
+ return (0);
+}
+
+/*
+ * The kernel module implementing this brand is being unloaded, so remove
+ * it from the list of active brands.
+ */
+int
+brand_unregister(brand_t *brand)
+{
+ struct brand_list *list, *prev;
+
+ /* Sanity checks */
+ if (brand == NULL || brand->b_name == NULL) {
+ cmn_err(CE_WARN, "Malformed brand");
+ return (EINVAL);
+ }
+
+ prev = NULL;
+ mutex_enter(&brand_list_lock);
+
+ for (list = brand_list; list != NULL; list = list->bl_next) {
+ if (list->bl_brand == brand)
+ break;
+ prev = list;
+ }
+
+ if (list == NULL) {
+ cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
+ mutex_exit(&brand_list_lock);
+ return (EINVAL);
+ }
+
+ if (list->bl_refcnt > 0) {
+ cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
+ brand->b_name);
+ mutex_exit(&brand_list_lock);
+ return (EBUSY);
+ }
+
+ /* Remove brand from the list */
+ if (prev != NULL)
+ prev->bl_next = list->bl_next;
+ else
+ brand_list = list->bl_next;
+
+ mutex_exit(&brand_list_lock);
+
+ kmem_free(list, sizeof (struct brand_list));
+
+ return (0);
+}
+
+/*
+ * Record that a zone of this brand has been instantiated. If the kernel
+ * module implementing this brand's functionality is not present, this
+ * routine attempts to load the module as a side effect.
+ */
+brand_t *
+brand_register_zone(struct brand_attr *attr)
+{
+ struct brand_list *l = NULL;
+ ddi_modhandle_t hdl = NULL;
+ char *modname;
+ int err = 0;
+
+ if (is_system_labeled()) {
+ cmn_err(CE_WARN,
+ "Branded zones are not allowed on labeled systems.");
+ return (NULL);
+ }
+
+ /*
+ * We make at most two passes through this loop. The first time
+ * through, we're looking to see if this is a new user of an
+ * already loaded brand. If the brand hasn't been loaded, we
+ * call ddi_modopen() to force it to be loaded and then make a
+ * second pass through the list of brands. If we don't find the
+ * brand the second time through it means that the modname
+ * specified in the brand_attr structure doesn't provide the brand
+ * specified in the brandname field. This would suggest a bug in
+ * the brand's config.xml file. We close the module and return
+ * 'NULL' to the caller.
+ */
+ for (;;) {
+ /*
+ * Search list of loaded brands
+ */
+ mutex_enter(&brand_list_lock);
+ for (l = brand_list; l != NULL; l = l->bl_next)
+ if (strcmp(attr->ba_brandname,
+ l->bl_brand->b_name) == 0)
+ break;
+ if ((l != NULL) || (hdl != NULL))
+ break;
+ mutex_exit(&brand_list_lock);
+
+ /*
+ * We didn't find that the requested brand has been loaded
+ * yet, so we trigger the load of the appropriate kernel
+ * module and search the list again.
+ */
+ modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) strcpy(modname, "brand/");
+ (void) strcat(modname, attr->ba_modname);
+ hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
+ kmem_free(modname, MAXPATHLEN);
+
+ if (err != 0)
+ return (NULL);
+ }
+
+ /*
+ * If we found the matching brand, bump its reference count.
+ */
+ if (l != NULL)
+ l->bl_refcnt++;
+
+ mutex_exit(&brand_list_lock);
+
+ if (hdl != NULL)
+ (void) ddi_modclose(hdl);
+
+ return ((l != NULL) ? l->bl_brand : NULL);
+}
+
+/*
+ * Return the number of zones currently using this brand.
+ */
+int
+brand_zone_count(struct brand *bp)
+{
+ struct brand_list *l;
+ int cnt = 0;
+
+ mutex_enter(&brand_list_lock);
+ for (l = brand_list; l != NULL; l = l->bl_next)
+ if (l->bl_brand == bp) {
+ cnt = l->bl_refcnt;
+ break;
+ }
+ mutex_exit(&brand_list_lock);
+
+ return (cnt);
+}
+
+void
+brand_unregister_zone(struct brand *bp)
+{
+ struct brand_list *list;
+
+ mutex_enter(&brand_list_lock);
+ for (list = brand_list; list != NULL; list = list->bl_next) {
+ if (list->bl_brand == bp) {
+ ASSERT(list->bl_refcnt > 0);
+ list->bl_refcnt--;
+ break;
+ }
+ }
+ mutex_exit(&brand_list_lock);
+}
+
+void
+brand_setbrand(proc_t *p)
+{
+ brand_t *bp = p->p_zone->zone_brand;
+
+ ASSERT(bp != NULL);
+ ASSERT(p->p_brand == &native_brand);
+
+ /*
+ * We should only be called from exec(), when we know the process
+ * is single-threaded.
+ */
+ ASSERT(p->p_tlist == p->p_tlist->t_forw);
+
+ p->p_brand = bp;
+ if (PROC_IS_BRANDED(p)) {
+ BROP(p)->b_setbrand(p);
+ lwp_attach_brand_hdlrs(p->p_tlist->t_lwp);
+ }
+}
diff --git a/usr/src/uts/common/os/ddi.c b/usr/src/uts/common/os/ddi.c
index ec12f51f37..6a0b6ace80 100644
--- a/usr/src/uts/common/os/ddi.c
+++ b/usr/src/uts/common/os/ddi.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -293,14 +292,15 @@ WR(queue_t *q)
int
drv_getparm(unsigned int parm, void *valuep)
{
- time_t now;
+ proc_t *p = curproc;
+ time_t now;
switch (parm) {
case UPROCP:
- *(proc_t **)valuep = ttoproc(curthread);
+ *(proc_t **)valuep = p;
break;
case PPGRP:
- *(pid_t *)valuep = ttoproc(curthread)->p_pgrp;
+ *(pid_t *)valuep = p->p_pgrp;
break;
case LBOLT:
*(clock_t *)valuep = lbolt;
@@ -317,10 +317,12 @@ drv_getparm(unsigned int parm, void *valuep)
}
break;
case PPID:
- *(pid_t *)valuep = ttoproc(curthread)->p_pid;
+ *(pid_t *)valuep = p->p_pid;
break;
case PSID:
- *(pid_t *)valuep = ttoproc(curthread)->p_sessp->s_sid;
+ mutex_enter(&p->p_splock);
+ *(pid_t *)valuep = p->p_sessp->s_sid;
+ mutex_exit(&p->p_splock);
break;
case UCRED:
*(cred_t **)valuep = CRED();
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index a3cd19e423..3b01993465 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -65,6 +65,7 @@
#include <sys/lwpchan_impl.h>
#include <sys/pool.h>
#include <sys/sdt.h>
+#include <sys/brand.h>
#include <c2/audit.h>
@@ -89,7 +90,6 @@ uint_t auxv_hwcap32 = 0; /* 32-bit version of auxv_hwcap */
#endif
int exec_lpg_disable = 0;
-
#define PSUIDFLAGS (SNOCD|SUGID)
/*
@@ -109,12 +109,13 @@ exece(const char *fname, const char **argp, const char **envp)
{
int error;
- error = exec_common(fname, argp, envp);
+ error = exec_common(fname, argp, envp, EBA_NONE);
return (error ? (set_errno(error)) : 0);
}
int
-exec_common(const char *fname, const char **argp, const char **envp)
+exec_common(const char *fname, const char **argp, const char **envp,
+ int brand_action)
{
vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL;
proc_t *p = ttoproc(curthread);
@@ -136,6 +137,7 @@ exec_common(const char *fname, const char **argp, const char **envp)
lwpdir_t **old_tidhash;
uint_t old_tidhash_sz;
lwpent_t *lep;
+ int brandme = 0;
/*
* exec() is not supported for the /proc agent lwp.
@@ -146,6 +148,35 @@ exec_common(const char *fname, const char **argp, const char **envp)
if ((error = secpolicy_basic_exec(CRED())) != 0)
return (error);
+ if (brand_action != EBA_NONE) {
+ /*
+ * Brand actions are not supported for processes that are not
+ * running in a branded zone.
+ */
+ if (!ZONE_IS_BRANDED(p->p_zone))
+ return (ENOTSUP);
+
+ if (brand_action == EBA_NATIVE) {
+ /* Only branded processes can be unbranded */
+ if (!PROC_IS_BRANDED(p))
+ return (ENOTSUP);
+ } else {
+ /* Only unbranded processes can be branded */
+ if (PROC_IS_BRANDED(p))
+ return (ENOTSUP);
+ brandme = 1;
+ }
+ } else {
+ /*
+ * If this is a native zone, or if the process is already
+ * branded, then we don't need to do anything. If this is
+ * a native process in a branded zone, we need to brand the
+ * process as it exec()s the new binary.
+ */
+ if (ZONE_IS_BRANDED(p->p_zone) && !PROC_IS_BRANDED(p))
+ brandme = 1;
+ }
+
/*
* Inform /proc that an exec() has started.
* Hold signals that are ignored by default so that we will
@@ -237,8 +268,14 @@ exec_common(const char *fname, const char **argp, const char **envp)
ua.argp = argp;
ua.envp = envp;
+ /* If necessary, brand this process before we start the exec. */
+ if (brandme != 0)
+ brand_setbrand(p);
+
if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
- exec_file, p->p_cred)) != 0) {
+ exec_file, p->p_cred, brand_action)) != 0) {
+ if (brandme != 0)
+ BROP(p)->b_proc_exit(p, lwp);
VN_RELE(vp);
if (dir != NULL)
VN_RELE(dir);
@@ -351,6 +388,12 @@ exec_common(const char *fname, const char **argp, const char **envp)
*/
close_exec(P_FINFO(p));
TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
+
+ /* Unbrand ourself if requested. */
+ if (brand_action == EBA_NATIVE)
+ BROP(p)->b_proc_exit(p, lwp);
+ ASSERT((brand_action != EBA_NATIVE) || !PROC_IS_BRANDED(p));
+
setregs(&args);
/* Mark this as an executable vnode */
@@ -376,6 +419,9 @@ exec_common(const char *fname, const char **argp, const char **envp)
lep = kmem_zalloc(sizeof (*lep), KM_SLEEP);
}
+ if (PROC_IS_BRANDED(p))
+ BROP(p)->b_exec();
+
mutex_enter(&p->p_lock);
prbarrier(p);
@@ -411,6 +457,7 @@ exec_common(const char *fname, const char **argp, const char **envp)
lep->le_start = curthread->t_start;
lwp_hash_in(p, lep);
}
+
/*
* Restore the saved signal mask and
* inform /proc that the exec() has finished.
@@ -422,6 +469,7 @@ exec_common(const char *fname, const char **argp, const char **envp)
kmem_free(old_lwpdir, old_lwpdir_sz * sizeof (lwpdir_t));
kmem_free(old_tidhash, old_tidhash_sz * sizeof (lwpdir_t *));
}
+
ASSERT(error == 0);
DTRACE_PROC(exec__success);
return (0);
@@ -451,7 +499,8 @@ gexec(
int level,
long *execsz,
caddr_t exec_file,
- struct cred *cred)
+ struct cred *cred,
+ int brand_action)
{
struct vnode *vp;
proc_t *pp = ttoproc(curthread);
@@ -593,7 +642,7 @@ gexec(
setidfl |= EXECSETID_PRIVS;
error = (*eswp->exec_func)(vp, uap, args, idatap, level, execsz,
- setidfl, exec_file, cred);
+ setidfl, exec_file, cred, brand_action);
rw_exit(eswp->exec_lock);
if (error != 0) {
if (newcred != NULL)
@@ -1016,17 +1065,44 @@ execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen,
}
if (zfodlen) {
+ struct as *as = curproc->p_as;
+ struct seg *seg;
+ uint_t zprot = 0;
+
end = (size_t)addr + len;
zfodbase = (caddr_t)roundup(end, PAGESIZE);
zfoddiff = (uintptr_t)zfodbase - end;
if (zfoddiff) {
+ /*
+ * Before we go to zero the remaining space on the last
+ * page, make sure we have write permission.
+ */
+
+ AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+ seg = as_segat(curproc->p_as, (caddr_t)end);
+ if (seg != NULL)
+ SEGOP_GETPROT(seg, (caddr_t)end, zfoddiff - 1,
+ &zprot);
+ AS_LOCK_EXIT(as, &as->a_lock);
+
+ if (seg != NULL && (zprot & PROT_WRITE) == 0) {
+ (void) as_setprot(as, (caddr_t)end,
+ zfoddiff - 1, zprot | PROT_WRITE);
+ }
+
if (on_fault(&ljb)) {
no_fault();
+ if (seg != NULL && (zprot & PROT_WRITE) == 0)
+ (void) as_setprot(as, (caddr_t)end,
+ zfoddiff - 1, zprot);
error = EFAULT;
goto bad;
}
uzero((void *)end, zfoddiff);
no_fault();
+ if (seg != NULL && (zprot & PROT_WRITE) == 0)
+ (void) as_setprot(as, (caddr_t)end,
+ zfoddiff - 1, zprot);
}
if (zfodlen > zfoddiff) {
struct segvn_crargs crargs =
@@ -1326,13 +1402,22 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
args->ne = args->na - argc;
/*
- * Add AT_SUN_PLATFORM and AT_SUN_EXECNAME strings to the stack.
+ * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
+ * AT_SUN_EMULATOR strings to the stack.
*/
if (auxvpp != NULL && *auxvpp != NULL) {
if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
return (error);
if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)
return (error);
+ if (args->brandname != NULL &&
+ (error = stk_add(args, args->brandname,
+ UIO_SYSSPACE)) != 0)
+ return (error);
+ if (args->emulator != NULL &&
+ (error = stk_add(args, args->emulator,
+ UIO_SYSSPACE)) != 0)
+ return (error);
}
/*
@@ -1438,19 +1523,32 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
/*
* Fill in the aux vector now that we know the user stack addresses
- * for the AT_SUN_PLATFORM and AT_SUN_EXECNAME strings.
+ * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
+ * AT_SUN_EMULATOR strings.
*/
if (auxvpp != NULL && *auxvpp != NULL) {
if (args->to_model == DATAMODEL_NATIVE) {
auxv_t **a = (auxv_t **)auxvpp;
ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp])
ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp])
+ if (args->brandname != NULL)
+ ADDAUX(*a,
+ AT_SUN_BRANDNAME, (long)&ustrp[*--offp])
+ if (args->emulator != NULL)
+ ADDAUX(*a,
+ AT_SUN_EMULATOR, (long)&ustrp[*--offp])
} else {
auxv32_t **a = (auxv32_t **)auxvpp;
ADDAUX(*a,
AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
ADDAUX(*a,
- AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp]);
+ AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp])
+ if (args->brandname != NULL)
+ ADDAUX(*a, AT_SUN_BRANDNAME,
+ (int)(uintptr_t)&ustrp[*--offp])
+ if (args->emulator != NULL)
+ ADDAUX(*a, AT_SUN_EMULATOR,
+ (int)(uintptr_t)&ustrp[*--offp])
}
}
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index 70061a7d3e..3063e5717f 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -73,6 +73,7 @@
#include <sys/pool.h>
#include <sys/sdt.h>
#include <sys/corectl.h>
+#include <sys/brand.h>
/*
* convert code/data pair into old style wait status
@@ -158,7 +159,6 @@ restart_init(int what, int why)
user_t *up = PTOU(p);
vnode_t *oldcd, *oldrd;
- sess_t *sp;
int i, err;
char reason_buf[64];
@@ -257,17 +257,9 @@ restart_init(int what, int why)
if (oldcd != NULL)
VN_RELE(oldcd);
- /*
- * Free the controlling tty.
- */
- mutex_enter(&pidlock);
- sp = p->p_sessp;
- if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) {
- mutex_exit(&pidlock);
- freectty(sp);
- } else {
- mutex_exit(&pidlock);
- }
+ /* Free the controlling tty. (freectty() always assumes curproc.) */
+ ASSERT(p == curproc);
+ (void) freectty(B_TRUE);
/*
* Now exec() the new init(1M) on top of the current process. If we
@@ -343,7 +335,6 @@ proc_exit(int why, int what)
timeout_id_t tmp_id;
int rv;
proc_t *q;
- sess_t *sp;
task_t *tk;
vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
sigqueue_t *sqp;
@@ -367,6 +358,14 @@ proc_exit(int why, int what)
DTRACE_PROC1(exit, int, why);
/*
+ * Will perform any brand specific proc exit processing, since this
+ * is always the last lwp, will also perform lwp_exit and free brand
+ * data
+ */
+ if (PROC_IS_BRANDED(p))
+ BROP(p)->b_proc_exit(p, lwp);
+
+ /*
* Don't let init exit unless zone_start_init() failed its exec, or
* we are shutting down the zone or the machine.
*
@@ -377,6 +376,7 @@ proc_exit(int why, int what)
if (z->zone_boot_err == 0 &&
zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN &&
+ z->zone_restart_init == B_TRUE &&
restart_init(what, why) == 0)
return (0);
/*
@@ -523,13 +523,9 @@ proc_exit(int why, int what)
closeall(P_FINFO(p));
- mutex_enter(&pidlock);
- sp = p->p_sessp;
- if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) {
- mutex_exit(&pidlock);
- freectty(sp);
- } else
- mutex_exit(&pidlock);
+ /* Free the controlling tty. (freectty() always assumes curproc.) */
+ ASSERT(p == curproc);
+ (void) freectty(B_TRUE);
#if defined(__sparc)
if (p->p_utraps != NULL)
diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c
index c7c400246d..fbda5b8c4a 100644
--- a/usr/src/uts/common/os/fork.c
+++ b/usr/src/uts/common/os/fork.c
@@ -80,6 +80,7 @@
#include <sys/sdt.h>
#include <sys/class.h>
#include <sys/corectl.h>
+#include <sys/brand.h>
static int64_t cfork(int, int);
static int getproc(proc_t **, int);
@@ -461,8 +462,10 @@ cfork(int isvfork, int isfork1)
mutex_exit(&p->p_lock);
}
- /* set return values for child */
- lwp_setrval(clone, p->p_pid, 1);
+ if (PROC_IS_BRANDED(p))
+ BROP(p)->b_lwp_setrval(clone, p->p_pid, 1);
+ else
+ lwp_setrval(clone, p->p_pid, 1);
/* set return values for parent */
r.r_val1 = (int)cp->p_pid;
@@ -873,6 +876,7 @@ getproc(proc_t **cpp, int kernel)
/*
* Make proc entry for child process
*/
+ mutex_init(&cp->p_splock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&cp->p_crlock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&cp->p_pflock, NULL, MUTEX_DEFAULT, NULL);
#if defined(__x86)
@@ -882,7 +886,7 @@ getproc(proc_t **cpp, int kernel)
cp->p_stat = SIDL;
cp->p_mstart = gethrtime();
- if ((newpid = pid_assign(cp)) == -1) {
+ if ((newpid = pid_allocate(cp, PID_ALLOC_PROC)) == -1) {
if (nproc == v.v_proc) {
CPU_STATS_ADDQ(CPU, sys, procovf, 1);
cmn_err(CE_WARN, "out of processes");
@@ -926,10 +930,13 @@ getproc(proc_t **cpp, int kernel)
cp->p_siginfo = pp->p_siginfo;
cp->p_flag = pp->p_flag & (SJCTL|SNOWAIT|SNOCD);
cp->p_sessp = pp->p_sessp;
- SESS_HOLD(pp->p_sessp);
+ sess_hold(pp);
cp->p_exec = pp->p_exec;
cp->p_execdir = pp->p_execdir;
cp->p_zone = pp->p_zone;
+ cp->p_brand = pp->p_brand;
+ if (PROC_IS_BRANDED(pp))
+ BROP(pp)->b_copy_procdata(cp, pp);
cp->p_bssbase = pp->p_bssbase;
cp->p_brkbase = pp->p_brkbase;
@@ -1198,6 +1205,7 @@ try_again:
if (p->p_segacct)
shmexit(p);
+
/*
* We grab p_lock for the benefit of /proc
*/
diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c
index dbccf77b9e..26a12c805e 100644
--- a/usr/src/uts/common/os/lwp.c
+++ b/usr/src/uts/common/os/lwp.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -59,6 +58,7 @@
#include <sys/cpc_impl.h>
#include <sys/sdt.h>
#include <sys/cmn_err.h>
+#include <sys/brand.h>
void *segkp_lwp; /* cookie for pool of segkp resources */
@@ -87,6 +87,7 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p,
uint_t old_hashsz = 0;
int i;
int rctlfail = 0;
+ boolean_t branded = 0;
mutex_enter(&p->p_lock);
mutex_enter(&p->p_zone->zone_nlwps_lock);
@@ -448,6 +449,19 @@ grow:
break;
} while (lwp_hash_lookup(p, t->t_tid) != NULL);
}
+
+ /*
+ * If this is a branded process, let the brand do any necessary lwp
+ * initialization.
+ */
+ if (PROC_IS_BRANDED(p)) {
+ if (BROP(p)->b_initlwp(lwp)) {
+ err = 1;
+ goto error;
+ }
+ branded = 1;
+ }
+
p->p_lwpcnt++;
t->t_waitfor = -1;
@@ -540,6 +554,9 @@ error:
if (cid != NOCLASS && bufp != NULL)
CL_FREE(cid, bufp);
+ if (branded)
+ BROP(p)->b_freelwp(lwp);
+
mutex_exit(&p->p_lock);
t->t_state = TS_FREE;
thread_rele(t);
@@ -673,6 +690,13 @@ lwp_exit(void)
if (t->t_upimutex != NULL)
upimutex_cleanup();
+ /*
+ * Perform any brand specific exit processing, then release any
+ * brand data associated with the lwp
+ */
+ if (PROC_IS_BRANDED(p))
+ BROP(p)->b_lwpexit(lwp);
+
mutex_enter(&p->p_lock);
lwp_cleanup();
@@ -1565,6 +1589,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
proc_t *p = lwptoproc(lwp);
int cid;
void *bufp;
+ void *brand_data;
int val;
ASSERT(p == curproc);
@@ -1578,6 +1603,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
if (t == curthread)
/* copy args out of registers first */
(void) save_syscall_args();
+
clwp = lwp_create(cp->p_lwpcnt == 0 ? lwp_rtt_initial : lwp_rtt,
NULL, 0, cp, TS_STOPPED, t->t_pri, &t->t_hold, NOCLASS, lwpid);
if (clwp == NULL)
@@ -1591,14 +1617,16 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
ct = clwp->lwp_thread;
tregs = clwp->lwp_regs;
tfpu = clwp->lwp_fpu;
+ brand_data = clwp->lwp_brand;
/* copy parent lwp to child lwp */
*clwp = *lwp;
/* fix up child's lwp */
- clwp->lwp_pcb.pcb_flags = 0;
-#if defined(__sparc)
+#if defined(__i386) || defined(__amd64)
+ clwp->lwp_pcb.pcb_flags = clwp->lwp_pcb.pcb_flags & RUPDATE_PENDING;
+#elif defined(__sparc)
clwp->lwp_pcb.pcb_step = STEP_NONE;
#endif
clwp->lwp_cursig = 0;
@@ -1608,6 +1636,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
ct->t_sysnum = t->t_sysnum;
clwp->lwp_regs = tregs;
clwp->lwp_fpu = tfpu;
+ clwp->lwp_brand = brand_data;
clwp->lwp_ap = clwp->lwp_arg;
clwp->lwp_procp = cp;
bzero(clwp->lwp_timer, sizeof (clwp->lwp_timer));
@@ -1640,6 +1669,10 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
ct->t_proc_flag |= TP_MSACCT;
mutex_exit(&cp->p_lock);
+ /* Allow brand to propagate brand-specific state */
+ if (PROC_IS_BRANDED(p))
+ BROP(p)->b_forklwp(lwp, clwp);
+
retry:
cid = t->t_cid;
diff --git a/usr/src/uts/common/os/main.c b/usr/src/uts/common/os/main.c
index 958bbf96c8..ec9fc6c3e3 100644
--- a/usr/src/uts/common/os/main.c
+++ b/usr/src/uts/common/os/main.c
@@ -70,6 +70,7 @@
#include <sys/errorq.h>
#include <sys/class.h>
#include <sys/stack.h>
+#include <sys/brand.h>
#include <vm/as.h>
#include <vm/seg_kmem.h>
@@ -124,6 +125,7 @@ cluster_wrapper(void)
char initname[INITNAME_SZ] = "/sbin/init"; /* also referenced by zone0 */
char initargs[BOOTARGS_MAX] = ""; /* also referenced by zone0 */
+extern int64_t lwp_sigmask(int, uint_t, uint_t);
/*
* Construct a stack for init containing the arguments to it, then
@@ -144,6 +146,7 @@ exec_init(const char *initpath, const char *args)
int error = 0, count = 0;
proc_t *p = ttoproc(curthread);
klwp_t *lwp = ttolwp(curthread);
+ int brand_action;
if (args == NULL)
args = "";
@@ -247,9 +250,17 @@ exec_init(const char *initpath, const char *args)
curthread->t_post_sys = 1;
curthread->t_sysnum = SYS_execve;
+ /*
+ * If we are executing init from zsched, we may have inherited its
+ * parent process's signal mask. Clear it now so that we behave in
+ * the same way as when started from the global zone.
+ */
+ (void) lwp_sigmask(SIG_UNBLOCK, 0xffffffff, 0xffffffff);
+
+ brand_action = ZONE_IS_BRANDED(p->p_zone) ? EBA_BRAND : EBA_NONE;
again:
error = exec_common((const char *)(uintptr_t)exec_fnamep,
- (const char **)(uintptr_t)uap, NULL);
+ (const char **)(uintptr_t)uap, NULL, brand_action);
/*
* Normally we would just set lwp_argsaved and t_post_sys and
diff --git a/usr/src/uts/common/os/modconf.c b/usr/src/uts/common/os/modconf.c
index 2992567207..3e662fac7d 100644
--- a/usr/src/uts/common/os/modconf.c
+++ b/usr/src/uts/common/os/modconf.c
@@ -55,6 +55,7 @@
#include <ipp/ipp.h>
#include <sys/strsubr.h>
#include <sys/kcpc.h>
+#include <sys/brand.h>
#include <sys/cpc_pcbe.h>
#include <sys/kstat.h>
#include <sys/fs/sdev_node.h>
@@ -237,6 +238,16 @@ struct mod_ops mod_pcbeops = {
mod_installpcbe, mod_removepcbe, mod_infonull
};
+/*
+ * Brand modules.
+ */
+static int mod_installbrand(struct modlbrand *, struct modlinkage *);
+static int mod_removebrand(struct modlbrand *, struct modlinkage *);
+
+struct mod_ops mod_brandops = {
+ mod_installbrand, mod_removebrand, mod_infonull
+};
+
static struct sysent *mod_getsysent(struct modlinkage *, struct sysent *);
static char uninstall_err[] = "Cannot uninstall %s; not installed";
@@ -496,6 +507,23 @@ mod_removepcbe(struct modlpcbe *modl, struct modlinkage *modlp)
}
/*
+ * Manage BrandZ modules.
+ */
+/*ARGSUSED*/
+static int
+mod_installbrand(struct modlbrand *modl, struct modlinkage *modlp)
+{
+ return (brand_register(modl->brand_branddef));
+}
+
+/*ARGSUSED*/
+static int
+mod_removebrand(struct modlbrand *modl, struct modlinkage *modlp)
+{
+ return (brand_unregister(modl->brand_branddef));
+}
+
+/*
* manage /dev fs modules
*/
/*ARGSUSED*/
@@ -1075,8 +1103,10 @@ mod_removefs(struct modlfs *modl, struct modlinkage *modlp)
return (EBUSY);
}
- /* XXX - Shouldn't the refcount be sufficient? */
-
+ /*
+ * A mounted filesystem could still have vsw_count = 0
+ * so we must check whether anyone is actually using our ops
+ */
if (vfs_opsinuse(&vswp->vsw_vfsops)) {
vfs_unrefvfssw(vswp);
WUNLOCK_VFSSW();
diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c
index 66cfed74b4..88b0258afe 100644
--- a/usr/src/uts/common/os/pid.c
+++ b/usr/src/uts/common/os/pid.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -21,7 +20,7 @@
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -115,6 +114,18 @@ pid_lookup(pid_t pid)
return (pidp);
}
+struct pid *
+pid_find(pid_t pid)
+{
+ struct pid *pidp;
+
+ mutex_enter(&pidlinklock);
+ pidp = pid_lookup(pid);
+ mutex_exit(&pidlinklock);
+
+ return (pidp);
+}
+
void
pid_setmin(void)
{
@@ -154,14 +165,13 @@ pid_getlockslot(int prslot)
}
/*
- * This function assigns a pid for use in a fork request. It allocates
- * a pid structure, tries to find an empty slot in the proc table,
- * and selects the process id.
+ * This function allocates a pid structure, a free pid, and optionally a
+ * slot in the proc table for it.
*
- * pid_assign() returns the new pid on success, -1 on failure.
+ * pid_allocate() returns the new pid on success, -1 on failure.
*/
pid_t
-pid_assign(proc_t *prp)
+pid_allocate(proc_t *prp, int flags)
{
struct pid *pidp;
union procent *pep;
@@ -170,7 +180,7 @@ pid_assign(proc_t *prp)
pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
mutex_enter(&pidlinklock);
- if ((pep = procentfree) == NULL) {
+ if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) {
/*
* ran out of /proc directory entries
*/
@@ -190,10 +200,6 @@ pid_assign(proc_t *prp)
goto failed;
}
- procentfree = pep->pe_next;
- pep->pe_proc = prp;
- prp->p_pidp = pidp;
-
/*
* Put pid into the pid hash table.
*/
@@ -201,8 +207,17 @@ pid_assign(proc_t *prp)
HASHPID(newpid) = pidp;
pidp->pid_ref = 1;
pidp->pid_id = newpid;
- pidp->pid_prslot = pep - procdir;
- prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
+
+ if (flags & PID_ALLOC_PROC) {
+ procentfree = pep->pe_next;
+ pidp->pid_prslot = pep - procdir;
+ pep->pe_proc = prp;
+ prp->p_pidp = pidp;
+ prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
+ } else {
+ pidp->pid_prslot = 0;
+ }
+
mutex_exit(&pidlinklock);
return (newpid);
@@ -264,7 +279,7 @@ pid_exit(proc_t *prp)
if (prp->p_pgidp != NULL)
pgexit(prp);
- SESS_RELE(prp->p_sessp);
+ sess_rele(prp->p_sessp, B_TRUE);
pidp = prp->p_pidp;
diff --git a/usr/src/uts/common/os/printf.c b/usr/src/uts/common/os/printf.c
index 603da31b62..a50bfa0db9 100644
--- a/usr/src/uts/common/os/printf.c
+++ b/usr/src/uts/common/os/printf.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -142,21 +141,15 @@ retry:
if (sl & SL_USER) {
ssize_t resid;
- sess_t *sessp;
-
- mutex_enter(&pidlock);
- sessp = curproc->p_sessp;
- SESS_HOLD(sessp);
- TTY_HOLD(sessp);
- mutex_exit(&pidlock);
- if (sessp->s_vp)
- (void) vn_rdwr(UIO_WRITE, sessp->s_vp,
- body, len, 0LL, UIO_SYSSPACE,
- FAPPEND, (rlim64_t)LOG_HIWAT, kcred, &resid);
- mutex_enter(&pidlock);
- TTY_RELE(sessp);
- SESS_RELE(sessp);
- mutex_exit(&pidlock);
+ sess_t *sp;
+
+ if ((sp = tty_hold()) != NULL) {
+ if (sp->s_vp != NULL)
+ (void) vn_rdwr(UIO_WRITE, sp->s_vp, body,
+ len, 0LL, UIO_SYSSPACE, FAPPEND,
+ (rlim64_t)LOG_HIWAT, kcred, &resid);
+ tty_rele(sp);
+ }
}
if (on_intr && !panicstr) {
diff --git a/usr/src/uts/common/os/procset.c b/usr/src/uts/common/os/procset.c
index 7a675c604e..ae5473847e 100644
--- a/usr/src/uts/common/os/procset.c
+++ b/usr/src/uts/common/os/procset.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -290,8 +289,10 @@ procinset(proc_t *pp, procset_t *psp)
break;
case P_SID:
+ mutex_enter(&pp->p_splock);
if (pp->p_sessp->s_sid == psp->p_lid)
loperand++;
+ mutex_exit(&pp->p_splock);
break;
case P_CID:
@@ -380,8 +381,10 @@ procinset(proc_t *pp, procset_t *psp)
break;
case P_SID:
+ mutex_enter(&pp->p_splock);
if (pp->p_sessp->s_sid == psp->p_rid)
roperand++;
+ mutex_exit(&pp->p_splock);
break;
case P_TASKID:
@@ -533,8 +536,10 @@ lwpinset(proc_t *pp, procset_t *psp, kthread_t *tp, int *done)
break;
case P_SID:
+ mutex_enter(&pp->p_splock);
if (pp->p_sessp->s_sid == psp->p_lid)
loperand++;
+ mutex_exit(&pp->p_splock);
break;
case P_TASKID:
@@ -617,8 +622,10 @@ lwpinset(proc_t *pp, procset_t *psp, kthread_t *tp, int *done)
break;
case P_SID:
+ mutex_enter(&pp->p_splock);
if (pp->p_sessp->s_sid == psp->p_rid)
roperand++;
+ mutex_exit(&pp->p_splock);
break;
case P_TASKID:
@@ -756,6 +763,7 @@ getmyid(idtype_t idtype)
proc_t *pp;
uid_t uid;
gid_t gid;
+ pid_t sid;
pp = ttoproc(curthread);
@@ -773,7 +781,10 @@ getmyid(idtype_t idtype)
return (pp->p_pgrp);
case P_SID:
- return (pp->p_sessp->s_sid);
+ mutex_enter(&pp->p_splock);
+ sid = pp->p_sessp->s_sid;
+ mutex_exit(&pp->p_splock);
+ return (sid);
case P_TASKID:
return (pp->p_task->tk_tkid);
diff --git a/usr/src/uts/common/os/session.c b/usr/src/uts/common/os/session.c
index 972677f7dc..7790a09094 100644
--- a/usr/src/uts/common/os/session.c
+++ b/usr/src/uts/common/os/session.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -47,102 +46,614 @@
#include <sys/kmem.h>
#include <sys/cmn_err.h>
#include <sys/strsubr.h>
+#include <sys/fs/snode.h>
sess_t session0 = {
- 1, /* s_ref */
- NODEV, /* s_dev */
- NULL, /* s_vp */
- &pid0, /* s_sidp */
- NULL /* s_cred */
+ &pid0, /* s_sidp */
+ {0}, /* s_lock */
+ 1, /* s_ref */
+ B_FALSE, /* s_sighuped */
+ B_FALSE, /* s_exit */
+ 0, /* s_exit_cv */
+ 0, /* s_cnt */
+ 0, /* s_cnt_cv */
+ NODEV, /* s_dev */
+ NULL, /* s_vp */
+ NULL /* s_cred */
};
void
-sess_rele(sess_t *sp)
+sess_hold(proc_t *p)
{
- ASSERT(MUTEX_HELD(&pidlock));
+ ASSERT(MUTEX_HELD(&pidlock) || MUTEX_HELD(&p->p_splock));
+ mutex_enter(&p->p_sessp->s_lock);
+ p->p_sessp->s_ref++;
+ mutex_exit(&p->p_sessp->s_lock);
+}
+
+void
+sess_rele(sess_t *sp, boolean_t pidlock_held)
+{
+ ASSERT(MUTEX_HELD(&pidlock) || !pidlock_held);
+
+ mutex_enter(&sp->s_lock);
ASSERT(sp->s_ref != 0);
- if (--sp->s_ref == 0) {
- if (sp == &session0)
- panic("sp == &session0");
- PID_RELE(sp->s_sidp);
- mutex_destroy(&sp->s_lock);
- cv_destroy(&sp->s_wait_cv);
- kmem_free(sp, sizeof (sess_t));
+ if (--sp->s_ref > 0) {
+ mutex_exit(&sp->s_lock);
+ return;
}
+ ASSERT(sp->s_ref == 0);
+
+ /*
+ * It's ok to free this session structure now because we know
+ * that no one else can have a pointer to it. We know this
+ * to be true because the only time that s_ref can possibly
+ * be incremented is when pidlock or p_splock is held AND there
+ * is a proc_t that points to that session structure. In that
+ * case we are guaranteed that the s_ref is at least 1 since there
+ * is a proc_t that points to it. So when s_ref finally drops to
+ * zero then no one else has a reference (and hence pointer) to
+ * this session structure and there is no valid proc_t pointing
+ * to this session structure anymore so, no one can acquire a
+ * reference (and pointer) to this session structure so it's
+ * ok to free it here.
+ */
+
+ if (sp == &session0)
+ panic("sp == &session0");
+
+ /* make sure there are no outstanding holds */
+ ASSERT(sp->s_cnt == 0);
+
+ /* make sure there is no exit in progress */
+ ASSERT(!sp->s_exit);
+
+ /* make sure someone already freed any ctty */
+ ASSERT(sp->s_vp == NULL);
+ ASSERT(sp->s_dev == NODEV);
+
+ if (!pidlock_held)
+ mutex_enter(&pidlock);
+ PID_RELE(sp->s_sidp);
+ if (!pidlock_held)
+ mutex_exit(&pidlock);
+
+ mutex_destroy(&sp->s_lock);
+ cv_destroy(&sp->s_cnt_cv);
+ kmem_free(sp, sizeof (sess_t));
+}
+
+sess_t *
+tty_hold(void)
+{
+ proc_t *p = curproc;
+ sess_t *sp;
+ boolean_t got_sig = B_FALSE;
+
+ /* make sure the caller isn't holding locks they shouldn't */
+ ASSERT(MUTEX_NOT_HELD(&pidlock));
+
+ for (;;) {
+ mutex_enter(&p->p_splock); /* protect p->p_sessp */
+ sp = p->p_sessp;
+ mutex_enter(&sp->s_lock); /* protect sp->* */
+
+ /* make sure the caller isn't holding locks they shouldn't */
+ ASSERT((sp->s_vp == NULL) ||
+ MUTEX_NOT_HELD(&sp->s_vp->v_stream->sd_lock));
+
+ /*
+ * If the session leader process is not exiting (and hence
+ * not trying to release the session's ctty) then we can
+ * safely grab a hold on the current session structure
+ * and return it. If on the other hand the session leader
+ * process is exiting and clearing the ctty then we'll
+ * wait till it's done before we loop around and grab a
+ * hold on the session structure.
+ */
+ if (!sp->s_exit)
+ break;
+
+ /* need to hold the session so it can't be freed */
+ sp->s_ref++;
+ mutex_exit(&p->p_splock);
+
+ /* Wait till the session leader is done */
+ if (!cv_wait_sig(&sp->s_exit_cv, &sp->s_lock))
+ got_sig = B_TRUE;
+
+ /*
+ * Now we need to drop our hold on the session structure,
+ * but we can't hold any locks when we do this because
+ * sess_rele() may need to aquire pidlock.
+ */
+ mutex_exit(&sp->s_lock);
+ sess_rele(sp, B_FALSE);
+
+ if (got_sig)
+ return (NULL);
+ }
+
+ /* whew, we finally got a hold */
+ sp->s_cnt++;
+ sp->s_ref++;
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
+ return (sp);
}
void
-sess_create(void)
+tty_rele(sess_t *sp)
{
- proc_t *pp;
- sess_t *sp;
+ /* make sure the caller isn't holding locks they shouldn't */
+ ASSERT(MUTEX_NOT_HELD(&pidlock));
- pp = ttoproc(curthread);
+ mutex_enter(&sp->s_lock);
+ if ((--sp->s_cnt) == 0)
+ cv_broadcast(&sp->s_cnt_cv);
+ mutex_exit(&sp->s_lock);
+
+ sess_rele(sp, B_FALSE);
+}
+
+void
+sess_create(void)
+{
+ proc_t *p = curproc;
+ sess_t *sp, *old_sp;
sp = kmem_zalloc(sizeof (sess_t), KM_SLEEP);
mutex_init(&sp->s_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&sp->s_wait_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&sp->s_cnt_cv, NULL, CV_DEFAULT, NULL);
+ /*
+ * we need to grap p_lock to protect p_pgidp because
+ * /proc looks at p_pgidp while holding only p_lock.
+ *
+ * we don't need to hold p->p_sessp->s_lock or get a hold on the
+ * session structure since we're not actually updating any of
+ * the contents of the old session structure.
+ */
mutex_enter(&pidlock);
+ mutex_enter(&p->p_lock);
+ mutex_enter(&p->p_splock);
+
+ pgexit(p);
+
+ sp->s_sidp = p->p_pidp;
+ sp->s_ref = 1;
+ sp->s_dev = NODEV;
+
+ old_sp = p->p_sessp;
+ p->p_sessp = sp;
+
+ pgjoin(p, p->p_pidp);
+ PID_HOLD(p->p_pidp);
+
+ mutex_exit(&p->p_splock);
+ mutex_exit(&p->p_lock);
+ mutex_exit(&pidlock);
+ sess_rele(old_sp, B_FALSE);
+}
+
+/*
+ * Note that sess_ctty_clear() resets all the fields in the session
+ * structure but doesn't release any holds or free any objects
+ * that the session structure might currently point to. it is the
+ * callers responsibility to do this.
+ */
+static void
+sess_ctty_clear(sess_t *sp, stdata_t *stp)
+{
/*
- * We need to protect p_pgidp with p_lock because
- * /proc looks at it while holding only p_lock.
+ * Assert that we hold all the necessary locks. We also need
+ * to be holding proc_t->p_splock for the process associated
+ * with this session, but since we don't have a proc pointer
+ * passed in we can't assert this here.
*/
- mutex_enter(&pp->p_lock);
- pgexit(pp);
- SESS_RELE(pp->p_sessp);
+ ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) &&
+ MUTEX_HELD(&sp->s_lock));
- sp->s_sidp = pp->p_pidp;
- sp->s_ref = 1;
+ /* reset the session structure members to defaults */
+ sp->s_sighuped = B_FALSE;
sp->s_dev = NODEV;
+ sp->s_vp = NULL;
+ sp->s_cred = NULL;
+
+ /* reset the stream session and group pointers */
+ stp->sd_pgidp = NULL;
+ stp->sd_sidp = NULL;
+}
+
+static void
+sess_ctty_set(proc_t *p, sess_t *sp, stdata_t *stp)
+{
+ cred_t *crp;
+
+ /* Assert that we hold all the necessary locks. */
+ ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) &&
+ MUTEX_HELD(&p->p_splock) && MUTEX_HELD(&sp->s_lock));
+
+ /* get holds on structures */
+ mutex_enter(&p->p_crlock);
+ crhold(crp = p->p_cred);
+ mutex_exit(&p->p_crlock);
+ PID_HOLD(sp->s_sidp); /* requires pidlock */
+ PID_HOLD(sp->s_sidp); /* requires pidlock */
+
+ /* update the session structure members */
+ sp->s_vp = makectty(stp->sd_vnode);
+ sp->s_dev = sp->s_vp->v_rdev;
+ sp->s_cred = crp;
+
+ /* update the stream emebers */
+ stp->sd_flag |= STRISTTY; /* just to be sure */
+ stp->sd_sidp = sp->s_sidp;
+ stp->sd_pgidp = sp->s_sidp;
+}
+
+int
+strctty(stdata_t *stp)
+{
+ sess_t *sp;
+ proc_t *p = curproc;
+ boolean_t got_sig = B_FALSE;
+
+ /*
+ * We are going to try to make stp the default ctty for the session
+ * associated with curproc. Not only does this require holding a
+ * bunch of locks but it also requires waiting for any outstanding
+ * holds on the session structure (aquired via tty_hold()) to be
+ * released. Hence, we have the following for(;;) loop that will
+ * aquire our locks, do some sanity checks, and wait for the hold
+ * count on the session structure to hit zero. If we get a signal
+ * while waiting for outstanding holds to be released then we abort
+ * the operation and return.
+ */
+ for (;;) {
+ mutex_enter(&stp->sd_lock); /* protects sd_pgidp/sd_sidp */
+ mutex_enter(&pidlock); /* protects p_pidp */
+ mutex_enter(&p->p_splock); /* protects p_sessp */
+ sp = p->p_sessp;
+ mutex_enter(&sp->s_lock); /* protects sp->* */
+
+ if (((stp->sd_flag & (STRHUP|STRDERR|STWRERR|STPLEX)) != 0) ||
+ (stp->sd_sidp != NULL) || /* stp already ctty? */
+ (p->p_pidp != sp->s_sidp) || /* we're not leader? */
+ (sp->s_vp != NULL)) { /* session has ctty? */
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
+ mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
+ return (ENOTTY);
+ }
+
+ /* sanity check. we can't be exiting right now */
+ ASSERT(!sp->s_exit);
+
+ /*
+ * If no one else has a hold on this session structure
+ * then we now have exclusive access to it, so break out
+ * of this loop and update the session structure.
+ */
+ if (sp->s_cnt == 0)
+ break;
+
+ /* need to hold the session so it can't be freed */
+ sp->s_ref++;
- pp->p_sessp = sp;
+ /* ain't locking order fun? */
+ mutex_exit(&p->p_splock);
+ mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
- pgjoin(pp, pp->p_pidp);
- mutex_exit(&pp->p_lock);
+ if (!cv_wait_sig(&sp->s_cnt_cv, &sp->s_lock))
+ got_sig = B_TRUE;
+ mutex_exit(&sp->s_lock);
+ sess_rele(sp, B_FALSE);
- PID_HOLD(sp->s_sidp);
+ if (got_sig)
+ return (EINTR);
+ }
+
+ /* set the session ctty bindings */
+ sess_ctty_set(p, sp, stp);
+
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
+ return (0);
}
-void
-freectty(sess_t *sp)
+/*
+ * freectty_lock() attempts to aquire the army of locks required to free
+ * the ctty associated with a given session leader process. If it returns
+ * successfully the following locks will be held:
+ * sd_lock, pidlock, p_splock, s_lock
+ *
+ * as a secondary bit of convience, freectty_lock() will also return
+ * pointers to the session, ctty, and ctty stream associated with the
+ * specified session leader process.
+ */
+static boolean_t
+freectty_lock(proc_t *p, sess_t **spp, vnode_t **vpp, stdata_t **stpp,
+ boolean_t at_exit)
{
- vnode_t *vp = sp->s_vp;
- cred_t *cred = sp->s_cred;
+ sess_t *sp;
+ vnode_t *vp;
+ stdata_t *stp;
- strfreectty(vp->v_stream);
+ mutex_enter(&pidlock); /* protect p_pidp */
+ mutex_enter(&p->p_splock); /* protect p->p_sessp */
+ sp = p->p_sessp;
+ mutex_enter(&sp->s_lock); /* protect sp->* */
- mutex_enter(&sp->s_lock);
- while (sp->s_cnt > 0) {
- cv_wait(&sp->s_wait_cv, &sp->s_lock);
+ if ((sp->s_sidp != p->p_pidp) || /* we're not leader? */
+ (sp->s_vp == NULL)) { /* no ctty? */
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
+ mutex_exit(&pidlock);
+ return (B_FALSE);
+ }
+
+ vp = sp->s_vp;
+ stp = sp->s_vp->v_stream;
+
+ if (at_exit) {
+ /* stop anyone else calling tty_hold() */
+ sp->s_exit = B_TRUE;
+ } else {
+ /*
+ * due to locking order we have to grab stp->sd_lock before
+ * grabbing all the other proc/session locks. but after we
+ * drop all our current locks it's possible that someone
+ * could come in and change our current session or close
+ * the current ctty (vp) there by making sp or stp invalid.
+ * (a VN_HOLD on vp won't protect stp because that only
+ * prevents the vnode from being freed not closed.) so
+ * to prevent this we bump s_ref and s_cnt here.
+ *
+ * course this doesn't matter if we're the last thread in
+ * an exiting process that is the session leader, since no
+ * one else can change our session or free our ctty.
+ */
+ sp->s_ref++; /* hold the session structure */
+ sp->s_cnt++; /* protect vp and stp */
+ }
+
+ /* drop our session locks */
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
+ mutex_exit(&pidlock);
+
+ /* grab locks in the right order */
+ mutex_enter(&stp->sd_lock); /* protects sd_pgidp/sd_sidp */
+ mutex_enter(&pidlock); /* protect p_pidp */
+ mutex_enter(&p->p_splock); /* protects p->p_sessp */
+ mutex_enter(&sp->s_lock); /* protects sp->* */
+
+ /* if the session has changed, abort mission */
+ if (sp != p->p_sessp) {
+ /*
+ * this can't happen during process exit since we're the
+ * only thread in the process and we sure didn't change
+ * our own session at this point.
+ */
+ ASSERT(!at_exit);
+
+ /* release our locks and holds */
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
+ mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
+ tty_rele(sp);
+ return (B_FALSE);
}
- ASSERT(sp->s_cnt == 0);
- ASSERT(vp->v_count >= 1);
- sp->s_vp = NULL;
- sp->s_cred = NULL;
/*
- * It is possible for the VOP_CLOSE below to call stralloctty()
- * and reallocate a new tty vnode. To prevent that the
- * session is marked as closing here.
+ * sanity checks. none of this should have changed since we had
+ * holds on the current ctty.
*/
+ ASSERT(sp->s_sidp == p->p_pidp); /* we're the leader */
+ ASSERT(sp->s_vp != NULL); /* a ctty exists */
+ ASSERT(vp == sp->s_vp);
+ ASSERT(stp == sp->s_vp->v_stream);
+
+ /* release our holds */
+ if (!at_exit) {
+ if ((--(sp)->s_cnt) == 0)
+ cv_broadcast(&sp->s_cnt_cv);
+ sp->s_ref--;
+ ASSERT(sp->s_ref > 0);
+ }
+
+ /* return our pointers */
+ *spp = sp;
+ *vpp = vp;
+ *stpp = stp;
- sp->s_flag = SESS_CLOSE;
+ return (B_TRUE);
+}
+
+/*
+ * Returns B_FALSE if no signal is sent to the process group associated with
+ * this ctty. Returns B_TRUE if a signal is sent to the process group.
+ * If it return B_TRUE it also means that all the locks we were holding
+ * were dropped so that we could send the signal.
+ */
+static boolean_t
+freectty_signal(proc_t *p, sess_t *sp, stdata_t *stp, boolean_t at_exit)
+{
+ /* Assert that we hold all the necessary locks. */
+ ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) &&
+ MUTEX_HELD(&p->p_splock) && MUTEX_HELD(&sp->s_lock));
+
+ /* check if we already signaled this group */
+ if (sp->s_sighuped)
+ return (B_FALSE);
+
+ sp->s_sighuped = B_TRUE;
+
+ if (!at_exit) {
+ /*
+ * once again, we're about to drop our army of locks and we
+ * don't want sp or stp to be freed. (see the comment in
+ * freectty_lock())
+ */
+ sp->s_ref++; /* hold the session structure */
+ sp->s_cnt++; /* protect vp and stp */
+ }
+
+ /* can't hold these locks while calling pgsignal() */
mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
+ mutex_exit(&pidlock);
+
+ /* signal anyone in the foreground process group */
+ pgsignal(stp->sd_pgidp, SIGHUP);
+
+ /* signal anyone blocked in poll on this stream */
+ if (!(stp->sd_flag & STRHUP))
+ strhup(stp);
+
+ mutex_exit(&stp->sd_lock);
+
+ /* release our holds */
+ if (!at_exit)
+ tty_rele(sp);
+
+ return (B_TRUE);
+}
+
+int
+freectty(boolean_t at_exit)
+{
+ proc_t *p = curproc;
+ stdata_t *stp;
+ vnode_t *vp;
+ cred_t *cred;
+ sess_t *sp;
+ struct pid *pgidp, *sidp;
+ boolean_t got_sig = B_FALSE;
/*
- * This will be the only thread with access to
- * this vnode, from this point on.
+ * If the current process is a session leader we are going to
+ * try to release the ctty associated our current session. To
+ * do this we need to aquire a bunch of locks, signal any
+ * processes in the forground that are associated with the ctty,
+ * and make sure no one has any outstanding holds on the current
+ * session * structure (aquired via tty_hold()). Hence, we have
+ * the following for(;;) loop that will do all this work for
+ * us and break out when the hold count on the session structure
+ * hits zero.
*/
+ for (;;) {
+ if (!freectty_lock(p, &sp, &vp, &stp, at_exit))
+ return (EIO);
+
+ if (freectty_signal(p, sp, stp, at_exit)) {
+ /* loop around to re-aquire locks */
+ continue;
+ }
+
+ /*
+ * Only a session leader process can free a ctty. So if
+ * we've made it here we know we're a session leader and
+ * if we're not actively exiting it impossible for another
+ * thread in this process to be exiting. (Because that
+ * thread would have already stopped all other threads
+ * in the current process.)
+ */
+ ASSERT(at_exit || !sp->s_exit);
+
+ /*
+ * If no one else has a hold on this session structure
+ * then we now have exclusive access to it, so break out
+ * of this loop and update the session structure.
+ */
+ if (sp->s_cnt == 0)
+ break;
+
+ if (!at_exit) {
+ /* need to hold the session so it can't be freed */
+ sp->s_ref++;
+ }
+
+ /* ain't locking order fun? */
+ mutex_exit(&p->p_splock);
+ mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
+
+ if (at_exit) {
+ /*
+ * if we're exiting then we can't allow this operation
+ * to fail so we do a cw_wait() instead of a
+ * cv_wait_sig(). if there are threads with active
+ * holds on this ctty that are blocked, then
+ * they should only be blocked in a cv_wait_sig()
+ * and hopefully they were in the foreground process
+ * group and recieved the SIGHUP we sent above. of
+ * course it's possible that they weren't in the
+ * foreground process group and didn't get our
+ * signal (or they could be stopped by job control
+ * in which case our signal wouldn't matter until
+ * they are restarted). in this case we won't
+ * exit until someone else sends them a signal.
+ */
+ cv_wait(&sp->s_cnt_cv, &sp->s_lock);
+ mutex_exit(&sp->s_lock);
+ continue;
+ }
+
+ if (!cv_wait_sig(&sp->s_cnt_cv, &sp->s_lock)) {
+ got_sig = B_TRUE;
+ }
+
+ mutex_exit(&sp->s_lock);
+ sess_rele(sp, B_FALSE);
+
+ if (got_sig)
+ return (EINTR);
+ }
+ ASSERT(sp->s_cnt == 0);
+ /* save some pointers for later */
+ cred = sp->s_cred;
+ pgidp = stp->sd_pgidp;
+ sidp = stp->sd_sidp;
+
+ /* clear the session ctty bindings */
+ sess_ctty_clear(sp, stp);
+
+ /* wake up anyone blocked in tty_hold() */
+ if (at_exit) {
+ ASSERT(sp->s_exit);
+ sp->s_exit = B_FALSE;
+ cv_broadcast(&sp->s_exit_cv);
+ }
+
+ /* we can drop these locks now */
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
+ mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
+
+ /* This is the only remaining thread with access to this vnode */
(void) VOP_CLOSE(vp, 0, 1, (offset_t)0, cred);
VN_RELE(vp);
-
crfree(cred);
+
+ /* release our holds on assorted structures and return */
+ mutex_enter(&pidlock);
+ PID_RELE(pgidp);
+ PID_RELE(sidp);
+ mutex_exit(&pidlock);
+
+ return (1);
}
/*
@@ -169,23 +680,29 @@ vhangup(void)
dev_t
cttydev(proc_t *pp)
{
- sess_t *sp = pp->p_sessp;
+ sess_t *sp;
+ dev_t dev;
+
+ mutex_enter(&pp->p_splock); /* protects p->p_sessp */
+ sp = pp->p_sessp;
+
+#ifdef DEBUG
+ mutex_enter(&sp->s_lock); /* protects sp->* */
if (sp->s_vp == NULL)
- return (NODEV);
- return (sp->s_dev);
+ ASSERT(sp->s_dev == NODEV);
+ else
+ ASSERT(sp->s_dev != NODEV);
+ mutex_exit(&sp->s_lock);
+#endif /* DEBUG */
+
+ dev = sp->s_dev;
+ mutex_exit(&pp->p_splock);
+ return (dev);
}
void
-alloctty(proc_t *pp, vnode_t *vp)
+ctty_clear_sighuped(void)
{
- sess_t *sp = pp->p_sessp;
- cred_t *crp;
-
- sp->s_vp = vp;
- sp->s_dev = vp->v_rdev;
-
- mutex_enter(&pp->p_crlock);
- crhold(crp = pp->p_cred);
- mutex_exit(&pp->p_crlock);
- sp->s_cred = crp;
+ ASSERT(MUTEX_HELD(&pidlock) || MUTEX_HELD(&curproc->p_splock));
+ curproc->p_sessp->s_sighuped = B_FALSE;
}
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index ffa676604f..e189a1627d 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -77,6 +77,19 @@
#include <sys/autoconf.h>
#include <sys/policy.h>
+
+/*
+ * This define helps improve the readability of streams code while
+ * still maintaining a very old streams performance enhancement. The
+ * performance enhancement basically involved having all callers
+ * of straccess() perform the first check that straccess() will do
+ * locally before actually calling straccess(). (There by reducing
+ * the number of unnecessary calls to straccess().)
+ */
+#define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \
+ (stp->sd_vnode->v_type == VFIFO) ? 0 : \
+ straccess((x), (y)))
+
/*
* what is mblk_pull_len?
*
@@ -1095,11 +1108,13 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
ASSERT(vp->v_stream);
stp = vp->v_stream;
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
- if (error = straccess(stp, JCREAD))
- return (error);
-
mutex_enter(&stp->sd_lock);
+
+ if ((error = i_straccess(stp, JCREAD)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ return (error);
+ }
+
if (stp->sd_flag & (STRDERR|STPLEX)) {
error = strgeterr(stp, STRDERR|STPLEX, 0);
if (error != 0) {
@@ -1161,12 +1176,8 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
}
TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
"strread awakes:%p, %p, %p", vp, uiop, crp);
- if (stp->sd_sidp != NULL &&
- stp->sd_vnode->v_type != VFIFO) {
- mutex_exit(&stp->sd_lock);
- if (error = straccess(stp, JCREAD))
- goto oops1;
- mutex_enter(&stp->sd_lock);
+ if ((error = i_straccess(stp, JCREAD)) != 0) {
+ goto oops;
}
first = 0;
}
@@ -2026,8 +2037,8 @@ strrput_nondata(queue_t *q, mblk_t *bp)
cv_broadcast(&q->q_wait); /* the readers */
cv_broadcast(&_WR(q)->q_wait); /* the writers */
cv_broadcast(&stp->sd_monitor); /* the ioctllers */
- mutex_exit(&stp->sd_lock);
strhup(stp);
+ mutex_exit(&stp->sd_lock);
return (0);
case M_UNHANGUP:
@@ -2665,18 +2676,23 @@ strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
ASSERT(vp->v_stream);
stp = vp->v_stream;
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
- if ((error = straccess(stp, JCWRITE)) != 0)
- return (error);
+ mutex_enter(&stp->sd_lock);
+
+ if ((error = i_straccess(stp, JCWRITE)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ return (error);
+ }
if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
- mutex_enter(&stp->sd_lock);
error = strwriteable(stp, B_TRUE, B_TRUE);
- mutex_exit(&stp->sd_lock);
- if (error != 0)
+ if (error != 0) {
+ mutex_exit(&stp->sd_lock);
return (error);
+ }
}
+ mutex_exit(&stp->sd_lock);
+
wqp = stp->sd_wrq;
/* get these values from them cached in the stream head */
@@ -2778,11 +2794,11 @@ strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
}
TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
"strwrite wake:q %p awakes", wqp);
+ if ((error = i_straccess(stp, JCWRITE)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ goto out;
+ }
mutex_exit(&stp->sd_lock);
- if (stp->sd_sidp != NULL &&
- stp->sd_vnode->v_type != VFIFO)
- if (error = straccess(stp, JCWRITE))
- goto out;
}
waitflag |= NOINTR;
TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
@@ -3101,6 +3117,7 @@ job_control_type(int cmd)
case JAGENT: /* Obsolete */
case JTRUN: /* Obsolete */
case JXTPROTO: /* Obsolete */
+ case TIOCSETLD:
return (JCSETP);
}
@@ -3162,10 +3179,12 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
return (EINVAL);
- if (access != -1 && stp->sd_sidp != NULL &&
- stp->sd_vnode->v_type != VFIFO)
- if (error = straccess(stp, access))
- return (error);
+ mutex_enter(&stp->sd_lock);
+ if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
+ mutex_exit(&stp->sd_lock);
+ return (error);
+ }
+ mutex_exit(&stp->sd_lock);
/*
* Check for sgttyb-related ioctls first, and complain as
@@ -3307,11 +3326,16 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
secpolicy_sti(crp) != 0) {
return (EPERM);
}
- if (stp->sd_sidp !=
- ttoproc(curthread)->p_sessp->s_sidp &&
+ mutex_enter(&stp->sd_lock);
+ mutex_enter(&curproc->p_splock);
+ if (stp->sd_sidp != curproc->p_sessp->s_sidp &&
secpolicy_sti(crp) != 0) {
+ mutex_exit(&curproc->p_splock);
+ mutex_exit(&stp->sd_lock);
return (EACCES);
}
+ mutex_exit(&curproc->p_splock);
+ mutex_exit(&stp->sd_lock);
strioc.ic_len = sizeof (char);
strioc.ic_dp = (char *)arg;
@@ -3445,10 +3469,13 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
return (EINVAL);
access = job_control_type(strioc.ic_cmd);
- if (access != -1 && stp->sd_sidp != NULL &&
- stp->sd_vnode->v_type != VFIFO &&
- (error = straccess(stp, access)) != 0)
+ mutex_enter(&stp->sd_lock);
+ if ((access != -1) &&
+ ((error = i_straccess(stp, access)) != 0)) {
+ mutex_exit(&stp->sd_lock);
return (error);
+ }
+ mutex_exit(&stp->sd_lock);
/*
* The I_STR facility provides a trap door for malicious
@@ -3699,7 +3726,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
/*
* try to allocate it as a controlling terminal
*/
- stralloctty(stp);
+ (void) strctty(stp);
}
}
@@ -5053,15 +5080,11 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
releasef(STRUCT_FGET(strfdinsert, fildes));
return (error);
}
- if (stp->sd_sidp != NULL &&
- stp->sd_vnode->v_type != VFIFO) {
+ if ((error = i_straccess(stp, access)) != 0) {
mutex_exit(&stp->sd_lock);
- if (error = straccess(stp, access)) {
- releasef(
- STRUCT_FGET(strfdinsert, fildes));
- return (error);
- }
- mutex_enter(&stp->sd_lock);
+ releasef(
+ STRUCT_FGET(strfdinsert, fildes));
+ return (error);
}
}
mutex_exit(&stp->sd_lock);
@@ -5144,12 +5167,9 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
mutex_exit(&stp->sd_lock);
return (error);
}
- if (stp->sd_sidp != NULL &&
- stp->sd_vnode->v_type != VFIFO) {
+ if ((error = i_straccess(stp, access)) != 0) {
mutex_exit(&stp->sd_lock);
- if (error = straccess(stp, access))
- return (error);
- mutex_enter(&stp->sd_lock);
+ return (error);
}
}
if (mp->b_datap->db_type != M_PASSFP) {
@@ -5446,13 +5466,13 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
{
pid_t sid;
- mutex_enter(&pidlock);
+ mutex_enter(&stp->sd_lock);
if (stp->sd_sidp == NULL) {
- mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
return (ENOTTY);
}
sid = stp->sd_sidp->pid_id;
- mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
copyflag));
}
@@ -5494,6 +5514,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
bg_pgid = stp->sd_pgidp->pid_id;
CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
PID_RELE(stp->sd_pgidp);
+ ctty_clear_sighuped();
stp->sd_pgidp = q->p_pgidp;
PID_HOLD(stp->sd_pgidp);
mutex_exit(&pidlock);
@@ -5505,17 +5526,30 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
{
pid_t pgrp;
- mutex_enter(&pidlock);
+ mutex_enter(&stp->sd_lock);
if (stp->sd_sidp == NULL) {
- mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
return (ENOTTY);
}
pgrp = stp->sd_pgidp->pid_id;
- mutex_exit(&pidlock);
+ mutex_exit(&stp->sd_lock);
return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
copyflag));
}
+ case TIOCSCTTY:
+ {
+ return (strctty(stp));
+ }
+
+ case TIOCNOTTY:
+ {
+ /* freectty() always assumes curproc. */
+ if (freectty(B_FALSE) != 0)
+ return (0);
+ return (ENOTTY);
+ }
+
case FIONBIO:
case FIOASYNC:
return (0); /* handled by the upper layer */
@@ -6233,18 +6267,21 @@ strgetmsg(
stp = vp->v_stream;
rvp->r_val1 = 0;
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
- if (error = straccess(stp, JCREAD))
- return (error);
+ mutex_enter(&stp->sd_lock);
+
+ if ((error = i_straccess(stp, JCREAD)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ return (error);
+ }
- /* Fast check of flags before acquiring the lock */
if (stp->sd_flag & (STRDERR|STPLEX)) {
- mutex_enter(&stp->sd_lock);
error = strgeterr(stp, STRDERR|STPLEX, 0);
- mutex_exit(&stp->sd_lock);
- if (error != 0)
+ if (error != 0) {
+ mutex_exit(&stp->sd_lock);
return (error);
+ }
}
+ mutex_exit(&stp->sd_lock);
switch (*flagsp) {
case MSG_HIPRI:
@@ -6381,11 +6418,9 @@ strgetmsg(
}
TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
"strgetmsg awakes:%p, %p", vp, uiop);
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
+ if ((error = i_straccess(stp, JCREAD)) != 0) {
mutex_exit(&stp->sd_lock);
- if (error = straccess(stp, JCREAD))
- return (error);
- mutex_enter(&stp->sd_lock);
+ return (error);
}
first = 0;
}
@@ -6797,23 +6832,26 @@ kstrgetmsg(
stp = vp->v_stream;
rvp->r_val1 = 0;
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
- if (error = straccess(stp, JCREAD))
- return (error);
+ mutex_enter(&stp->sd_lock);
+
+ if ((error = i_straccess(stp, JCREAD)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ return (error);
+ }
flags = *flagsp;
- /* Fast check of flags before acquiring the lock */
if (stp->sd_flag & (STRDERR|STPLEX)) {
if ((stp->sd_flag & STPLEX) ||
(flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
- mutex_enter(&stp->sd_lock);
error = strgeterr(stp, STRDERR|STPLEX,
(flags & MSG_IPEEK));
- mutex_exit(&stp->sd_lock);
- if (error != 0)
+ if (error != 0) {
+ mutex_exit(&stp->sd_lock);
return (error);
+ }
}
}
+ mutex_exit(&stp->sd_lock);
switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
case MSG_HIPRI:
@@ -6955,11 +6993,9 @@ retry:
}
TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
"kstrgetmsg awakes:%p, %p", vp, uiop);
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
+ if ((error = i_straccess(stp, JCREAD)) != 0) {
mutex_exit(&stp->sd_lock);
- if (error = straccess(stp, JCREAD))
- return (error);
- mutex_enter(&stp->sd_lock);
+ return (error);
}
first = 0;
}
@@ -7430,18 +7466,23 @@ strputmsg(
audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
#endif
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
- if (error = straccess(stp, JCWRITE))
- return (error);
+ mutex_enter(&stp->sd_lock);
+
+ if ((error = i_straccess(stp, JCWRITE)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ return (error);
+ }
if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
- mutex_enter(&stp->sd_lock);
error = strwriteable(stp, B_FALSE, xpg4);
- mutex_exit(&stp->sd_lock);
- if (error != 0)
+ if (error != 0) {
+ mutex_exit(&stp->sd_lock);
return (error);
+ }
}
+ mutex_exit(&stp->sd_lock);
+
/*
* Check for legal flag value.
*/
@@ -7561,10 +7602,11 @@ strputmsg(
}
TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
"strputmsg wake:stp %p wakes", stp);
+ if ((error = i_straccess(stp, JCWRITE)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ return (error);
+ }
mutex_exit(&stp->sd_lock);
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
- if (error = straccess(stp, JCWRITE))
- return (error);
}
out:
/*
@@ -7617,25 +7659,27 @@ kstrputmsg(
if (mctl == NULL)
return (EINVAL);
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
- if (error = straccess(stp, JCWRITE)) {
- freemsg(mctl);
- return (error);
- }
+ mutex_enter(&stp->sd_lock);
+
+ if ((error = i_straccess(stp, JCWRITE)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ freemsg(mctl);
+ return (error);
}
if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
- mutex_enter(&stp->sd_lock);
error = strwriteable(stp, B_FALSE, B_TRUE);
- mutex_exit(&stp->sd_lock);
if (error != 0) {
+ mutex_exit(&stp->sd_lock);
freemsg(mctl);
return (error);
}
}
}
+ mutex_exit(&stp->sd_lock);
+
/*
* Check for legal flag value.
*/
@@ -7804,13 +7848,12 @@ kstrputmsg(
}
TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
"kstrputmsg wake:stp %p wakes", stp);
- mutex_exit(&stp->sd_lock);
- if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
- if (error = straccess(stp, JCWRITE)) {
- freemsg(mctl);
- return (error);
- }
+ if ((error = i_straccess(stp, JCWRITE)) != 0) {
+ mutex_exit(&stp->sd_lock);
+ freemsg(mctl);
+ return (error);
}
+ mutex_exit(&stp->sd_lock);
}
out:
freemsg(mctl);
diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c
index 57a918a3f0..ae99e5198a 100644
--- a/usr/src/uts/common/os/strsubr.c
+++ b/usr/src/uts/common/os/strsubr.c
@@ -3107,13 +3107,18 @@ straccess(struct stdata *stp, enum jcaccess mode)
proc_t *p = ttoproc(t);
sess_t *sp;
+ ASSERT(mutex_owned(&stp->sd_lock));
+
if (stp->sd_sidp == NULL || stp->sd_vnode->v_type == VFIFO)
return (0);
- mutex_enter(&p->p_lock);
- sp = p->p_sessp;
+ mutex_enter(&p->p_lock); /* protects p_pgidp */
for (;;) {
+ mutex_enter(&p->p_splock); /* protects p->p_sessp */
+ sp = p->p_sessp;
+ mutex_enter(&sp->s_lock); /* protects sp->* */
+
/*
* If this is not the calling process's controlling terminal
* or if the calling process is already in the foreground
@@ -3121,6 +3126,8 @@ straccess(struct stdata *stp, enum jcaccess mode)
*/
if (sp->s_dev != stp->sd_vnode->v_rdev ||
p->p_pgidp == stp->sd_pgidp) {
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
mutex_exit(&p->p_lock);
return (0);
}
@@ -3131,10 +3138,15 @@ straccess(struct stdata *stp, enum jcaccess mode)
if (sp->s_vp == NULL) {
if (!cantsend(p, t, SIGHUP))
sigtoproc(p, t, SIGHUP);
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
mutex_exit(&p->p_lock);
return (EIO);
}
+ mutex_exit(&sp->s_lock);
+ mutex_exit(&p->p_splock);
+
if (mode == JCGETP) {
mutex_exit(&p->p_lock);
return (0);
@@ -3146,7 +3158,9 @@ straccess(struct stdata *stp, enum jcaccess mode)
return (EIO);
}
mutex_exit(&p->p_lock);
+ mutex_exit(&stp->sd_lock);
pgsignal(p->p_pgidp, SIGTTIN);
+ mutex_enter(&stp->sd_lock);
mutex_enter(&p->p_lock);
} else { /* mode == JCWRITE or JCSETP */
if ((mode == JCWRITE && !(stp->sd_flag & STRTOSTOP)) ||
@@ -3159,7 +3173,9 @@ straccess(struct stdata *stp, enum jcaccess mode)
return (EIO);
}
mutex_exit(&p->p_lock);
+ mutex_exit(&stp->sd_lock);
pgsignal(p->p_pgidp, SIGTTOU);
+ mutex_enter(&stp->sd_lock);
mutex_enter(&p->p_lock);
}
@@ -3174,10 +3190,15 @@ straccess(struct stdata *stp, enum jcaccess mode)
* We can't get here if the signal is ignored or
* if the current thread is blocking the signal.
*/
+ mutex_exit(&stp->sd_lock);
if (!cv_wait_sig_swap(&lbolt_cv, &p->p_lock)) {
mutex_exit(&p->p_lock);
+ mutex_enter(&stp->sd_lock);
return (EINTR);
}
+ mutex_exit(&p->p_lock);
+ mutex_enter(&stp->sd_lock);
+ mutex_enter(&p->p_lock);
}
}
@@ -4001,59 +4022,12 @@ strsignal(stdata_t *stp, int sig, int32_t band)
void
strhup(stdata_t *stp)
{
+ ASSERT(mutex_owned(&stp->sd_lock));
pollwakeup(&stp->sd_pollist, POLLHUP);
- mutex_enter(&stp->sd_lock);
if (stp->sd_sigflags & S_HANGUP)
strsendsig(stp->sd_siglist, S_HANGUP, 0, 0);
- mutex_exit(&stp->sd_lock);
-}
-
-void
-stralloctty(stdata_t *stp)
-{
- proc_t *p = curproc;
- sess_t *sp = p->p_sessp;
-
- mutex_enter(&stp->sd_lock);
- /*
- * No need to hold the session lock or do a TTY_HOLD() because
- * this is the only thread that can be the session leader and not
- * have a controlling tty.
- */
- if ((stp->sd_flag &
- (STRHUP|STRDERR|STWRERR|STPLEX|STRISTTY)) == STRISTTY &&
- stp->sd_sidp == NULL && /* not allocated as ctty */
- sp->s_sidp == p->p_pidp && /* session leader */
- sp->s_flag != SESS_CLOSE && /* session is not closing */
- sp->s_vp == NULL) { /* without ctty */
- ASSERT(stp->sd_pgidp == NULL);
- alloctty(p, makectty(stp->sd_vnode));
-
- mutex_enter(&pidlock);
- stp->sd_sidp = sp->s_sidp;
- stp->sd_pgidp = sp->s_sidp;
- PID_HOLD(stp->sd_pgidp);
- PID_HOLD(stp->sd_sidp);
- mutex_exit(&pidlock);
- }
- mutex_exit(&stp->sd_lock);
}
-void
-strfreectty(stdata_t *stp)
-{
- mutex_enter(&stp->sd_lock);
- pgsignal(stp->sd_pgidp, SIGHUP);
- mutex_enter(&pidlock);
- PID_RELE(stp->sd_pgidp);
- PID_RELE(stp->sd_sidp);
- stp->sd_pgidp = NULL;
- stp->sd_sidp = NULL;
- mutex_exit(&pidlock);
- mutex_exit(&stp->sd_lock);
- if (!(stp->sd_flag & STRHUP))
- strhup(stp);
-}
/*
* Backenable the first queue upstream from `q' with a service procedure.
*/
diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c
index 80761e102c..8211e23d01 100644
--- a/usr/src/uts/common/os/sysent.c
+++ b/usr/src/uts/common/os/sysent.c
@@ -51,6 +51,7 @@
int access();
int alarm();
int auditsys();
+int64_t brandsys();
int brk();
int chdir();
int chmod();
@@ -131,6 +132,8 @@ int unlink();
int utime();
int64_t utssys32();
int64_t utssys64();
+int uucopy();
+ssize_t uucopystr();
int64_t wait();
ssize_t write();
ssize_t readv();
@@ -473,7 +476,7 @@ struct sysent sysent[NSYSCALL] =
SYSENT_NOSYS(),
SYSENT_CI("fstatfs", fstatfs32, 4)),
/* 39 */ SYSENT_CI("setpgrp", setpgrp, 3),
- /* 40 */ SYSENT_LOADABLE(), /* (was cxenix) */
+ /* 40 */ SYSENT_CI("uucopystr", uucopystr, 3),
/* 41 */ SYSENT_CI("dup", dup, 1),
/* 42 */ SYSENT_LOADABLE(), /* (was pipe ) */
/* 43 */ SYSENT_CL("times", times, 1),
@@ -658,7 +661,7 @@ struct sysent sysent[NSYSCALL] =
SYSENT_NOSYS(),
SYSENT_C("llseek", llseek32, 4)),
/* 176 */ SYSENT_LOADABLE(), /* inst_sync */
- /* 177 */ SYSENT_LOADABLE(), /* (was srmlimitsys) */
+ /* 177 */ SYSENT_CI("brandsys", brandsys, 6),
/* 178 */ SYSENT_LOADABLE(), /* kaio */
/* 179 */ SYSENT_LOADABLE(), /* cpc */
/* 180 */ SYSENT_CI("lgrpsys", lgrpsys, 3),
@@ -770,7 +773,7 @@ struct sysent sysent[NSYSCALL] =
/* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock, 1),
/* 252 */ SYSENT_CI("lwp_mutex_init", lwp_mutex_init, 2),
/* 253 */ SYSENT_CI("cladm", cladm, 3),
- /* 254 */ SYSENT_LOADABLE(), /* (was lwp_sigtimedwait) */
+ /* 254 */ SYSENT_CI("uucopy", uucopy, 3),
/* 255 */ SYSENT_CI("umount2", umount2, 2)
/* ONC_PLUS EXTRACT START */
};
@@ -876,7 +879,7 @@ struct sysent sysent32[NSYSCALL] =
/* 37 */ SYSENT_CI("kill", kill, 2),
/* 38 */ SYSENT_CI("fstatfs", fstatfs32, 4),
/* 39 */ SYSENT_CI("setpgrp", setpgrp, 3),
- /* 40 */ SYSENT_LOADABLE32(), /* (was cxenix) */
+ /* 40 */ SYSENT_CI("uucopystr", uucopystr, 3),
/* 41 */ SYSENT_CI("dup", dup, 1),
/* 42 */ SYSENT_LOADABLE32(), /* (was pipe ) */
/* 43 */ SYSENT_CI("times", times32, 1),
@@ -1036,7 +1039,7 @@ struct sysent sysent32[NSYSCALL] =
/* 174 */ SYSENT_CI("pwrite", pwrite32, 4),
/* 175 */ SYSENT_C("llseek", llseek32, 4),
/* 176 */ SYSENT_LOADABLE32(), /* inst_sync */
- /* 177 */ SYSENT_LOADABLE32(), /* srmlimitsys */
+ /* 177 */ SYSENT_CI("brandsys", brandsys, 6),
/* 178 */ SYSENT_LOADABLE32(), /* kaio */
/* 179 */ SYSENT_LOADABLE32(), /* cpc */
/* 180 */ SYSENT_CI("lgrpsys", lgrpsys, 3),
@@ -1116,7 +1119,7 @@ struct sysent sysent32[NSYSCALL] =
/* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock, 1),
/* 252 */ SYSENT_CI("lwp_mutex_init", lwp_mutex_init, 2),
/* 253 */ SYSENT_CI("cladm", cladm, 3),
- /* 254 */ SYSENT_LOADABLE32(), /* (was lwp_sigtimedwait) */
+ /* 254 */ SYSENT_CI("uucopy", uucopy, 3),
/* 255 */ SYSENT_CI("umount2", umount2, 2)
/* ONC_PLUS EXTRACT START */
};
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 6a5c9243b3..9fd6b423bd 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -228,6 +228,7 @@
#include <sys/nvpair.h>
#include <sys/rctl.h>
#include <sys/fss.h>
+#include <sys/brand.h>
#include <sys/zone.h>
#include <sys/tsol/label.h>
@@ -330,7 +331,6 @@ static kmutex_t mount_lock;
const char * const zone_default_initname = "/sbin/init";
static char * const zone_prefix = "/zone/";
-
static int zone_shutdown(zoneid_t zoneid);
/*
@@ -1223,6 +1223,8 @@ zone_init(void)
zone0.zone_nlwps = p0.p_lwpcnt;
zone0.zone_ntasks = 1;
mutex_exit(&p0.p_lock);
+ zone0.zone_restart_init = B_TRUE;
+ zone0.zone_brand = &native_brand;
rctl_prealloc_destroy(gp);
/*
* pool_default hasn't been initialized yet, so we let pool_init() take
@@ -2330,33 +2332,40 @@ void
zone_start_init(void)
{
proc_t *p = ttoproc(curthread);
+ zone_t *z = p->p_zone;
ASSERT(!INGLOBALZONE(curproc));
/*
+ * For all purposes (ZONE_ATTR_INITPID and restart_init),
+ * storing just the pid of init is sufficient.
+ */
+ z->zone_proc_initpid = p->p_pid;
+
+ /*
* We maintain zone_boot_err so that we can return the cause of the
* failure back to the caller of the zone_boot syscall.
*/
p->p_zone->zone_boot_err = start_init_common();
mutex_enter(&zone_status_lock);
- if (p->p_zone->zone_boot_err != 0) {
+ if (z->zone_boot_err != 0) {
/*
* Make sure we are still in the booting state-- we could have
* raced and already be shutting down, or even further along.
*/
- if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING)
- zone_status_set(p->p_zone, ZONE_IS_SHUTTING_DOWN);
+ if (zone_status_get(z) == ZONE_IS_BOOTING)
+ zone_status_set(z, ZONE_IS_SHUTTING_DOWN);
mutex_exit(&zone_status_lock);
/* It's gone bad, dispose of the process */
- if (proc_exit(CLD_EXITED, p->p_zone->zone_boot_err) != 0) {
+ if (proc_exit(CLD_EXITED, z->zone_boot_err) != 0) {
mutex_enter(&p->p_lock);
ASSERT(p->p_flag & SEXITLWPS);
lwp_exit();
}
} else {
- if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING)
- zone_status_set(p->p_zone, ZONE_IS_RUNNING);
+ if (zone_status_get(z) == ZONE_IS_BOOTING)
+ zone_status_set(z, ZONE_IS_RUNNING);
mutex_exit(&zone_status_lock);
/* cause the process to return to userland. */
lwp_rtt();
@@ -2939,6 +2948,9 @@ zone_create(const char *zone_name, const char *zone_root,
zone->zone_psetid = ZONE_PS_INVAL;
zone->zone_ncpus = 0;
zone->zone_ncpus_online = 0;
+ zone->zone_restart_init = B_TRUE;
+ zone->zone_brand = &native_brand;
+ zone->zone_initname = NULL;
mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL);
@@ -3464,6 +3476,9 @@ zone_shutdown(zoneid_t zoneid)
zone_rele(zone);
return (set_errno(EINTR));
}
+
+ brand_unregister_zone(zone->zone_brand);
+
zone_rele(zone);
return (0);
}
@@ -3771,6 +3786,18 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
copyout(&initpid, buf, bufsize) != 0)
error = EFAULT;
break;
+ case ZONE_ATTR_BRAND:
+ size = strlen(zone->zone_brand->b_name) + 1;
+
+ if (bufsize > size)
+ bufsize = size;
+ if (buf != NULL) {
+ err = copyoutstr(zone->zone_brand->b_name, buf,
+ bufsize, NULL);
+ if (err != 0 && err != ENAMETOOLONG)
+ error = EFAULT;
+ }
+ break;
case ZONE_ATTR_INITNAME:
size = strlen(zone->zone_initname) + 1;
if (bufsize > size)
@@ -3797,7 +3824,12 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
}
break;
default:
- error = EINVAL;
+ if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) {
+ size = bufsize;
+ error = ZBROP(zone)->b_getattr(zone, attr, buf, &size);
+ } else {
+ error = EINVAL;
+ }
}
zone_rele(zone);
@@ -3815,6 +3847,7 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
{
zone_t *zone;
zone_status_t zone_status;
+ struct brand_attr *attrp;
int err;
if (secpolicy_zone_config(CRED()) != 0)
@@ -3847,8 +3880,33 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
case ZONE_ATTR_BOOTARGS:
err = zone_set_bootargs(zone, (const char *)buf);
break;
+ case ZONE_ATTR_BRAND:
+ ASSERT(!ZONE_IS_BRANDED(zone));
+ err = 0;
+ attrp = kmem_alloc(sizeof (struct brand_attr), KM_SLEEP);
+ if ((buf == NULL) ||
+ (copyin(buf, attrp, sizeof (struct brand_attr)) != 0)) {
+ kmem_free(attrp, sizeof (struct brand_attr));
+ err = EFAULT;
+ break;
+ }
+
+ if (is_system_labeled() && strncmp(attrp->ba_brandname,
+ NATIVE_BRAND_NAME, MAXNAMELEN) != 0) {
+ err = EPERM;
+ break;
+ }
+
+ zone->zone_brand = brand_register_zone(attrp);
+ kmem_free(attrp, sizeof (struct brand_attr));
+ if (zone->zone_brand == NULL)
+ err = EINVAL;
+ break;
default:
- err = EINVAL;
+ if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone))
+ err = ZBROP(zone)->b_setattr(zone, attr, buf, bufsize);
+ else
+ err = EINVAL;
}
done:
@@ -4145,10 +4203,10 @@ zone_enter(zoneid_t zoneid)
*/
mutex_enter(&pidlock);
sp = zone->zone_zsched->p_sessp;
- SESS_HOLD(sp);
+ sess_hold(zone->zone_zsched);
mutex_enter(&pp->p_lock);
pgexit(pp);
- SESS_RELE(pp->p_sessp);
+ sess_rele(pp->p_sessp, B_TRUE);
pp->p_sessp = sp;
pgjoin(pp, zone->zone_zsched->p_pidp);
mutex_exit(&pp->p_lock);
diff --git a/usr/src/uts/common/rpc/clnt_gen.c b/usr/src/uts/common/rpc/clnt_gen.c
index 0093210bd5..4c557b563f 100644
--- a/usr/src/uts/common/rpc/clnt_gen.c
+++ b/usr/src/uts/common/rpc/clnt_gen.c
@@ -346,6 +346,11 @@ bindresvport_again:
}
if (!error && bound_addr) {
+ if (bound_addr->maxlen < ret->addr.len) {
+ kmem_free(bound_addr->buf, bound_addr->maxlen);
+ bound_addr->buf = kmem_zalloc(ret->addr.len, KM_SLEEP);
+ bound_addr->maxlen = ret->addr.len;
+ }
bcopy(ret->addr.buf, bound_addr->buf, ret->addr.len);
bound_addr->len = ret->addr.len;
}
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index f6bcef9c5c..2754405b01 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -102,6 +102,7 @@ CHKHDRS= \
bofi_impl.h \
bpp_io.h \
bootstat.h \
+ brand.h \
buf.h \
bufmod.h \
bustypes.h \
diff --git a/usr/src/uts/common/sys/audioio.h b/usr/src/uts/common/sys/audioio.h
index 5b8152cfc5..2814eb7040 100644
--- a/usr/src/uts/common/sys/audioio.h
+++ b/usr/src/uts/common/sys/audioio.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,8 +19,8 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 1995-2001 by Sun Microsystems, Inc.
- * All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
*/
#ifndef _SYS_AUDIOIO_H
@@ -209,10 +208,10 @@ typedef struct audio_info audio_info_t;
* a signed int.
*/
#define AUDIO_INITINFO(i) { \
- uint_t *__x__; \
- for (__x__ = (uint_t *)(i); \
+ uint_t *__x__; \
+ for (__x__ = (uint_t *)(i); \
(char *)__x__ < (((char *)(i)) + sizeof (audio_info_t)); \
- *__x__++ = ~0); \
+ *__x__++ = (uint_t)~0); \
}
diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h
index 025d7a18e9..b9cf07f269 100644
--- a/usr/src/uts/common/sys/auxv.h
+++ b/usr/src/uts/common/sys/auxv.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -171,6 +170,15 @@ extern uint_t getisax(uint32_t *, uint_t);
#define AT_SUN_AUXFLAGS 2017 /* AF_SUN_ flags passed from the kernel */
/*
+ * Used to indicate to the runtime linker the name of the emulation binary,
+ * if one is being used. For brands, this is the name of the brand library.
+ */
+#define AT_SUN_EMULATOR 2018
+
+#define AT_SUN_BRANDNAME 2019
+#define AT_SUN_BRAND_PHDR 2020 /* Brand executable's phdr */
+
+/*
* The kernel is in a better position to determine whether a process needs to
* ignore dangerous LD environment variables. If set, this flags tells
* ld.so.1 to run "secure" and ignore the the environment.
@@ -183,7 +191,6 @@ extern uint_t getisax(uint32_t *, uint_t);
*/
#define AF_SUN_HWCAPVERIFY 0x00000002
-
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/bitmap.h b/usr/src/uts/common/sys/bitmap.h
index 8476ba9563..d0dd12b683 100644
--- a/usr/src/uts/common/sys/bitmap.h
+++ b/usr/src/uts/common/sys/bitmap.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -124,6 +124,14 @@ extern "C" {
#endif /* _LP64 */
+/*
+ * BIT_ONLYONESET is a private macro not designed for bitmaps of
+ * arbitrary size. u must be an unsigned integer/long. It returns
+ * true if one and only one bit is set in u.
+ */
+#define BIT_ONLYONESET(u) \
+ ((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0))
+
#if defined(_KERNEL) && !defined(_ASM)
#include <sys/atomic.h>
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
new file mode 100644
index 0000000000..c4595e9641
--- /dev/null
+++ b/usr/src/uts/common/sys/brand.h
@@ -0,0 +1,134 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_BRAND_H
+#define _SYS_BRAND_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/proc.h>
+#include <sys/exec.h>
+
+/*
+ * All Brands supported by this kernel must use BRAND_VER_1.
+ */
+#define BRAND_VER_1 1
+
+/*
+ * sub-commands to brandsys.
+ * 1 - 128 are for common commands
+ * 128+ are available for brand-specific commands.
+ */
+#define B_REGISTER 1
+#define B_TTYMODES 2
+#define B_ELFDATA 3
+#define B_EXEC_NATIVE 4
+#define B_EXEC_BRAND 5
+
+/*
+ * Structure used by zoneadmd to communicate the name of a brand and the
+ * supporting brand module into the kernel.
+ */
+struct brand_attr {
+ char ba_brandname[MAXNAMELEN];
+ char ba_modname[MAXPATHLEN];
+};
+
+/* What we call the native brand. */
+#define NATIVE_BRAND_NAME "native"
+
+#ifdef _KERNEL
+
+/* Root for branded zone's native binaries */
+#define NATIVE_ROOT "/native/"
+
+struct proc;
+struct uarg;
+struct brand_mach_ops;
+struct intpdata;
+struct execa;
+
+struct brand_ops {
+ int (*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+ uintptr_t, uintptr_t, uintptr_t);
+ void (*b_setbrand)(struct proc *);
+ int (*b_getattr)(zone_t *, int, void *, size_t *);
+ int (*b_setattr)(zone_t *, int, void *, size_t);
+ void (*b_copy_procdata)(struct proc *, struct proc *);
+ void (*b_proc_exit)(struct proc *, klwp_t *);
+ void (*b_exec)();
+ void (*b_lwp_setrval)(klwp_t *, int, int);
+ int (*b_initlwp)(klwp_t *);
+ void (*b_forklwp)(klwp_t *, klwp_t *);
+ void (*b_freelwp)(klwp_t *);
+ void (*b_lwpexit)(klwp_t *);
+ int (*b_elfexec)(struct vnode *vp, struct execa *uap,
+ struct uarg *args, struct intpdata *idata, int level,
+ long *execsz, int setid, caddr_t exec_file,
+ struct cred *cred, int brand_action);
+};
+
+/*
+ * The b_version field must always be the first entry in this struct.
+ */
+typedef struct brand {
+ int b_version;
+ char *b_name;
+ struct brand_ops *b_ops;
+ struct brand_mach_ops *b_machops;
+} brand_t;
+
+extern brand_t native_brand;
+
+/*
+ * Convenience macros
+ */
+#define lwptolwpbrand(l) ((l)->lwp_brand)
+#define ttolwpbrand(t) (lwptolwpbrand(ttolwp(t)))
+#define PROC_IS_BRANDED(p) ((p)->p_brand != &native_brand)
+#define ZONE_IS_BRANDED(z) ((z)->zone_brand != &native_brand)
+#define BROP(p) ((p)->p_brand->b_ops)
+#define ZBROP(z) ((z)->zone_brand->b_ops)
+#define BRMOP(p) ((p)->p_brand->b_machops)
+
+extern void brand_init();
+extern int brand_register(brand_t *);
+extern int brand_unregister(brand_t *);
+extern brand_t *brand_register_zone(struct brand_attr *);
+extern brand_t *brand_find_name(char *);
+extern void brand_unregister_zone(brand_t *);
+extern int brand_zone_count(brand_t *);
+extern void brand_setbrand(proc_t *);
+#endif /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BRAND_H */
diff --git a/usr/src/uts/common/sys/class.h b/usr/src/uts/common/sys/class.h
index fbfbcc6080..9988ca3190 100644
--- a/usr/src/uts/common/sys/class.h
+++ b/usr/src/uts/common/sys/class.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -38,6 +37,7 @@
#include <sys/thread.h>
#include <sys/priocntl.h>
#include <sys/mutex.h>
+#include <sys/uio.h>
#ifdef __cplusplus
extern "C" {
@@ -128,15 +128,15 @@ extern pri_t minclsyspri;
extern id_t syscid; /* system scheduling class ID */
extern id_t defaultcid; /* "default" class id; see dispadmin(1M) */
-extern int alloc_cid(char *, id_t *);
-extern int scheduler_load(char *, sclass_t *);
-extern int getcid(char *, id_t *);
-extern int getcidbyname(char *, id_t *);
-extern int parmsin(pcparms_t *, pc_vaparms_t *);
-extern int parmsout(pcparms_t *, pc_vaparms_t *);
-extern int parmsset(pcparms_t *, kthread_id_t);
-extern void parmsget(kthread_id_t, pcparms_t *);
-extern int vaparmsout(char *, pcparms_t *, pc_vaparms_t *);
+extern int alloc_cid(char *, id_t *);
+extern int scheduler_load(char *, sclass_t *);
+extern int getcid(char *, id_t *);
+extern int getcidbyname(char *, id_t *);
+extern int parmsin(pcparms_t *, pc_vaparms_t *);
+extern int parmsout(pcparms_t *, pc_vaparms_t *);
+extern int parmsset(pcparms_t *, kthread_id_t);
+extern void parmsget(kthread_id_t, pcparms_t *);
+extern int vaparmsout(char *, pcparms_t *, pc_vaparms_t *, uio_seg_t);
#endif
diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h
index e9a34eacfe..a5eaf18edd 100644
--- a/usr/src/uts/common/sys/exec.h
+++ b/usr/src/uts/common/sys/exec.h
@@ -105,9 +105,19 @@ typedef struct uarg {
uint_t brkpageszc;
uintptr_t entry;
uintptr_t thrptr;
+ char *emulator;
+ char *brandname;
+ auxv32_t *brand_auxp; /* starting user addr of brand auxvs on stack */
} uarg_t;
/*
+ * Possible brand actions for exec.
+ */
+#define EBA_NONE 0
+#define EBA_NATIVE 1
+#define EBA_BRAND 2
+
+/*
* The following macro is a machine dependent encapsulation of
* postfix processing to hide the stack direction from elf.c
* thereby making the elf.c code machine independent.
@@ -166,7 +176,7 @@ struct execsw {
int (*exec_func)(struct vnode *vp, struct execa *uap,
struct uarg *args, struct intpdata *idata, int level,
long *execsz, int setid, caddr_t exec_file,
- struct cred *cred);
+ struct cred *cred, int brand_action);
int (*exec_core)(struct vnode *vp, struct proc *p,
struct cred *cred, rlim64_t rlimit, int sig,
core_content_t content);
@@ -198,10 +208,10 @@ extern int exec_args(execa_t *, uarg_t *, intpdata_t *, void **);
extern int exec(const char *fname, const char **argp);
extern int exece(const char *fname, const char **argp, const char **envp);
extern int exec_common(const char *fname, const char **argp,
- const char **envp);
+ const char **envp, int brand_action);
extern int gexec(vnode_t **vp, struct execa *uap, struct uarg *args,
struct intpdata *idata, int level, long *execsz, caddr_t exec_file,
- struct cred *cred);
+ struct cred *cred, int brand_action);
extern struct execsw *allocate_execsw(char *name, char *magic,
size_t magic_size);
extern struct execsw *findexecsw(char *magic);
diff --git a/usr/src/uts/common/sys/klwp.h b/usr/src/uts/common/sys/klwp.h
index ade26b4f82..7dea5b4941 100644
--- a/usr/src/uts/common/sys/klwp.h
+++ b/usr/src/uts/common/sys/klwp.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -211,6 +210,8 @@ typedef struct _klwp {
*/
struct ct_template *lwp_ct_active[CTT_MAXTYPE]; /* active templates */
struct contract *lwp_ct_latest[CTT_MAXTYPE]; /* last created contract */
+
+ void *lwp_brand; /* per-lwp brand data */
} klwp_t;
/* lwp states */
diff --git a/usr/src/uts/common/sys/modctl.h b/usr/src/uts/common/sys/modctl.h
index 1093eddef6..5e9450dde5 100644
--- a/usr/src/uts/common/sys/modctl.h
+++ b/usr/src/uts/common/sys/modctl.h
@@ -61,6 +61,7 @@ struct mod_ops {
* The defined set of mod_ops structures for each loadable module type
* Defined in modctl.c
*/
+extern struct mod_ops mod_brandops;
#if defined(__i386) || defined(__amd64)
extern struct mod_ops mod_cpuops;
#endif
@@ -175,6 +176,13 @@ struct modlpcbe {
struct __pcbe_ops *pcbe_ops;
};
+/* For Brand modules */
+struct modlbrand {
+ struct mod_ops *brand_modops;
+ char *brand_linkinfo;
+ struct brand *brand_branddef;
+};
+
/* for devname fs */
struct modldev {
struct mod_ops *dev_modops;
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index fadcbf4a6d..13a3605e66 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -115,6 +115,7 @@ typedef struct lwpdir {
struct pool;
struct task;
struct zone;
+struct brand;
struct corectl_path;
struct corectl_content;
@@ -336,6 +337,11 @@ typedef struct proc {
uintptr_t p_portcnt; /* event ports counter */
struct zone *p_zone; /* zone in which process lives */
struct vnode *p_execdir; /* directory that p_exec came from */
+ struct brand *p_brand; /* process's brand */
+ void *p_brand_data; /* per-process brand state */
+
+ /* additional lock to protect p_sessp (but not its contents) */
+ kmutex_t p_splock;
} proc_t;
#define PROC_T /* headers relying on proc_t are OK */
@@ -408,6 +414,10 @@ struct plock {
extern proc_t p0; /* process 0 */
extern struct plock p0lock; /* p0's plock */
extern struct pid pid0; /* p0's pid */
+
+/* pid_allocate() flags */
+#define PID_ALLOC_PROC 0x0001 /* assign a /proc slot as well */
+
#endif /* _KERNEL */
/* stat codes */
@@ -588,7 +598,8 @@ extern int sigcheck(proc_t *, kthread_t *);
extern void sigdefault(proc_t *);
extern void pid_setmin(void);
-extern pid_t pid_assign(proc_t *);
+extern pid_t pid_allocate(proc_t *, int);
+extern struct pid *pid_find(pid_t);
extern int pid_rele(struct pid *);
extern void pid_exit(proc_t *);
extern void proc_entry_free(struct pid *);
@@ -724,6 +735,7 @@ extern void lwp_rtt(void);
extern void lwp_rtt_initial(void);
extern int lwp_setprivate(klwp_t *, int, uintptr_t);
extern void lwp_stat_update(lwp_stat_id_t, long);
+extern void lwp_attach_brand_hdlrs(klwp_t *);
/*
* Signal queue function prototypes. Must be here due to header ordering
diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h
index 9aa6493956..120503539b 100644
--- a/usr/src/uts/common/sys/ptms.h
+++ b/usr/src/uts/common/sys/ptms.h
@@ -35,6 +35,8 @@
extern "C" {
#endif
+#ifdef _KERNEL
+
/*
* Structures and definitions supporting the pseudo terminal
* drivers. This structure is private and should not be used by any
@@ -63,8 +65,6 @@ struct pt_ttys {
#define PTSOPEN 0x04 /* slave side is open */
#define PTSTTY 0x08 /* slave side is tty */
-#ifdef _KERNEL
-
/*
* Multi-threading primitives.
* Values of pt_refcnt: -1 if a writer is accessing the struct
@@ -129,18 +129,29 @@ extern void ptms_logp(char *, uintptr_t);
#define DDBGP(a, b)
#endif
+typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t;
+typedef struct ptmptsopencb {
+ boolean_t (*ppocb_func)(ptmptsopencb_arg_t);
+ ptmptsopencb_arg_t ppocb_arg;
+} ptmptsopencb_t;
+
#endif /* _KERNEL */
+typedef struct pt_own {
+ uid_t pto_ruid;
+ gid_t pto_rgid;
+} pt_own_t;
+
/*
* ioctl commands
*
- * ISPTM: Determines whether the file descriptor is that of an open master
- * device. Return code of zero indicates that the file descriptor
- * represents master device.
+ * ISPTM: Determines whether the file descriptor is that of an open master
+ * device. Return code of zero indicates that the file descriptor
+ * represents master device.
*
- * UNLKPT: Unlocks the master and slave devices. It returns 0 on success. On
- * failure, the errno is set to EINVAL indicating that the master
- * device is not open.
+ * UNLKPT: Unlocks the master and slave devices. It returns 0 on success. On
+ * failure, the errno is set to EINVAL indicating that the master
+ * device is not open.
*
* ZONEPT: Sets the zoneid of the pair of master and slave devices. It
* returns 0 upon success. Used to force a pty 'into' a zone upon
@@ -149,16 +160,24 @@ extern void ptms_logp(char *, uintptr_t);
* PT_OWNER: Sets uid and gid for slave device. It returns 0 on success.
*
*/
-#define ISPTM (('P'<<8)|1) /* query for master */
-#define UNLKPT (('P'<<8)|2) /* unlock master/slave pair */
-#define PTSSTTY (('P'<<8)|3) /* set tty flag */
-#define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */
-#define PT_OWNER (('P'<<8)|5) /* set owner and group for slave device */
+#define ISPTM (('P'<<8)|1) /* query for master */
+#define UNLKPT (('P'<<8)|2) /* unlock master/slave pair */
+#define PTSSTTY (('P'<<8)|3) /* set tty flag */
+#define ZONEPT (('P'<<8)|4) /* set zone of master/slave pair */
+#define PT_OWNER (('P'<<8)|5) /* set owner/group for slave device */
-typedef struct pt_own {
- uid_t pto_ruid;
- gid_t pto_rgid;
-} pt_own_t;
+#ifdef _KERNEL
+/*
+ * kernel ioctl commands
+ *
+ * PTMPTSOPENCB: Returns a callback function pointer and opaque argument.
+ * The return value of the callback function when it's invoked
+ * with the opaque argument passed to it will indicate if the
+ * pts slave device is currently open.
+ */
+#define PTMPTSOPENCB (('P'<<8)|6) /* check if the slave is open */
+
+#endif /* _KERNEL */
#ifdef __cplusplus
}
diff --git a/usr/src/uts/common/sys/session.h b/usr/src/uts/common/sys/session.h
index 639d6bf69d..8db8a8a5bb 100644
--- a/usr/src/uts/common/sys/session.h
+++ b/usr/src/uts/common/sys/session.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -37,54 +36,96 @@
extern "C" {
#endif
+/*
+ * Session structure overview.
+ *
+ * Currently, the only structure in the kernel which has a pointer to a
+ * session structures is the proc_t via the p_sessp pointer. To
+ * access a session proc_t->p_sessp pointer a caller must hold either
+ * pidlock or p_splock. These locks only protect the p_sessp pointer
+ * itself and do not protect any of the contents of the session structure.
+ * To prevent the contents of a the session structure from changing the
+ * caller must grab s_lock.
+ *
+ * No callers should ever update the contents of the session structure
+ * directly. Only the session management code should ever modify the
+ * contents of the session structure. When the session code attempts
+ * to modify the contents of a session structure it must hold multiple
+ * locks. The locking order for all the locks that may need to be
+ * acquired is:
+ * sd_lock -> pidlock -> p_splock -> s_lock
+ *
+ * If a caller requires access to a session structure for long
+ * periods of time or across operations that may block it should
+ * use the tty_hold() and sess_hold() interfaces.
+ *
+ * sess_hold() returns a pointer to a session structure associated
+ * with the proc_t that was passed in. It also increments the reference
+ * count associated with that session structure to ensure that it
+ * can't be freed until after the caller is done with it and calls
+ * sess_rele(). This hold doesn't actually protect any of the
+ * contents of the session structure.
+ *
+ * tty_hold() returns a pointer to a session structure associated
+ * with the curproc. It also "locks" the contents of the session
+ * structure. This hold should be used when the caller will be
+ * doing operations on a controlling tty associated with the session.
+ * This operation doesn an implicit sess_hold() so that the session
+ * structure can't be free'd until after the caller is done with it
+ * and invokes tty_rele().
+ *
+ * NOTE: Neither of these functions (sess_hold() or tty_hold())
+ * prevent a process from changing its session. Once these functions
+ * return a session pointer, that session pointer may no longer be
+ * associated with the current process. If a caller wants to prevent
+ * a process from changing its session then it must hold pidlock or
+ * p_splock.
+ */
+
typedef struct sess {
- uint_t s_ref; /* reference count */
- dev_t s_dev; /* tty's device number */
- struct vnode *s_vp; /* tty's vnode */
- struct pid *s_sidp; /* session ID info */
- struct cred *s_cred; /* allocation credentials */
- kmutex_t s_lock; /* sync s_vp use with freectty */
- kcondvar_t s_wait_cv; /* Condvar for sleeping */
- int s_cnt; /* # of active users of this session */
- int s_flag; /* session state flag see below */
-} sess_t;
+ struct pid *s_sidp; /* session ID info, never changes */
-#define SESS_CLOSE 1 /* session about to close */
-#define s_sid s_sidp->pid_id
+ kmutex_t s_lock; /* protects everything below */
+ uint_t s_ref; /* reference count */
+ boolean_t s_sighuped; /* ctty had sighup sent to it */
-#if defined(_KERNEL)
+ boolean_t s_exit; /* sesion leader is exiting */
+ kcondvar_t s_exit_cv; /* Condvar for s_exit */
-extern sess_t session0;
+ int s_cnt; /* active users of this ctty */
+ kcondvar_t s_cnt_cv; /* Condvar for s_cnt */
-#define SESS_HOLD(sp) (++(sp)->s_ref)
-#define SESS_RELE(sp) sess_rele(sp)
+ /*
+ * The following fields can only be updated while s_lock is held
+ * and s_cnt is 0. (ie, no one has a tty_hold() on this session.)
+ */
+ dev_t s_dev; /* tty's device number */
+ struct vnode *s_vp; /* tty's vnode */
+ struct cred *s_cred; /* allocation credentials */
+} sess_t;
-/*
- * Used to synchronize session vnode users with freectty()
- */
+#define s_sid s_sidp->pid_id
-#define TTY_HOLD(sp) { \
- mutex_enter(&(sp)->s_lock); \
- (++(sp)->s_cnt); \
- mutex_exit(&(sp)->s_lock); \
-}
+#if defined(_KERNEL)
-#define TTY_RELE(sp) { \
- mutex_enter(&(sp)->s_lock); \
- if ((--(sp)->s_cnt) == 0) \
- cv_signal(&(sp)->s_wait_cv); \
- mutex_exit(&(sp)->s_lock); \
-}
+extern sess_t session0;
/* forward referenced structure tags */
struct vnode;
struct proc;
+struct stdata;
+
+extern void sess_hold(proc_t *p);
+extern void sess_rele(sess_t *, boolean_t);
+extern sess_t *tty_hold(void);
+extern void tty_rele(sess_t *sp);
+
-extern void sess_rele(sess_t *);
extern void sess_create(void);
-extern void freectty(sess_t *);
-extern void alloctty(struct proc *, struct vnode *);
+extern int strctty(struct stdata *);
+extern int freectty(boolean_t);
extern dev_t cttydev(struct proc *);
+extern void ctty_clear_sighuped(void);
#endif /* defined(_KERNEL) */
diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h
index d00220f2a9..39112e6c97 100644
--- a/usr/src/uts/common/sys/socketvar.h
+++ b/usr/src/uts/common/sys/socketvar.h
@@ -544,11 +544,21 @@ struct sonodeops {
(((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1))
/*
- * Used in parsing msg_control
+ * Macros that operate on struct cmsghdr.
+ * Used in parsing msg_control.
+ * The CMSG_VALID macro does not assume that the last option buffer is padded.
*/
#define CMSG_NEXT(cmsg) \
(struct cmsghdr *)((uintptr_t)(cmsg) + \
ROUNDUP_cmsglen((cmsg)->cmsg_len))
+#define CMSG_CONTENT(cmsg) (&((cmsg)[1]))
+#define CMSG_CONTENTLEN(cmsg) ((cmsg)->cmsg_len - sizeof (struct cmsghdr))
+#define CMSG_VALID(cmsg, start, end) \
+ (ISALIGNED_cmsghdr(cmsg) && \
+ ((uintptr_t)(cmsg) >= (uintptr_t)(start)) && \
+ ((uintptr_t)(cmsg) < (uintptr_t)(end)) && \
+ ((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) && \
+ ((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
/*
* Maximum size of any argument that is copied in (addresses, options,
diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h
index 27403d72cc..4f424e96e1 100644
--- a/usr/src/uts/common/sys/strsubr.h
+++ b/usr/src/uts/common/sys/strsubr.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -1113,8 +1112,6 @@ extern mblk_t *strrput_misc(vnode_t *, mblk_t *,
extern int getiocseqno(void);
extern int strwaitbuf(size_t, int);
extern int strwaitq(stdata_t *, int, ssize_t, int, clock_t, int *);
-extern void stralloctty(struct stdata *);
-extern void strfreectty(struct stdata *);
extern struct stdata *shalloc(queue_t *);
extern void shfree(struct stdata *s);
extern queue_t *allocq(void);
diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h
index 43dee30f0b..1a6412b70b 100644
--- a/usr/src/uts/common/sys/syscall.h
+++ b/usr/src/uts/common/sys/syscall.h
@@ -99,7 +99,7 @@ extern "C" {
* getpgid(pid) :: syscall(39,4,pid)
* setpgid(pid,pgid) :: syscall(39,5,pid,pgid)
*/
-#define SYS_reserved_40 40 /* 40 not used, was xenix */
+#define SYS_uucopystr 40
#define SYS_dup 41
#define SYS_pipe 42
#define SYS_times 43
@@ -355,7 +355,7 @@ extern "C" {
#define SYS_pwrite 174
#define SYS_llseek 175
#define SYS_inst_sync 176
-#define SYS_reserved_177 177 /* 177 reserved */
+#define SYS_brand 177
#define SYS_kaio 178
/*
* subcodes:
@@ -464,6 +464,8 @@ extern "C" {
* zone_list(...) :: zone(ZONE_LIST, ...)
* zone_shutdown(...) :: zone(ZONE_SHUTDOWN, ...)
* zone_lookup(...) :: zone(ZONE_LOOKUP, ...)
+ * zone_setattr(...) :: zone(ZONE_SETATTR, ...)
+ * zone_getattr(...) :: zone(ZONE_GETATTR, ...)
*/
#define SYS_autofssys 228
#define SYS_getcwd 229
@@ -494,7 +496,7 @@ extern "C" {
#define SYS_lwp_mutex_trylock 251
#define SYS_lwp_mutex_init 252
#define SYS_cladm 253
-#define SYS_reserved_254 254 /* 254 reserved */
+#define SYS_uucopy 254
#define SYS_umount2 255
diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h
index c96ea5b4ac..ac465ad49f 100644
--- a/usr/src/uts/common/sys/systm.h
+++ b/usr/src/uts/common/sys/systm.h
@@ -246,6 +246,7 @@ int copyoutstr_noerr(const char *, char *, size_t, size_t *);
int copystr(const char *, char *, size_t, size_t *);
void bcopy(const void *, void *, size_t);
void ucopy(const void *, void *, size_t);
+void ucopystr(const char *, char *, size_t, size_t *);
void pgcopy(const void *, void *, size_t);
void ovbcopy(const void *, void *, size_t);
void bzero(void *, size_t);
diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h
index 8bd020e5c1..2d99f70bc2 100644
--- a/usr/src/uts/common/sys/termios.h
+++ b/usr/src/uts/common/sys/termios.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -382,6 +381,24 @@ extern pid_t tcgetsid();
#define TCSETSF (_TIOC|16)
/*
+ * linux terminal ioctls we need to be aware of
+ */
+#define TIOCSETLD (_TIOC|123) /* set line discipline parms */
+#define TIOCGETLD (_TIOC|124) /* get line discipline parms */
+
+/*
+ * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is
+ * perfectly legal except, linux expects them to be separate. So we keep
+ * them separately.
+ */
+struct lx_cc {
+ unsigned char veof; /* veof value */
+ unsigned char veol; /* veol value */
+ unsigned char vmin; /* vmin value */
+ unsigned char vtime; /* vtime value */
+};
+
+/*
* NTP PPS ioctls
*/
#define TIOCGPPS (_TIOC|125)
@@ -457,6 +474,7 @@ struct ppsclockev32 {
#define TIOCGLTC (tIOC|116) /* get local special chars */
#define TIOCOUTQ (tIOC|115) /* driver output queue size */
#define TIOCNOTTY (tIOC|113) /* void tty association */
+#define TIOCSCTTY (tIOC|132) /* get a ctty */
#define TIOCSTOP (tIOC|111) /* stop output, like ^S */
#define TIOCSTART (tIOC|110) /* start output, like ^Q */
#define TIOCSILOOP (tIOC|109) /* private to Sun; do not use */
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 56c23d00ad..636b8acc0f 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -87,6 +87,10 @@ extern "C" {
#define ZONE_ATTR_SLBL 8
#define ZONE_ATTR_INITNAME 9
#define ZONE_ATTR_BOOTARGS 10
+#define ZONE_ATTR_BRAND 11
+
+/* Start of the brand-specific attribute namespace */
+#define ZONE_ATTR_BRAND_ATTRS 32768
#define ZONE_EVENT_CHANNEL "com.sun:zones:status"
#define ZONE_EVENT_STATUS_CLASS "status"
@@ -103,6 +107,49 @@ extern "C" {
#define ZONE_CB_TIMESTAMP "when"
#define ZONE_CB_ZONEID "zoneid"
+/*
+ * Exit values that may be returned by scripts or programs invoked by various
+ * zone commands.
+ *
+ * These are defined as:
+ *
+ * ZONE_SUBPROC_OK
+ * ===============
+ * The subprocess completed successfully.
+ *
+ * ZONE_SUBPROC_USAGE
+ * ==================
+ * The subprocess failed with a usage message, or a usage message should
+ * be output in its behalf.
+ *
+ * ZONE_SUBPROC_NOTCOMPLETE
+ * ========================
+ * The subprocess did not complete, but the actions performed by the
+ * subprocess require no recovery actions by the user.
+ *
+ * For example, if the subprocess were called by "zoneadm install," the
+ * installation of the zone did not succeed but the user need not perform
+ * a "zoneadm uninstall" before attempting another install.
+ *
+ * ZONE_SUBPROC_FATAL
+ * ==================
+ * The subprocess failed in a fatal manner, usually one that will require
+ * some type of recovery action by the user.
+ *
+ * For example, if the subprocess were called by "zoneadm install," the
+ * installation of the zone did not succeed and the user will need to
+ * perform a "zoneadm uninstall" before another install attempt is
+ * possible.
+ *
+ * The non-success exit values are large to avoid accidental collision
+ * with values used internally by some commands (e.g. "Z_ERR" and
+ * "Z_USAGE" as used by zoneadm.)
+ */
+#define ZONE_SUBPROC_OK 0
+#define ZONE_SUBPROC_USAGE 253
+#define ZONE_SUBPROC_NOTCOMPLETE 254
+#define ZONE_SUBPROC_FATAL 255
+
#ifdef _SYSCALL32
typedef struct {
caddr32_t zone_name;
@@ -159,8 +206,8 @@ typedef enum {
* communicates with zoneadmd, but only uses Z_REBOOT and Z_HALT.
*/
typedef enum zone_cmd {
- Z_READY, Z_BOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING,
- Z_MOUNT, Z_UNMOUNT
+ Z_READY, Z_BOOT, Z_FORCEBOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING,
+ Z_MOUNT, Z_FORCEMOUNT, Z_UNMOUNT
} zone_cmd_t;
/*
@@ -223,6 +270,7 @@ typedef struct zone_cmd_rval {
#define ZF_IS_SCRATCH 0x4 /* scratch zone */
struct pool;
+struct brand;
/*
* Structure to record list of ZFS datasets exported to a zone.
@@ -318,6 +366,8 @@ typedef struct zone {
int zone_match; /* require label match for packets */
tsol_mlp_list_t zone_mlps; /* MLPs on zone-private addresses */
+ boolean_t zone_restart_init; /* Restart init if it dies? */
+ struct brand *zone_brand; /* zone's brand */
} zone_t;
/*
@@ -330,8 +380,6 @@ extern zone_t *global_zone;
extern uint_t maxzones;
extern rctl_hndl_t rc_zone_nlwps;
-extern const char * const zone_initname;
-
extern long zone(int, void *, void *, void *, void *);
extern void zone_zsd_init(void);
extern void zone_init(void);
diff --git a/usr/src/uts/common/syscall/brandsys.c b/usr/src/uts/common/syscall/brandsys.c
new file mode 100644
index 0000000000..9b4bd38baa
--- /dev/null
+++ b/usr/src/uts/common/syscall/brandsys.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/brand.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+
+/*
+ * brand(2) system call.
+ */
+int64_t
+brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
+ uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+ struct proc *p = curthread->t_procp;
+ int64_t rval = 0;
+ int err;
+
+ /*
+ * The brandsys system call can only be executed from inside a
+ * branded zone.
+ */
+ if (INGLOBALZONE(p) || !ZONE_IS_BRANDED(p->p_zone))
+ return (set_errno(ENOSYS));
+
+ if ((err = ZBROP(p->p_zone)->b_brandsys(cmd, &rval, arg1, arg2, arg3,
+ arg4, arg5, arg6)) != 0)
+ return (set_errno(err));
+
+ return (rval);
+}
diff --git a/usr/src/uts/common/syscall/pgrpsys.c b/usr/src/uts/common/syscall/pgrpsys.c
index e8be876537..8f60747663 100644
--- a/usr/src/uts/common/syscall/pgrpsys.c
+++ b/usr/src/uts/common/syscall/pgrpsys.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,11 +18,16 @@
*
* CDDL HEADER END
*/
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
-#ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.78 */
+#pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.78 */
#include <sys/param.h>
#include <sys/types.h>
@@ -39,8 +43,9 @@
int
setpgrp(int flag, int pid, int pgid)
{
- register proc_t *p = ttoproc(curthread);
- register int retval = 0;
+ proc_t *p = curproc;
+ int retval = 0;
+ int sid;
switch (flag) {
@@ -51,7 +56,10 @@ setpgrp(int flag, int pid, int pgid)
sess_create();
} else
mutex_exit(&pidlock);
- return (p->p_sessp->s_sid);
+ mutex_enter(&p->p_splock);
+ sid = p->p_sessp->s_sid;
+ mutex_exit(&p->p_splock);
+ return (sid);
case 3: /* setsid() */
mutex_enter(&pidlock);
@@ -61,7 +69,10 @@ setpgrp(int flag, int pid, int pgid)
}
mutex_exit(&pidlock);
sess_create();
- return (p->p_sessp->s_sid);
+ mutex_enter(&p->p_splock);
+ sid = p->p_sessp->s_sid;
+ mutex_exit(&p->p_splock);
+ return (sid);
case 5: /* setpgid() */
{
diff --git a/usr/src/uts/common/syscall/uucopy.c b/usr/src/uts/common/syscall/uucopy.c
new file mode 100644
index 0000000000..c301599e2f
--- /dev/null
+++ b/usr/src/uts/common/syscall/uucopy.c
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/systm.h>
+
+int
+uucopy(const void *from, void *to, size_t size)
+{
+ label_t ljb;
+
+ if (on_fault(&ljb))
+ return (set_errno(EFAULT));
+
+ ucopy(from, to, size);
+
+ no_fault();
+
+ return (0);
+}
+
+ssize_t
+uucopystr(const char *from, char *to, size_t size)
+{
+ label_t ljb;
+ size_t len;
+
+ if (on_fault(&ljb))
+ return (set_errno(EFAULT));
+
+ ucopystr(from, to, size, &len);
+
+ no_fault();
+
+ return ((ssize_t)len);
+}