PSARC/2005/471 BrandZ: Support for non-native zones

6374606 ::nm -D without an object may not work on processes in zones 6409350 BrandZ project integration into Solaris 6455289 pthread_setschedparam() should return EPERM rather than panic libc 6455591 setpriority(3C) gets errno wrong for deficient privileges failure 6458178 fifofs doesn't support lofs mounts of fifos 6460380 Attempted open() of a symlink with the O_NOFOLLOW flag set returns EINVAL, not ELOOP 6463857 renice(1) errors erroneously --HG-- rename : usr/src/lib/libzonecfg/zones/SUNWblank.xml => usr/src/lib/brand/native/zone/SUNWblank.xml rename : usr/src/lib/libzonecfg/zones/SUNWdefault.xml => usr/src/lib/brand/native/zone/SUNWdefault.xml
author: nn35248 <none@none> 2006-09-11 22:51:59 -0700
committer: nn35248 <none@none> 2006-09-11 22:51:59 -0700
commit: 9acbbeaf2a1ffe5c14b244867d427714fab43c5c (patch)
tree: d1ecd54896325c19a463220e9cbc50864874fc82 /usr/src/uts/common
parent: da51466dc253d7c98dda4956059042bd0c476328 (diff)
download: illumos-gate-9acbbeaf2a1ffe5c14b244867d427714fab43c5c.tar.gz
101 files changed, 17861 insertions, 610 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index b022fcd0c9..f0203dfeb9 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -41,6 +41,7 @@ sparc_CORE_OBJS +=
 COMMON_CORE_OBJS +=		\
 		atomic.o	\
 		bp_map.o	\
+		brand.o		\
 		chip.o		\
 		cpu.o		\
 		cpu_intr.o	\
@@ -78,6 +79,7 @@ GENUNIX_OBJS +=	\
 		bio.o		\
 		bitmap.o	\
 		blabel.o	\
+		brandsys.o	\
 		callb.o		\
 		callout.o	\
 		chdir.o		\
@@ -318,6 +320,7 @@ GENUNIX_OBJS +=	\
 		urw.o		\
 		utime.o		\
 		utssys.o	\
+		uucopy.o	\
 		vfs.o		\
 		vfs_conf.o	\
 		vmem.o		\
@@ -360,6 +363,8 @@ PROFILE_OBJS += profile.o
 
 SYSTRACE_OBJS += systrace.o
 
+LX_SYSTRACE_OBJS += lx_systrace.o
+
 LOCKSTAT_OBJS += lockstat.o
 
 FASTTRAP_OBJS += fasttrap.o fasttrap_isa.o
@@ -397,6 +402,10 @@ PTSL_OBJS +=	tty_pts.o
 
 PTM_OBJS +=	ptm.o
 
+LX_PTM_OBJS +=	lx_ptm.o
+
+LX_AUDIO_OBJS += lx_audio.o
+
 PTS_OBJS +=	pts.o
 
 PTY_OBJS +=	ptms_conf.o
@@ -937,6 +946,8 @@ DEDUMP_OBJS +=	dedump.o
 
 DRCOMPAT_OBJS +=	drcompat.o
 
+LDLINUX_OBJS +=	ldlinux.o
+
 LDTERM_OBJS +=	ldterm.o uwidth.o
 
 PCKT_OBJS +=	pckt.o
diff --git a/usr/src/uts/common/Makefile.rules b/usr/src/uts/common/Makefile.rules
index 69e32b7ee5..27b347c937 100644
--- a/usr/src/uts/common/Makefile.rules
+++ b/usr/src/uts/common/Makefile.rules
@@ -70,6 +70,10 @@ $(OBJS_DIR)/%.o:		$(COMMONBASE)/avl/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
 
+$(OBJS_DIR)/%.o:		$(UTSBASE)/common/brand/sn1/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
 $(OBJS_DIR)/%.o:		$(UTSBASE)/common/c2/%.c
 	$(COMPILE.c) -o $@ $<
 	$(CTFCONVERT_O)
@@ -911,6 +915,9 @@ $(LINTS_DIR)/%.ln:		$(COMMONBASE)/acl/%.c
 $(LINTS_DIR)/%.ln:		$(COMMONBASE)/avl/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
+$(LINTS_DIR)/%.ln:		$(UTSBASE)/common/brand/sn1/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
+
 $(LINTS_DIR)/%.ln:		$(UTSBASE)/common/c2/%.c
 	@($(LHEAD) $(LINT.c) $< $(LTAIL))
 
diff --git a/usr/src/uts/common/brand/lx/autofs/lx_autofs.c b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
new file mode 100644
index 0000000000..ecd4e8e44d
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/autofs/lx_autofs.c
@@ -0,0 +1,1558 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <fs/fs_subr.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/dirent.h>
+#include <sys/fs/fifonode.h>
+#include <sys/modctl.h>
+#include <sys/mount.h>
+#include <sys/policy.h>
+#include <sys/sunddi.h>
+
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+
+#include <sys/lx_autofs_impl.h>
+
+/*
+ * External functions
+ */
+extern uintptr_t		space_fetch(char *key);
+extern int			space_store(char *key, uintptr_t ptr);
+
+/*
+ * Globals
+ */
+static vfsops_t			*lx_autofs_vfsops;
+static vnodeops_t		*lx_autofs_vn_ops = NULL;
+static int			lx_autofs_fstype;
+static major_t			lx_autofs_major;
+static minor_t			lx_autofs_minor = 0;
+
+/*
+ * Support functions
+ */
+static void
+i_strfree(char *str)
+{
+	kmem_free(str, strlen(str) + 1);
+}
+
+static char *
+i_strdup(char *str)
+{
+	int	n = strlen(str);
+	char	*ptr = kmem_alloc(n + 1, KM_SLEEP);
+	bcopy(str, ptr, n + 1);
+	return (ptr);
+}
+
+static int
+i_str_to_int(char *str, int *val)
+{
+	long	res;
+
+	if (str == NULL)
+		return (-1);
+
+	if ((ddi_strtol(str, NULL, 10, &res) != 0) ||
+	    (res < INT_MIN) || (res > INT_MAX))
+		return (-1);
+
+	*val = res;
+	return (0);
+}
+
+static void
+i_stack_init(list_t *lp)
+{
+	list_create(lp,
+	    sizeof (stack_elem_t), offsetof(stack_elem_t, se_list));
+}
+
+static void
+i_stack_fini(list_t *lp)
+{
+	ASSERT(list_head(lp) == NULL);
+	list_destroy(lp);
+}
+
+static void
+i_stack_push(list_t *lp, caddr_t ptr1, caddr_t ptr2, caddr_t ptr3)
+{
+	stack_elem_t	*se;
+
+	se = kmem_alloc(sizeof (*se), KM_SLEEP);
+	se->se_ptr1 = ptr1;
+	se->se_ptr2 = ptr2;
+	se->se_ptr3 = ptr3;
+	list_insert_head(lp, se);
+}
+
+static int
+i_stack_pop(list_t *lp, caddr_t *ptr1, caddr_t *ptr2, caddr_t *ptr3)
+{
+	stack_elem_t	*se;
+
+	if ((se = list_head(lp)) == NULL)
+		return (-1);
+	list_remove(lp, se);
+	if (ptr1 != NULL)
+		*ptr1 = se->se_ptr1;
+	if (ptr2 != NULL)
+		*ptr2 = se->se_ptr2;
+	if (ptr3 != NULL)
+		*ptr3 = se->se_ptr3;
+	kmem_free(se, sizeof (*se));
+	return (0);
+}
+
+static vnode_t *
+fifo_peer_vp(vnode_t *vp)
+{
+	fifonode_t *fnp = VTOF(vp);
+	fifonode_t *fn_dest = fnp->fn_dest;
+	return (FTOV(fn_dest));
+}
+
+static vnode_t *
+i_vn_alloc(vfs_t *vfsp, vnode_t *uvp)
+{
+	lx_autofs_vfs_t	*data = vfsp->vfs_data;
+	vnode_t		*vp, *vp_old;
+
+	/* Allocate a new vnode structure in case we need it. */
+	vp = vn_alloc(KM_SLEEP);
+	vn_setops(vp, lx_autofs_vn_ops);
+	VN_SET_VFS_TYPE_DEV(vp, vfsp, uvp->v_type, uvp->v_rdev);
+	vp->v_data = uvp;
+	ASSERT(vp->v_count == 1);
+
+	/*
+	 * Take a hold on the vfs structure.  This is how unmount will
+	 * determine if there are any active vnodes in the file system.
+	 */
+	VFS_HOLD(vfsp);
+
+	/*
+	 * Check if we already have a vnode allocated for this underlying
+	 * vnode_t.
+	 */
+	mutex_enter(&data->lav_lock);
+	if (mod_hash_find(data->lav_vn_hash,
+	    (mod_hash_key_t)uvp, (mod_hash_val_t *)&vp_old) != 0) {
+
+		/*
+		 * Didn't find an existing node.
+		 * Add this node to the hash and return.
+		 */
+		VERIFY(mod_hash_insert(data->lav_vn_hash,
+		    (mod_hash_key_t)uvp,
+		    (mod_hash_val_t)vp) == 0);
+		mutex_exit(&data->lav_lock);
+		return (vp);
+	}
+
+	/* Get a hold on the existing vnode and free up the one we allocated. */
+	VN_HOLD(vp_old);
+	mutex_exit(&data->lav_lock);
+
+	/* Free up the new vnode we allocated. */
+	VN_RELE(uvp);
+	VFS_RELE(vfsp);
+	vn_invalid(vp);
+	vn_free(vp);
+
+	return (vp_old);
+}
+
+static void
+i_vn_free(vnode_t *vp)
+{
+	vfs_t		*vfsp = vp->v_vfsp;
+	lx_autofs_vfs_t	*data = vfsp->vfs_data;
+	vnode_t		*uvp = vp->v_data;
+	vnode_t	*vp_tmp;
+
+	ASSERT(MUTEX_HELD((&data->lav_lock)));
+	ASSERT(MUTEX_HELD((&vp->v_lock)));
+
+	ASSERT(vp->v_count == 0);
+
+	/* We're about to free this vnode so take it out of the hash. */
+	(void) mod_hash_remove(data->lav_vn_hash,
+	    (mod_hash_key_t)uvp, (mod_hash_val_t)&vp_tmp);
+
+	/*
+	 * No one else can lookup this vnode any more so there's no need
+	 * to hold locks.
+	 */
+	mutex_exit(&data->lav_lock);
+	mutex_exit(&vp->v_lock);
+
+	/* Release the underlying vnode. */
+	VN_RELE(uvp);
+	VFS_RELE(vfsp);
+	vn_invalid(vp);
+	vn_free(vp);
+}
+
+static lx_autofs_lookup_req_t *
+i_lalr_alloc(lx_autofs_vfs_t *data, int *dup_request, char *nm)
+{
+	lx_autofs_lookup_req_t	*lalr, *lalr_dup;
+
+	/* Pre-allocate a new automounter request before grabbing locks. */
+	lalr = kmem_zalloc(sizeof (*lalr), KM_SLEEP);
+	mutex_init(&lalr->lalr_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&lalr->lalr_cv, NULL, CV_DEFAULT, NULL);
+	lalr->lalr_ref = 1;
+	lalr->lalr_pkt.lap_protover = LX_AUTOFS_PROTO_VERSION;
+
+	/* Assign a unique id for this request. */
+	lalr->lalr_pkt.lap_id = id_alloc(data->lav_ids);
+
+	/*
+	 * The token expected by the linux automount is the name of
+	 * the directory entry to look up.  (And not the entire
+	 * path that is being accessed.)
+	 */
+	lalr->lalr_pkt.lap_name_len = strlen(nm);
+	if (lalr->lalr_pkt.lap_name_len >
+	    (sizeof (lalr->lalr_pkt.lap_name) - 1)) {
+		zcmn_err(getzoneid(), CE_NOTE,
+		    "invalid autofs lookup: \"%s\"", nm);
+		id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+		kmem_free(lalr, sizeof (*lalr));
+		return (NULL);
+	}
+	(void) strlcpy(lalr->lalr_pkt.lap_name, nm,
+	    sizeof (lalr->lalr_pkt.lap_name));
+
+	/* Check for an outstanding request for this path. */
+	mutex_enter(&data->lav_lock);
+	if (mod_hash_find(data->lav_path_hash,
+	    (mod_hash_key_t)nm, (mod_hash_val_t *)&lalr_dup) == 0) {
+		/*
+		 * There's already an outstanding request for this
+		 * path so we don't need a new one.
+		 */
+		id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+		kmem_free(lalr, sizeof (*lalr));
+		lalr = lalr_dup;
+
+		/* Bump the ref count on the old request. */
+		atomic_add_int(&lalr->lalr_ref, 1);
+
+		*dup_request = 1;
+	} else {
+		/* Add it to the hashes. */
+		VERIFY(mod_hash_insert(data->lav_id_hash,
+		    (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+		    (mod_hash_val_t)lalr) == 0);
+		VERIFY(mod_hash_insert(data->lav_path_hash,
+		    (mod_hash_key_t)i_strdup(nm),
+		    (mod_hash_val_t)lalr) == 0);
+
+		*dup_request = 0;
+	}
+	mutex_exit(&data->lav_lock);
+
+	return (lalr);
+}
+
+static lx_autofs_lookup_req_t *
+i_lalr_find(lx_autofs_vfs_t *data, int id)
+{
+	lx_autofs_lookup_req_t	*lalr;
+
+	/* Check for an outstanding request for this id. */
+	mutex_enter(&data->lav_lock);
+	if (mod_hash_find(data->lav_id_hash, (mod_hash_key_t)(uintptr_t)id,
+	    (mod_hash_val_t *)&lalr) != 0) {
+		mutex_exit(&data->lav_lock);
+		return (NULL);
+	}
+	atomic_add_int(&lalr->lalr_ref, 1);
+	mutex_exit(&data->lav_lock);
+	return (lalr);
+}
+
+static void
+i_lalr_complete(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+	lx_autofs_lookup_req_t	*lalr_tmp;
+
+	/* Remove this request from the hashes so no one can look it up. */
+	mutex_enter(&data->lav_lock);
+	(void) mod_hash_remove(data->lav_id_hash,
+		    (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+		    (mod_hash_val_t)&lalr_tmp);
+	(void) mod_hash_remove(data->lav_path_hash,
+		    (mod_hash_key_t)lalr->lalr_pkt.lap_name,
+		    (mod_hash_val_t)&lalr_tmp);
+	mutex_exit(&data->lav_lock);
+
+	/* Mark this requst as complete and wakeup anyone waiting on it. */
+	mutex_enter(&lalr->lalr_lock);
+	lalr->lalr_complete = 1;
+	cv_broadcast(&lalr->lalr_cv);
+	mutex_exit(&lalr->lalr_lock);
+}
+
+static void
+i_lalr_release(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+	ASSERT(!MUTEX_HELD(&lalr->lalr_lock));
+	if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0)
+		return;
+	ASSERT(lalr->lalr_ref == 0);
+	id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+	kmem_free(lalr, sizeof (*lalr));
+}
+
+static void
+i_lalr_abort(lx_autofs_vfs_t *data, lx_autofs_lookup_req_t *lalr)
+{
+	lx_autofs_lookup_req_t	*lalr_tmp;
+
+	/*
+	 * This is a little tricky.  We're aborting the wait for this
+	 * request.  So if anyone else is waiting for this request we
+	 * can't free it, but if no one else is waiting for the request
+	 * we should free it.
+	 */
+	mutex_enter(&data->lav_lock);
+	if (atomic_add_int_nv(&lalr->lalr_ref, -1) > 0) {
+		mutex_exit(&data->lav_lock);
+		return;
+	}
+	ASSERT(lalr->lalr_ref == 0);
+
+	/* Remove this request from the hashes so no one can look it up. */
+	(void) mod_hash_remove(data->lav_id_hash,
+		    (mod_hash_key_t)(uintptr_t)lalr->lalr_pkt.lap_id,
+		    (mod_hash_val_t)&lalr_tmp);
+	(void) mod_hash_remove(data->lav_path_hash,
+		    (mod_hash_key_t)lalr->lalr_pkt.lap_name,
+		    (mod_hash_val_t)&lalr_tmp);
+	mutex_exit(&data->lav_lock);
+
+	/* It's ok to free this now because the ref count was zero. */
+	id_free(data->lav_ids, lalr->lalr_pkt.lap_id);
+	kmem_free(lalr, sizeof (*lalr));
+}
+
+static int
+i_fifo_lookup(pid_t pgrp, int fd, file_t **fpp_wr, file_t **fpp_rd)
+{
+	proc_t		*prp;
+	uf_info_t	*fip;
+	uf_entry_t	*ufp_wr, *ufp_rd;
+	file_t		*fp_wr, *fp_rd;
+	vnode_t		*vp_wr, *vp_rd;
+	int		i;
+
+	/*
+	 * sprlock() is zone aware, so assuming this mount call was
+	 * initiated by a process in a zone, if it tries to specify
+	 * a pgrp outside of it's zone this call will fail.
+	 *
+	 * Also, we want to grab hold of the main automounter process
+	 * and its going to be the group leader for pgrp, so its
+	 * pid will be equal to pgrp.
+	 */
+	prp = sprlock(pgrp);
+	if (prp == NULL)
+		return (-1);
+	mutex_exit(&prp->p_lock);
+
+	/* Now we want to access the processes open file descriptors. */
+	fip = P_FINFO(prp);
+	mutex_enter(&fip->fi_lock);
+
+	/* Sanity check fifo write fd. */
+	if (fd >= fip->fi_nfiles) {
+		mutex_exit(&fip->fi_lock);
+		mutex_enter(&prp->p_lock);
+		sprunlock(prp);
+		return (-1);
+	}
+
+	/* Get a pointer to the write fifo. */
+	UF_ENTER(ufp_wr, fip, fd);
+	if (((fp_wr = ufp_wr->uf_file) == NULL) ||
+	    ((vp_wr = fp_wr->f_vnode) == NULL) || (vp_wr->v_type != VFIFO)) {
+		/* Invalid fifo fd. */
+		UF_EXIT(ufp_wr);
+		mutex_exit(&fip->fi_lock);
+		mutex_enter(&prp->p_lock);
+		sprunlock(prp);
+		return (-1);
+	}
+
+	/*
+	 * Now we need to find the read end of the fifo (for reasons
+	 * explained below.)  We assume that the read end of the fifo
+	 * is in the same process as the write end.
+	 */
+	vp_rd = fifo_peer_vp(fp_wr->f_vnode);
+	for (i = 0; i < fip->fi_nfiles; i++) {
+		UF_ENTER(ufp_rd, fip, i);
+		if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+		    (fp_rd->f_vnode == vp_rd))
+			break;
+		UF_EXIT(ufp_rd);
+	}
+	if (i == fip->fi_nfiles) {
+		/* Didn't find it. */
+		UF_EXIT(ufp_wr);
+		mutex_exit(&fip->fi_lock);
+		mutex_enter(&prp->p_lock);
+		sprunlock(prp);
+		return (-1);
+	}
+
+	/*
+	 * We need to drop fi_lock before we can try to aquire f_tlock
+	 * the good news is that the file pointers are protected because
+	 * we're still holding uf_lock.
+	 */
+	mutex_exit(&fip->fi_lock);
+
+	/*
+	 * Here we bump the open counts on the fifos.  The reason
+	 * that we do this is because when we go to write to the
+	 * fifo we want to ensure that they are actually open (and
+	 * not in the process of being closed) without having to
+	 * stop the automounter.  (If the write end of the fifo
+	 * were closed and we tried to write to it we would panic.
+	 * If the read end of the fifo was closed and we tried to
+	 * write to the other end, the process that invoked the
+	 * lookup operation would get an unexpected SIGPIPE.)
+	 */
+	mutex_enter(&fp_wr->f_tlock);
+	fp_wr->f_count++;
+	ASSERT(fp_wr->f_count >= 2);
+	mutex_exit(&fp_wr->f_tlock);
+
+	mutex_enter(&fp_rd->f_tlock);
+	fp_rd->f_count++;
+	ASSERT(fp_rd->f_count >= 2);
+	mutex_exit(&fp_rd->f_tlock);
+
+	/* Release all our locks. */
+	UF_EXIT(ufp_wr);
+	UF_EXIT(ufp_rd);
+	mutex_enter(&prp->p_lock);
+	sprunlock(prp);
+
+	/* Return the file pointers. */
+	*fpp_rd = fp_rd;
+	*fpp_wr = fp_wr;
+	return (0);
+}
+
+static uint_t
+/*ARGSUSED*/
+i_fifo_close_cb(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+	int	*id = (int *)arg;
+	/* Return the key and terminate the walk. */
+	*id = (uintptr_t)key;
+	return (MH_WALK_TERMINATE);
+}
+
+static void
+i_fifo_close(lx_autofs_vfs_t *data)
+{
+	/*
+	 * Close the fifo to prevent any future requests from
+	 * getting sent to the automounter.
+	 */
+	mutex_enter(&data->lav_lock);
+	if (data->lav_fifo_wr != NULL) {
+		(void) closef(data->lav_fifo_wr);
+		data->lav_fifo_wr = NULL;
+	}
+	if (data->lav_fifo_rd != NULL) {
+		(void) closef(data->lav_fifo_rd);
+		data->lav_fifo_rd = NULL;
+	}
+	mutex_exit(&data->lav_lock);
+
+	/*
+	 * Wakeup any threads currently waiting for the automounter
+	 * note that it's possible for multiple threads to have entered
+	 * this function and to be doing the work below simultaneously.
+	 */
+	for (;;) {
+		lx_autofs_lookup_req_t	*lalr;
+		int			id;
+
+		/* Lookup the first entry in the hash. */
+		id = -1;
+		mod_hash_walk(data->lav_id_hash,
+		    i_fifo_close_cb, &id);
+		if (id == -1) {
+			/* No more id's in the hash. */
+			break;
+		}
+		if ((lalr = i_lalr_find(data, id)) == NULL) {
+			/* Someone else beat us to it. */
+			continue;
+		}
+
+		/* Mark the request as compleate and release it. */
+		i_lalr_complete(data, lalr);
+		i_lalr_release(data, lalr);
+	}
+}
+
+static int
+i_fifo_verify_rd(lx_autofs_vfs_t *data)
+{
+	proc_t		*prp;
+	uf_info_t	*fip;
+	uf_entry_t	*ufp_rd;
+	file_t		*fp_rd;
+	vnode_t		*vp_rd;
+	int		i;
+
+	ASSERT(MUTEX_HELD((&data->lav_lock)));
+
+	/* Check if we've already been shut down. */
+	if (data->lav_fifo_wr == NULL) {
+		ASSERT(data->lav_fifo_rd == NULL);
+		return (-1);
+	}
+	vp_rd = fifo_peer_vp(data->lav_fifo_wr->f_vnode);
+
+	/*
+	 * sprlock() is zone aware, so assuming this mount call was
+	 * initiated by a process in a zone, if it tries to specify
+	 * a pgrp outside of it's zone this call will fail.
+	 *
+	 * Also, we want to grab hold of the main automounter process
+	 * and its going to be the group leader for pgrp, so its
+	 * pid will be equal to pgrp.
+	 */
+	prp = sprlock(data->lav_pgrp);
+	if (prp == NULL)
+		return (-1);
+	mutex_exit(&prp->p_lock);
+
+	/* Now we want to access the processes open file descriptors. */
+	fip = P_FINFO(prp);
+	mutex_enter(&fip->fi_lock);
+
+	/*
+	 * Now we need to find the read end of the fifo (for reasons
+	 * explained below.)  We assume that the read end of the fifo
+	 * is in the same process as the write end.
+	 */
+	for (i = 0; i < fip->fi_nfiles; i++) {
+		UF_ENTER(ufp_rd, fip, i);
+		if (((fp_rd = ufp_rd->uf_file) != NULL) &&
+		    (fp_rd->f_vnode == vp_rd))
+			break;
+		UF_EXIT(ufp_rd);
+	}
+	if (i == fip->fi_nfiles) {
+		/* Didn't find it. */
+		mutex_exit(&fip->fi_lock);
+		mutex_enter(&prp->p_lock);
+		sprunlock(prp);
+		return (-1);
+	}
+
+	/*
+	 * Seems the automounter still has the read end of the fifo
+	 * open, we're done here.  Release all our locks and exit.
+	 */
+	mutex_exit(&fip->fi_lock);
+	UF_EXIT(ufp_rd);
+	mutex_enter(&prp->p_lock);
+	sprunlock(prp);
+
+	return (0);
+}
+
+static int
+i_fifo_write(lx_autofs_vfs_t *data, lx_autofs_pkt_t *lap)
+{
+	struct uio	uio;
+	struct iovec	iov;
+	file_t		*fp_wr, *fp_rd;
+	int		error;
+
+	/*
+	 * The catch here is we need to make sure _we_ don't close
+	 * the the fifo while writing to it.  (Another thread could come
+	 * along and realize the automounter process is gone and close
+	 * the fifo.  To do this we bump the open count before we
+	 * write to the fifo.
+	 */
+	mutex_enter(&data->lav_lock);
+	if (data->lav_fifo_wr == NULL) {
+		ASSERT(data->lav_fifo_rd == NULL);
+		mutex_exit(&data->lav_lock);
+		return (ENOENT);
+	}
+	fp_wr = data->lav_fifo_wr;
+	fp_rd = data->lav_fifo_rd;
+
+	/* Bump the open count on the write fifo. */
+	mutex_enter(&fp_wr->f_tlock);
+	fp_wr->f_count++;
+	mutex_exit(&fp_wr->f_tlock);
+
+	/* Bump the open count on the read fifo. */
+	mutex_enter(&fp_rd->f_tlock);
+	fp_rd->f_count++;
+	mutex_exit(&fp_rd->f_tlock);
+
+	mutex_exit(&data->lav_lock);
+
+	iov.iov_base = (caddr_t)lap;
+	iov.iov_len = sizeof (*lap);
+	uio.uio_iov = &iov;
+	uio.uio_iovcnt = 1;
+	uio.uio_loffset = 0;
+	uio.uio_segflg = (short)UIO_SYSSPACE;
+	uio.uio_resid = sizeof (*lap);
+	uio.uio_llimit = 0;
+	uio.uio_fmode = FWRITE | FNDELAY | FNONBLOCK;
+
+	error = VOP_WRITE(fp_wr->f_vnode, &uio, 0, kcred, NULL);
+	(void) closef(fp_wr);
+	(void) closef(fp_rd);
+
+	/*
+	 * After every write we verify that the automounter still has
+	 * these files open.
+	 */
+	mutex_enter(&data->lav_lock);
+	if (i_fifo_verify_rd(data) != 0) {
+		/*
+		 * Something happened to the automounter.
+		 * Close down the communication pipe we setup.
+		 */
+		mutex_exit(&data->lav_lock);
+		i_fifo_close(data);
+		if (error != 0)
+			return (error);
+		return (ENOENT);
+	}
+	mutex_exit(&data->lav_lock);
+
+	return (error);
+}
+
+static int
+i_bs_readdir(vnode_t *dvp, list_t *dir_stack, list_t *file_stack)
+{
+	struct iovec	iov;
+	struct uio	uio;
+	dirent64_t	*dp, *dbuf;
+	vnode_t		*vp;
+	size_t		dlen, dbuflen;
+	int		eof, error, ndirents = 64;
+	char		*nm;
+
+	dlen = ndirents * (sizeof (*dbuf));
+	dbuf = kmem_alloc(dlen, KM_SLEEP);
+
+	uio.uio_iov = &iov;
+	uio.uio_iovcnt = 1;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_fmode = 0;
+	uio.uio_extflg = UIO_COPY_CACHED;
+	uio.uio_loffset = 0;
+	uio.uio_llimit = MAXOFFSET_T;
+
+	eof = 0;
+	error = 0;
+	while (!error && !eof) {
+		uio.uio_resid = dlen;
+		iov.iov_base = (char *)dbuf;
+		iov.iov_len = dlen;
+
+		(void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+		if (VOP_READDIR(dvp, &uio, kcred, &eof) != 0) {
+			VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+			kmem_free(dbuf, dlen);
+			return (-1);
+		}
+		VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
+
+		if ((dbuflen = dlen - uio.uio_resid) == 0) {
+			/* We're done. */
+			break;
+		}
+
+		for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
+			dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
+
+			nm = dp->d_name;
+
+			if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
+				continue;
+
+			if (VOP_LOOKUP(dvp,
+			    nm, &vp, NULL, 0, NULL, kcred) != 0) {
+				kmem_free(dbuf, dlen);
+				return (-1);
+			}
+			if (vp->v_type == VDIR) {
+				if (dir_stack != NULL) {
+					i_stack_push(dir_stack, (caddr_t)dvp,
+					    (caddr_t)vp, i_strdup(nm));
+				} else {
+					VN_RELE(vp);
+				}
+			} else {
+				if (file_stack != NULL) {
+					i_stack_push(file_stack, (caddr_t)dvp,
+					    (caddr_t)vp, i_strdup(nm));
+				} else {
+					VN_RELE(vp);
+				}
+			}
+		}
+	}
+	kmem_free(dbuf, dlen);
+	return (0);
+}
+
+static void
+i_bs_destroy(vnode_t *dvp, char *path)
+{
+	list_t	search_stack;
+	list_t	dir_stack;
+	list_t	file_stack;
+	vnode_t	*pdvp, *vp;
+	char	*dpath, *fpath;
+	int	ret;
+
+	if (VOP_LOOKUP(dvp, path, &vp, NULL, 0, NULL, kcred) != 0) {
+		/* A directory entry with this name doesn't actually exist. */
+		return;
+	}
+
+	if ((vp->v_type & VDIR) == 0) {
+		/* Easy, the directory entry is a file so delete it. */
+		VN_RELE(vp);
+		(void) VOP_REMOVE(dvp, path, kcred);
+		return;
+	}
+
+	/*
+	 * The directory entry is a subdirectory, now we have a bit more
+	 * work to do.  (We'll have to recurse into the sub directory.)
+	 * It would have been much easier to do this recursively but kernel
+	 * stacks are notoriously small.
+	 */
+	i_stack_init(&search_stack);
+	i_stack_init(&dir_stack);
+	i_stack_init(&file_stack);
+
+	/* Save our newfound subdirectory into a list. */
+	i_stack_push(&search_stack, (caddr_t)dvp, (caddr_t)vp, i_strdup(path));
+
+	/* Do a recursive depth first search into the subdirectories. */
+	while (i_stack_pop(&search_stack,
+	    (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+		/* Get a list of the subdirectories in this directory. */
+		if (i_bs_readdir(dvp, &search_stack, NULL) != 0)
+			goto exit;
+
+		/* Save the current directory a seperate stack. */
+		i_stack_push(&dir_stack, (caddr_t)pdvp, (caddr_t)dvp, dpath);
+	}
+
+	/*
+	 * Now dir_stack contains a list of directories, the deepest paths
+	 * are at the top of the list.  So let's go through and process them.
+	 */
+	while (i_stack_pop(&dir_stack,
+	    (caddr_t *)&pdvp, (caddr_t *)&dvp, &dpath) == 0) {
+
+		/* Get a list of the files in this directory. */
+		if (i_bs_readdir(dvp, NULL, &file_stack) != 0) {
+			VN_RELE(dvp);
+			i_strfree(dpath);
+			goto exit;
+		}
+
+		/* Delete all the files in this directory. */
+		while (i_stack_pop(&file_stack,
+		    NULL, (caddr_t *)&vp, &fpath) == 0) {
+			VN_RELE(vp)
+			ret = VOP_REMOVE(dvp, fpath, kcred);
+			i_strfree(fpath);
+			if (ret != 0) {
+				i_strfree(dpath);
+				goto exit;
+			}
+		}
+
+		/* Delete this directory. */
+		VN_RELE(dvp);
+		ret = VOP_RMDIR(pdvp, dpath, pdvp, kcred);
+		i_strfree(dpath);
+		if (ret != 0)
+			goto exit;
+	}
+
+exit:
+	while (
+	    (i_stack_pop(&search_stack, NULL, (caddr_t *)&vp, &path) == 0) ||
+	    (i_stack_pop(&dir_stack, NULL, (caddr_t *)&vp, &path) == 0) ||
+	    (i_stack_pop(&file_stack, NULL, (caddr_t *)&vp, &path) == 0)) {
+		VN_RELE(vp);
+		i_strfree(path);
+	}
+	i_stack_fini(&search_stack);
+	i_stack_fini(&dir_stack);
+	i_stack_fini(&file_stack);
+}
+
+static vnode_t *
+i_bs_create(vnode_t *dvp, char *bs_name)
+{
+	vnode_t	*vp;
+	vattr_t	vattr;
+
+	/*
+	 * After looking at the mkdir syscall path it seems we don't need
+	 * to initialize all of the vattr_t structure.
+	 */
+	bzero(&vattr, sizeof (vattr));
+	vattr.va_type = VDIR;
+	vattr.va_mode = 0755; /* u+rwx,og=rx */
+	vattr.va_mask = AT_TYPE|AT_MODE;
+
+	if (VOP_MKDIR(dvp, bs_name, &vattr, &vp, kcred) != 0)
+		return (NULL);
+	return (vp);
+}
+
+static int
+i_automounter_call(vnode_t *dvp, char *nm)
+{
+	lx_autofs_lookup_req_t	*lalr;
+	lx_autofs_vfs_t		*data;
+	int			error, dup_request;
+
+	/* Get a pointer to the vfs mount data. */
+	data = dvp->v_vfsp->vfs_data;
+
+	/* The automounter only support queries in the root directory. */
+	if (dvp != data->lav_root)
+		return (ENOENT);
+
+	/*
+	 * Check if the current process is in the automounters process
+	 * group.  (If it is, the current process is either the autmounter
+	 * itself or one of it's forked child processes.)  If so, don't
+	 * redirect this lookup back into the automounter because we'll
+	 * hang.
+	 */
+	mutex_enter(&pidlock);
+	if (data->lav_pgrp == curproc->p_pgrp) {
+		mutex_exit(&pidlock);
+		return (ENOENT);
+	}
+	mutex_exit(&pidlock);
+
+	/* Verify that the automount process pipe still exists. */
+	mutex_enter(&data->lav_lock);
+	if (data->lav_fifo_wr == NULL) {
+		ASSERT(data->lav_fifo_rd == NULL);
+		mutex_exit(&data->lav_lock);
+		return (ENOENT);
+	}
+	mutex_exit(&data->lav_lock);
+
+	/* Allocate an automounter request structure. */
+	if ((lalr = i_lalr_alloc(data, &dup_request, nm)) == NULL)
+		return (ENOENT);
+
+	/*
+	 * If we were the first one to allocate this request then we
+	 * need to send it to the automounter.
+	 */
+	if ((!dup_request) &&
+	    ((error = i_fifo_write(data, &lalr->lalr_pkt)) != 0)) {
+		/*
+		 * Unable to send the request to the automounter.
+		 * Unblock any other threads waiting on the request
+		 * and release the request.
+		 */
+		i_lalr_complete(data, lalr);
+		i_lalr_release(data, lalr);
+		return (error);
+	}
+
+	/* Wait for someone to signal us that this request has compleated. */
+	mutex_enter(&lalr->lalr_lock);
+	while (!lalr->lalr_complete) {
+		if (cv_wait_sig(&lalr->lalr_cv, &lalr->lalr_lock) == 0) {
+			/* We got a signal, abort this lookup. */
+			mutex_exit(&lalr->lalr_lock);
+			i_lalr_abort(data, lalr);
+			return (EINTR);
+		}
+	}
+	mutex_exit(&lalr->lalr_lock);
+	i_lalr_release(data, lalr);
+
+	return (0);
+}
+
+static int
+i_automounter_ioctl(vnode_t *vp, int cmd, intptr_t arg)
+{
+	lx_autofs_vfs_t *data = (lx_autofs_vfs_t *)vp->v_vfsp->vfs_data;
+
+	/*
+	 * Be strict.
+	 * We only accept ioctls from the automounter process group.
+	 */
+	mutex_enter(&pidlock);
+	if (data->lav_pgrp != curproc->p_pgrp) {
+		mutex_exit(&pidlock);
+		return (ENOENT);
+	}
+	mutex_exit(&pidlock);
+
+	if ((cmd == LX_AUTOFS_IOC_READY) || (cmd == LX_AUTOFS_IOC_FAIL)) {
+		lx_autofs_lookup_req_t	*lalr;
+		int			id = arg;
+
+		/*
+		 * We don't actually care if the request failed or succeeded.
+		 * We do the same thing either way.
+		 */
+		if ((lalr = i_lalr_find(data, id)) == NULL)
+			return (ENXIO);
+
+		/* Mark the request as compleate and release it. */
+		i_lalr_complete(data, lalr);
+		i_lalr_release(data, lalr);
+		return (0);
+	}
+	if (cmd == LX_AUTOFS_IOC_CATATONIC) {
+		/* The automounter is shutting down. */
+		i_fifo_close(data);
+		return (0);
+	}
+	return (ENOTSUP);
+}
+
+static int
+i_parse_mntopt(vfs_t *vfsp, lx_autofs_vfs_t *data)
+{
+	char		*fd_str, *pgrp_str, *minproto_str, *maxproto_str;
+	int		fd, pgrp, minproto, maxproto;
+	file_t		*fp_wr, *fp_rd;
+
+	/* Require all options to be present. */
+	if ((vfs_optionisset(vfsp, LX_MNTOPT_FD, &fd_str) != 1) ||
+	    (vfs_optionisset(vfsp, LX_MNTOPT_PGRP, &pgrp_str) != 1) ||
+	    (vfs_optionisset(vfsp, LX_MNTOPT_MINPROTO, &minproto_str) != 1) ||
+	    (vfs_optionisset(vfsp, LX_MNTOPT_MAXPROTO, &maxproto_str) != 1))
+		return (EINVAL);
+
+	/* Get the values for each parameter. */
+	if ((i_str_to_int(fd_str, &fd) != 0) ||
+	    (i_str_to_int(pgrp_str, &pgrp) != 0) ||
+	    (i_str_to_int(minproto_str, &minproto) != 0) ||
+	    (i_str_to_int(maxproto_str, &maxproto) != 0))
+		return (EINVAL);
+
+	/*
+	 * We support v2 of the linux kernel automounter protocol.
+	 * Make sure the mount request we got indicates support
+	 * for this version of the protocol.
+	 */
+	if ((minproto > 2) || (maxproto < 2))
+		return (EINVAL);
+
+	/*
+	 * Now we need to lookup the fifos we'll be using
+	 * to talk to the userland automounter process.
+	 */
+	if (i_fifo_lookup(pgrp, fd, &fp_wr, &fp_rd) != 0)
+		return (EINVAL);
+
+	/* Save the mount options and fifo pointers. */
+	data->lav_fd = fd;
+	data->lav_pgrp = pgrp;
+	data->lav_fifo_rd = fp_rd;
+	data->lav_fifo_wr = fp_wr;
+	return (0);
+}
+
+/*
+ * VFS entry points
+ */
+static int
+lx_autofs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+	lx_autofs_vfs_t	*data;
+	dev_t		dev;
+	char		name[40];
+	int		error;
+
+	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+		return (EPERM);
+
+	if (mvp->v_type != VDIR)
+		return (ENOTDIR);
+
+	if ((uap->flags & MS_OVERLAY) == 0 &&
+	    (mvp->v_count > 1 || (mvp->v_flag & VROOT)))
+		return (EBUSY);
+
+	/* We don't support mountes in the global zone. */
+	if (getzoneid() == GLOBAL_ZONEID)
+		return (EPERM);
+
+	/* We don't support mounting on top of ourselves. */
+	if (vn_matchops(mvp, lx_autofs_vn_ops))
+		return (EPERM);
+
+	/* Allocate a vfs struct. */
+	data = kmem_zalloc(sizeof (lx_autofs_vfs_t), KM_SLEEP);
+
+	/* Parse mount options. */
+	if ((error = i_parse_mntopt(vfsp, data)) != 0) {
+		kmem_free(data, sizeof (lx_autofs_vfs_t));
+		return (error);
+	}
+
+	/* Initialize the backing store. */
+	i_bs_destroy(mvp, LX_AUTOFS_BS_DIR);
+	if ((data->lav_bs_vp = i_bs_create(mvp, LX_AUTOFS_BS_DIR)) == NULL) {
+		kmem_free(data, sizeof (lx_autofs_vfs_t));
+		return (EBUSY);
+	}
+	data->lav_bs_name = LX_AUTOFS_BS_DIR;
+
+	/* We have to hold the underlying vnode we're mounted on. */
+	data->lav_mvp = mvp;
+	VN_HOLD(mvp);
+
+	/* Initialize vfs fields */
+	vfsp->vfs_bsize = DEV_BSIZE;
+	vfsp->vfs_fstype = lx_autofs_fstype;
+	vfsp->vfs_data = data;
+
+	/* Invent a dev_t (sigh) */
+	do {
+		dev = makedevice(lx_autofs_major,
+		    atomic_add_32_nv(&lx_autofs_minor, 1) & L_MAXMIN32);
+	} while (vfs_devismounted(dev));
+	vfsp->vfs_dev = dev;
+	vfs_make_fsid(&vfsp->vfs_fsid, dev, lx_autofs_fstype);
+
+	/* Create an id space arena for automounter requests. */
+	(void) snprintf(name, sizeof (name), "lx_autofs_id_%d",
+	    getminor(vfsp->vfs_dev));
+	data->lav_ids = id_space_create(name, 1, INT_MAX);
+
+	/* Create hashes to keep track of automounter requests. */
+	mutex_init(&data->lav_lock, NULL, MUTEX_DEFAULT, NULL);
+	(void) snprintf(name, sizeof (name), "lx_autofs_path_hash_%d",
+	    getminor(vfsp->vfs_dev));
+	data->lav_path_hash = mod_hash_create_strhash(name,
+	    LX_AUTOFS_VFS_PATH_HASH_SIZE, mod_hash_null_valdtor);
+	(void) snprintf(name, sizeof (name), "lx_autofs_id_hash_%d",
+	    getminor(vfsp->vfs_dev));
+	data->lav_id_hash = mod_hash_create_idhash(name,
+	    LX_AUTOFS_VFS_ID_HASH_SIZE, mod_hash_null_valdtor);
+
+	/* Create a hash to keep track of vnodes. */
+	(void) snprintf(name, sizeof (name), "lx_autofs_vn_hash_%d",
+	    getminor(vfsp->vfs_dev));
+	data->lav_vn_hash = mod_hash_create_ptrhash(name,
+	    LX_AUTOFS_VFS_VN_HASH_SIZE, mod_hash_null_valdtor,
+	    sizeof (vnode_t));
+
+	/* Create root vnode */
+	data->lav_root = i_vn_alloc(vfsp, data->lav_bs_vp);
+	data->lav_root->v_flag |=
+	    VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT;
+
+	return (0);
+}
+
+static int
+lx_autofs_unmount(vfs_t *vfsp, int flag, struct cred *cr)
+{
+	lx_autofs_vfs_t *data;
+
+	if (secpolicy_fs_unmount(cr, vfsp) != 0)
+		return (EPERM);
+
+	/* We do not currently support forced unmounts. */
+	if (flag & MS_FORCE)
+		return (ENOTSUP);
+
+	/*
+	 * We should never have a reference count of less than 2: one for the
+	 * caller, one for the root vnode.
+	 */
+	ASSERT(vfsp->vfs_count >= 2);
+
+	/* If there are any outstanding vnodes, we can't unmount. */
+	if (vfsp->vfs_count > 2)
+		return (EBUSY);
+
+	/* Check for any remaining holds on the root vnode. */
+	data = vfsp->vfs_data;
+	ASSERT(data->lav_root->v_vfsp == vfsp);
+	if (data->lav_root->v_count > 1)
+		return (EBUSY);
+
+	/* Close the fifo to the automount process. */
+	if (data->lav_fifo_wr != NULL)
+		(void) closef(data->lav_fifo_wr);
+	if (data->lav_fifo_rd != NULL)
+		(void) closef(data->lav_fifo_rd);
+
+	/*
+	 * We have to release our hold on our root vnode before we can
+	 * delete the backing store.  (Since the root vnode is linked
+	 * to the backing store.)
+	 */
+	VN_RELE(data->lav_root);
+
+	/* Cleanup the backing store. */
+	i_bs_destroy(data->lav_mvp, data->lav_bs_name);
+	VN_RELE(data->lav_mvp);
+
+	/* Cleanup out remaining data structures. */
+	mod_hash_destroy_strhash(data->lav_path_hash);
+	mod_hash_destroy_idhash(data->lav_id_hash);
+	mod_hash_destroy_ptrhash(data->lav_vn_hash);
+	id_space_destroy(data->lav_ids);
+	kmem_free(data, sizeof (lx_autofs_vfs_t));
+
+	return (0);
+}
+
+static int
+lx_autofs_root(vfs_t *vfsp, vnode_t **vpp)
+{
+	lx_autofs_vfs_t	*data = vfsp->vfs_data;
+
+	*vpp = data->lav_root;
+	VN_HOLD(*vpp);
+
+	return (0);
+}
+
+static int
+lx_autofs_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+	lx_autofs_vfs_t	*data = vfsp->vfs_data;
+	vnode_t		*urvp = data->lav_root->v_data;
+	dev32_t		d32;
+	int		error;
+
+	if ((error = VFS_STATVFS(urvp->v_vfsp, sp)) != 0)
+		return (error);
+
+	/* Update some of values before returning. */
+	(void) cmpldev(&d32, vfsp->vfs_dev);
+	sp->f_fsid = d32;
+	(void) strlcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name,
+	    sizeof (sp->f_basetype));
+	sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+	bzero(sp->f_fstr, sizeof (sp->f_fstr));
+	return (0);
+}
+
+static const fs_operation_def_t lx_autofs_vfstops[] = {
+	{ VFSNAME_MOUNT, lx_autofs_mount },
+	{ VFSNAME_UNMOUNT, lx_autofs_unmount },
+	{ VFSNAME_ROOT, lx_autofs_root },
+	{ VFSNAME_STATVFS, lx_autofs_statvfs },
+	{ NULL, NULL }
+};
+
+/*
+ * VOP entry points - simple passthrough
+ *
+ * For most VOP entry points we can simply pass the request on to
+ * the underlying filesystem we're mounted on.
+ */
+static int
+lx_autofs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
+{
+	vnode_t *uvp = vp->v_data;
+	return (VOP_CLOSE(uvp, flag, count, offset, cr));
+}
+
+static int
+lx_autofs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
+{
+	vnode_t *uvp = vp->v_data;
+	return (VOP_READDIR(uvp, uiop, cr, eofp));
+}
+
+static int
+lx_autofs_access(vnode_t *vp, int mode, int flags, cred_t *cr)
+{
+	vnode_t *uvp = vp->v_data;
+	return (VOP_ACCESS(uvp, mode, flags, cr));
+}
+
+static int
+lx_autofs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+	vnode_t *uvp = vp->v_data;
+	return (VOP_RWLOCK(uvp, write_lock, ctp));
+}
+
+static void
+lx_autofs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
+{
+	vnode_t *uvp = vp->v_data;
+	VOP_RWUNLOCK(uvp, write_lock, ctp);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr)
+{
+	vnode_t *udvp = dvp->v_data;
+
+	/*
+	 * cdir is the calling processes current directory.
+	 * If cdir is lx_autofs vnode then get its real underlying
+	 * vnode ptr.  (It seems like the only thing cdir is
+	 * ever used for is to make sure the user doesn't delete
+	 * their current directory.)
+	 */
+	if (vn_matchops(cdir, lx_autofs_vn_ops)) {
+		vnode_t *ucdir = cdir->v_data;
+		return (VOP_RMDIR(udvp, nm, ucdir, cr));
+	}
+
+	return (VOP_RMDIR(udvp, nm, cdir, cr));
+}
+
+/*
+ * VOP entry points - special passthrough
+ *
+ * For some VOP entry points we will first pass the request on to
+ * the underlying filesystem we're mounted on.  If there's an error
+ * then we immediatly return the error, but if the request succeedes
+ * we have to do some extra work before returning.
+ */
+static int
+lx_autofs_open(vnode_t **vpp, int flag, cred_t *cr)
+{
+	vnode_t		*ovp = *vpp;
+	vnode_t		*uvp = ovp->v_data;
+	int		error;
+
+	if ((error = VOP_OPEN(&uvp, flag, cr)) != 0)
+		return (error);
+
+	/* Check for clone opens. */
+	if (uvp == ovp->v_data)
+		return (0);
+
+	/* Deal with clone opens by returning a new vnode. */
+	*vpp = i_vn_alloc(ovp->v_vfsp, uvp);
+	VN_RELE(ovp);
+	return (0);
+}
+
+static int
+lx_autofs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
+{
+	vnode_t		*uvp = vp->v_data;
+	int		error;
+
+	if ((error = VOP_GETATTR(uvp, vap, flags, cr)) != 0)
+		return (error);
+
+	/* Update the attributes with our filesystem id. */
+	vap->va_fsid = vp->v_vfsp->vfs_dev;
+	return (0);
+}
+
+static int
+lx_autofs_mkdir(vnode_t *dvp, char *nm, struct vattr *vap, vnode_t **vpp,
+    cred_t *cr)
+{
+	vnode_t		*udvp = dvp->v_data;
+	vnode_t		*uvp = NULL;
+	int		error;
+
+	if ((error = VOP_MKDIR(udvp, nm, vap, &uvp, cr)) != 0)
+		return (error);
+
+	/* Update the attributes with our filesystem id. */
+	vap->va_fsid = dvp->v_vfsp->vfs_dev;
+
+	/* Allocate a new vnode. */
+	*vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+	return (0);
+}
+
+/*
+ * VOP entry points - custom
+ */
+/*ARGSUSED*/
+static void
+lx_autofs_inactive(struct vnode *vp, struct cred *cr)
+{
+	lx_autofs_vfs_t	*data = vp->v_vfsp->vfs_data;
+
+	/*
+	 * We need to hold the vfs lock because if we're going to free
+	 * this vnode we have to prevent anyone from looking it up
+	 * in the vnode hash.
+	 */
+	mutex_enter(&data->lav_lock);
+	mutex_enter(&vp->v_lock);
+
+	if (vp->v_count < 1) {
+		panic("lx_autofs_inactive: bad v_count");
+		/*NOTREACHED*/
+	}
+
+	/* Drop the temporary hold by vn_rele now. */
+	if (--vp->v_count > 0) {
+		mutex_exit(&vp->v_lock);
+		mutex_exit(&data->lav_lock);
+		return;
+	}
+
+	/*
+	 * No one should have been blocked on this lock because we're
+	 * about to free this vnode.
+	 */
+	i_vn_free(vp);
+}
+
+static int
+lx_autofs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+    int flags, vnode_t *rdir, cred_t *cr)
+{
+	vnode_t			*udvp = dvp->v_data;
+	vnode_t			*uvp = NULL;
+	int			error;
+
+	/* First try to lookup if this path component already exitst. */
+	if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr)) == 0) {
+		*vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+		return (0);
+	}
+
+	/* Only query the automounter if the path does not exist. */
+	if (error != ENOENT)
+		return (error);
+
+	/* Refer the lookup to the automounter. */
+	if ((error = i_automounter_call(dvp, nm)) != 0)
+		return (error);
+
+	/* Retry the lookup operation. */
+	if ((error = VOP_LOOKUP(udvp, nm, &uvp, pnp, flags, rdir, cr)) == 0) {
+		*vpp = i_vn_alloc(dvp->v_vfsp, uvp);
+		return (0);
+	}
+	return (error);
+}
+
+/*ARGSUSED*/
+static int
+lx_autofs_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode, cred_t *cr,
+    int *rvalp)
+{
+	vnode_t			*uvp = vp->v_data;
+
+	/* Intercept certain ioctls. */
+	switch ((uint_t)cmd) {
+	case LX_AUTOFS_IOC_READY:
+	case LX_AUTOFS_IOC_FAIL:
+	case LX_AUTOFS_IOC_CATATONIC:
+	case LX_AUTOFS_IOC_EXPIRE:
+	case LX_AUTOFS_IOC_PROTOVER:
+	case LX_AUTOFS_IOC_SETTIMEOUT:
+		return (i_automounter_ioctl(vp, cmd, arg));
+	}
+
+	/* Pass any remaining ioctl on. */
+	return (VOP_IOCTL(uvp, cmd, arg, mode, cr, rvalp));
+}
+
+/*
+ * VOP entry points definitions
+ */
+static const fs_operation_def_t lx_autofs_tops_root[] = {
+	{ VOPNAME_OPEN,		lx_autofs_open },
+	{ VOPNAME_CLOSE,	lx_autofs_close },
+	{ VOPNAME_IOCTL,	lx_autofs_ioctl },
+	{ VOPNAME_RWLOCK,	lx_autofs_rwlock },
+	{ VOPNAME_RWUNLOCK,	(fs_generic_func_p)lx_autofs_rwunlock },
+	{ VOPNAME_GETATTR,	lx_autofs_getattr },
+	{ VOPNAME_ACCESS,	lx_autofs_access },
+	{ VOPNAME_READDIR,	lx_autofs_readdir },
+	{ VOPNAME_LOOKUP,	lx_autofs_lookup },
+	{ VOPNAME_INACTIVE,	(fs_generic_func_p)lx_autofs_inactive },
+	{ VOPNAME_MKDIR,	lx_autofs_mkdir },
+	{ VOPNAME_RMDIR,	lx_autofs_rmdir },
+	{ NULL }
+};
+
+/*
+ * lx_autofs_init() gets invoked via the mod_install() call in
+ * this modules _init() routine.  Therefor, the code that cleans
+ * up the structures we allocate below is actually found in
+ * our _fini() routine.
+ */
+/* ARGSUSED */
+static int
+lx_autofs_init(int fstype, char *name)
+{
+	int		error;
+
+	if ((lx_autofs_major =
+	    (major_t)space_fetch(LX_AUTOFS_SPACE_KEY_UDEV)) == 0) {
+
+		if ((lx_autofs_major = getudev()) == (major_t)-1) {
+			cmn_err(CE_WARN, "lx_autofs_init: "
+			    "can't get unique device number");
+			return (EAGAIN);
+		}
+
+		if (space_store(LX_AUTOFS_SPACE_KEY_UDEV,
+		    (uintptr_t)lx_autofs_major) != 0) {
+			cmn_err(CE_WARN, "lx_autofs_init: "
+			    "can't save unique device number");
+			return (EAGAIN);
+		}
+	}
+
+	lx_autofs_fstype = fstype;
+	if ((error = vfs_setfsops(
+	    fstype, lx_autofs_vfstops, &lx_autofs_vfsops)) != 0) {
+		cmn_err(CE_WARN, "lx_autofs_init: bad vfs ops template");
+		return (error);
+	}
+
+	if ((error = vn_make_ops("lx_autofs vnode ops",
+	    lx_autofs_tops_root, &lx_autofs_vn_ops)) != 0) {
+		VERIFY(vfs_freevfsops_by_type(fstype) == 0);
+		lx_autofs_vn_ops = NULL;
+		return (error);
+	}
+
+	return (0);
+}
+
+
+/*
+ * Module linkage
+ */
+static mntopt_t lx_autofs_mntopt[] = {
+	{ LX_MNTOPT_FD,		NULL,	0,	MO_HASVALUE },
+	{ LX_MNTOPT_PGRP,	NULL,	0,	MO_HASVALUE },
+	{ LX_MNTOPT_MINPROTO,	NULL,	0,	MO_HASVALUE },
+	{ LX_MNTOPT_MAXPROTO,	NULL,	0,	MO_HASVALUE }
+};
+
+static mntopts_t lx_autofs_mntopts = {
+	sizeof (lx_autofs_mntopt) / sizeof (mntopt_t),
+	lx_autofs_mntopt
+};
+
+static vfsdef_t vfw = {
+	VFSDEF_VERSION,
+	LX_AUTOFS_NAME,
+	lx_autofs_init,
+	VSW_HASPROTO | VSW_VOLATILEDEV,
+	&lx_autofs_mntopts
+};
+
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+	&mod_fsops, "linux autofs filesystem", &vfw
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+	return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+	int		error;
+
+	if ((error = mod_remove(&modlinkage)) != 0)
+		return (error);
+
+	if (lx_autofs_vn_ops != NULL) {
+		vn_freevnodeops(lx_autofs_vn_ops);
+		lx_autofs_vn_ops = NULL;
+	}
+
+	/*
+	 * In our init routine, if we get an error after calling
+	 * vfs_setfsops() we cleanup by calling vfs_freevfsops_by_type().
+	 * But we don't need to call vfs_freevfsops_by_type() here
+	 * because the fs framework did this for us as part of the
+	 * mod_remove() call above.
+	 */
+	return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
new file mode 100644
index 0000000000..ae049e2792
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.c
@@ -0,0 +1,395 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/modctl.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/stat.h>
+#include <sys/conf.h>
+#include <sys/frame.h>
+#include <sys/dtrace.h>
+#include <sys/dtrace_impl.h>
+
+#include <sys/lx_impl.h>
+
+#define	LX_SYSTRACE_SHIFT	16
+#define	LX_SYSTRACE_ISENTRY(x)	((int)(x) >> LX_SYSTRACE_SHIFT)
+#define	LX_SYSTRACE_SYSNUM(x)	((int)(x) & ((1 << LX_SYSTRACE_SHIFT) - 1))
+#define	LX_SYSTRACE_ENTRY(id)	((1 << LX_SYSTRACE_SHIFT) | (id))
+#define	LX_SYSTRACE_RETURN(id)	(id)
+
+#define	LX_SYSTRACE_ENTRY_AFRAMES	2
+#define	LX_SYSTRACE_RETURN_AFRAMES	4
+
+typedef struct lx_systrace_sysent {
+	const char *lss_name;
+	dtrace_id_t lss_entry;
+	dtrace_id_t lss_return;
+} lx_systrace_sysent_t;
+
+static dev_info_t *lx_systrace_devi;
+static dtrace_provider_id_t lx_systrace_id;
+static kmutex_t lx_systrace_lock;
+static uint_t lx_systrace_nenabled;
+
+static int lx_systrace_nsysent;
+static lx_systrace_sysent_t *lx_systrace_sysent;
+
+/*ARGSUSED*/
+static void
+lx_systrace_entry(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+    ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+	dtrace_id_t id;
+
+	if (sysnum >= lx_systrace_nsysent)
+		return;
+
+	if ((id = lx_systrace_sysent[sysnum].lss_entry) == DTRACE_IDNONE)
+		return;
+
+	dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_return(ulong_t sysnum, ulong_t arg0, ulong_t arg1, ulong_t arg2,
+    ulong_t arg3, ulong_t arg4, ulong_t arg5)
+{
+	dtrace_id_t id;
+
+	if (sysnum >= lx_systrace_nsysent)
+		return;
+
+	if ((id = lx_systrace_sysent[sysnum].lss_return) == DTRACE_IDNONE)
+		return;
+
+	dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_provide(void *arg, const dtrace_probedesc_t *desc)
+{
+	int i;
+
+	if (desc != NULL)
+		return;
+
+	for (i = 0; i < lx_systrace_nsysent; i++) {
+		if (dtrace_probe_lookup(lx_systrace_id, NULL,
+		    lx_systrace_sysent[i].lss_name, "entry") != 0)
+			continue;
+
+		(void) dtrace_probe_create(lx_systrace_id, NULL,
+		    lx_systrace_sysent[i].lss_name, "entry",
+		    LX_SYSTRACE_ENTRY_AFRAMES,
+		    (void *)((uintptr_t)LX_SYSTRACE_ENTRY(i)));
+
+		(void) dtrace_probe_create(lx_systrace_id, NULL,
+		    lx_systrace_sysent[i].lss_name, "return",
+		    LX_SYSTRACE_RETURN_AFRAMES,
+		    (void *)((uintptr_t)LX_SYSTRACE_RETURN(i)));
+
+		lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE;
+		lx_systrace_sysent[i].lss_return = DTRACE_IDNONE;
+	}
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_enable(void *arg, dtrace_id_t id, void *parg)
+{
+	int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+	ASSERT(sysnum < lx_systrace_nsysent);
+
+	mutex_enter(&lx_systrace_lock);
+	if (lx_systrace_nenabled++ == 0)
+		lx_brand_systrace_enable();
+	mutex_exit(&lx_systrace_lock);
+
+	if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+		lx_systrace_sysent[sysnum].lss_entry = id;
+	} else {
+		lx_systrace_sysent[sysnum].lss_return = id;
+	}
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_disable(void *arg, dtrace_id_t id, void *parg)
+{
+	int sysnum = LX_SYSTRACE_SYSNUM((uintptr_t)parg);
+
+	ASSERT(sysnum < lx_systrace_nsysent);
+
+	if (LX_SYSTRACE_ISENTRY((uintptr_t)parg)) {
+		lx_systrace_sysent[sysnum].lss_entry = DTRACE_IDNONE;
+	} else {
+		lx_systrace_sysent[sysnum].lss_return = DTRACE_IDNONE;
+	}
+
+	mutex_enter(&lx_systrace_lock);
+	if (--lx_systrace_nenabled == 0)
+		lx_brand_systrace_disable();
+	mutex_exit(&lx_systrace_lock);
+}
+
+/*ARGSUSED*/
+static void
+lx_systrace_destroy(void *arg, dtrace_id_t id, void *parg)
+{
+}
+
+/*ARGSUSED*/
+static uint64_t
+lx_systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
+    int aframes)
+{
+	struct frame *fp = (struct frame *)dtrace_getfp();
+	uintptr_t *stack;
+	uint64_t val = 0;
+	int i;
+
+	if (argno >= 6)
+		return (0);
+
+	/*
+	 * Walk the four frames down the stack to the entry or return callback.
+	 * Our callback calls dtrace_probe() which calls dtrace_dif_variable()
+	 * which invokes this function to get the extended arguments. We get
+	 * the frame pointer in via call to dtrace_getfp() above which makes for
+	 * four frames.
+	 */
+	for (i = 0; i < 4; i++) {
+		fp = (struct frame *)fp->fr_savfp;
+	}
+
+	stack = (uintptr_t *)&fp[1];
+
+	/*
+	 * Skip the first argument to the callback -- the system call number.
+	 */
+	argno++;
+
+#ifdef __amd64
+	/*
+	 * On amd64, the first 6 arguments are passed in registers while
+	 * subsequent arguments are on the stack.
+	 */
+	argno -= 6;
+#endif
+
+	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
+	val = stack[argno];
+	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
+
+	return (val);
+}
+
+
+static const dtrace_pattr_t lx_systrace_attr = {
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
+{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
+};
+
+static dtrace_pops_t lx_systrace_pops = {
+	lx_systrace_provide,
+	NULL,
+	lx_systrace_enable,
+	lx_systrace_disable,
+	NULL,
+	NULL,
+	NULL,
+	lx_systrace_getarg,
+	NULL,
+	lx_systrace_destroy
+};
+
+static int
+lx_systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+{
+	int i;
+
+	switch (cmd) {
+	case DDI_ATTACH:
+		break;
+	case DDI_RESUME:
+		return (DDI_SUCCESS);
+	default:
+		return (DDI_FAILURE);
+	}
+
+	if (ddi_create_minor_node(devi, "lx_systrace", S_IFCHR,
+	    0, DDI_PSEUDO, NULL) == DDI_FAILURE ||
+	    dtrace_register("lx-syscall", &lx_systrace_attr,
+	    DTRACE_PRIV_KERNEL, 0, &lx_systrace_pops, NULL,
+	    &lx_systrace_id) != 0) {
+		ddi_remove_minor_node(devi, NULL);
+		return (DDI_FAILURE);
+	}
+
+	ddi_report_dev(devi);
+	lx_systrace_devi = devi;
+
+	/*
+	 * Count up the lx_brand system calls.
+	 */
+	for (i = 0; lx_sysent[i].sy_callc != NULL; i++)
+		continue;
+
+	/*
+	 * Initialize our corresponding table.
+	 */
+	lx_systrace_sysent = kmem_zalloc(i * sizeof (lx_systrace_sysent_t),
+	    KM_SLEEP);
+	lx_systrace_nsysent = i;
+
+	for (i = 0; i < lx_systrace_nsysent; i++) {
+		lx_systrace_sysent[i].lss_name = lx_sysent[i].sy_name;
+		lx_systrace_sysent[i].lss_entry = DTRACE_IDNONE;
+		lx_systrace_sysent[i].lss_return = DTRACE_IDNONE;
+	}
+
+	/*
+	 * Install probe triggers.
+	 */
+	lx_systrace_entry_ptr = lx_systrace_entry;
+	lx_systrace_return_ptr = lx_systrace_return;
+
+	return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
+{
+	switch (cmd) {
+	case DDI_DETACH:
+		break;
+	case DDI_SUSPEND:
+		return (DDI_SUCCESS);
+	default:
+		return (DDI_FAILURE);
+	}
+
+	if (dtrace_unregister(lx_systrace_id) != 0)
+		return (DDI_FAILURE);
+
+	/*
+	 * Free table.
+	 */
+	kmem_free(lx_systrace_sysent, lx_systrace_nsysent *
+	    sizeof (lx_systrace_sysent_t));
+	lx_systrace_sysent = NULL;
+	lx_systrace_nsysent = 0;
+
+	/*
+	 * Reset probe triggers.
+	 */
+	lx_systrace_entry_ptr = NULL;
+	lx_systrace_return_ptr = NULL;
+
+	return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
+{
+	return (0);
+}
+
+static struct cb_ops lx_systrace_cb_ops = {
+	lx_systrace_open,	/* open */
+	nodev,			/* close */
+	nulldev,		/* strategy */
+	nulldev,		/* print */
+	nodev,			/* dump */
+	nodev,			/* read */
+	nodev,			/* write */
+	nodev,			/* ioctl */
+	nodev,			/* devmap */
+	nodev,			/* mmap */
+	nodev,			/* segmap */
+	nochpoll,		/* poll */
+	ddi_prop_op,		/* cb_prop_op */
+	0,			/* streamtab */
+	D_NEW | D_MP		/* Driver compatibility flag */
+};
+
+static struct dev_ops lx_systrace_ops = {
+	DEVO_REV,		/* devo_rev */
+	0,			/* refcnt */
+	ddi_getinfo_1to1,	/* get_dev_info */
+	nulldev,		/* identify */
+	nulldev,		/* probe */
+	lx_systrace_attach,	/* attach */
+	lx_systrace_detach,	/* detach */
+	nodev,			/* reset */
+	&lx_systrace_cb_ops,	/* driver operations */
+	NULL,			/* bus operations */
+	nodev			/* dev power */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+	&mod_driverops,		/* module type (this is a pseudo driver) */
+	"Linux Brand System Call Tracing", /* name of module */
+	&lx_systrace_ops	/* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	(void *)&modldrv,
+	NULL
+};
+
+int
+_init(void)
+{
+	return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+	return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
new file mode 100644
index 0000000000..e4499c8a5b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/dtrace/lx_systrace.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+
+name="lx_systrace" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/ldlinux.c b/usr/src/uts/common/brand/lx/io/ldlinux.c
new file mode 100644
index 0000000000..76c5e1d255
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/ldlinux.c
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/ddi.h>
+#include <sys/cmn_err.h>
+#include <sys/modctl.h>
+#include <sys/ptms.h>
+#include <sys/stropts.h>
+#include <sys/strsun.h>
+#include <sys/sunddi.h>
+
+#include <sys/ldlinux.h>
+
+
+/*
+ * ldlinuxopen - open routine gets called when the module gets pushed onto the
+ * stream.
+ */
+/* ARGSUSED */
+static int
+ldlinuxopen(
+	queue_t    *q,		/* pointer to the read side queue */
+	dev_t   *devp,		/* pointer to stream tail's dev */
+	int	oflag,		/* the user open(2) supplied flags */
+	int	sflag,		/* open state flag */
+	cred_t *credp)		/* credentials */
+{
+	struct ldlinux *tp;	/* ldlinux entry for this module */
+	mblk_t *mop;
+	struct stroptions *sop;
+	struct termios *termiosp;
+	int len;
+
+	if (sflag != MODOPEN)
+		return (EINVAL);
+
+	if (q->q_ptr != NULL) {
+		/* It's already attached. */
+		return (0);
+	}
+
+	mop = allocb(sizeof (struct stroptions), BPRI_MED);
+	if (mop == NULL)
+		return (ENOSR);
+	mop->b_datap->db_type = M_SETOPTS;
+	mop->b_wptr += sizeof (struct stroptions);
+	sop = (struct stroptions *)mop->b_rptr;
+	sop->so_flags = SO_ISTTY;
+
+	/*
+	 * Allocate state structure.
+	 */
+	tp = kmem_alloc(sizeof (*tp), KM_SLEEP);
+
+	/* Stash a pointer to our private data in q_ptr. */
+	q->q_ptr = WR(q)->q_ptr = tp;
+
+	/*
+	 * Get termios defaults.  These are stored as
+	 * a property in the "options" node.
+	 */
+	if (ddi_getlongprop(DDI_DEV_T_ANY, ddi_root_node(), 0, "ttymodes",
+	    (caddr_t)&termiosp, &len) == DDI_PROP_SUCCESS &&
+	    len == sizeof (struct termios)) {
+		if (termiosp->c_lflag & ICANON) {
+			tp->veof = termiosp->c_cc[VEOF];
+			tp->veol = termiosp->c_cc[VEOL];
+			tp->vmin = 1;
+			tp->vtime = 0;
+		} else {
+			tp->veof = 0;
+			tp->veol = 0;
+			tp->vmin = termiosp->c_cc[VMIN];
+			tp->vtime = termiosp->c_cc[VTIME];
+		}
+		kmem_free(termiosp, len);
+	} else {
+		/*
+		 * winge winge winge...
+		 */
+		cmn_err(CE_WARN,
+		    "ldlinuxopen: Couldn't get ttymodes property!");
+		bzero(tp, sizeof (*tp));
+	}
+
+	tp->state = 0;
+
+	/*
+	 * Commit to the open and send the M_SETOPTS off to the stream head.
+	 */
+	qprocson(q);
+	putnext(q, mop);
+
+	return (0);
+}
+
+
+/*
+ * ldlinuxclose - This routine gets called when the module gets
+ * popped off of the stream.
+ */
+/* ARGSUSED */
+static int
+ldlinuxclose(queue_t *q, int flag, cred_t *credp)
+{
+	struct ldlinux *tp;
+
+	qprocsoff(q);
+	tp = q->q_ptr;
+	kmem_free(tp, sizeof (*tp));
+	q->q_ptr = WR(q)->q_ptr = NULL;
+	return (0);
+}
+
+
+static void
+do_ioctl(queue_t *q, mblk_t *mp)
+{
+	struct ldlinux	*tp = q->q_ptr;
+	struct iocblk	*iocp = (struct iocblk *)mp->b_rptr;
+	struct lx_cc	*cb;
+	mblk_t		*tmp;
+	int		error;
+
+	switch (iocp->ioc_cmd) {
+	case TIOCSETLD:
+		/* prepare caller supplied data for access */
+		error = miocpullup(mp, sizeof (struct lx_cc));
+		if (error != 0) {
+			miocnak(q, mp, 0, error);
+			return;
+		}
+
+		/* get a pointer to the caller supplied data */
+		cb = (struct lx_cc *)mp->b_cont->b_rptr;
+
+		/* save caller supplied data in our per-stream cache */
+		tp->veof = cb->veof;
+		tp->veol = cb->veol;
+		tp->vmin = cb->vmin;
+		tp->vtime = cb->vtime;
+
+		/* initialize and send a reply indicating that we're done */
+		miocack(q, mp, 0, 0);
+		return;
+
+	case TIOCGETLD:
+		/* allocate a reply message */
+		if ((tmp = allocb(sizeof (struct lx_cc), BPRI_MED)) == NULL) {
+			miocnak(q, mp, 0, ENOSR);
+			return;
+		}
+
+		/* initialize the reply message */
+		mioc2ack(mp, tmp, sizeof (struct lx_cc), 0);
+
+		/* get a pointer to the reply data */
+		cb = (struct lx_cc *)mp->b_cont->b_rptr;
+
+		/* copy data from our per-stream cache into the reply data */
+		cb->veof = tp->veof;
+		cb->veol = tp->veol;
+		cb->vmin = tp->vmin;
+		cb->vtime = tp->vtime;
+
+		/* send the reply indicating that we're done */
+		qreply(q, mp);
+		return;
+
+	case PTSSTTY:
+		tp->state |= ISPTSTTY;
+		break;
+
+	default:
+		break;
+	}
+
+	putnext(q, mp);
+}
+
+
+/*
+ * ldlinuxput - Module read and write queue put procedure.
+ */
+static void
+ldlinuxput(queue_t *q, mblk_t *mp)
+{
+	struct ldlinux *tp = q->q_ptr;
+
+	switch (DB_TYPE(mp)) {
+	default:
+		break;
+	case M_IOCTL:
+		if ((q->q_flag & QREADR) == 0) {
+			do_ioctl(q, mp);
+			return;
+		}
+		break;
+
+	case M_FLUSH:
+		/*
+		 * Handle read and write flushes.
+		 */
+		if ((((q->q_flag & QREADR) != 0) && (*mp->b_rptr & FLUSHR)) ||
+		    (((q->q_flag & QREADR) == 0) && (*mp->b_rptr & FLUSHW))) {
+			if ((tp->state & ISPTSTTY) && (*mp->b_rptr & FLUSHBAND))
+				flushband(q, *(mp->b_rptr + 1), FLUSHDATA);
+			else
+				flushq(q, FLUSHDATA);
+		}
+		break;
+	}
+	putnext(q, mp);
+}
+
+
+static struct module_info ldlinux_info = {
+	LDLINUX_MODID,
+	LDLINUX_MOD,
+	0,
+	INFPSZ,
+	0,
+	0
+};
+
+static struct qinit ldlinuxinit = {
+	(int (*)()) ldlinuxput,
+	NULL,
+	ldlinuxopen,
+	ldlinuxclose,
+	NULL,
+	&ldlinux_info
+};
+
+static struct streamtab ldlinuxinfo = {
+	&ldlinuxinit,
+	&ldlinuxinit
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct fmodsw fsw = {
+	LDLINUX_MOD,
+	&ldlinuxinfo,
+	D_MTQPAIR | D_MP
+};
+
+static struct modlstrmod modlstrmod = {
+	&mod_strmodops, "termios extensions for lx brand", &fsw
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1, &modlstrmod, NULL
+};
+
+int
+_init()
+{
+	return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+	return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.c b/usr/src/uts/common/brand/lx/io/lx_audio.c
new file mode 100644
index 0000000000..07c3bd0949
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.c
@@ -0,0 +1,2026 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/audio.h>
+#include <sys/conf.h>
+#include <sys/debug.h>
+#include <sys/disp.h>
+#include <sys/ddi.h>
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/kmem.h>
+#include <sys/lx_audio.h>
+#include <sys/mixer.h>
+#include <sys/modhash.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/stropts.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+
+/* Properties used by the lx_audio driver */
+#define	LXA_PROP_INPUTDEV		"inputdev"
+#define	LXA_PROP_OUTPUTDEV		"outputdev"
+
+/* default device paths used by this driver */
+#define	LXA_DEV_DEFAULT			"/dev/audio"
+#define	LXA_DEV_CUSTOM_DIR		"/dev/sound/"
+
+/* maximum possible number of concurrent opens of this driver */
+#define	LX_AUDIO_MAX_OPENS		1024
+
+/*
+ * these are default fragment size and fragment count values.
+ * these values were chosen to make quake work well on my
+ * laptop: 2Ghz Pentium M + NVIDIA GeForce Go 6400.
+ *
+ * for reference:
+ * - 1 sec of stereo output at 44Khz is about 171 Kb of data
+ * - 1 sec of mono output at 8Khz is about 8Kb of data
+ */
+#define	LXA_OSS_FRAG_SIZE		(1024)	/* 1/8 sec at 8Khz mono */
+#define	LXA_OSS_FRAG_CNT		(1024 * 2)
+
+/* maximum ammount of fragment memory we'll allow a process to mmap */
+#define	LXA_OSS_FRAG_MEM		(1024 * 1024 * 2) /* 2Mb */
+
+/* forward declarations */
+typedef struct lxa_state lxa_state_t;
+typedef struct lxa_zstate lxa_zstate_t;
+
+/*
+ * Structure and enum declarations
+ */
+typedef enum {
+	LXA_TYPE_INVALID	= 0,
+	LXA_TYPE_AUDIO		= 1,	/* audio device */
+	LXA_TYPE_AUDIOCTL	= 2	/* audio control/mixer device */
+} lxa_dev_type_t;
+
+struct lxa_zstate {
+	char			*lxa_zs_zonename;
+
+	/*
+	 * we could store the input/output audio device setting here,
+	 * but instead we're keeing them as device node properties
+	 * so that a user can easily see the audio configuration for
+	 * a zone via prtconf.
+	 */
+
+	/*
+	 * OSS doesn't support multiple opens of the audio device.
+	 * (multiple opens of the mixer device are supported.)
+	 * so here we'll keep a pointer to any open input/output
+	 * streams.  (OSS does support two opens if one is for input
+	 * and the other is for output.)
+	 */
+	lxa_state_t		*lxa_zs_istate;
+	lxa_state_t		*lxa_zs_ostate;
+
+	/*
+	 * we need to cache channel gain and balance.  channel gain and
+	 * balance map to PCM volume in OSS, which are supposedly a property
+	 * of the underlying hardware.  but in solaris, channels are
+	 * implemented in software and only exist when an audio device
+	 * is actually open.  (each open returns a unique channel.)  OSS
+	 * apps will expect consistent PCM volume set/get operations to
+	 * work even if no audio device is open.  hence, if no underlying
+	 * device is open we need to cache the gain and balance setting.
+	 */
+	lxa_mixer_levels_t	lxa_zs_pcm_levels;
+};
+
+struct lxa_state {
+	lxa_zstate_t	*lxas_zs;	/* zone state pointer */
+
+	dev_t		lxas_dev_old;	/* dev_t used to open the device */
+	dev_t		lxas_dev_new;	/* new dev_t assigned to an open */
+	int		lxas_flags;	/* original flags passed to open */
+	lxa_dev_type_t	lxas_type;	/* type of device that was opened */
+
+	int		lxas_devs_same;	/* input and output device the same? */
+
+	/* input device variables */
+	ldi_handle_t	lxas_idev_lh;		/* ldi handle for access */
+	int		lxas_idev_flags;	/* flags used for open */
+
+	/* output device variables */
+	ldi_handle_t	lxas_odev_lh;		/* ldi handle for access */
+	int		lxas_odev_flags;	/* flags used for open */
+
+	/*
+	 * since we support multiplexing of devices we need to remember
+	 * certain parameters about the devices
+	 */
+	uint_t		lxas_hw_features;
+	uint_t		lxas_sw_features;
+
+	uint_t		lxas_frag_size;
+	uint_t		lxas_frag_cnt;
+
+	/*
+	 * members needed to support mmap device access.  note that to
+	 * simplifly things we only support one mmap access per open.
+	 */
+	ddi_umem_cookie_t	lxas_umem_cookie;
+	char			*lxas_umem_ptr;
+	size_t			lxas_umem_len;
+	kthread_t		*lxas_mmap_thread;
+	int			lxas_mmap_thread_running;
+	int			lxas_mmap_thread_exit;
+	int			lxas_mmap_thread_frag;
+};
+
+/*
+ * Global variables
+ */
+dev_info_t	*lxa_dip = NULL;
+kmutex_t	lxa_lock;
+id_space_t	*lxa_minor_id = NULL;
+mod_hash_t	*lxa_state_hash = NULL;
+mod_hash_t	*lxa_zstate_hash = NULL;
+size_t		lxa_state_hash_size = 15;
+size_t		lxa_zstate_hash_size = 15;
+size_t		lxa_registered_zones = 0;
+
+/*
+ * function declarations
+ */
+static void lxa_mmap_output_disable(lxa_state_t *);
+
+/*
+ * functions
+ */
+static void
+lxa_state_close(lxa_state_t *lxa_state)
+{
+	lxa_zstate_t		*lxa_zs = lxa_state->lxas_zs;
+	minor_t			minor = getminor(lxa_state->lxas_dev_new);
+
+	/* disable any mmap output that might still be going on */
+	lxa_mmap_output_disable(lxa_state);
+
+	/*
+	 * if this was the active input/output device, unlink it from
+	 * the global zone state so that other opens of the audio device
+	 * can now succeed.
+	 */
+	mutex_enter(&lxa_lock);
+	if (lxa_zs->lxa_zs_istate == lxa_state)
+		lxa_zs->lxa_zs_istate = NULL;
+	if (lxa_zs->lxa_zs_ostate == lxa_state) {
+		lxa_zs->lxa_zs_ostate = NULL;
+	}
+	mutex_exit(&lxa_lock);
+
+	/* remove this state structure from the hash (if it's there) */
+	(void) mod_hash_remove(lxa_state_hash,
+	    (mod_hash_key_t)(uintptr_t)minor, (mod_hash_val_t *)&lxa_state);
+
+	/* close any audio device that we have open */
+	if (lxa_state->lxas_idev_lh != NULL)
+		(void) ldi_close(lxa_state->lxas_idev_lh,
+		    lxa_state->lxas_idev_flags, kcred);
+	if (lxa_state->lxas_odev_lh != NULL)
+		(void) ldi_close(lxa_state->lxas_odev_lh,
+		    lxa_state->lxas_odev_flags, kcred);
+
+	/* free up any memory allocated by mmaps */
+	if (lxa_state->lxas_umem_cookie != NULL)
+		ddi_umem_free(lxa_state->lxas_umem_cookie);
+
+	/* release the id associated with this state structure */
+	id_free(lxa_minor_id, minor);
+
+	kmem_free(lxa_state, sizeof (*lxa_state));
+}
+
+static char *
+getzonename(void)
+{
+	return (curproc->p_zone->zone_name);
+}
+
+static void
+strfree(char *str)
+{
+	kmem_free(str, strlen(str) + 1);
+}
+
+static char *
+strdup(char *str)
+{
+	int	n = strlen(str);
+	char	*ptr = kmem_alloc(n + 1, KM_SLEEP);
+	bcopy(str, ptr, n + 1);
+	return (ptr);
+}
+
+static char *
+lxa_devprop_name(char *zname, char *pname)
+{
+	char	*zpname;
+	int	n;
+
+	ASSERT((pname != NULL) && (zname != NULL));
+
+	/* prepend the zone name to the property name */
+	n = snprintf(NULL, 0, "%s_%s", zname, pname) + 1;
+	zpname = kmem_alloc(n, KM_SLEEP);
+	(void) snprintf(zpname, n, "%s_%s", zname, pname);
+
+	return (zpname);
+}
+
+static int
+lxa_devprop_verify(char *pval)
+{
+	int	n;
+
+	ASSERT(pval != NULL);
+
+	if (strcmp(pval, "default") == 0)
+		return (0);
+
+	/* make sure the value is an integer */
+	for (n = 0; pval[n] != '\0'; n++) {
+		if ((pval[n] < '0') && (pval[n] > '9')) {
+			return (-1);
+		}
+	}
+
+	return (0);
+}
+
+static char *
+lxa_devprop_lookup(char *zname, char *pname, lxa_dev_type_t lxa_type)
+{
+	char		*zprop_name, *pval;
+	char		*dev_path;
+	int		n, rv;
+
+	ASSERT((pname != NULL) && (zname != NULL));
+	ASSERT((lxa_type == LXA_TYPE_AUDIO) || (lxa_type == LXA_TYPE_AUDIOCTL));
+
+	zprop_name = lxa_devprop_name(zname, pname);
+
+	/* attempt to lookup the property */
+	rv = ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, zprop_name, &pval);
+	strfree(zprop_name);
+
+	if (rv != DDI_PROP_SUCCESS)
+		return (NULL);
+
+	if (lxa_devprop_verify(pval) != 0) {
+		ddi_prop_free(pval);
+		return (NULL);
+	}
+
+	if (strcmp(pval, "none") == 0) {
+		/* there is no audio device specified */
+		return (NULL);
+	} else if (strcmp(pval, "default") == 0) {
+		/* use the default audio device on the system */
+		dev_path = strdup(LXA_DEV_DEFAULT);
+	} else {
+		/* a custom audio device was specified, generate a path */
+		n = snprintf(NULL, 0, "%s%s", LXA_DEV_CUSTOM_DIR, pval) + 1;
+		dev_path = kmem_alloc(n, KM_SLEEP);
+		(void) snprintf(dev_path, n, "%s%s", LXA_DEV_CUSTOM_DIR, pval);
+	}
+	ddi_prop_free(pval);
+
+	/*
+	 * if this is an audio control device so we need to append
+	 * "ctl" to the path
+	 */
+	if (lxa_type == LXA_TYPE_AUDIOCTL) {
+		char	*tmp;
+		n = snprintf(NULL, 0, "%s%s", dev_path, "ctl") + 1;
+		tmp = kmem_alloc(n, KM_SLEEP);
+		(void) snprintf(tmp, n, "%s%s", dev_path, "ctl");
+		strfree(dev_path);
+		dev_path = tmp;
+	}
+
+	return (dev_path);
+}
+
+static int
+lxa_dev_getfeatures(lxa_state_t *lxa_state)
+{
+	audio_info_t	ai_idev, ai_odev;
+	int		n, rv;
+
+	/* set a default fragment size */
+	lxa_state->lxas_frag_size = LXA_OSS_FRAG_SIZE;
+	lxa_state->lxas_frag_cnt = LXA_OSS_FRAG_CNT;
+
+	/* get info for the currently open audio devices */
+	if ((lxa_state->lxas_idev_lh != NULL) &&
+	    ((rv = ldi_ioctl(lxa_state->lxas_idev_lh,
+	    AUDIO_GETINFO, (intptr_t)&ai_idev, FKIOCTL, kcred, &n)) != 0))
+		return (rv);
+	if ((lxa_state->lxas_odev_lh != NULL) &&
+	    ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+	    AUDIO_GETINFO, (intptr_t)&ai_odev, FKIOCTL, kcred, &n)) != 0))
+		return (rv);
+
+	/* if we're only open for reading or writing then it's easy */
+	if (lxa_state->lxas_idev_lh == NULL) {
+		lxa_state->lxas_sw_features = ai_odev.sw_features;
+		lxa_state->lxas_hw_features = ai_odev.hw_features;
+		return (0);
+	} else if (lxa_state->lxas_odev_lh == NULL) {
+		lxa_state->lxas_sw_features = ai_idev.sw_features;
+		lxa_state->lxas_hw_features = ai_idev.hw_features;
+		return (0);
+	}
+
+	/*
+	 * well if we're open for reading and writing but the underlying
+	 * device is the same then it's also pretty easy
+	 */
+	if (lxa_state->lxas_devs_same) {
+		if ((ai_odev.sw_features != ai_idev.sw_features) ||
+		    (ai_odev.hw_features != ai_idev.hw_features)) {
+			zcmn_err(getzoneid(), CE_WARN, "lx_audio error: "
+			    "audio device reported inconsistent features");
+			return (EIO);
+		}
+		lxa_state->lxas_sw_features = ai_odev.sw_features;
+		lxa_state->lxas_hw_features = ai_odev.hw_features;
+		return (0);
+	}
+
+	/*
+	 * figure out which software features we're going to support.
+	 * we will report a feature as supported if both the input
+	 * and output device support it.
+	 */
+	lxa_state->lxas_sw_features = 0;
+	n = ai_idev.sw_features & ai_odev.sw_features;
+	if (n & AUDIO_SWFEATURE_MIXER)
+		lxa_state->lxas_sw_features |= AUDIO_SWFEATURE_MIXER;
+
+	/*
+	 * figure out which hardware features we're going to support.
+	 * for a first pass we will report a feature as supported if
+	 * both the input and output device support it.
+	 */
+	lxa_state->lxas_hw_features = 0;
+	n = ai_idev.hw_features & ai_odev.hw_features;
+	if (n & AUDIO_HWFEATURE_MSCODEC)
+		lxa_state->lxas_hw_features |= AUDIO_HWFEATURE_MSCODEC;
+
+	/*
+	 * if we made it here then we have different audio input and output
+	 * devices.  this will allow us to report support for additional
+	 * hardware features that may not supported by just the input or
+	 * output device alone.
+	 */
+
+	/* always report tha we support both playback and recording */
+	lxa_state->lxas_hw_features =
+	    AUDIO_HWFEATURE_PLAY | AUDIO_HWFEATURE_RECORD;
+
+	/* always report full duplex support */
+	lxa_state->lxas_hw_features = AUDIO_HWFEATURE_DUPLEX;
+
+	/* never report that we have input to output loopback support */
+	ASSERT((lxa_state->lxas_hw_features & AUDIO_HWFEATURE_IN2OUT) == 0);
+	return (0);
+}
+
+static int
+lxa_dev_open(lxa_state_t *lxa_state)
+{
+	char		*idev, *odev;
+	int		flags, rv;
+	ldi_handle_t	lh;
+	ldi_ident_t	li = NULL;
+
+	ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) ||
+	    (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL));
+
+	/*
+	 * check if we have configuration properties for this zone.
+	 * if we don't then audio isn't supported in this zone.
+	 */
+	idev = lxa_devprop_lookup(getzonename(), LXA_PROP_INPUTDEV,
+	    lxa_state->lxas_type);
+	odev = lxa_devprop_lookup(getzonename(), LXA_PROP_OUTPUTDEV,
+	    lxa_state->lxas_type);
+
+	/* make sure there is at least one device to read from or write to */
+	if ((idev == NULL) && (odev == NULL))
+		return (ENODEV);
+
+	/* see if the input and output devices are actually the same device */
+	if (((idev != NULL) && (odev != NULL)) &&
+	    (strcmp(idev, odev) == 0))
+		lxa_state->lxas_devs_same = 1;
+
+	/* we don't respect FEXCL */
+	flags = lxa_state->lxas_flags & ~FEXCL;
+	if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+		/*
+		 * if we're opening audio devices then we need to muck
+		 * with the FREAD/FWRITE flags.
+		 *
+		 * certain audio device may only support input or output
+		 * (but not both.)  so if we're multiplexing input/output
+		 * to different devices we need to make sure we don't try
+		 * and open the output device for reading and the input
+		 * device for writing.
+		 *
+		 * if we're using the same device for input/output we still
+		 * need to do this because some audio devices won't let
+		 * themselves be opened multiple times for read access.
+		 */
+		lxa_state->lxas_idev_flags = flags & ~FWRITE;
+		lxa_state->lxas_odev_flags = flags & ~FREAD;
+
+		/* make sure we have devices to read from and write to */
+		if (((flags & FREAD) && (idev == NULL)) ||
+		    ((flags & FWRITE) && (odev == NULL))) {
+			rv = ENODEV;
+			goto out;
+		}
+	} else {
+		lxa_state->lxas_idev_flags = lxa_state->lxas_odev_flags = flags;
+	}
+
+	/* get an ident to open the devices */
+	if (ldi_ident_from_dev(lxa_state->lxas_dev_new, &li) != 0) {
+		rv = ENODEV;
+		goto out;
+	}
+
+	/* open the input device */
+	lxa_state->lxas_idev_lh = NULL;
+	if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) ||
+	    (lxa_state->lxas_idev_flags & FREAD)) &&
+	    (idev != NULL)) {
+		rv = ldi_open_by_name(idev, lxa_state->lxas_idev_flags,
+		    kcred, &lh, li);
+		if (rv != 0) {
+			zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+			    "unable to open audio device: %s", idev);
+			zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+			    "possible zone audio configuration error");
+			goto out;
+		}
+		lxa_state->lxas_idev_lh = lh;
+	}
+
+	/* open the output device */
+	lxa_state->lxas_odev_lh = NULL;
+	if (((lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) ||
+	    (lxa_state->lxas_odev_flags & FWRITE)) &&
+	    (odev != NULL)) {
+		rv = ldi_open_by_name(odev, lxa_state->lxas_odev_flags,
+		    kcred, &lh, li);
+		if (rv != 0) {
+			/* if we opened an input device, close it now */
+			if (lxa_state->lxas_idev_lh != NULL) {
+				(void) ldi_close(lxa_state->lxas_idev_lh,
+				    lxa_state->lxas_idev_flags, kcred);
+			}
+
+			zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+			    "unable to open audio device: %s", odev);
+			zcmn_err(getzoneid(), CE_WARN, "lxa_open_dev: "
+			    "possible zone audio configuration error");
+			goto out;
+		}
+		lxa_state->lxas_odev_lh = lh;
+	}
+
+	/* free up stuff */
+out:
+	if (li != NULL)
+		ldi_ident_release(li);
+	if (idev != NULL)
+		strfree(idev);
+	if (odev != NULL)
+		strfree(odev);
+
+	return (rv);
+}
+
+void
+lxa_mmap_thread_exit(lxa_state_t *lxa_state)
+{
+	mutex_enter(&lxa_lock);
+	lxa_state->lxas_mmap_thread = NULL;
+	lxa_state->lxas_mmap_thread_frag = 0;
+	lxa_state->lxas_mmap_thread_running = 0;
+	lxa_state->lxas_mmap_thread_exit = 0;
+	mutex_exit(&lxa_lock);
+	thread_exit();
+	/*NOTREACHED*/
+}
+
+void
+lxa_mmap_thread(lxa_state_t *lxa_state)
+{
+	struct uio	uio, uio_null;
+	iovec_t		iovec, iovec_null;
+	uint_t		bytes_per_sec, usec_per_frag, ticks_per_frag;
+	int		rv, junk, eof, retry;
+	audio_info_t	ai;
+
+	/* we better be setup for writing to the output device */
+	ASSERT((lxa_state->lxas_flags & FWRITE) != 0);
+	ASSERT(lxa_state->lxas_odev_lh != NULL);
+
+	/* setup a uio to output one fragment */
+	uio.uio_iov = &iovec;
+	uio.uio_iovcnt = 1;
+	uio.uio_offset = 0;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_fmode = 0;
+	uio.uio_extflg = 0;
+	uio.uio_llimit = MAXOFFSET_T;
+
+	/* setup a uio to output a eof (a fragment with a length of 0) */
+	uio_null.uio_iov = &iovec_null;
+	uio_null.uio_iov->iov_len = 0;
+	uio_null.uio_iov->iov_base = NULL;
+	uio_null.uio_iovcnt = 1;
+	uio_null.uio_offset = 0;
+	uio_null.uio_segflg = UIO_SYSSPACE;
+	uio_null.uio_fmode = 0;
+	uio_null.uio_extflg = 0;
+	uio_null.uio_llimit = MAXOFFSET_T;
+	uio_null.uio_resid = 0;
+
+lxa_mmap_thread_top:
+	ASSERT(!MUTEX_HELD(&lxa_lock));
+
+	/* first drain any pending audio output */
+	if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+	    AUDIO_DRAIN, NULL, FKIOCTL, kcred, &junk)) != 0) {
+		cmn_err(CE_WARN, "lxa_mmap_thread: "
+		    "AUDIO_DRAIN failed, aborting audio output");
+		lxa_mmap_thread_exit(lxa_state);
+		/*NOTREACHED*/
+	}
+
+	/*
+	 * we depend on the ai.play.eof value to keep track of
+	 * audio output progress so reset it here.
+	 */
+	AUDIO_INITINFO(&ai);
+	ai.play.eof = 0;
+	if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+	    AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) {
+		cmn_err(CE_WARN, "lxa_mmap_thread: "
+		    "AUDIO_SETINFO failed, aborting audio output");
+		lxa_mmap_thread_exit(lxa_state);
+		/*NOTREACHED*/
+	}
+
+	/*
+	 * we're going to need to know the sampling rate and number
+	 * of output channels to estimate how long we can sleep between
+	 * requests.
+	 */
+	if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO,
+	    (intptr_t)&ai, FKIOCTL, kcred, &junk)) != 0) {
+		cmn_err(CE_WARN, "lxa_mmap_thread: "
+		    "AUDIO_GETINFO failed, aborting audio output");
+		lxa_mmap_thread_exit(lxa_state);
+		/*NOTREACHED*/
+	}
+
+	/* estimate how many ticks it takes to output a fragment of data */
+	bytes_per_sec = (ai.play.sample_rate * ai.play.channels *
+	    ai.play.precision) / 8;
+	usec_per_frag = MICROSEC * lxa_state->lxas_frag_size / bytes_per_sec;
+	ticks_per_frag = drv_usectohz(usec_per_frag);
+
+	/* queue up three fragments of of data into the output stream */
+	eof = 3;
+
+	/* sanity check the eof value */
+	ASSERT(ai.play.eof == 0);
+	ai.play.eof = 0;
+
+	/* we always start audio output at fragment 0 */
+	mutex_enter(&lxa_lock);
+	lxa_state->lxas_mmap_thread_frag = 0;
+
+	/*
+	 * we shouldn't have allowed the mapping if it isn't a multiple
+	 * of the fragment size
+	 */
+	ASSERT((lxa_state->lxas_umem_len % lxa_state->lxas_frag_size) == 0);
+
+	while (!lxa_state->lxas_mmap_thread_exit) {
+		size_t start, end;
+
+		/*
+		 * calculate the start and ending offsets of the next
+		 * fragment to output
+		 */
+		start = lxa_state->lxas_mmap_thread_frag *
+		    lxa_state->lxas_frag_size;
+		end = start + lxa_state->lxas_frag_size;
+
+		ASSERT(start < lxa_state->lxas_umem_len);
+		ASSERT(end <= lxa_state->lxas_umem_len);
+
+		/* setup the uio to output one fragment of audio */
+		uio.uio_resid = end - start;
+		uio.uio_iov->iov_len = end - start;
+		uio.uio_iov->iov_base = &lxa_state->lxas_umem_ptr[start];
+
+		/* increment the current fragment index */
+		lxa_state->lxas_mmap_thread_frag =
+		    (lxa_state->lxas_mmap_thread_frag + 1) %
+		    (lxa_state->lxas_umem_len / lxa_state->lxas_frag_size);
+
+		/* drop the audio lock before actually outputting data */
+		mutex_exit(&lxa_lock);
+
+		/*
+		 * write the fragment of audio data to the device stream
+		 * then write a eof to the stream to tell the device to
+		 * increment ai.play.eof when it's done processing the
+		 * fragment we just wrote
+		 */
+		if ((rv = ldi_write(lxa_state->lxas_odev_lh,
+			    &uio, kcred)) != 0) {
+			cmn_err(CE_WARN, "lxa_mmap_thread: "
+			    "ldi_write() failed (%d), "
+			    "resetting audio output", rv);
+			goto lxa_mmap_thread_top;
+		}
+		if ((rv = ldi_write(lxa_state->lxas_odev_lh,
+			    &uio_null, kcred)) != 0) {
+			cmn_err(CE_WARN, "lxa_mmap_thread: "
+			    "ldi_write(eof) failed (%d), "
+			    "resetting audio output", rv);
+			goto lxa_mmap_thread_top;
+		}
+
+		/*
+		 * we want to avoid buffer underrun so ensure that
+		 * there is always at least one fragment of data in the
+		 * output stream.
+		 */
+		mutex_enter(&lxa_lock);
+		if (--eof > 0) {
+			continue;
+		}
+
+		/*
+		 * now we wait until the audio device has finished outputting
+		 * at least one fragment of data.
+		 */
+		retry = 0;
+		while (!lxa_state->lxas_mmap_thread_exit && (eof == 0)) {
+			uint_t ai_eof_old = ai.play.eof;
+
+			mutex_exit(&lxa_lock);
+
+			/*
+			 * delay for the number of ticks it takes
+			 * to output one fragment of data
+			 */
+			if (ticks_per_frag > 0)
+				delay(ticks_per_frag);
+
+			/* check if we've managed to output any fragments */
+			if ((rv = ldi_ioctl(lxa_state->lxas_odev_lh,
+			    AUDIO_GETINFO, (intptr_t)&ai,
+			    FKIOCTL, kcred, &junk)) != 0) {
+				cmn_err(CE_WARN, "lxa_mmap_thread: "
+				    "AUDIO_GETINFO failed (%d), "
+				    "resetting audio output", rv);
+				/* re-start mmap audio output */
+				goto lxa_mmap_thread_top;
+			}
+
+			if (ai_eof_old == ai.play.eof) {
+				/* institute a random retry limit */
+				if (retry++ < 100) {
+					mutex_enter(&lxa_lock);
+					continue;
+				}
+				cmn_err(CE_WARN, "lxa_mmap_thread: "
+				    "output stalled, "
+				    "resetting audio output");
+				/* re-start mmap audio output */
+				goto lxa_mmap_thread_top;
+			}
+
+			if (ai.play.eof > ai_eof_old) {
+				eof = ai.play.eof - ai_eof_old;
+			} else {
+				/* eof counter wrapped around */
+				ASSERT(ai_eof_old < ai.play.eof);
+				eof = ai.play.eof + (ai_eof_old - UINTMAX_MAX);
+			}
+			/* we're done with this loop so re-aquire the lock */
+			ASSERT(eof != 0);
+			mutex_enter(&lxa_lock);
+		}
+	}
+	mutex_exit(&lxa_lock);
+	lxa_mmap_thread_exit(lxa_state);
+	/*NOTREACHED*/
+}
+
+static void
+lxa_mmap_output_disable(lxa_state_t *lxa_state)
+{
+	kt_did_t tid;
+
+	mutex_enter(&lxa_lock);
+
+	/* if the output thread isn't running there's nothing to do */
+	if (lxa_state->lxas_mmap_thread_running == 0) {
+		mutex_exit(&lxa_lock);
+		return;
+	}
+
+	/* tell the pcm mmap output thread to exit */
+	lxa_state->lxas_mmap_thread_exit = 1;
+
+	/* wait for the mmap output thread to exit */
+	tid = lxa_state->lxas_mmap_thread->t_did;
+	mutex_exit(&lxa_lock);
+	thread_join(tid);
+}
+
+static void
+lxa_mmap_output_enable(lxa_state_t *lxa_state)
+{
+	mutex_enter(&lxa_lock);
+
+	/* if the output thread is already running there's nothing to do */
+	if (lxa_state->lxas_mmap_thread_running != 0) {
+		mutex_exit(&lxa_lock);
+		return;
+	}
+
+	/* setup output state */
+	lxa_state->lxas_mmap_thread_running = 1;
+	lxa_state->lxas_mmap_thread_exit = 0;
+	lxa_state->lxas_mmap_thread_frag = 0;
+
+	/* kick off a thread to do the mmap pcm output */
+	lxa_state->lxas_mmap_thread = thread_create(NULL, 0,
+	    (void (*)())lxa_mmap_thread, lxa_state,
+	    0, &p0, TS_RUN, minclsyspri);
+	ASSERT(lxa_state->lxas_mmap_thread != NULL);
+
+	mutex_exit(&lxa_lock);
+}
+
+static int
+lxa_ioc_mmap_output(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+	uint_t	trigger;
+
+	/* we only support output via mmap */
+	if ((lxa_state->lxas_flags & FWRITE) == 0)
+		return (EINVAL);
+
+	/* if the user hasn't mmap the device then there's nothing to do */
+	if (lxa_state->lxas_umem_cookie == NULL)
+		return (EINVAL);
+
+	/* copy in the request */
+	if (ddi_copyin((void *)arg, &trigger, sizeof (trigger), mode) != 0)
+		return (EFAULT);
+
+	/* a zero value disables output */
+	if (trigger == 0) {
+		lxa_mmap_output_disable(lxa_state);
+		return (0);
+	}
+
+	/* a non-zero value enables output */
+	lxa_mmap_output_enable(lxa_state);
+	return (0);
+}
+
+static int
+lxa_ioc_mmap_ptr(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+	int	ptr;
+
+	/* we only support output via mmap */
+	if ((lxa_state->lxas_flags & FWRITE) == 0)
+		return (EINVAL);
+
+	/* if the user hasn't mmap the device then there's nothing to do */
+	if (lxa_state->lxas_umem_cookie == NULL)
+		return (EINVAL);
+
+	/* if the output thread isn't running then there's nothing to do */
+	if (lxa_state->lxas_mmap_thread_running == 0)
+		return (EINVAL);
+
+	mutex_enter(&lxa_lock);
+	ptr = lxa_state->lxas_mmap_thread_frag * lxa_state->lxas_frag_size;
+	mutex_exit(&lxa_lock);
+
+	if (ddi_copyout(&ptr, (void *)arg, sizeof (ptr), mode) != 0)
+		return (EFAULT);
+
+	return (0);
+}
+
+static int
+lxa_ioc_get_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+	lxa_frag_info_t	fi;
+
+	fi.lxa_fi_size = lxa_state->lxas_frag_size;
+	fi.lxa_fi_cnt = lxa_state->lxas_frag_cnt;
+
+	if (ddi_copyout(&fi, (void *)arg, sizeof (fi), mode) != 0)
+		return (EFAULT);
+
+	return (0);
+}
+
+static int
+lxa_ioc_set_frag_info(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+	lxa_frag_info_t	fi;
+
+	/* if the device is mmaped we can't change the fragment settings */
+	if (lxa_state->lxas_umem_cookie != NULL)
+		return (EINVAL);
+
+	/* copy in the request */
+	if (ddi_copyin((void *)arg, &fi, sizeof (fi), mode) != 0)
+		return (EFAULT);
+
+	/* do basic bounds checking */
+	if ((fi.lxa_fi_cnt == 0) || (fi.lxa_fi_size < 16))
+		return (EINVAL);
+
+	/* don't accept size values less than 16 */
+
+	lxa_state->lxas_frag_size = fi.lxa_fi_size;
+	lxa_state->lxas_frag_cnt = fi.lxa_fi_cnt;
+
+	return (0);
+}
+
+static int
+lxa_audio_drain(lxa_state_t *lxa_state)
+{
+	int	junk;
+
+	/* only applies to output buffers */
+	if (lxa_state->lxas_odev_lh == NULL)
+		return (EINVAL);
+
+	/* can't fail so ignore the return value */
+	(void) ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_DRAIN, NULL,
+	    FKIOCTL, kcred, &junk);
+	return (0);
+}
+
+/*
+ * lxa_audio_info_merge() usage notes:
+ *
+ * - it's important to make sure NOT to get the ai_idev and ai_odev
+ *   parameters mixed up when calling lxa_audio_info_merge().
+ *
+ * - it's important for the caller to make sure that AUDIO_GETINFO
+ *   was called for the input device BEFORE the output device.  (see
+ *   the comments for merging the monitor_gain setting to see why.)
+ */
+static void
+lxa_audio_info_merge(lxa_state_t *lxa_state,
+    audio_info_t *ai_idev, audio_info_t *ai_odev, audio_info_t *ai_merged)
+{
+	/* if we're not setup for output return the intput device info */
+	if (lxa_state->lxas_odev_lh == NULL) {
+		*ai_merged = *ai_idev;
+		return;
+	}
+
+	/* if we're not setup for input return the output device info */
+	if (lxa_state->lxas_idev_lh == NULL) {
+		*ai_merged = *ai_odev;
+		return;
+	}
+
+	/* get record values from the input device */
+	ai_merged->record = ai_idev->record;
+
+	/* get play values from the output device */
+	ai_merged->play = ai_odev->play;
+
+	/* muting status only matters for the output device */
+	ai_merged->output_muted = ai_odev->output_muted;
+
+	/* we don't support device reference counts, always return 1 */
+	ai_merged->ref_cnt = 1;
+
+	/*
+	 * for supported hw/sw features report the combined feature
+	 * set we calcuated out earlier.
+	 */
+	ai_merged->hw_features = lxa_state->lxas_hw_features;
+	ai_merged->sw_features = lxa_state->lxas_sw_features;
+
+	if (!lxa_state->lxas_devs_same) {
+		/*
+		 * if the input and output devices are different
+		 * physical devices then we don't support input to
+		 * output loopback so we always report the input
+		 * to output loopback gain to be zero.
+		 */
+		ai_merged->monitor_gain = 0;
+	} else {
+		/*
+		 * the intput and output devices are actually the
+		 * same physical device.  hence it probably supports
+		 * intput to output loopback.  regardless we should
+		 * pass back the intput to output gain reported by
+		 * the device.  when we pick a value to passback we
+		 * use the output device value since that was
+		 * the most recently queried.  (we base this
+		 * decision on the assumption that io gain is
+		 * actually hardware setting in the device and
+		 * hence if it is changed on one open instance of
+		 * the device the change will be visable to all
+		 * other instances of the device.)
+		 */
+		ai_merged->monitor_gain = ai_odev->monitor_gain;
+	}
+
+	/*
+	 * for currently enabled software features always return the
+	 * merger of the two.  (of course the enabled software features
+	 * for the input and output devices should alway be the same,
+	 * so if it isn't complain.)
+	 */
+	if (ai_idev->sw_features_enabled != ai_odev->sw_features_enabled)
+		zcmn_err(getzoneid(), CE_WARN, "lx_audio: "
+		    "unexpected sofware feature state");
+	ai_merged->sw_features_enabled =
+	    ai_idev->sw_features_enabled & ai_odev->sw_features_enabled;
+}
+
+static int
+lxa_audio_setinfo(lxa_state_t *lxa_state, int cmd, intptr_t arg,
+    int mode)
+{
+	audio_info_t	ai, ai_null, ai_idev, ai_odev;
+	int		rv, junk;
+
+	/* copy in the request */
+	if (ddi_copyin((void *)arg, &ai, sizeof (ai), mode) != 0)
+		return (EFAULT);
+
+	/*
+	 * if the caller is attempting to enable a software feature that
+	 * we didn't report as supported the return an error
+	 */
+	if ((ai.sw_features_enabled != -1) &&
+	    (ai.sw_features_enabled & ~lxa_state->lxas_sw_features))
+		return (EINVAL);
+
+	/*
+	 * if a process has mmaped this device then we don't allow
+	 * changes to the play.eof field (since mmap output depends
+	 * on this field.
+	 */
+	if ((lxa_state->lxas_umem_cookie != NULL) &&
+	    (ai.play.eof != -1))
+		return (EIO);
+
+	/* initialize the new requests */
+	AUDIO_INITINFO(&ai_null);
+	ai_idev = ai_odev = ai;
+
+	/* remove audio input settings from the output device request */
+	ai_odev.record = ai_null.record;
+
+	/* remove audio output settings from the input device request */
+	ai_idev.play = ai_null.play;
+	ai_idev.output_muted = ai_null.output_muted;
+
+	/* apply settings to the intput device */
+	if ((lxa_state->lxas_idev_lh != NULL) &&
+	    ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, cmd,
+	    (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0))
+		return (rv);
+
+	/* apply settings to the output device */
+	if ((lxa_state->lxas_odev_lh != NULL) &&
+	    ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, cmd,
+	    (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0))
+		return (rv);
+
+	/*
+	 * a AUDIO_SETINFO call performs an implicit AUDIO_GETINFO to
+	 * return values (see the coments in audioio.h.) so we need
+	 * to combine the values returned from the input and output
+	 * device back into the users buffer.
+	 */
+	lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai);
+
+	/* copyout the results */
+	if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0) {
+		return (EFAULT);
+	}
+
+	return (0);
+}
+
+static int
+lxa_audio_getinfo(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+	audio_info_t	ai, ai_idev, ai_odev;
+	int		rv, junk;
+
+	/* get the settings from the input device */
+	if ((lxa_state->lxas_idev_lh != NULL) &&
+	    ((rv = ldi_ioctl(lxa_state->lxas_idev_lh, AUDIO_GETINFO,
+	    (intptr_t)&ai_idev, FKIOCTL, kcred, &junk)) != 0))
+		return (rv);
+
+	/* get the settings from the output device */
+	if ((lxa_state->lxas_odev_lh != NULL) &&
+	    ((rv = ldi_ioctl(lxa_state->lxas_odev_lh, AUDIO_GETINFO,
+	    (intptr_t)&ai_odev, FKIOCTL, kcred, &junk)) != 0))
+		return (rv);
+
+	/*
+	 * we need to combine the values returned from the input
+	 * and output device back into a single user buffer.
+	 */
+	lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, &ai);
+
+	/* copyout the results */
+	if (ddi_copyout(&ai, (void *)arg, sizeof (ai), mode) != 0)
+		return (EFAULT);
+
+	return (0);
+}
+
+static int
+lxa_mixer_ai_from_lh(ldi_handle_t lh, audio_info_t *ai)
+{
+	am_control_t	*actl;
+	int		rv, ch_count, junk;
+
+	ASSERT((lh != NULL) && (ai != NULL));
+
+	/* get the number of channels for the underlying device */
+	if ((rv = ldi_ioctl(lh, AUDIO_GET_NUM_CHS,
+	    (intptr_t)&ch_count, FKIOCTL, kcred, &junk)) != 0)
+		return (rv);
+
+	/* allocate the am_control_t structure */
+	actl = kmem_alloc(AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count), KM_SLEEP);
+
+	/* get the device state and channel state */
+	if ((rv = ldi_ioctl(lh, AUDIO_MIXERCTL_GETINFO,
+	    (intptr_t)actl, FKIOCTL, kcred, &junk)) != 0) {
+		kmem_free(actl, AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count));
+		return (rv);
+	}
+
+	/* return the audio_info structure */
+	*ai = actl->dev_info;
+	kmem_free(actl, AUDIO_MIXER_CTL_STRUCT_SIZE(ch_count));
+	return (0);
+}
+
+static int
+lxa_mixer_get_ai(lxa_state_t *lxa_state, audio_info_t *ai)
+{
+	audio_info_t	ai_idev, ai_odev;
+	int		rv;
+
+	/* if there is no input device, query the output device */
+	if (lxa_state->lxas_idev_lh == NULL)
+		return (lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh, ai));
+
+	/* if there is no ouput device, query the intput device */
+	if (lxa_state->lxas_odev_lh == NULL)
+		return (lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh, ai));
+
+	/*
+	 * now get the audio_info and channel information for the
+	 * underlying output device.
+	 */
+	if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_idev_lh,
+	    &ai_idev)) != 0)
+		return (rv);
+	if ((rv = lxa_mixer_ai_from_lh(lxa_state->lxas_odev_lh,
+	    &ai_odev)) != 0)
+		return (rv);
+
+	/* now merge the audio_info structures */
+	lxa_audio_info_merge(lxa_state, &ai_idev, &ai_odev, ai);
+	return (0);
+}
+
+static int
+lxa_mixer_get_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode)
+{
+	lxa_mixer_levels_t	lxa_ml;
+	audio_info_t		ai;
+	int			rv;
+
+	ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+	if ((rv = lxa_mixer_get_ai(lxa_state, &ai)) != 0)
+		return (rv);
+
+	switch (cmd) {
+	case LXA_IOC_MIXER_GET_VOL:
+		lxa_ml.lxa_ml_gain = ai.play.gain;
+		lxa_ml.lxa_ml_balance = ai.play.balance;
+		break;
+	case LXA_IOC_MIXER_GET_MIC:
+		lxa_ml.lxa_ml_gain = ai.record.gain;
+		lxa_ml.lxa_ml_balance = ai.record.balance;
+		break;
+	}
+
+	if (ddi_copyout(&lxa_ml, (void *)arg, sizeof (lxa_ml), mode) != 0)
+		return (EFAULT);
+	return (0);
+}
+
+static int
+lxa_mixer_set_common(lxa_state_t *lxa_state, int cmd, intptr_t arg, int mode)
+{
+	lxa_mixer_levels_t	lxa_ml;
+	audio_info_t		ai;
+
+	ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+	/* get the new mixer settings */
+	if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0)
+		return (EFAULT);
+
+	/* sanity check the mixer settings */
+	if (!LXA_MIXER_LEVELS_OK(&lxa_ml))
+		return (EINVAL);
+
+	/* initialize an audio_info struct with the new settings */
+	AUDIO_INITINFO(&ai);
+	switch (cmd) {
+	case LXA_IOC_MIXER_SET_VOL:
+		ai.play.gain = lxa_ml.lxa_ml_gain;
+		ai.play.balance = lxa_ml.lxa_ml_balance;
+		break;
+	case LXA_IOC_MIXER_SET_MIC:
+		ai.record.gain = lxa_ml.lxa_ml_gain;
+		ai.record.balance = lxa_ml.lxa_ml_balance;
+		break;
+	}
+
+	/*
+	 * we're going to cheat here.  normally the
+	 * MIXERCTL_SETINFO ioctl take am_control_t and the
+	 * AUDIO_SETINFO takes an audio_info_t.  as it turns
+	 * out the first element in a am_control_t is an
+	 * audio_info_t.  also, the rest of the am_control_t
+	 * structure is normally ignored for a MIXERCTL_SETINFO
+	 * ioctl.  so here we'll try to fall back to the code
+	 * that handles AUDIO_SETINFO ioctls.
+	 */
+	return (lxa_audio_setinfo(lxa_state, AUDIO_MIXERCTL_SETINFO,
+		(intptr_t)&ai, FKIOCTL));
+}
+
+static int
+lxa_mixer_get_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+	ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+	/* simply return the cached pcm mixer settings */
+	mutex_enter(&lxa_lock);
+	if (ddi_copyout(&lxa_state->lxas_zs->lxa_zs_pcm_levels,
+		(void *)arg,
+		sizeof (lxa_state->lxas_zs->lxa_zs_pcm_levels), mode) != 0) {
+		mutex_exit(&lxa_lock);
+		return (EFAULT);
+	}
+	mutex_exit(&lxa_lock);
+	return (0);
+}
+
+static int
+lxa_mixer_set_pcm(lxa_state_t *lxa_state, intptr_t arg, int mode)
+{
+	lxa_mixer_levels_t	lxa_ml;
+	int			rv;
+
+	ASSERT(lxa_state->lxas_type == LXA_TYPE_AUDIOCTL);
+
+	/* get the new mixer settings */
+	if (ddi_copyin((void *)arg, &lxa_ml, sizeof (lxa_ml), mode) != 0)
+		return (EFAULT);
+
+	/* sanity check the mixer settings */
+	if (!LXA_MIXER_LEVELS_OK(&lxa_ml))
+		return (EINVAL);
+
+	mutex_enter(&lxa_lock);
+
+	/* if there is an active output channel, update it */
+	if (lxa_state->lxas_zs->lxa_zs_ostate != NULL) {
+		audio_info_t	ai;
+
+		/* initialize an audio_info struct with the new settings */
+		AUDIO_INITINFO(&ai);
+		ai.play.gain = lxa_ml.lxa_ml_gain;
+		ai.play.balance = lxa_ml.lxa_ml_balance;
+
+		if ((rv = lxa_audio_setinfo(lxa_state->lxas_zs->lxa_zs_ostate,
+		    AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) {
+			mutex_exit(&lxa_lock);
+			return (rv);
+		}
+	}
+
+	/* update the cached mixer settings */
+	lxa_state->lxas_zs->lxa_zs_pcm_levels = lxa_ml;
+
+	mutex_exit(&lxa_lock);
+	return (0);
+}
+
+static int
+lxa_zone_reg(intptr_t arg, int mode)
+{
+	lxa_zone_reg_t	lxa_zr;
+	lxa_zstate_t	*lxa_zs = NULL;
+	char		*idev_name = NULL, *odev_name = NULL, *pval = NULL;
+	int		i, junk;
+
+	if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0)
+		return (EFAULT);
+
+	/* make sure that zone_name is a valid string */
+	for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++)
+		if (lxa_zr.lxa_zr_zone_name[i] == '\0')
+			break;
+	if (i == sizeof (lxa_zr.lxa_zr_zone_name))
+		return (EINVAL);
+
+	/* make sure that inputdev is a valid string */
+	for (i = 0; i < sizeof (lxa_zr.lxa_zr_inputdev); i++)
+		if (lxa_zr.lxa_zr_inputdev[i] == '\0')
+			break;
+	if (i == sizeof (lxa_zr.lxa_zr_inputdev))
+		return (EINVAL);
+
+	/* make sure it's a valid inputdev property value */
+	if (lxa_devprop_verify(lxa_zr.lxa_zr_inputdev) != 0)
+		return (EINVAL);
+
+	/* make sure that outputdev is a valid string */
+	for (i = 0; i < sizeof (lxa_zr.lxa_zr_outputdev); i++)
+		if (lxa_zr.lxa_zr_outputdev[i] == '\0')
+			break;
+	if (i == sizeof (lxa_zr.lxa_zr_outputdev))
+		return (EINVAL);
+
+	/* make sure it's a valid outputdev property value */
+	if (lxa_devprop_verify(lxa_zr.lxa_zr_outputdev) != 0)
+		return (EINVAL);
+
+	/* get the property names */
+	idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+	    LXA_PROP_INPUTDEV);
+	odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+	    LXA_PROP_OUTPUTDEV);
+
+	/*
+	 * allocate and initialize a zone state structure
+	 * since the audio device can't possibly be opened yet
+	 * (since we're setting it up now and the zone isn't booted
+	 * yet) assign some some resonable default pcm channel settings.
+	 * also, default to one mixer channel.
+	 */
+	lxa_zs = kmem_zalloc(sizeof (*lxa_zs), KM_SLEEP);
+	lxa_zs->lxa_zs_zonename = strdup(lxa_zr.lxa_zr_zone_name);
+	lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain = AUDIO_MID_GAIN;
+	lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance = AUDIO_MID_BALANCE;
+
+	mutex_enter(&lxa_lock);
+
+	/*
+	 * make sure this zone isn't already registered
+	 * a zone is registered with properties for that zone exist
+	 * or there is a zone state structure for that zone
+	 */
+	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+	    idev_name, &pval) == DDI_PROP_SUCCESS) {
+		goto err_unlock;
+	}
+	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+	    odev_name, &pval) == DDI_PROP_SUCCESS) {
+		goto err_unlock;
+	}
+	if (mod_hash_find(lxa_zstate_hash,
+	    (mod_hash_key_t)lxa_zs->lxa_zs_zonename,
+	    (mod_hash_val_t *)&junk) == 0)
+		goto err_unlock;
+
+	/*
+	 * create the new properties and insert the zone state structure
+	 * into the global hash
+	 */
+	if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip,
+	    idev_name, lxa_zr.lxa_zr_inputdev) != DDI_PROP_SUCCESS)
+		goto err_prop_remove;
+	if (ddi_prop_update_string(DDI_DEV_T_NONE, lxa_dip,
+	    odev_name, lxa_zr.lxa_zr_outputdev) != DDI_PROP_SUCCESS)
+		goto err_prop_remove;
+	if (mod_hash_insert(lxa_zstate_hash,
+	    (mod_hash_key_t)lxa_zs->lxa_zs_zonename,
+	    (mod_hash_val_t)lxa_zs) != 0)
+		goto err_prop_remove;
+
+	/* success! */
+	lxa_registered_zones++;
+	mutex_exit(&lxa_lock);
+
+	/* cleanup */
+	strfree(idev_name);
+	strfree(odev_name);
+	return (0);
+
+err_prop_remove:
+	(void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name);
+	(void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name);
+
+err_unlock:
+	mutex_exit(&lxa_lock);
+
+err:
+	if (lxa_zs != NULL) {
+		strfree(lxa_zs->lxa_zs_zonename);
+		kmem_free(lxa_zs, sizeof (*lxa_zs));
+	}
+	if (pval != NULL)
+		ddi_prop_free(pval);
+	if (idev_name != NULL)
+		strfree(idev_name);
+	if (odev_name != NULL)
+		strfree(odev_name);
+	return (EIO);
+}
+
+static int
+lxa_zone_unreg(intptr_t arg, int mode)
+{
+	lxa_zone_reg_t	lxa_zr;
+	lxa_zstate_t	*lxa_zs = NULL;
+	char		*idev_name = NULL, *odev_name = NULL, *pval = NULL;
+	int		rv, i;
+
+	if (ddi_copyin((void *)arg, &lxa_zr, sizeof (lxa_zr), mode) != 0)
+		return (EFAULT);
+
+	/* make sure that zone_name is a valid string */
+	for (i = 0; i < sizeof (lxa_zr.lxa_zr_zone_name); i++)
+		if (lxa_zr.lxa_zr_zone_name[i] == '\0')
+			break;
+	if (i == sizeof (lxa_zr.lxa_zr_zone_name))
+		return (EINVAL);
+
+	/* get the property names */
+	idev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+	    LXA_PROP_INPUTDEV);
+	odev_name = lxa_devprop_name(lxa_zr.lxa_zr_zone_name,
+	    LXA_PROP_OUTPUTDEV);
+
+	mutex_enter(&lxa_lock);
+
+	if (lxa_registered_zones <= 0) {
+		rv = ENOENT;
+		goto err_unlock;
+	}
+
+	/* make sure this zone is actually registered */
+	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+	    idev_name, &pval) != DDI_PROP_SUCCESS) {
+		rv = ENOENT;
+		goto err_unlock;
+	}
+	ddi_prop_free(pval);
+	pval = NULL;
+	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, lxa_dip,
+	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+	    odev_name, &pval) != DDI_PROP_SUCCESS) {
+		rv = ENOENT;
+		goto err_unlock;
+	}
+	ddi_prop_free(pval);
+	pval = NULL;
+	if (mod_hash_find(lxa_zstate_hash,
+	    (mod_hash_key_t)lxa_zr.lxa_zr_zone_name,
+	    (mod_hash_val_t *)&lxa_zs) != 0) {
+		rv = ENOENT;
+		goto err_unlock;
+	}
+	ASSERT(strcmp(lxa_zr.lxa_zr_zone_name, lxa_zs->lxa_zs_zonename) == 0);
+
+	/*
+	 * if the audio device is currently in use then refuse to
+	 * unregister the zone
+	 */
+	if ((lxa_zs->lxa_zs_ostate != NULL) ||
+	    (lxa_zs->lxa_zs_ostate != NULL)) {
+		rv = EBUSY;
+		goto err_unlock;
+	}
+
+	/* success! cleanup zone config state */
+	(void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, idev_name);
+	(void) ddi_prop_remove(DDI_DEV_T_NONE, lxa_dip, odev_name);
+
+	/*
+	 * note, the action of removing the zone state structure from the
+	 * hash will automatically free lxa_zs->lxa_zs_zonename.
+	 *
+	 * the reason for this is that we used lxa_zs->lxa_zs_zonename
+	 * as the hash key and by default mod_hash_create_strhash() uses
+	 * mod_hash_strkey_dtor() as a the hash key destructor.  (which
+	 * free's the key for us.
+	 */
+	(void) mod_hash_remove(lxa_zstate_hash,
+	    (mod_hash_key_t)lxa_zr.lxa_zr_zone_name,
+	    (mod_hash_val_t *)&lxa_zs);
+	lxa_registered_zones--;
+	mutex_exit(&lxa_lock);
+
+	/* cleanup */
+	kmem_free(lxa_zs, sizeof (*lxa_zs));
+	strfree(idev_name);
+	strfree(odev_name);
+	return (0);
+
+err_unlock:
+	mutex_exit(&lxa_lock);
+
+err:
+	if (pval != NULL)
+		ddi_prop_free(pval);
+	if (idev_name != NULL)
+		strfree(idev_name);
+	if (odev_name != NULL)
+		strfree(odev_name);
+	return (rv);
+}
+
+static int
+lxa_ioctl_devctl(int cmd, intptr_t arg, int mode)
+{
+	/* devctl ioctls are only allowed from the global zone */
+	ASSERT(getzoneid() == 0);
+	if (getzoneid() != 0)
+		return (EINVAL);
+
+	switch (cmd) {
+	case LXA_IOC_ZONE_REG:
+		return (lxa_zone_reg(arg, mode));
+	case LXA_IOC_ZONE_UNREG:
+		return (lxa_zone_unreg(arg, mode));
+	}
+
+	return (EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+lxa_open(dev_t *devp, int flags, int otyp, cred_t *credp)
+{
+	lxa_dev_type_t	open_type = LXA_TYPE_INVALID;
+	lxa_zstate_t	*lxa_zs;
+	lxa_state_t	*lxa_state;
+	minor_t		minor;
+	int		rv;
+
+	if (getminor(*devp) == LXA_MINORNUM_DEVCTL) {
+		/*
+		 * this is a devctl node, it exists to administer this
+		 * pseudo driver so it doesn't actually need access to
+		 * any underlying audio devices.  hence there is nothing
+		 * really to do here.  course, this driver should
+		 * only be administered from the global zone.
+		 */
+		ASSERT(getzoneid() == 0);
+		if (getzoneid() != 0)
+			return (EINVAL);
+		return (0);
+	}
+
+	/* lookup the zone state structure */
+	if (mod_hash_find(lxa_zstate_hash, (mod_hash_key_t)getzonename(),
+		(mod_hash_val_t *)&lxa_zs) != 0) {
+		return (EIO);
+	}
+
+	/* determine what type of device was opened */
+	switch (getminor(*devp)) {
+	case LXA_MINORNUM_DSP:
+		open_type = LXA_TYPE_AUDIO;
+		break;
+	case LXA_MINORNUM_MIXER:
+		open_type = LXA_TYPE_AUDIOCTL;
+		break;
+	default:
+		return (EINVAL);
+	}
+	ASSERT(open_type != LXA_TYPE_INVALID);
+
+	/* all other opens are clone opens so get a new minor node */
+	minor = id_alloc(lxa_minor_id);
+
+	/* allocate and initialize the new lxa_state structure */
+	lxa_state = kmem_zalloc(sizeof (*lxa_state), KM_SLEEP);
+	lxa_state->lxas_zs = lxa_zs;
+	lxa_state->lxas_dev_old = *devp;
+	lxa_state->lxas_dev_new = makedevice(getmajor(*devp), minor);
+	lxa_state->lxas_flags = flags;
+	lxa_state->lxas_type = open_type;
+
+	/* initialize the input and output device */
+	if (((rv = lxa_dev_open(lxa_state)) != 0) ||
+	    ((rv = lxa_dev_getfeatures(lxa_state)) != 0)) {
+		lxa_state_close(lxa_state);
+		return (rv);
+	}
+
+	/*
+	 * save this audio statue structure into a hash indexed
+	 * by it's minor device number.  (this will provide a convient
+	 * way to lookup the state structure on future operations.)
+	 */
+	if (mod_hash_insert(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+	    (mod_hash_val_t)lxa_state) != 0) {
+		lxa_state_close(lxa_state);
+		return (EIO);
+	}
+
+	mutex_enter(&lxa_lock);
+
+	/* apply the currently cached zone PCM mixer levels */
+	if ((lxa_state->lxas_type == LXA_TYPE_AUDIO) &&
+	    (lxa_state->lxas_odev_lh != NULL)) {
+		audio_info_t ai;
+
+		AUDIO_INITINFO(&ai);
+		ai.play.gain = lxa_zs->lxa_zs_pcm_levels.lxa_ml_gain;
+		ai.play.balance = lxa_zs->lxa_zs_pcm_levels.lxa_ml_balance;
+
+		if ((rv = lxa_audio_setinfo(lxa_state,
+		    AUDIO_SETINFO, (intptr_t)&ai, FKIOCTL)) != 0) {
+			mutex_exit(&lxa_lock);
+			lxa_state_close(lxa_state);
+			return (rv);
+		}
+	}
+
+	/*
+	 * we only allow one active open of the input or output device.
+	 * check here for duplicate opens
+	 */
+	if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+		if ((lxa_state->lxas_idev_lh != NULL) &&
+		    (lxa_zs->lxa_zs_istate != NULL)) {
+			mutex_exit(&lxa_lock);
+			lxa_state_close(lxa_state);
+			return (EBUSY);
+		}
+		if ((lxa_state->lxas_odev_lh != NULL) &&
+		    (lxa_zs->lxa_zs_ostate != NULL)) {
+			mutex_exit(&lxa_lock);
+			lxa_state_close(lxa_state);
+			return (EBUSY);
+		}
+
+		/* not a duplicate open, update the global zone state */
+		if (lxa_state->lxas_idev_lh != NULL)
+			lxa_zs->lxa_zs_istate = lxa_state;
+		if (lxa_state->lxas_odev_lh != NULL)
+			lxa_zs->lxa_zs_ostate = lxa_state;
+	}
+	mutex_exit(&lxa_lock);
+
+	/* make sure to return our newly allocated dev_t */
+	*devp = lxa_state->lxas_dev_new;
+	return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_close(dev_t dev, int flags, int otyp, cred_t *credp)
+{
+	lxa_state_t	*lxa_state;
+	minor_t		minor = getminor(dev);
+
+	/* handle devctl minor nodes (these nodes don't have a handle */
+	if (getminor(dev) == LXA_MINORNUM_DEVCTL)
+		return (0);
+
+	/* get the handle for this device */
+	if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+	    (mod_hash_val_t *)&lxa_state) != 0) return
+		(EINVAL);
+
+	lxa_state_close(lxa_state);
+	return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+	lxa_state_t	*lxa_state;
+	minor_t		minor = getminor(dev);
+
+	/* get the handle for this device */
+	if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+	    (mod_hash_val_t *)&lxa_state) != 0)
+		return (EINVAL);
+
+	/*
+	 * if a process has mmaped this device then we don't allow
+	 * any more reads or writes to the device
+	 */
+	if (lxa_state->lxas_umem_cookie != NULL)
+		return (EIO);
+
+	/* we can't do a read if there is no input device */
+	if (lxa_state->lxas_idev_lh == NULL)
+		return (EBADF);
+
+	/* pass the request on */
+	return (ldi_read(lxa_state->lxas_idev_lh, uiop, kcred));
+}
+
+static int
+/*ARGSUSED*/
+lxa_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+	lxa_state_t	*lxa_state;
+	minor_t		minor = getminor(dev);
+
+	/* get the handle for this device */
+	if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+	    (mod_hash_val_t *)&lxa_state) != 0)
+		return (EINVAL);
+
+	/*
+	 * if a process has mmaped this device then we don't allow
+	 * any more reads or writes to the device
+	 */
+	if (lxa_state->lxas_umem_cookie != NULL)
+		return (EIO);
+
+	/* we can't do a write if there is no output device */
+	if (lxa_state->lxas_odev_lh == NULL)
+		return (EBADF);
+
+	/* pass the request on */
+	return (ldi_write(lxa_state->lxas_odev_lh, uiop, kcred));
+}
+
+static int
+/*ARGSUSED*/
+lxa_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+    int *rvalp)
+{
+	lxa_state_t	*lxa_state;
+	minor_t		minor = getminor(dev);
+
+	/* handle devctl minor nodes (these nodes don't have a handle */
+	if (getminor(dev) == LXA_MINORNUM_DEVCTL)
+		return (lxa_ioctl_devctl(cmd, arg, mode));
+
+	/* get the handle for this device */
+	if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+	    (mod_hash_val_t *)&lxa_state) != 0)
+		return (EINVAL);
+
+	ASSERT((lxa_state->lxas_type == LXA_TYPE_AUDIO) ||
+	    (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL));
+
+	switch (cmd) {
+	case LXA_IOC_GETMINORNUM:
+		{
+			int minornum = getminor(lxa_state->lxas_dev_old);
+			if (ddi_copyout(&minornum, (void *)arg,
+			    sizeof (minornum), mode) != 0)
+				return (EFAULT);
+		}
+		return (0);
+	}
+
+	if (lxa_state->lxas_type == LXA_TYPE_AUDIO) {
+		/* deal with native ioctl */
+		switch (cmd) {
+		case LXA_IOC_MMAP_OUTPUT:
+			return (lxa_ioc_mmap_output(lxa_state, arg, mode));
+		case LXA_IOC_MMAP_PTR:
+			return (lxa_ioc_mmap_ptr(lxa_state, arg, mode));
+		case LXA_IOC_GET_FRAG_INFO:
+			return (lxa_ioc_get_frag_info(lxa_state, arg, mode));
+		case LXA_IOC_SET_FRAG_INFO:
+			return (lxa_ioc_set_frag_info(lxa_state, arg, mode));
+		}
+
+		/* deal with layered ioctls */
+		switch (cmd) {
+		case AUDIO_DRAIN:
+			return (lxa_audio_drain(lxa_state));
+		case AUDIO_SETINFO:
+			return (lxa_audio_setinfo(lxa_state,
+				AUDIO_SETINFO, arg, mode));
+		case AUDIO_GETINFO:
+			return (lxa_audio_getinfo(lxa_state, arg, mode));
+		}
+	}
+
+	if (lxa_state->lxas_type == LXA_TYPE_AUDIOCTL) {
+		/* deal with native ioctl */
+		switch (cmd) {
+		case LXA_IOC_MIXER_GET_VOL:
+			return (lxa_mixer_get_common(lxa_state,
+				cmd, arg, mode));
+		case LXA_IOC_MIXER_SET_VOL:
+			return (lxa_mixer_set_common(lxa_state,
+				cmd, arg, mode));
+		case LXA_IOC_MIXER_GET_MIC:
+			return (lxa_mixer_get_common(lxa_state,
+				cmd, arg, mode));
+		case LXA_IOC_MIXER_SET_MIC:
+			return (lxa_mixer_set_common(lxa_state,
+				cmd, arg, mode));
+		case LXA_IOC_MIXER_GET_PCM:
+			return (lxa_mixer_get_pcm(lxa_state, arg, mode));
+		case LXA_IOC_MIXER_SET_PCM:
+			return (lxa_mixer_set_pcm(lxa_state, arg, mode));
+		}
+
+	}
+
+	return (EINVAL);
+}
+
+static int
+/*ARGSUSED*/
+lxa_devmap(dev_t dev, devmap_cookie_t dhp,
+    offset_t off, size_t len, size_t *maplen, uint_t model)
+{
+	lxa_state_t		*lxa_state;
+	minor_t			minor = getminor(dev);
+	ddi_umem_cookie_t	umem_cookie;
+	void			*umem_ptr;
+	int			rv;
+
+	/* get the handle for this device */
+	if (mod_hash_find(lxa_state_hash, (mod_hash_key_t)(uintptr_t)minor,
+	    (mod_hash_val_t *)&lxa_state) != 0)
+		return (EINVAL);
+
+	/* we only support mmaping of audio devices */
+	if (lxa_state->lxas_type != LXA_TYPE_AUDIO)
+		return (EINVAL);
+
+	/* we only support output via mmap */
+	if ((lxa_state->lxas_flags & FWRITE) == 0)
+		return (EINVAL);
+
+	/* sanity check the amount of memory the user is allocating */
+	if ((len == 0) ||
+	    (len > LXA_OSS_FRAG_MEM) ||
+	    ((len % lxa_state->lxas_frag_size) != 0))
+		return (EINVAL);
+
+	/* allocate and clear memory to mmap */
+	umem_ptr = ddi_umem_alloc(len, DDI_UMEM_NOSLEEP, &umem_cookie);
+	if (umem_ptr == NULL)
+		return (ENOMEM);
+	bzero(umem_ptr, len);
+
+	/* setup the memory mappings */
+	rv = devmap_umem_setup(dhp, lxa_dip, NULL, umem_cookie, 0, len,
+	    PROT_USER | PROT_READ | PROT_WRITE, 0, NULL);
+	if (rv != 0) {
+		ddi_umem_free(umem_cookie);
+		return (EIO);
+	}
+
+	mutex_enter(&lxa_lock);
+
+	/* we only support one mmap per open */
+	if (lxa_state->lxas_umem_cookie != NULL) {
+		ASSERT(lxa_state->lxas_umem_ptr != NULL);
+		mutex_exit(&lxa_lock);
+		ddi_umem_free(umem_cookie);
+		return (EBUSY);
+	}
+	ASSERT(lxa_state->lxas_umem_ptr == NULL);
+
+	*maplen = len;
+	lxa_state->lxas_umem_len = len;
+	lxa_state->lxas_umem_ptr = umem_ptr;
+	lxa_state->lxas_umem_cookie = umem_cookie;
+	mutex_exit(&lxa_lock);
+	return (0);
+}
+
+static int
+/*ARGSUSED*/
+lxa_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	int	instance = ddi_get_instance(dip);
+
+	if (cmd != DDI_ATTACH)
+		return (DDI_FAILURE);
+
+	ASSERT(instance == 0);
+	if (instance != 0)
+		return (DDI_FAILURE);
+
+	lxa_dip = dip;
+	mutex_init(&lxa_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	/* create our minor nodes */
+	if (ddi_create_minor_node(dip, LXA_MINORNAME_DEVCTL, S_IFCHR,
+	    LXA_MINORNUM_DEVCTL, DDI_PSEUDO, 0) != DDI_SUCCESS)
+		return (DDI_FAILURE);
+
+	if (ddi_create_minor_node(dip, LXA_MINORNAME_DSP, S_IFCHR,
+	    LXA_MINORNUM_DSP, DDI_PSEUDO, 0) != DDI_SUCCESS)
+		return (DDI_FAILURE);
+
+	if (ddi_create_minor_node(dip, LXA_MINORNAME_MIXER, S_IFCHR,
+	    LXA_MINORNUM_MIXER, DDI_PSEUDO, 0) != DDI_SUCCESS)
+		return (DDI_FAILURE);
+
+	/* allocate our data structures */
+	lxa_minor_id = id_space_create("lxa_minor_id",
+	    LXA_MINORNUM_COUNT, LX_AUDIO_MAX_OPENS);
+	lxa_state_hash = mod_hash_create_idhash("lxa_state_hash",
+	    lxa_state_hash_size, mod_hash_null_valdtor);
+	lxa_zstate_hash = mod_hash_create_strhash("lxa_zstate_hash",
+	    lxa_zstate_hash_size, mod_hash_null_valdtor);
+
+	return (DDI_SUCCESS);
+}
+
+static int
+/*ARGSUSED*/
+lxa_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	if (cmd != DDI_DETACH)
+		return (DDI_FAILURE);
+
+	ASSERT(!MUTEX_HELD(&lxa_lock));
+	if (lxa_registered_zones > 0)
+		return (DDI_FAILURE);
+
+	mod_hash_destroy_idhash(lxa_state_hash);
+	mod_hash_destroy_idhash(lxa_zstate_hash);
+	id_space_destroy(lxa_minor_id);
+	lxa_state_hash = NULL;
+	lxa_dip = NULL;
+
+	return (DDI_SUCCESS);
+}
+
+static int
+/*ARGSUSED*/
+lxa_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **resultp)
+{
+	switch (infocmd) {
+	case DDI_INFO_DEVT2DEVINFO:
+		*resultp = lxa_dip;
+		return (DDI_SUCCESS);
+
+	case DDI_INFO_DEVT2INSTANCE:
+		*resultp = (void *)0;
+		return (DDI_SUCCESS);
+	}
+	return (DDI_FAILURE);
+}
+
+/*
+ * Driver flags
+ */
+static struct cb_ops lxa_cb_ops = {
+	lxa_open,		/* open */
+	lxa_close,		/* close */
+	nodev,			/* strategy */
+	nodev,			/* print */
+	nodev,			/* dump */
+	lxa_read,		/* read */
+	lxa_write,		/* write */
+	lxa_ioctl,		/* ioctl */
+	lxa_devmap,		/* devmap */
+	nodev,			/* mmap */
+	ddi_devmap_segmap,	/* segmap */
+	nochpoll,		/* chpoll */
+	ddi_prop_op,		/* prop_op */
+	NULL,			/* cb_str */
+	D_NEW | D_MP | D_DEVMAP,
+	CB_REV,
+	NULL,
+	NULL
+};
+
+static struct dev_ops lxa_ops = {
+	DEVO_REV,
+	0,
+	lxa_getinfo,
+	nulldev,
+	nulldev,
+	lxa_attach,
+	lxa_detach,
+	nodev,
+	&lxa_cb_ops,
+	NULL,
+	NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+	&mod_driverops,		/* type of module */
+	"linux audio driver 'lx_audio' %I%",
+	&lxa_ops		/* driver ops */
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	&modldrv,
+	NULL
+};
+
+/*
+ * standard module entry points
+ */
+int
+_init(void)
+{
+	return (mod_install(&modlinkage));
+}
+
+int
+_fini(void)
+{
+	return (mod_remove(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.conf b/usr/src/uts/common/brand/lx/io/lx_audio.conf
new file mode 100644
index 0000000000..2eeb5eb7ee
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+
+name="lx_audio" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.c b/usr/src/uts/common/brand/lx/io/lx_ptm.c
new file mode 100644
index 0000000000..e4079df133
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.c
@@ -0,0 +1,1137 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * This driver attempts to emulate some of the the behaviors of
+ * Linux terminal devices (/dev/ptmx and /dev/pts/[0-9][0-9]*) on Solaris
+ *
+ * It does this by layering over the /dev/ptmx device and intercepting
+ * opens to it.
+ *
+ * This driver makes the following assumptions about the way the ptm/pts
+ * drivers on Solaris work:
+ *
+ *    - all opens of the /dev/ptmx device node return a unique dev_t.
+ *
+ *    - the dev_t minor node value for each open ptm instance corrospondes
+ *      to it's associated slave terminal device number.  ie. the path to
+ *      the slave terminal device associated with an open ptm instance
+ *      who's dev_t minor node vaue is 5, is /dev/pts/5.
+ *
+ *    - the ptm driver always allocates the lowest numbered slave terminal
+ *      device possible.
+ */
+
+#include <sys/conf.h>
+#include <sys/ddi.h>
+#include <sys/devops.h>
+#include <sys/file.h>
+#include <sys/filio.h>
+#include <sys/kstr.h>
+#include <sys/ldlinux.h>
+#include <sys/lx_ptm.h>
+#include <sys/modctl.h>
+#include <sys/pathname.h>
+#include <sys/ptms.h>
+#include <sys/ptyvar.h>
+#include <sys/stat.h>
+#include <sys/stropts.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#define	LP_PTM_PATH		"/dev/ptmx"
+#define	LP_PTS_PATH		"/dev/pts/"
+#define	LP_PTS_DRV_NAME		"pts"
+#define	LP_PTS_USEC_DELAY	(5 * 1000)	/* 5 ms */
+#define	LP_PTS_USEC_DELAY_MAX	(5 * MILLISEC)	/* 5 ms */
+
+/*
+ * this driver is layered on top of the ptm driver.  we'd like to
+ * make this drivers minor name space a mirror of the ptm drivers
+ * namespace, but we can't actually do this.  the reason is that the
+ * ptm driver is opened via the clone driver.  there for no minor nodes
+ * of the ptm driver are actually accessible via the filesystem.
+ * since we're not a streams device we can't be opened by the clone
+ * driver.  there for we need to have at least minor node accessible
+ * via the filesystem so that consumers can open it.  we use the device
+ * node with a minor number of 0 for this purpose.  what this means is
+ * that minor node 0 can't be used to map ptm minor node 0.  since this
+ * minor node is now reserved we need to shift our ptm minor node
+ * mappings by one.  ie. a ptm minor node with a value of 0 will
+ * corrospond to our minor node with a value of 1.  these mappings are
+ * managed with the following macros.
+ */
+#define	DEVT_TO_INDEX(x)	LX_PTM_DEV_TO_PTS(x)
+#define	INDEX_TO_MINOR(x)	((x) + 1)
+
+/*
+ * grow our layered handle array by the same size increment that the ptm
+ * driver uses to grow the pty device space - PTY_MAXDELTA
+ */
+#define	LP_PTY_INC	128
+
+/*
+ * lx_ptm_ops contains state information about outstanding operations on the
+ * underlying master terminal device.  Currently we only track information
+ * for read operations.
+ *
+ * Note that this data has not been rolled directly into the lx_ptm_handle
+ * structure because we can't put mutex's of condition variables into
+ * lx_ptm_handle structure.  The reason is that the array of lx_ptm_handle
+ * structures linked to from the global lx_ptm state can be resized
+ * dynamically, and when it's resized, the new array is at a different
+ * memory location and the old array memory is discarded.  Mutexs and cvs
+ * are accessed based off their address, so if this array was re-sized while
+ * there were outstanding operations on any mutexs or cvs in the array
+ * then the system would tip over.  In the future the lx_ptm_handle structure
+ * array should probably be replaced with either an array of pointers to
+ * lx_ptm_handle structures or some other kind of data structure containing
+ * pointers to lx_ptm_handle structures.  Then the lx_ptm_ops structure
+ * could be folded directly into the lx_ptm_handle structures.  (This will
+ * also require the definition of a new locking mechanism to protect the
+ * contents of lx_ptm_handle structures.)
+ */
+typedef struct lx_ptm_ops {
+	int			lpo_rops;
+	kcondvar_t		lpo_rops_cv;
+	kmutex_t		lpo_rops_lock;
+} lx_ptm_ops_t;
+
+/*
+ * Every open of the master terminal device in a zone results in a new
+ * lx_ptm_handle handle allocation.  These handles are stored in an array
+ * hanging off the lx_ptm_state structure.
+ */
+typedef struct lx_ptm_handle {
+	/* Device handle to the underlying real /dev/ptmx master terminal. */
+	ldi_handle_t		lph_handle;
+
+	/* Flag to indicate if TIOCPKT mode has been enabled. */
+	int			lph_pktio;
+
+	/* Number of times the slave device has been opened/closed. */
+	int			lph_eofed;
+
+	/* Callback handler in the ptm driver to check if slave is open. */
+	ptmptsopencb_t		lph_ppocb;
+
+	/* Pointer to state for operations on underlying device. */
+	lx_ptm_ops_t		*lph_lpo;
+} lx_ptm_handle_t;
+
+/*
+ * Global state for the lx_ptm driver.
+ */
+typedef struct lx_ptm_state {
+	/* lx_ptm device devinfo pointer */
+	dev_info_t		*lps_dip;
+
+	/* LDI ident used to open underlying real /dev/ptmx master terminals. */
+	ldi_ident_t		lps_li;
+
+	/* pts drivers major number */
+	major_t			lps_pts_major;
+
+	/* rw lock used to manage access and growth of lps_lh_array */
+	krwlock_t		lps_lh_rwlock;
+
+	/* number of elements in lps_lh_array */
+	uint_t			lps_lh_count;
+
+	/* Array of handles to underlying real /dev/ptmx master terminals. */
+	lx_ptm_handle_t		*lps_lh_array;
+} lx_ptm_state_t;
+
+/* Pointer to the lx_ptm global state structure. */
+static lx_ptm_state_t	lps;
+
+/*
+ * List of modules to be autopushed onto slave terminal devices when they
+ * are opened in an lx branded zone.
+ */
+static char *lx_pts_mods[] = {
+	"ptem",
+	"ldterm",
+	"ttcompat",
+	LDLINUX_MOD,
+	NULL
+};
+
+static void
+lx_ptm_lh_grow(uint_t index)
+{
+	uint_t			new_lh_count, old_lh_count;
+	lx_ptm_handle_t		*new_lh_array, *old_lh_array;
+
+	/*
+	 * allocate a new array.  we drop the rw lock on the array so that
+	 * readers can still access devices in case our memory allocation
+	 * blocks.
+	 */
+	new_lh_count = MAX(lps.lps_lh_count + LP_PTY_INC, index + 1);
+	new_lh_array =
+	    kmem_zalloc(sizeof (lx_ptm_handle_t) * new_lh_count, KM_SLEEP);
+
+	/*
+	 * double check that we still actually need to increase the size
+	 * of the array
+	 */
+	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+	if (index < lps.lps_lh_count) {
+		/* someone beat us to it so there's nothing more to do */
+		rw_exit(&lps.lps_lh_rwlock);
+		kmem_free(new_lh_array,
+		    sizeof (lx_ptm_handle_t) * new_lh_count);
+		return;
+	}
+
+	/* copy the existing data into the new array */
+	ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+	ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+	if (lps.lps_lh_count != 0) {
+		bcopy(lps.lps_lh_array, new_lh_array,
+		    sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+	}
+
+	/* save info on the old array */
+	old_lh_array = lps.lps_lh_array;
+	old_lh_count = lps.lps_lh_count;
+
+	/* install the new array */
+	lps.lps_lh_array = new_lh_array;
+	lps.lps_lh_count = new_lh_count;
+
+	rw_exit(&lps.lps_lh_rwlock);
+
+	/* free the old array */
+	if (old_lh_array != NULL) {
+		kmem_free(old_lh_array,
+		    sizeof (lx_ptm_handle_t) * old_lh_count);
+	}
+}
+
+static void
+lx_ptm_lh_insert(uint_t index, ldi_handle_t lh)
+{
+	lx_ptm_ops_t *lpo;
+
+	ASSERT(lh != NULL);
+
+	/* Allocate and initialize the ops structure */
+	lpo = kmem_zalloc(sizeof (lx_ptm_ops_t), KM_SLEEP);
+	mutex_init(&lpo->lpo_rops_lock, NULL, MUTEX_DEFAULT, NULL);
+	cv_init(&lpo->lpo_rops_cv, NULL, CV_DEFAULT, NULL);
+
+	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+	/* check if we need to grow the size of the layered handle array */
+	if (index >= lps.lps_lh_count) {
+		rw_exit(&lps.lps_lh_rwlock);
+		lx_ptm_lh_grow(index);
+		rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+	}
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle == NULL);
+	ASSERT(lps.lps_lh_array[index].lph_pktio == 0);
+	ASSERT(lps.lps_lh_array[index].lph_eofed == 0);
+	ASSERT(lps.lps_lh_array[index].lph_lpo == NULL);
+
+	/* insert the new handle and return */
+	lps.lps_lh_array[index].lph_handle = lh;
+	lps.lps_lh_array[index].lph_pktio = 0;
+	lps.lps_lh_array[index].lph_eofed = 0;
+	lps.lps_lh_array[index].lph_lpo = lpo;
+
+	rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_remove(uint_t index)
+{
+	ldi_handle_t	lh;
+
+	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+	ASSERT(lps.lps_lh_array[index].lph_lpo->lpo_rops == 0);
+	ASSERT(!MUTEX_HELD(&lps.lps_lh_array[index].lph_lpo->lpo_rops_lock));
+
+	/* free the write handle */
+	kmem_free(lps.lps_lh_array[index].lph_lpo, sizeof (lx_ptm_ops_t));
+	lps.lps_lh_array[index].lph_lpo = NULL;
+
+	/* remove the handle and return it */
+	lh = lps.lps_lh_array[index].lph_handle;
+	lps.lps_lh_array[index].lph_handle = NULL;
+	lps.lps_lh_array[index].lph_pktio = 0;
+	lps.lps_lh_array[index].lph_eofed = 0;
+	rw_exit(&lps.lps_lh_rwlock);
+	return (lh);
+}
+
+static void
+lx_ptm_lh_get_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+	*ppocb = lps.lps_lh_array[index].lph_ppocb;
+	rw_exit(&lps.lps_lh_rwlock);
+}
+
+static void
+lx_ptm_lh_set_ppocb(uint_t index, ptmptsopencb_t *ppocb)
+{
+	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+	lps.lps_lh_array[index].lph_ppocb = *ppocb;
+	rw_exit(&lps.lps_lh_rwlock);
+}
+
+static ldi_handle_t
+lx_ptm_lh_lookup(uint_t index)
+{
+	ldi_handle_t	lh;
+
+	rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+	/* return the handle */
+	lh = lps.lps_lh_array[index].lph_handle;
+	rw_exit(&lps.lps_lh_rwlock);
+	return (lh);
+}
+
+static lx_ptm_ops_t *
+lx_ptm_lpo_lookup(uint_t index)
+{
+	lx_ptm_ops_t	*lpo;
+
+	rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_lpo != NULL);
+
+	/* return the handle */
+	lpo = lps.lps_lh_array[index].lph_lpo;
+	rw_exit(&lps.lps_lh_rwlock);
+	return (lpo);
+}
+
+static int
+lx_ptm_lh_pktio_get(uint_t index)
+{
+	int		pktio;
+
+	rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+	/* return the pktio state */
+	pktio = lps.lps_lh_array[index].lph_pktio;
+	rw_exit(&lps.lps_lh_rwlock);
+	return (pktio);
+}
+
+static void
+lx_ptm_lh_pktio_set(uint_t index, int pktio)
+{
+	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+	/* set the pktio state */
+	lps.lps_lh_array[index].lph_pktio = pktio;
+	rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_lh_eofed_get(uint_t index)
+{
+	int		eofed;
+
+	rw_enter(&lps.lps_lh_rwlock, RW_READER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+	/* return the eofed state */
+	eofed = lps.lps_lh_array[index].lph_eofed;
+	rw_exit(&lps.lps_lh_rwlock);
+	return (eofed);
+}
+
+static void
+lx_ptm_lh_eofed_set(uint_t index)
+{
+	rw_enter(&lps.lps_lh_rwlock, RW_WRITER);
+
+	ASSERT(index < lps.lps_lh_count);
+	ASSERT(lps.lps_lh_array[index].lph_handle != NULL);
+
+	/* set the eofed state */
+	lps.lps_lh_array[index].lph_eofed++;
+	rw_exit(&lps.lps_lh_rwlock);
+}
+
+static int
+lx_ptm_read_start(dev_t dev)
+{
+	lx_ptm_ops_t	*lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+	mutex_enter(&lpo->lpo_rops_lock);
+	ASSERT(lpo->lpo_rops >= 0);
+
+	/* Wait for other read operations to finish */
+	while (lpo->lpo_rops != 0) {
+		if (cv_wait_sig(&lpo->lpo_rops_cv, &lpo->lpo_rops_lock) == 0) {
+			mutex_exit(&lpo->lpo_rops_lock);
+			return (-1);
+		}
+	}
+
+	/* Start a read operation */
+	VERIFY(++lpo->lpo_rops == 1);
+	mutex_exit(&lpo->lpo_rops_lock);
+	return (0);
+}
+
+static void
+lx_ptm_read_end(dev_t dev)
+{
+	lx_ptm_ops_t	*lpo = lx_ptm_lpo_lookup(DEVT_TO_INDEX(dev));
+
+	mutex_enter(&lpo->lpo_rops_lock);
+	ASSERT(lpo->lpo_rops >= 0);
+
+	/* End a read operation */
+	VERIFY(--lpo->lpo_rops == 0);
+	cv_signal(&lpo->lpo_rops_cv);
+
+	mutex_exit(&lpo->lpo_rops_lock);
+}
+
+static int
+lx_ptm_pts_isopen(dev_t dev)
+{
+	ptmptsopencb_t	ppocb;
+
+	lx_ptm_lh_get_ppocb(DEVT_TO_INDEX(dev), &ppocb);
+	return (ppocb.ppocb_func(ppocb.ppocb_arg));
+}
+
+static void
+lx_ptm_eof_read(ldi_handle_t lh)
+{
+	struct uio	uio;
+	iovec_t		iov;
+	char		junk[1];
+
+	/*
+	 * We can remove any EOF message from the head of the stream by
+	 * doing a zero byte read from the stream.
+	 */
+	iov.iov_len = 0;
+	iov.iov_base = junk;
+	uio.uio_iovcnt = 1;
+	uio.uio_iov = &iov;
+	uio.uio_resid = iov.iov_len;
+	uio.uio_offset = 0;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_fmode = 0;
+	uio.uio_extflg = 0;
+	uio.uio_llimit = MAXOFFSET_T;
+	(void) ldi_read(lh, &uio, kcred);
+}
+
+static int
+lx_ptm_eof_drop_1(dev_t dev, int *rvalp)
+{
+	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+	int		err, msg_size, msg_count;
+
+	*rvalp = 0;
+
+	/*
+	 * Check if there is an EOF message (represented by a zero length
+	 * data message) at the head of the stream.  Note that the
+	 * I_NREAD ioctl is a streams framework ioctl so it will succeed
+	 * even if there have been previous write errors on this stream.
+	 */
+	if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+	    FKIOCTL, kcred, &msg_count)) != 0)
+		return (err);
+
+	if ((msg_count == 0) || (msg_size != 0)) {
+		/* No EOF message found */
+		return (0);
+	}
+
+	/* Record the fact that the slave device has been closed. */
+	lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+
+	/* drop the EOF */
+	lx_ptm_eof_read(lh);
+	*rvalp = 1;
+	return (0);
+}
+
+static int
+lx_ptm_eof_drop(dev_t dev, int *rvalp)
+{
+	int rval, err;
+
+	if (rvalp != NULL)
+		*rvalp = 0;
+	for (;;) {
+		if ((err = lx_ptm_eof_drop_1(dev, &rval)) != 0)
+			return (err);
+		if (rval == 0)
+			return (0);
+		if (rvalp != NULL)
+			*rvalp = 1;
+	}
+}
+
+static int
+lx_ptm_data_check(dev_t dev, int ignore_eof, int *rvalp)
+{
+	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+	int		err;
+
+	*rvalp = 0;
+	if (ignore_eof) {
+		int	size, rval;
+
+		if ((err = ldi_ioctl(lh, FIONREAD, (intptr_t)&size,
+		    FKIOCTL, kcred, &rval)) != 0)
+			return (err);
+		if (size != 0)
+			*rvalp = 1;
+	} else {
+		int	msg_size, msg_count;
+
+		if ((err = ldi_ioctl(lh, I_NREAD, (intptr_t)&msg_size,
+		    FKIOCTL, kcred, &msg_count)) != 0)
+			return (err);
+		if (msg_count != 0)
+			*rvalp = 1;
+	}
+	return (0);
+}
+
+static int
+lx_ptm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	int err;
+
+	if (cmd != DDI_ATTACH)
+		return (DDI_FAILURE);
+
+	if (ddi_create_minor_node(dip, LX_PTM_MINOR_NODE, S_IFCHR,
+	    ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS)
+		return (DDI_FAILURE);
+
+	err = ldi_ident_from_dip(dip, &lps.lps_li);
+	if (err != 0) {
+		ddi_remove_minor_node(dip, ddi_get_name(dip));
+		return (DDI_FAILURE);
+	}
+
+	lps.lps_dip = dip;
+	lps.lps_pts_major = ddi_name_to_major(LP_PTS_DRV_NAME);
+
+	rw_init(&lps.lps_lh_rwlock, NULL, RW_DRIVER, NULL);
+	lps.lps_lh_count = 0;
+	lps.lps_lh_array = NULL;
+
+	return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	if (cmd != DDI_DETACH)
+		return (DDI_FAILURE);
+
+	ldi_ident_release(lps.lps_li);
+	lps.lps_dip = NULL;
+
+	ASSERT((lps.lps_lh_count != 0) || (lps.lps_lh_array == NULL));
+	ASSERT((lps.lps_lh_count == 0) || (lps.lps_lh_array != NULL));
+	if (lps.lps_lh_array != NULL) {
+		kmem_free(lps.lps_lh_array,
+		    sizeof (lx_ptm_handle_t) * lps.lps_lh_count);
+		lps.lps_lh_array = NULL;
+		lps.lps_lh_count = 0;
+	}
+
+	return (DDI_SUCCESS);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+	struct strioctl	iocb;
+	ptmptsopencb_t	ppocb = { NULL, NULL };
+	ldi_handle_t	lh;
+	major_t		maj, our_major = getmajor(*devp);
+	minor_t		min, lastmin;
+	uint_t		index, anchor = 1;
+	dev_t		ptm_dev;
+	int		err, rval = 0;
+
+	/*
+	 * Don't support the FNDELAY flag and FNONBLOCK until we either
+	 * find a Linux app that opens /dev/ptmx with the O_NDELAY
+	 * or O_NONBLOCK flags explicitly, or until we create test cases
+	 * to determine how reads of master terminal devices opened with
+	 * these flags behave in different situations on Linux.  Supporting
+	 * these flags will involve enhancing our read implementation
+	 * and changing the way it deals with EOF notifications.
+	 */
+	if (flag & (FNDELAY | FNONBLOCK))
+		return (ENOTSUP);
+
+	/*
+	 * we're layered on top of the ptm driver so open that driver
+	 * first.  (note that we're opening /dev/ptmx in the global
+	 * zone, not ourselves in the Linux zone.)
+	 */
+	err = ldi_open_by_name(LP_PTM_PATH, flag, credp, &lh, lps.lps_li);
+	if (err != 0)
+		return (err);
+
+	/* get the devt returned by the ptmx open */
+	err = ldi_get_dev(lh, &ptm_dev);
+	if (err != 0) {
+		(void) ldi_close(lh, flag, credp);
+		return (err);
+	}
+
+	/*
+	 * we're a cloning driver so here's well change the devt that we
+	 * return.  the ptmx is also a cloning driver so we'll just use
+	 * it's minor number as our minor number (it already manages it's
+	 * minor name space so no reason to duplicate the effort.)
+	 */
+	index = getminor(ptm_dev);
+	*devp = makedevice(our_major, INDEX_TO_MINOR(index));
+
+	/* Get a callback function to query if the pts device is open. */
+	iocb.ic_cmd = PTMPTSOPENCB;
+	iocb.ic_timout = 0;
+	iocb.ic_len = sizeof (ppocb);
+	iocb.ic_dp = (char *)&ppocb;
+
+	err = ldi_ioctl(lh, I_STR, (intptr_t)&iocb, FKIOCTL, kcred, &rval);
+	if ((err != 0) || (rval != 0)) {
+		(void) ldi_close(lh, flag, credp);
+		return (EIO); /* XXX return something else here? */
+	}
+	ASSERT(ppocb.ppocb_func != NULL);
+
+	/*
+	 * now setup autopush for the terminal slave device.  this is
+	 * necessary so that when a Linux program opens the device we
+	 * can push required strmod modules onto the stream.  in Solaris
+	 * this is normally done by the application that actually
+	 * allocates the terminal.
+	 */
+	maj = lps.lps_pts_major;
+	min = index;
+	lastmin = 0;
+	err = kstr_autopush(SET_AUTOPUSH, &maj, &min, &lastmin,
+	    &anchor, lx_pts_mods);
+	if (err != 0) {
+		(void) ldi_close(lh, flag, credp);
+		return (EIO); /* XXX return something else here? */
+	}
+
+	/* save off this layered handle for future accesses */
+	lx_ptm_lh_insert(index, lh);
+	lx_ptm_lh_set_ppocb(index, &ppocb);
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+lx_ptm_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+	ldi_handle_t	lh;
+	major_t		maj;
+	minor_t		min, lastmin;
+	uint_t		index;
+	int		err;
+
+	index = DEVT_TO_INDEX(dev);
+
+	/*
+	 * we must cleanup all the state associated with this major/minor
+	 * terminal pair before actually closing the ptm master device.
+	 * this is required because once the close of the ptm device is
+	 * complete major/minor terminal pair is immediatly available for
+	 * re-use in any zone.
+	 */
+
+	/* free up our saved reference for this layered handle */
+	lh = lx_ptm_lh_remove(index);
+
+	/* unconfigure autopush for the associated terminal slave device */
+	maj = lps.lps_pts_major;
+	min = index;
+	lastmin = 0;
+	do {
+		/*
+		 * we loop here because we don't want to release this ptm
+		 * node if autopush can't be disabled on the associated
+		 * slave device because then bad things could happen if
+		 * another brand were to get this terminal allocated
+		 * to them.
+		 *
+		 * XXX should we ever give up?
+		 */
+		err = kstr_autopush(CLR_AUTOPUSH, &maj, &min, &lastmin,
+		    0, NULL);
+	} while (err != 0);
+
+	err = ldi_close(lh, flag, credp);
+
+	/*
+	 * note that we don't have to bother with changing the permissions
+	 * on the associated slave device here.  the reason is that no one
+	 * can actually open the device untill it's associated master
+	 * device is re-opened, which will result in the permissions on
+	 * it being reset.
+	 */
+	return (err);
+}
+
+static int
+lx_ptm_read_loop(dev_t dev, struct uio *uiop, cred_t *credp, int *loop)
+{
+	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+	int		err, rval;
+	struct uio	uio = *uiop;
+
+	*loop = 0;
+
+	/*
+	 * Here's another way that Linux master terminals behave differently
+	 * from Solaris master terminals.  If you do a read on a Linux
+	 * master terminal (that was opened witout NDELAY and NONBLOCK)
+	 * who's corrosponding slave terminal is currently closed and
+	 * has been opened and closed at least once, Linux return -1 and
+	 * set errno to EIO where as Solaris blocks.
+	 */
+	if (lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev))) {
+		/* Slave has been opened and closed at least once. */
+		if (lx_ptm_pts_isopen(dev) == 0) {
+			/*
+			 * Slave is closed.  Make sure that data is avaliable
+			 * before attempting a read.
+			 */
+			if ((err = lx_ptm_data_check(dev, 0, &rval)) != 0)
+				return (err);
+
+			/* If there is no data available then return. */
+			if (rval == 0)
+				return (EIO);
+		}
+	}
+
+	/* Actually do the read operation. */
+	if ((err = ldi_read(lh, uiop, credp)) != 0)
+		return (err);
+
+	/* If read returned actual data then return. */
+	if (uio.uio_resid != uiop->uio_resid)
+		return (0);
+
+	/*
+	 * This was a zero byte read (ie, an EOF).  This indicates
+	 * that the slave terinal device has been closed.  Record
+	 * the fact that the slave device has been closed and retry
+	 * the read operation.
+	 */
+	lx_ptm_lh_eofed_set(DEVT_TO_INDEX(dev));
+	*loop = 1;
+	return (0);
+}
+
+static int
+lx_ptm_read(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+	int		pktio = lx_ptm_lh_pktio_get(DEVT_TO_INDEX(dev));
+	int		err, loop;
+	struct uio	uio;
+	struct iovec	iovp;
+
+	ASSERT(uiop->uio_iovcnt > 0);
+
+	/*
+	 * If packet mode has been enabled (via TIOCPKT) we need to pad
+	 * all read requests with a leading byte that indicates any
+	 * relevant control status information.
+	 */
+	if (pktio != 0) {
+		/*
+		 * We'd like to write the control information into
+		 * the current buffer but we can't yet.  We don't
+		 * want to modify userspace memory here only to have
+		 * the read operation fail later.  So instead
+		 * what we'll do here is read one character from the
+		 * beginning of the memory pointed to by the uio
+		 * structure.  This will advance the output pointer
+		 * by one.  Then when the read completes successfully
+		 * we can update the byte that we passed over.  Before
+		 * we do the read make a copy of the current uiop and
+		 * iovec structs so we can write to them later.
+		 */
+		uio = *uiop;
+		iovp = *uiop->uio_iov;
+		uio.uio_iov = &iovp;
+
+		if (uwritec(uiop) == -1)
+			return (EFAULT);
+	}
+
+	do {
+		/*
+		 * Serialize all reads.  We need to do this so that we can
+		 * properly emulate the behavior of master terminals on Linux.
+		 * In reality this serializaion should not pose any kind of
+		 * performance problem since it would be very strange to have
+		 * multiple threads trying to read from the same master
+		 * terminal device concurrently.
+		 */
+		if (lx_ptm_read_start(dev) != 0)
+			return (EINTR);
+
+		err = lx_ptm_read_loop(dev, uiop, credp, &loop);
+		lx_ptm_read_end(dev);
+		if (err != 0)
+			return (err);
+	} while (loop != 0);
+
+	if (pktio != 0) {
+		uint8_t		pktio_data = TIOCPKT_DATA;
+
+		/*
+		 * Note that the control status information we
+		 * pass back is faked up in the sense that we
+		 * don't actually report any events, we always
+		 * report a status of 0.
+		 */
+		if (uiomove(&pktio_data, 1, UIO_READ, &uio) != 0)
+			return (EFAULT);
+	}
+
+	return (0);
+}
+
+static int
+lx_ptm_write(dev_t dev, struct uio *uiop, cred_t *credp)
+{
+	ldi_handle_t		lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+	int		err;
+
+	err = ldi_write(lh, uiop, credp);
+
+	return (err);
+}
+
+static int
+lx_ptm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+    int *rvalp)
+{
+	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+	int		err;
+
+	/*
+	 * here we need to make sure that we never allow the
+	 * I_SETSIG and I_ESETSIG ioctls to pass through.  we
+	 * do this because we can't support them.
+	 *
+	 * the native Solaris ptm device supports these ioctls because
+	 * they are streams framework ioctls and all streams devices
+	 * support them by default.  these ioctls cause the current
+	 * process to be registered with a stream and receive signals
+	 * when certain stream events occur.
+	 *
+	 * a problem arises with cleanup of these registrations
+	 * for layered drivers.
+	 *
+	 * normally the streams framework is notified whenever a
+	 * process closes any reference to a stream and it goes ahead
+	 * and cleans up these registrations.  but actual device drivers
+	 * are not notified when a process performs a close operation
+	 * unless the process is closing the last opened reference to
+	 * the device on the entire system.
+	 *
+	 * so while we could pass these ioctls on and allow processes
+	 * to register for signal delivery, we would never receive
+	 * any notification when those processes exit (or close a
+	 * stream) and we wouldn't be able to unregister them.
+	 *
+	 * luckily these operations are streams specific and Linux
+	 * doesn't support streams devices.  so it doesn't actually
+	 * seem like we need to support these ioctls.  if it turns
+	 * out that we do need to support them for some reason in
+	 * the future, the current driver model will have to be
+	 * enhanced to better support streams device layering.
+	 */
+	if ((cmd == I_SETSIG) || (cmd == I_ESETSIG))
+		return (EINVAL);
+
+	/*
+	 * here we fake up support for TIOCPKT.  Linux applications expect
+	 * /etc/ptmx to support this ioctl, but on Solaris it doesn't.
+	 * (it is supported on older bsd style ptys.)  so we'll fake
+	 * up support for it here.
+	 *
+	 * the reason that this ioctl is emulated here instead of in
+	 * userland is that this ioctl affects the results returned
+	 * from read() operations.  if this ioctl was emulated in
+	 * userland the brand library would need to intercept all
+	 * read operations and check to see if pktio was enabled
+	 * for the fd being read from.  since this ioctl only needs
+	 * to be supported on the ptmx device it makes more sense
+	 * to support it here where we can easily update the results
+	 * returned for read() operations performed on ourselves.
+	 */
+	if (cmd == TIOCPKT) {
+		int	pktio;
+
+		if (ddi_copyin((void *)arg, &pktio, sizeof (pktio),
+		    mode) != DDI_SUCCESS)
+			return (EFAULT);
+
+		if (pktio == 0)
+			lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 0);
+		else
+			lx_ptm_lh_pktio_set(DEVT_TO_INDEX(dev), 1);
+
+		return (0);
+	}
+
+	err = ldi_ioctl(lh, cmd, arg, mode, credp, rvalp);
+
+	return (err);
+}
+
+static int
+lx_ptm_poll_loop(dev_t dev, short events, int anyyet, short *reventsp,
+    struct pollhead **phpp, int *loop)
+{
+	ldi_handle_t	lh = lx_ptm_lh_lookup(DEVT_TO_INDEX(dev));
+	short		reventsp2;
+	int		err, rval;
+
+	*loop = 0;
+
+	/*
+	 * If the slave device has been opened and closed at least
+	 * once and the slave device is currently closed, then poll
+	 * always needs to returns immediatly.
+	 */
+	if ((lx_ptm_lh_eofed_get(DEVT_TO_INDEX(dev)) != 0) &&
+	    (lx_ptm_pts_isopen(dev) == 0)) {
+		/* In this case always return POLLHUP */
+		*reventsp = POLLHUP;
+
+		/*
+		 * Check if there really is data on the stream.
+		 * If so set the correct return flags.
+		 */
+		if ((err = lx_ptm_data_check(dev, 1, &rval)) != 0) {
+			/* Something went wrong. */
+			return (err);
+		}
+		if (rval != 0)
+			*reventsp |= (events & (POLLIN | POLLRDNORM));
+
+		/*
+		 * Is the user checking for writability?  Note that for ptm
+		 * devices Linux seems to ignore the POLLWRBAND write flag.
+		 */
+		if ((events & POLLWRNORM) == 0)
+			return (0);
+
+		/*
+		 * To check if the stream is writable we have to actually
+		 * call poll, but make sure to set anyyet to 1 to prevent
+		 * the streams framework from setting up callbacks.
+		 */
+		if ((err = ldi_poll(lh, POLLWRNORM, 1, &reventsp2, NULL)) != 0)
+			return (err);
+
+		*reventsp |= (reventsp2 & POLLWRNORM);
+	} else {
+		int lockstate;
+
+		/* The slave device is open, do the poll */
+		if ((err = ldi_poll(lh, events, anyyet, reventsp, phpp)) != 0)
+			return (err);
+
+		/*
+		 * Drop any leading EOFs on the stream.
+		 *
+		 * Note that we have to use pollunlock() here to avoid
+		 * recursive mutex enters in the poll framework.  The
+		 * reason is that if there is an EOF message on the stream
+		 * then the act of reading from the queue to remove the
+		 * message can cause the ptm drivers event service
+		 * routine to be invoked, and if there is no open
+		 * slave device then the ptm driver may generate
+		 * error messages and put them on the stream.  This
+		 * in turn will generate a poll event and the poll
+		 * framework will try to invoke any poll callbacks
+		 * associated with the stream.  In the process of
+		 * doing that the poll framework will try to aquire
+		 * locks that we are already holding.  So we need to
+		 * drop those locks here before we do our read.
+		 */
+		lockstate = pollunlock();
+		err = lx_ptm_eof_drop(dev, &rval);
+		pollrelock(lockstate);
+		if (err)
+			return (err);
+
+		/* If no EOF was dropped then return */
+		if (rval == 0)
+			return (0);
+
+		/*
+		 * An EOF was removed from the stream.  Retry the entire
+		 * poll operation from the top because polls on the ptm
+		 * device should behave differently now.
+		 */
+		*loop = 1;
+	}
+	return (0);
+}
+
+static int
+lx_ptm_poll(dev_t dev, short events, int anyyet, short *reventsp,
+    struct pollhead **phpp)
+{
+	int loop, err;
+
+	do {
+		/* Serialize ourself wrt read operations. */
+		if (lx_ptm_read_start(dev) != 0)
+			return (EINTR);
+
+		err = lx_ptm_poll_loop(dev,
+		    events, anyyet, reventsp, phpp, &loop);
+		lx_ptm_read_end(dev);
+		if (err != 0)
+			return (err);
+	} while (loop != 0);
+	return (0);
+}
+
+static struct cb_ops lx_ptm_cb_ops = {
+	lx_ptm_open,		/* open */
+	lx_ptm_close,		/* close */
+	nodev,			/* strategy */
+	nodev,			/* print */
+	nodev,			/* dump */
+	lx_ptm_read,		/* read */
+	lx_ptm_write,		/* write */
+	lx_ptm_ioctl,		/* ioctl */
+	nodev,			/* devmap */
+	nodev,			/* mmap */
+	nodev,			/* segmap */
+	lx_ptm_poll,		/* chpoll */
+	ddi_prop_op,		/* prop_op */
+	NULL,			/* cb_str */
+	D_NEW | D_MP,
+	CB_REV,
+	NULL,
+	NULL
+};
+
+static struct dev_ops lx_ptm_ops = {
+	DEVO_REV,
+	0,
+	ddi_getinfo_1to1,
+	nulldev,
+	nulldev,
+	lx_ptm_attach,
+	lx_ptm_detach,
+	nodev,
+	&lx_ptm_cb_ops,
+	NULL,
+	NULL
+};
+
+static struct modldrv modldrv = {
+	&mod_driverops,
+	"Linux master terminal driver 'lx_ptm' %I%",
+	&lx_ptm_ops
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	&modldrv,
+	NULL
+};
+
+int
+_init(void)
+{
+	return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+	return (mod_remove(&modlinkage));
+}
diff --git a/usr/src/uts/common/brand/lx/io/lx_ptm.conf b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
new file mode 100644
index 0000000000..481b4e3c74
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/io/lx_ptm.conf
@@ -0,0 +1,27 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+#ident	"%Z%%M%	%I%	%E% SMI"
+
+name="lx_ptm" parent="pseudo" instance=0;
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
new file mode 100644
index 0000000000..d993c1eefc
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -0,0 +1,836 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/exec.h>
+#include <sys/lx_impl.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_futex.h>
+#include <sys/lx_brand.h>
+#include <sys/termios.h>
+#include <sys/sunddi.h>
+#include <sys/ddi.h>
+#include <sys/exec.h>
+#include <sys/vnode.h>
+#include <sys/pathname.h>
+#include <sys/machelf.h>
+#include <sys/auxv.h>
+#include <sys/priv.h>
+#include <sys/regset.h>
+#include <sys/privregs.h>
+#include <sys/archsystm.h>
+#include <sys/zone.h>
+#include <sys/brand.h>
+
+int	lx_debug = 0;
+
+void	lx_setbrand(proc_t *);
+int	lx_getattr(zone_t *, int, void *, size_t *);
+int	lx_setattr(zone_t *, int, void *, size_t);
+int	lx_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+		uintptr_t, uintptr_t, uintptr_t);
+void	lx_copy_procdata(proc_t *, proc_t *);
+
+extern void lx_setrval(klwp_t *, int, int);
+extern void lx_proc_exit(proc_t *, klwp_t *);
+extern void lx_exec();
+extern int lx_initlwp(klwp_t *);
+extern void lx_forklwp(klwp_t *, klwp_t *);
+extern void lx_exitlwp(klwp_t *);
+extern void lx_freelwp(klwp_t *);
+extern greg_t lx_fixsegreg(greg_t, model_t);
+extern int lx_sched_affinity(int, uintptr_t, int, uintptr_t, int64_t *);
+
+int lx_systrace_brand_enabled;
+
+lx_systrace_f *lx_systrace_entry_ptr;
+lx_systrace_f *lx_systrace_return_ptr;
+
+static int lx_systrace_enabled;
+
+static int lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+    struct intpdata *idata, int level, long *execsz, int setid,
+    caddr_t exec_file, struct cred *cred, int brand_action);
+
+/* lx brand */
+struct brand_ops lx_brops = {
+	lx_brandsys,
+	lx_setbrand,
+	lx_getattr,
+	lx_setattr,
+	lx_copy_procdata,
+	lx_proc_exit,
+	lx_exec,
+	lx_setrval,
+	lx_initlwp,
+	lx_forklwp,
+	lx_freelwp,
+	lx_exitlwp,
+	lx_elfexec
+};
+
+struct brand_mach_ops lx_mops = {
+	NULL,
+	lx_brand_int80_callback,
+	NULL,
+	NULL,
+	NULL,
+	lx_fixsegreg,
+};
+
+struct brand lx_brand = {
+	BRAND_VER_1,
+	"lx",
+	&lx_brops,
+	&lx_mops
+};
+
+static struct modlbrand modlbrand = {
+	&mod_brandops, "lx brand %I%", &lx_brand
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1, (void *)&modlbrand, NULL
+};
+
+void
+lx_proc_exit(proc_t *p, klwp_t *lwp)
+{
+	zone_t *z = p->p_zone;
+
+	ASSERT(p->p_brand != NULL);
+	ASSERT(p->p_brand_data != NULL);
+
+	/*
+	 * If init is dying and we aren't explicitly shutting down the zone
+	 * or the system, then Solaris is about to restart init.  The Linux
+	 * init is not designed to handle a restart, which it interprets as
+	 * a reboot.  To give it a sane environment in which to run, we
+	 * reboot the zone.
+	 */
+	if (p->p_pid == z->zone_proc_initpid) {
+		if (z->zone_boot_err == 0 &&
+		    z->zone_restart_init &&
+		    zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
+		    zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN)
+			(void) zone_kadmin(A_REBOOT, 0, NULL, CRED());
+	} else {
+		lx_exitlwp(lwp);
+		kmem_free(p->p_brand_data, sizeof (struct lx_proc_data));
+		p->p_brand_data = NULL;
+		p->p_brand = &native_brand;
+	}
+}
+
+void
+lx_setbrand(proc_t *p)
+{
+	kthread_t *t = p->p_tlist;
+	int err;
+
+	ASSERT(p->p_brand_data == NULL);
+	ASSERT(ttolxlwp(curthread) == NULL);
+
+	p->p_brand_data = kmem_zalloc(sizeof (struct lx_proc_data), KM_SLEEP);
+
+	/*
+	 * This routine can only be called for single-threaded processes.
+	 * Since lx_initlwp() can only fail if we run out of PIDs for
+	 * multithreaded processes, we know that this can never fail.
+	 */
+	err = lx_initlwp(t->t_lwp);
+	ASSERT(err == 0);
+}
+
+/* ARGSUSED */
+int
+lx_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
+{
+	boolean_t val;
+
+	if (attr == LX_ATTR_RESTART_INIT) {
+		if (bufsize > sizeof (boolean_t))
+			return (ERANGE);
+		if (copyin(buf, &val, sizeof (val)) != 0)
+			return (EFAULT);
+		if (val != B_TRUE && val != B_FALSE)
+			return (EINVAL);
+		zone->zone_restart_init = val;
+		return (0);
+	}
+	return (EINVAL);
+}
+
+/* ARGSUSED */
+int
+lx_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
+{
+	if (attr == LX_ATTR_RESTART_INIT) {
+		if (*bufsize < sizeof (boolean_t))
+			return (ERANGE);
+		if (copyout(&zone->zone_restart_init, buf,
+		    sizeof (boolean_t)) != 0)
+			return (EFAULT);
+		*bufsize = sizeof (boolean_t);
+		return (0);
+	}
+	return (-EINVAL);
+}
+
+/*
+ * Enable ptrace system call tracing for the given LWP. This is done by
+ * both setting the flag in that LWP's brand data (in the kernel) and setting
+ * the process-wide trace flag (in the brand library of the traced process).
+ */
+static int
+lx_ptrace_syscall_set(pid_t pid, id_t lwpid, int set)
+{
+	proc_t *p;
+	kthread_t *t;
+	klwp_t *lwp;
+	lx_proc_data_t *lpdp;
+	lx_lwp_data_t *lldp;
+	uintptr_t addr;
+	int ret, flag = 1;
+
+	if ((p = sprlock(pid)) == NULL)
+		return (ESRCH);
+
+	if (priv_proc_cred_perm(curproc->p_cred, p, NULL, VWRITE) != 0) {
+		sprunlock(p);
+		return (EPERM);
+	}
+
+	if ((t = idtot(p, lwpid)) == NULL || (lwp = ttolwp(t)) == NULL) {
+		sprunlock(p);
+		return (ESRCH);
+	}
+
+	if ((lpdp = p->p_brand_data) == NULL ||
+	    (lldp = lwp->lwp_brand) == NULL) {
+		sprunlock(p);
+		return (ESRCH);
+	}
+
+	if (set) {
+		/*
+		 * Enable the ptrace flag for this LWP and this process. Note
+		 * that we will turn off the LWP's ptrace flag, but we don't
+		 * turn off the process's ptrace flag.
+		 */
+		lldp->br_ptrace = 1;
+		lpdp->l_ptrace = 1;
+
+		addr = lpdp->l_traceflag;
+
+		mutex_exit(&p->p_lock);
+
+		/*
+		 * This can fail only in some rare corner cases where the
+		 * process is exiting or we're completely out of memory. In
+		 * these cases, it's sufficient to return an error to the ptrace
+		 * consumer and leave the process-wide flag set.
+		 */
+		ret = uwrite(p, &flag, sizeof (flag), addr);
+
+		mutex_enter(&p->p_lock);
+
+		/*
+		 * If we couldn't set the trace flag, unset the LWP's ptrace
+		 * flag as there ptrace consumer won't expect this LWP to stop.
+		 */
+		if (ret != 0)
+			lldp->br_ptrace = 0;
+	} else {
+		lldp->br_ptrace = 0;
+		ret = 0;
+	}
+
+	sprunlock(p);
+
+	if (ret != 0)
+		ret = EIO;
+
+	return (ret);
+}
+
+static void
+lx_ptrace_fire(void)
+{
+	kthread_t *t = curthread;
+	klwp_t *lwp = ttolwp(t);
+	lx_lwp_data_t *lldp = lwp->lwp_brand;
+
+	/*
+	 * The ptrace flag only applies until the next event is encountered
+	 * for the given LWP. If it's set, turn off the flag and poke the
+	 * controlling process by raising a signal.
+	 */
+	if (lldp->br_ptrace) {
+		lldp->br_ptrace = 0;
+		tsignal(t, SIGTRAP);
+	}
+}
+
+void
+lx_brand_systrace_enable(void)
+{
+	extern void lx_brand_int80_enable(void);
+
+	ASSERT(!lx_systrace_enabled);
+
+	lx_brand_int80_enable();
+
+	lx_systrace_enabled = 1;
+}
+
+void
+lx_brand_systrace_disable(void)
+{
+	extern void lx_brand_int80_disable(void);
+
+	ASSERT(lx_systrace_enabled);
+
+	lx_brand_int80_disable();
+
+	lx_systrace_enabled = 0;
+}
+
+/*
+ * Get the addresses of the user-space system call handler and attach it to
+ * the proc structure. Returning 0 indicates success; the value returned
+ * by the system call is the value stored in rval. Returning a non-zero
+ * value indicates a failure; the value returned is used to set errno, -1
+ * is returned from the syscall and the contents of rval are ignored. To
+ * set errno and have the syscall return a value other than -1 we can
+ * manually set errno and rval and return 0.
+ */
+int
+lx_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
+    uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+	kthread_t *t = curthread;
+	proc_t *p = ttoproc(t);
+	lx_proc_data_t *pd;
+	int linux_call;
+	struct termios *termios;
+	uint_t termios_len;
+	int error;
+	lx_brand_registration_t reg;
+
+	/*
+	 * There is one operation that is suppored for non-branded
+	 * process.  B_EXEC_BRAND.  This is the equilivant of an
+	 * exec call, but the new process that is created will be
+	 * a branded process.
+	 */
+	if (cmd == B_EXEC_BRAND) {
+		ASSERT(p->p_zone != NULL);
+		ASSERT(p->p_zone->zone_brand == &lx_brand);
+		return (exec_common(
+		    (char *)arg1, (const char **)arg2, (const char **)arg3,
+		    EBA_BRAND));
+	}
+
+	/* For all other operations this must be a branded process. */
+	if (p->p_brand == NULL)
+		return (set_errno(ENOSYS));
+
+	ASSERT(p->p_brand == &lx_brand);
+	ASSERT(p->p_brand_data != NULL);
+
+	switch (cmd) {
+	case B_REGISTER:
+		if (p->p_model == DATAMODEL_NATIVE) {
+			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0) {
+				lx_print("Failed to copyin brand registration "
+				    "at 0x%p\n", (void *)arg1);
+				return (EFAULT);
+			}
+#ifdef _LP64
+		} else {
+			lx_brand_registration32_t reg32;
+
+			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0) {
+				lx_print("Failed to copyin brand registration "
+				    "at 0x%p\n", (void *)arg1);
+				return (EFAULT);
+			}
+
+			reg.lxbr_version = (uint_t)reg32.lxbr_version;
+			reg.lxbr_handler =
+			    (void *)(uintptr_t)reg32.lxbr_handler;
+			reg.lxbr_tracehandler =
+			    (void *)(uintptr_t)reg32.lxbr_tracehandler;
+			reg.lxbr_traceflag =
+			    (void *)(uintptr_t)reg32.lxbr_traceflag;
+#endif
+		}
+
+		if (reg.lxbr_version != LX_VERSION_1) {
+			lx_print("Invalid brand library version (%u)\n",
+			    reg.lxbr_version);
+			return (EINVAL);
+		}
+
+		lx_print("Assigning brand 0x%p and handler 0x%p to proc 0x%p\n",
+		    (void *)&lx_brand, (void *)reg.lxbr_handler, (void *)p);
+		pd = p->p_brand_data;
+		pd->l_handler = (uintptr_t)reg.lxbr_handler;
+		pd->l_tracehandler = (uintptr_t)reg.lxbr_tracehandler;
+		pd->l_traceflag = (uintptr_t)reg.lxbr_traceflag;
+		*rval = 0;
+		return (0);
+	case B_TTYMODES:
+		/* This is necessary for emulating TCGETS ioctls. */
+		if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, ddi_root_node(),
+		    DDI_PROP_NOTPROM, "ttymodes", (uchar_t **)&termios,
+		    &termios_len) != DDI_SUCCESS)
+			return (EIO);
+
+		ASSERT(termios_len == sizeof (*termios));
+
+		if (copyout(&termios, (void *)arg1, sizeof (termios)) != 0) {
+			ddi_prop_free(termios);
+			return (EFAULT);
+		}
+
+		ddi_prop_free(termios);
+		*rval = 0;
+		return (0);
+
+	case B_ELFDATA:
+		pd = curproc->p_brand_data;
+		if (copyout(&pd->l_elf_data, (void *)arg1,
+		    sizeof (lx_elf_data_t)) != 0) {
+			(void) set_errno(EFAULT);
+			return (*rval = -1);
+		}
+		*rval = 0;
+		return (0);
+
+	case B_EXEC_NATIVE:
+		error = exec_common(
+		    (char *)arg1, (const char **)arg2, (const char **)arg3,
+		    EBA_NATIVE);
+		if (error) {
+			(void) set_errno(error);
+			return (*rval = -1);
+		}
+		return (*rval = 0);
+
+	case B_LPID_TO_SPAIR:
+		/*
+		 * Given a Linux pid as arg1, return the Solaris pid in arg2 and
+		 * the Solaris LWP in arg3.  We also translate pid 1 (which is
+		 * hardcoded in many applications) to the zone's init process.
+		 */
+		{
+			pid_t s_pid;
+			id_t s_tid;
+
+			if ((pid_t)arg1 == 1) {
+				s_pid = p->p_zone->zone_proc_initpid;
+				/* handle the dead/missing init(1M) case */
+				if (s_pid == -1)
+					s_pid = 1;
+				s_tid = 1;
+			} else if (lx_lpid_to_spair((pid_t)arg1, &s_pid,
+			    &s_tid) < 0)
+				return (ESRCH);
+
+			if (copyout(&s_pid, (void *)arg2,
+			    sizeof (s_pid)) != 0 ||
+			    copyout(&s_tid, (void *)arg3, sizeof (s_tid)) != 0)
+				return (EFAULT);
+
+			*rval = 0;
+			return (0);
+		}
+
+	case B_PTRACE_SYSCALL:
+		*rval = lx_ptrace_syscall_set((pid_t)arg1, (id_t)arg2,
+		    (int)arg3);
+		return (0);
+
+	case B_SYSENTRY:
+		if (lx_systrace_enabled) {
+			uint32_t args[6];
+
+			ASSERT(lx_systrace_entry_ptr != NULL);
+
+			if (copyin((void *)arg2, args, sizeof (args)) != 0)
+				return (EFAULT);
+
+			(*lx_systrace_entry_ptr)(arg1, args[0], args[1],
+			    args[2], args[3], args[4], args[5]);
+		}
+
+		lx_ptrace_fire();
+
+		pd = p->p_brand_data;
+
+		/*
+		 * If neither DTrace not ptrace are interested in tracing
+		 * this process any more, turn off the trace flag.
+		 */
+		if (!lx_systrace_enabled && !pd->l_ptrace)
+			(void) suword32((void *)pd->l_traceflag, 0);
+
+		*rval = 0;
+		return (0);
+
+	case B_SYSRETURN:
+		if (lx_systrace_enabled) {
+			ASSERT(lx_systrace_return_ptr != NULL);
+
+			(*lx_systrace_return_ptr)(arg1, arg2, arg2, 0, 0, 0, 0);
+		}
+
+		lx_ptrace_fire();
+
+		pd = p->p_brand_data;
+
+		/*
+		 * If neither DTrace not ptrace are interested in tracing
+		 * this process any more, turn off the trace flag.
+		 */
+		if (!lx_systrace_enabled && !pd->l_ptrace)
+			(void) suword32((void *)pd->l_traceflag, 0);
+
+		*rval = 0;
+		return (0);
+
+	case B_SET_AFFINITY_MASK:
+	case B_GET_AFFINITY_MASK:
+		/*
+		 * Retrieve or store the CPU affinity mask for the
+		 * requested linux pid.
+		 *
+		 * arg1 is a linux PID (0 means curthread).
+		 * arg2 is the size of the given mask.
+		 * arg3 is the address of the affinity mask.
+		 */
+		return (lx_sched_affinity(cmd, arg1, arg2, arg3, rval));
+
+	default:
+		linux_call = cmd - B_EMULATE_SYSCALL;
+		if (linux_call >= 0 && linux_call < LX_NSYSCALLS) {
+			*rval = lx_emulate_syscall(linux_call, arg1, arg2,
+			    arg3, arg4, arg5, arg6);
+			return (0);
+		}
+	}
+
+	return (EINVAL);
+}
+
+/*
+ * Copy the per-process brand data from a parent proc to a child.
+ */
+void
+lx_copy_procdata(proc_t *child, proc_t *parent)
+{
+	lx_proc_data_t *cpd, *ppd;
+
+	ppd = parent->p_brand_data;
+
+	ASSERT(ppd != NULL);
+
+	cpd = kmem_alloc(sizeof (lx_proc_data_t), KM_SLEEP);
+	*cpd = *ppd;
+
+	child->p_brand_data = cpd;
+}
+
+#if defined(_ELF32_COMPAT)
+/*
+ * Currently, only 32-bit branded ELF executables are supported.
+ */
+#define	elfexec elf32exec
+#define	mapexec_brand mapexec32_brand
+#endif /* __amd64 */
+
+extern int elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
+    intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
+    cred_t *cred, int brand_action);
+extern int mapexec_brand(vnode_t *, uarg_t *, Ehdr *ehdr, Elf32_Addr *,
+    intptr_t *, caddr_t, int *, caddr_t *, caddr_t *, size_t *);
+
+/*
+ * Exec routine called by elfexec() to load 32-bit Linux binaries.
+ */
+static int
+lx_elfexec(struct vnode *vp, struct execa *uap, struct uarg *args,
+    struct intpdata *idata, int level, long *execsz, int setid,
+    caddr_t exec_file, struct cred *cred, int brand_action)
+{
+	int		error;
+	vnode_t		*nvp;
+	auxv32_t	phdr_auxv = { AT_SUN_BRAND_PHDR, 0 };
+	Ehdr		ehdr;
+	Elf32_Addr	uphdr_vaddr;
+	intptr_t	voffset;
+	int		interp;
+	int		i;
+	struct execenv	env;
+	struct user	*up = PTOU(ttoproc(curthread));
+	lx_elf_data_t	*edp =
+	    &((lx_proc_data_t *)ttoproc(curthread)->p_brand_data)->l_elf_data;
+
+	ASSERT(ttoproc(curthread)->p_brand == &lx_brand);
+	ASSERT(ttoproc(curthread)->p_brand_data != NULL);
+
+	/*
+	 * Set the brandname and library name for the new process so that
+	 * elfexec() puts them onto the stack.
+	 */
+	args->brandname = LX_BRANDNAME;
+	args->emulator = LX_LIB_PATH;
+
+	/*
+	 * We will exec the brand library, and map in the linux linker and the
+	 * linux executable.
+	 */
+	if (error = lookupname(LX_LIB_PATH, UIO_SYSSPACE, FOLLOW, NULLVPP,
+	    &nvp)) {
+		uprintf("%s: not found.", LX_LIB);
+		return (error);
+	}
+
+	if (error = elfexec(nvp, uap, args, idata, level + 1, execsz, setid,
+	    exec_file, cred, brand_action)) {
+		VN_RELE(nvp);
+		return (error);
+	}
+	VN_RELE(nvp);
+
+	bzero(&env, sizeof (env));
+
+	if (error = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
+	    exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
+	    &env.ex_brksize))
+		return (error);
+
+	/*
+	 * Save off the important properties of the lx executable. The brand
+	 * library will ask us for this data later, when it is ready to set
+	 * things up for the lx executable.
+	 */
+	edp->ed_phdr = (uphdr_vaddr == -1) ? voffset + ehdr.e_phoff :
+	    voffset + uphdr_vaddr;
+	edp->ed_entry = voffset + ehdr.e_entry;
+	edp->ed_phent = ehdr.e_phentsize;
+	edp->ed_phnum = ehdr.e_phnum;
+
+	if (interp) {
+		if (ehdr.e_type == ET_DYN) {
+			/*
+			 * This is a shared object executable, so we need to
+			 * pick a reasonable place to put the heap. Just don't
+			 * use the first page.
+			 */
+			env.ex_brkbase = (caddr_t)PAGESIZE;
+			env.ex_bssbase = (caddr_t)PAGESIZE;
+		}
+
+		/*
+		 * If the program needs an interpreter (most do), map it in and
+		 * store relevant information about it in the aux vector, where
+		 * the brand library can find it.
+		 */
+		if (error = lookupname(LX_LINKER, UIO_SYSSPACE, FOLLOW, NULLVPP,
+		    &nvp)) {
+			uprintf("%s: not found.", LX_LINKER);
+			return (error);
+		}
+		if (error = mapexec_brand(nvp, args, &ehdr, &uphdr_vaddr,
+		    &voffset, exec_file, &interp, NULL, NULL, NULL)) {
+			VN_RELE(nvp);
+			return (error);
+		}
+		VN_RELE(nvp);
+
+		/*
+		 * Now that we know the base address of the brand's linker,
+		 * place it in the aux vector.
+		 */
+		edp->ed_base = voffset;
+		edp->ed_ldentry = voffset + ehdr.e_entry;
+	} else {
+		/*
+		 * This program has no interpreter. The lx brand library will
+		 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
+		 * so in this case, put the entry point of the main executable
+		 * there.
+		 */
+		if (ehdr.e_type == ET_EXEC) {
+			/*
+			 * An executable with no interpreter, this must be a
+			 * statically linked executable, which means we loaded
+			 * it at the address specified in the elf header, in
+			 * which case the e_entry field of the elf header is an
+			 * absolute address.
+			 */
+			edp->ed_ldentry = ehdr.e_entry;
+			edp->ed_entry = ehdr.e_entry;
+		} else {
+			/*
+			 * A shared object with no interpreter, we use the
+			 * calculated address from above.
+			 */
+			edp->ed_ldentry = edp->ed_entry;
+		}
+
+		/*
+		 * Delay setting the brkbase until the first call to brk();
+		 * see elfexec() for details.
+		 */
+		env.ex_bssbase = (caddr_t)0;
+		env.ex_brkbase = (caddr_t)0;
+		env.ex_brksize = 0;
+	}
+
+	env.ex_vp = vp;
+	setexecenv(&env);
+
+	/*
+	 * We don't need to copy this stuff out. It is only used by our
+	 * tools to locate the lx linker's debug section. But we should at
+	 * least try to keep /proc's view of the aux vector consistent with
+	 * what's on the process stack.
+	 */
+	phdr_auxv.a_un.a_val = edp->ed_phdr;
+	if (copyout(&phdr_auxv, args->brand_auxp, sizeof (phdr_auxv)) == -1)
+		return (EFAULT);
+
+	/*
+	 * /proc uses the AT_ENTRY aux vector entry to deduce
+	 * the location of the executable in the address space. The user
+	 * structure contains a copy of the aux vector that needs to have those
+	 * entries patched with the values of the real lx executable (they
+	 * currently contain the values from the lx brand library that was
+	 * elfexec'd, above).
+	 *
+	 * For live processes, AT_BASE is used to locate the linker segment,
+	 * which /proc and friends will later use to find Solaris symbols
+	 * (such as rtld_db_preinit). However, for core files, /proc uses
+	 * AT_ENTRY to find the right segment to label as the executable.
+	 * So we set AT_ENTRY to be the entry point of the linux executable,
+	 * but leave AT_BASE to be the address of the Solaris linker.
+	 */
+	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
+		if (up->u_auxv[i].a_type == AT_ENTRY)
+			up->u_auxv[i].a_un.a_val = edp->ed_entry;
+		if (up->u_auxv[i].a_type == AT_SUN_BRAND_PHDR)
+			up->u_auxv[i].a_un.a_val = edp->ed_phdr;
+	}
+
+	return (0);
+}
+
+int
+_init(void)
+{
+	int err = 0;
+
+	/* pid/tid conversion hash tables */
+	lx_pid_init();
+
+	/* for lx_futex() */
+	lx_futex_init();
+
+	err = mod_install(&modlinkage);
+	if (err != 0) {
+		cmn_err(CE_WARN, "Couldn't install lx brand module");
+
+		/*
+		 * This looks drastic, but it should never happen.  These
+		 * two data structures should be completely free-able until
+		 * they are used by Linux processes.  Since the brand
+		 * wasn't loaded there should be no Linux processes, and
+		 * thus no way for these data structures to be modified.
+		 */
+		if (lx_futex_fini())
+			panic("lx brand module cannot be loaded or unloaded.");
+	}
+	return (err);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+	int err;
+	int futex_done = 0;
+
+	/*
+	 * If there are any zones using this brand, we can't allow it to be
+	 * unloaded.
+	 */
+	if (brand_zone_count(&lx_brand))
+		return (EBUSY);
+
+	lx_pid_fini();
+
+	if ((err = lx_futex_fini()) != 0)
+		goto done;
+	futex_done = 1;
+
+	err = mod_remove(&modlinkage);
+
+done:
+	if (err) {
+		/*
+		 * If we can't unload the module, then we have to get it
+		 * back into a sane state.
+		 */
+		lx_pid_init();
+
+		if (futex_done)
+			lx_futex_init();
+
+	}
+
+	return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_misc.c b/usr/src/uts/common/brand/lx/os/lx_misc.c
new file mode 100644
index 0000000000..375b99fa46
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_misc.c
@@ -0,0 +1,383 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/archsystm.h>
+#include <sys/privregs.h>
+#include <sys/exec.h>
+#include <sys/lwp.h>
+#include <sys/sem.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_futex.h>
+
+/* Linux specific functions and definitions */
+void lx_setrval(klwp_t *, int, int);
+void lx_exec();
+int lx_initlwp(klwp_t *);
+void lx_forklwp(klwp_t *, klwp_t *);
+void lx_exitlwp(klwp_t *);
+void lx_freelwp(klwp_t *);
+static void lx_save(klwp_t *);
+static void lx_restore(klwp_t *);
+extern void lx_ptrace_free(proc_t *);
+
+/*
+ * Set the return code for the forked child, always zero
+ */
+/*ARGSUSED*/
+void
+lx_setrval(klwp_t *lwp, int v1, int v2)
+{
+	lwptoregs(lwp)->r_r0 = 0;
+}
+
+/*
+ * Reset process state on exec(2)
+ */
+void
+lx_exec()
+{
+	klwp_t *lwp = ttolwp(curthread);
+	struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+	int err;
+
+	/*
+	 * There are two mutually exclusive special cases we need to
+	 * address.  First, if this was a native process prior to this
+	 * exec(), then this lwp won't have its brand-specific data
+	 * initialized and it won't be assigned a Linux PID yet.  Second,
+	 * if this was a multi-threaded Linux process and this lwp wasn't
+	 * the main lwp, then we need to make its Solaris and Linux PIDS
+	 * match.
+	 */
+	if (lwpd == NULL) {
+		err = lx_initlwp(lwp);
+		/*
+		 * Only possible failure from this routine should be an
+		 * inability to allocate a new PID.  Since single-threaded
+		 * processes don't need a new PID, we should never hit this
+		 * error.
+		 */
+		ASSERT(err == 0);
+		lwpd = lwptolxlwp(lwp);
+	} else if (curthread->t_tid != 1) {
+		lx_pid_reassign(curthread);
+	}
+
+	installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL, lx_save,
+	    NULL);
+
+	/*
+	 * clear out the tls array
+	 */
+	bzero(lwpd->br_tls, sizeof (lwpd->br_tls));
+
+	/*
+	 * reset the tls entries in the gdt
+	 */
+	kpreempt_disable();
+	lx_restore(lwp);
+	kpreempt_enable();
+}
+
+void
+lx_exitlwp(klwp_t *lwp)
+{
+	struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+	proc_t *p;
+	kthread_t *t;
+	sigqueue_t *sqp = NULL;
+	pid_t ppid;
+	id_t ptid;
+
+	if (lwpd == NULL)
+		return;		/* second time thru' */
+
+	if (lwpd->br_clear_ctidp != NULL) {
+		(void) suword32(lwpd->br_clear_ctidp, 0);
+		(void) lx_futex((uintptr_t)lwpd->br_clear_ctidp, FUTEX_WAKE, 1,
+		    NULL, NULL, 0);
+	}
+
+	if (lwpd->br_signal != 0) {
+		/*
+		 * The first thread in a process doesn't cause a signal to
+		 * be sent when it exits.  It was created by a fork(), not
+		 * a clone(), so the parent should get signalled when the
+		 * process exits.
+		 */
+		if (lwpd->br_ptid == -1)
+			goto free;
+
+		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+		/*
+		 * If br_ppid is 0, it means this is a CLONE_PARENT thread,
+		 * so the signal goes to the parent process - not to a
+		 * specific thread in this process.
+		 */
+		p = lwptoproc(lwp);
+		if (lwpd->br_ppid == 0) {
+			mutex_enter(&p->p_lock);
+			ppid = p->p_ppid;
+			t = NULL;
+		} else {
+			/*
+			 * If we have been reparented to init or if our
+			 * parent thread is gone, then nobody gets
+			 * signaled.
+			 */
+			if ((lx_lwp_ppid(lwp, &ppid, &ptid) == 1) ||
+			    (ptid == -1))
+				goto free;
+
+			mutex_enter(&pidlock);
+			if ((p = prfind(ppid)) == NULL || p->p_stat == SIDL) {
+				mutex_exit(&pidlock);
+				goto free;
+			}
+			mutex_enter(&p->p_lock);
+			mutex_exit(&pidlock);
+
+			if ((t = idtot(p, ptid)) == NULL) {
+				mutex_exit(&p->p_lock);
+				goto free;
+			}
+		}
+
+		sqp->sq_info.si_signo = lwpd->br_signal;
+		sqp->sq_info.si_code = lwpd->br_exitwhy;
+		sqp->sq_info.si_status = lwpd->br_exitwhat;
+		sqp->sq_info.si_pid = lwpd->br_pid;
+		sqp->sq_info.si_uid = crgetruid(CRED());
+		sigaddqa(p, t, sqp);
+		mutex_exit(&p->p_lock);
+		sqp = NULL;
+	}
+
+free:
+	if (sqp)
+		kmem_free(sqp, sizeof (sigqueue_t));
+
+	lx_freelwp(lwp);
+}
+
+void
+lx_freelwp(klwp_t *lwp)
+{
+	struct lx_lwp_data *lwpd = lwptolxlwp(lwp);
+
+	if (lwpd != NULL) {
+		(void) removectx(lwptot(lwp), lwp, lx_save, lx_restore,
+		    NULL, NULL, lx_save, NULL);
+		if (lwpd->br_pid != 0)
+			lx_pid_rele(lwptoproc(lwp)->p_pid,
+			    lwptot(lwp)->t_tid);
+
+		lwp->lwp_brand = NULL;
+		kmem_free(lwpd, sizeof (struct lx_lwp_data));
+	}
+}
+
+int
+lx_initlwp(klwp_t *lwp)
+{
+	struct lx_lwp_data *lwpd;
+	struct lx_lwp_data *plwpd;
+	kthread_t *tp = lwptot(lwp);
+
+	lwpd = kmem_zalloc(sizeof (struct lx_lwp_data), KM_SLEEP);
+	lwpd->br_exitwhy = CLD_EXITED;
+	lwpd->br_lwp = lwp;
+	lwpd->br_clear_ctidp = NULL;
+	lwpd->br_set_ctidp = NULL;
+	lwpd->br_signal = 0;
+	lwpd->br_affinitymask = 0;
+
+	/*
+	 * The first thread in a process has ppid set to the parent
+	 * process's pid, and ptid set to -1.  Subsequent threads in the
+	 * process have their ppid set to the pid of the thread that
+	 * created them, and their ptid to that thread's tid.
+	 */
+	if (tp->t_next == tp) {
+		lwpd->br_ppid = tp->t_procp->p_ppid;
+		lwpd->br_ptid = -1;
+	} else if (ttolxlwp(curthread) != NULL) {
+		plwpd = ttolxlwp(curthread);
+		bcopy(plwpd->br_tls, lwpd->br_tls, sizeof (lwpd->br_tls));
+		lwpd->br_ppid = plwpd->br_pid;
+		lwpd->br_ptid = curthread->t_tid;
+	} else {
+		/*
+		 * Oddball case: the parent thread isn't a Linux process.
+		 */
+		lwpd->br_ppid = 0;
+		lwpd->br_ptid = -1;
+	}
+	lwp->lwp_brand = lwpd;
+
+	if (lx_pid_assign(tp)) {
+		kmem_free(lwpd, sizeof (struct lx_lwp_data));
+		lwp->lwp_brand = NULL;
+		return (-1);
+	}
+	lwpd->br_tgid = lwpd->br_pid;
+
+	installctx(lwptot(lwp), lwp, lx_save, lx_restore, NULL, NULL,
+	    lx_save, NULL);
+
+	return (0);
+}
+
+/*
+ * There is no need to have any locking for either the source or
+ * destination struct lx_lwp_data structs.  This is always run in the
+ * thread context of the source thread, and the destination thread is
+ * always newly created and not referred to from anywhere else.
+ */
+void
+lx_forklwp(klwp_t *srclwp, klwp_t *dstlwp)
+{
+	struct lx_lwp_data *src = srclwp->lwp_brand;
+	struct lx_lwp_data *dst = dstlwp->lwp_brand;
+
+	dst->br_ppid = src->br_pid;
+	dst->br_ptid = lwptot(srclwp)->t_tid;
+	bcopy(src->br_tls, dst->br_tls, sizeof (dst->br_tls));
+
+	/*
+	 * copy only these flags
+	 */
+	dst->br_lwp_flags = src->br_lwp_flags & BR_CPU_BOUND;
+	dst->br_clone_args = NULL;
+}
+
+/*
+ * When switching a Linux process off the CPU, clear its GDT entries.
+ */
+/* ARGSUSED */
+static void
+lx_save(klwp_t *t)
+{
+	static user_desc_t null_desc;
+	static int inited;
+	user_desc_t *gdt;
+	int i;
+
+	if (inited == 0) {
+		bzero(&null_desc, sizeof (null_desc));
+		inited = 1;
+	}
+
+	gdt = cpu_get_gdt();
+	for (i = 0; i < LX_TLSNUM; i++)
+		gdt[GDT_TLSMIN + i] = null_desc;
+}
+
+/*
+ * When switching a Linux process on the CPU, set its GDT entries.
+ */
+static void
+lx_restore(klwp_t *t)
+{
+	struct lx_lwp_data *lwpd = lwptolxlwp(t);
+	user_desc_t *gdt;
+	user_desc_t *tls;
+	int i;
+
+	ASSERT(lwpd);
+
+	gdt = cpu_get_gdt();
+	tls = lwpd->br_tls;
+	for (i = 0; i < LX_TLSNUM; i++)
+		gdt[GDT_TLSMIN + i] = tls[i];
+}
+
+void
+lx_set_gdt(int entry, user_desc_t *descrp)
+{
+	user_desc_t *gdt = cpu_get_gdt();
+
+	gdt[entry] = *descrp;
+}
+
+void
+lx_clear_gdt(int entry)
+{
+	user_desc_t *gdt = cpu_get_gdt();
+
+	bzero(&gdt[entry], sizeof (user_desc_t));
+}
+
+longlong_t
+lx_nosys()
+{
+	return (set_errno(ENOSYS));
+}
+
+longlong_t
+lx_opnotsupp()
+{
+	return (set_errno(EOPNOTSUPP));
+}
+
+/*
+ * Brand-specific routine to check if given non-Solaris standard segment
+ * register values should be used as-is or if they should be modified to other
+ * values.
+ */
+/*ARGSUSED*/
+greg_t
+lx_fixsegreg(greg_t sr, model_t datamodel)
+{
+	struct lx_lwp_data *lxlwp = ttolxlwp(curthread);
+
+	/*
+	 * If the segreg is the same as the %gs the brand callback was last
+	 * entered with, allow it to be used unmodified.
+	 */
+	ASSERT(sr == (sr & 0xffff));
+
+	if (sr == (lxlwp->br_ugs & 0xffff))
+		return (sr);
+
+	/*
+	 * Force the SR into the LDT in ring 3 for 32-bit processes.
+	 *
+	 * 64-bit processes get the null GDT selector since they are not
+	 * allowed to have a private LDT.
+	 */
+#if defined(__amd64)
+	return (datamodel == DATAMODEL_ILP32 ? (sr | SEL_TI_LDT | SEL_UPL) : 0);
+#elif defined(__i386)
+	datamodel = datamodel;	/* datamodel currently unused for 32-bit */
+	return (sr | SEL_TI_LDT | SEL_UPL);
+#endif	/* __amd64 */
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_pid.c b/usr/src/uts/common/brand/lx/os/lx_pid.c
new file mode 100644
index 0000000000..4f22efd1ee
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_pid.c
@@ -0,0 +1,348 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/bitmap.h>
+#include <sys/var.h>
+#include <sys/thread.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/zone.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+
+#define	LINUX_PROC_FACTOR	8	/* factor down the hash table by this */
+static int hash_len = 4;		/* desired average hash chain length */
+static int hash_size;			/* no of buckets in the hash table */
+
+static struct lx_pid **stol_pid_hash;
+static struct lx_pid **ltos_pid_hash;
+
+#define	LTOS_HASH(pid)		((pid) & (hash_size - 1))
+#define	STOL_HASH(pid, tid)	(((pid) + (tid)) & (hash_size - 1))
+
+static kmutex_t hash_lock;
+
+static void
+lx_pid_insert_hash(struct lx_pid *lpidp)
+{
+	int shash = STOL_HASH(lpidp->s_pid, lpidp->s_tid);
+	int lhash = LTOS_HASH(lpidp->l_pid);
+
+	ASSERT(MUTEX_HELD(&hash_lock));
+
+	lpidp->stol_next = stol_pid_hash[shash];
+	stol_pid_hash[shash] = lpidp;
+
+	lpidp->ltos_next = ltos_pid_hash[lhash];
+	ltos_pid_hash[lhash] = lpidp;
+}
+
+static struct lx_pid *
+lx_pid_remove_hash(pid_t pid, id_t tid)
+{
+	struct lx_pid **hpp;
+	struct lx_pid *lpidp;
+
+	ASSERT(MUTEX_HELD(&hash_lock));
+
+	hpp = &stol_pid_hash[STOL_HASH(pid, tid)];
+	while (*hpp) {
+		if ((*hpp)->s_pid == pid && (*hpp)->s_tid == tid) {
+			lpidp = *hpp;
+			*hpp = (*hpp)->stol_next;
+			break;
+		}
+		hpp = &(*hpp)->stol_next;
+	}
+
+	/*
+	 * when called during error recovery the pid may already
+	 * be released
+	 */
+	if (lpidp == NULL)
+		return (NULL);
+
+	hpp = &ltos_pid_hash[LTOS_HASH(lpidp->l_pid)];
+	while (*hpp) {
+		if (*hpp == lpidp) {
+			*hpp = lpidp->ltos_next;
+			break;
+		}
+		hpp = &(*hpp)->ltos_next;
+	}
+
+	return (lpidp);
+}
+
+/*
+ * given a solaris pid/tid pair, create a linux pid
+ */
+int
+lx_pid_assign(kthread_t *t)
+{
+	proc_t *p = ttoproc(t);
+	pid_t s_pid = p->p_pid;
+	id_t s_tid = t->t_tid;
+	struct pid *pidp;
+	struct lx_pid *lpidp;
+	lx_lwp_data_t *lwpd = ttolxlwp(t);
+	pid_t newpid;
+
+	if (p->p_lwpcnt > 0) {
+		/*
+		 * Allocate a pid for any thread other than the first
+		 */
+		if ((newpid = pid_allocate(p, 0)) < 0)
+			return (-1);
+
+		pidp = pid_find(newpid);
+	} else {
+		pidp = NULL;
+		newpid = s_pid;
+	}
+
+	lpidp = kmem_alloc(sizeof (struct lx_pid), KM_SLEEP);
+	lpidp->l_pid = newpid;
+	lpidp->s_pid = s_pid;
+	lpidp->s_tid = s_tid;
+	lpidp->l_pidp = pidp;
+	lpidp->l_start = t->t_start;
+
+	/*
+	 * now put the pid into the linux-solaris and solaris-linux
+	 * conversion hash tables
+	 */
+	mutex_enter(&hash_lock);
+	lx_pid_insert_hash(lpidp);
+	mutex_exit(&hash_lock);
+
+	lwpd->br_pid = newpid;
+
+	return (0);
+}
+
+/*
+ * If we are exec()ing the process, this thread's tid is about to be reset
+ * to 1.  Make sure the Linux PID bookkeeping reflects that change.
+ */
+void
+lx_pid_reassign(kthread_t *t)
+{
+	proc_t *p = ttoproc(t);
+	struct pid *old_pidp;
+	struct lx_pid *lpidp;
+
+	ASSERT(p->p_lwpcnt == 1);
+
+	mutex_enter(&hash_lock);
+
+	/*
+	 * Clean up all the traces of this thread's 'fake' Linux PID.
+	 */
+	lpidp = lx_pid_remove_hash(p->p_pid, t->t_tid);
+	ASSERT(lpidp != NULL);
+	old_pidp = lpidp->l_pidp;
+	lpidp->l_pidp = NULL;
+
+	/*
+	 * Now register this thread as (pid, 1).
+	 */
+	lpidp->l_pid = p->p_pid;
+	lpidp->s_pid = p->p_pid;
+	lpidp->s_tid = 1;
+	lx_pid_insert_hash(lpidp);
+
+	mutex_exit(&hash_lock);
+
+	if (old_pidp)
+		(void) pid_rele(old_pidp);
+}
+
+/*
+ * release a solaris pid/tid pair
+ */
+void
+lx_pid_rele(pid_t pid, id_t tid)
+{
+	struct lx_pid *lpidp;
+
+	mutex_enter(&hash_lock);
+	lpidp = lx_pid_remove_hash(pid, tid);
+	mutex_exit(&hash_lock);
+
+	if (lpidp) {
+		if (lpidp->l_pidp)
+			(void) pid_rele(lpidp->l_pidp);
+
+		kmem_free(lpidp, sizeof (*lpidp));
+	}
+}
+
+/*
+ * given a linux pid, return the solaris pid/tid pair
+ */
+int
+lx_lpid_to_spair(pid_t l_pid, pid_t *s_pid, id_t *s_tid)
+{
+	struct lx_pid *hp;
+
+	mutex_enter(&hash_lock);
+	for (hp = ltos_pid_hash[LTOS_HASH(l_pid)]; hp; hp = hp->ltos_next)
+		if (l_pid == hp->l_pid) {
+			if (s_pid)
+				*s_pid = hp->s_pid;
+			if (s_tid)
+				*s_tid = hp->s_tid;
+			break;
+		}
+	mutex_exit(&hash_lock);
+
+	return (hp ? 0 : -1);
+}
+
+/*
+ * Given an lwp, return the Linux pid of its parent.  If the caller
+ * wants them, we return the Solaris (pid, tid) as well.
+ */
+pid_t
+lx_lwp_ppid(klwp_t *lwp, pid_t *ppidp, id_t *ptidp)
+{
+	lx_lwp_data_t *lwpd = lwptolxlwp(lwp);
+	proc_t *p = lwptoproc(lwp);
+	struct lx_pid *hp;
+	pid_t zoneinit = curproc->p_zone->zone_proc_initpid;
+	pid_t lppid, ppid;
+
+	/*
+	 * Be sure not to return a parent pid that should be invisible
+	 * within this zone.
+	 */
+	ppid = ((p->p_flag & SZONETOP)
+	    ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+	/*
+	 * If the parent process's pid is the zone's init process, force it
+	 * to the Linux init pid value of 1.
+	 */
+	if (ppid == zoneinit)
+		ppid = 1;
+
+	/*
+	 * There are two cases in which the Linux definition of a 'parent'
+	 * matches that of Solaris:
+	 *
+	 * - if our tgid is the same as our PID, then we are either the
+	 *   first thread in the process or a CLONE_THREAD thread.
+	 *
+	 * - if the brand lwp value for ppid is 0, then we are either the
+	 *   child of a differently-branded process or a CLONE_PARENT thread.
+	 */
+	if (p->p_pid == lwpd->br_tgid || lwpd->br_ppid == 0) {
+		if (ppidp != NULL)
+			*ppidp = ppid;
+		if (ptidp != NULL)
+			*ptidp = -1;
+		return (ppid);
+	}
+
+	/*
+	 * Set the default Linux parent pid to be the pid of the zone's init
+	 * process; this will get converted back to the Linux default of 1
+	 * later.
+	 */
+	lppid = zoneinit;
+
+	/*
+	 * If the process's parent isn't init, try and look up the Linux "pid"
+	 * corresponding to the process's parent.
+	 */
+	if (ppid != 1) {
+		/*
+		 * In all other cases, we are looking for the parent of this
+		 * specific thread, which in Linux refers to the thread that
+		 * clone()d it.   We stashed that thread's PID away when this
+		 * thread was created.
+		 */
+		mutex_enter(&hash_lock);
+		for (hp = ltos_pid_hash[LTOS_HASH(lwpd->br_ppid)]; hp;
+		    hp = hp->ltos_next) {
+			if (lwpd->br_ppid == hp->l_pid) {
+				/*
+				 * We found the PID we were looking for, but
+				 * since we cached its value in this LWP's brand
+				 * structure, it has exited and been reused by
+				 * another process.
+				 */
+				if (hp->l_start > lwptot(lwp)->t_start)
+					break;
+
+				lppid = lwpd->br_ppid;
+				if (ppidp != NULL)
+					*ppidp = hp->s_pid;
+				if (ptidp != NULL)
+					*ptidp = hp->s_tid;
+
+				break;
+			}
+		}
+		mutex_exit(&hash_lock);
+	}
+
+	if (lppid == zoneinit) {
+		lppid = 1;
+
+		if (ppidp != NULL)
+			*ppidp = lppid;
+		if (ptidp != NULL)
+			*ptidp = -1;
+	}
+
+	return (lppid);
+}
+
+void
+lx_pid_init(void)
+{
+	hash_size = 1 << highbit(v.v_proc / (hash_len * LINUX_PROC_FACTOR));
+
+	stol_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+	    KM_SLEEP);
+	ltos_pid_hash = kmem_zalloc(sizeof (struct lx_pid *) * hash_size,
+	    KM_SLEEP);
+
+	mutex_init(&hash_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+lx_pid_fini(void)
+{
+	kmem_free(stol_pid_hash, sizeof (struct lx_pid *) * hash_size);
+	kmem_free(ltos_pid_hash, sizeof (struct lx_pid *) * hash_size);
+}
diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c
new file mode 100644
index 0000000000..686afea458
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c
@@ -0,0 +1,409 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/thread.h>
+#include <sys/systm.h>
+#include <sys/syscall.h>
+#include <sys/proc.h>
+#include <sys/modctl.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/brand.h>
+#include <sys/machbrand.h>
+#include <sys/lx_syscalls.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_impl.h>
+
+/*
+ * Some system calls return either a 32-bit or a 64-bit value, depending
+ * on the datamodel.
+ */
+#ifdef	_LP64
+#define	V_RVAL	SE_64RVAL
+#else
+#define	V_RVAL	SE_32RVAL1
+#endif
+
+/*
+ * Define system calls that return a native 'long' quantity i.e. a 32-bit
+ * or 64-bit integer - depending on how the kernel is itself compiled
+ * e.g. read(2) returns 'ssize_t' in the kernel and in userland.
+ */
+#define	LX_CL(name, call, narg)      \
+	{ V_RVAL, (name), (llfcn_t)(call), (narg) }
+
+/*
+ * Returns a 32 bit quantity regardless of datamodel
+ */
+#define	LX_CI(name, call, narg)      \
+	{ SE_32RVAL1, (name), (llfcn_t)(call), (narg) }
+
+extern longlong_t lx_nosys(void);
+#define	LX_NOSYS(name)			\
+	{SE_64RVAL, (name), (llfcn_t)lx_nosys, 0}
+
+lx_sysent_t lx_sysent[] =
+{
+	LX_NOSYS("lx_nosys"),					/* 0 */
+	LX_NOSYS("exit"),					/* 0 */
+	LX_NOSYS("lx_fork"),
+	LX_NOSYS("read"),
+	LX_NOSYS("write"),
+	LX_NOSYS("open"),
+	LX_NOSYS("close"),
+	LX_NOSYS("waitpid"),
+	LX_NOSYS("creat"),
+	LX_NOSYS("link"),
+	LX_NOSYS("unlink"),					/* 10 */
+	LX_NOSYS("exec"),
+	LX_NOSYS("chdir"),
+	LX_NOSYS("gtime"),
+	LX_NOSYS("mknod"),
+	LX_NOSYS("chmod"),
+	LX_NOSYS("lchown16"),
+	LX_NOSYS("break"),
+	LX_NOSYS("stat"),
+	LX_NOSYS("lseek"),
+	LX_CL("getpid",	lx_getpid,	0),			/* 20 */
+	LX_NOSYS("mount"),
+	LX_NOSYS("umount"),
+	LX_NOSYS("setuid16"),
+	LX_NOSYS("getuid16"),
+	LX_NOSYS("stime"),
+	LX_NOSYS("ptrace"),
+	LX_NOSYS("alarm"),
+	LX_NOSYS("fstat"),
+	LX_NOSYS("pause"),
+	LX_NOSYS("utime"),					/* 30 */
+	LX_NOSYS("stty"),
+	LX_NOSYS("gtty"),
+	LX_NOSYS("access"),
+	LX_NOSYS("nice"),
+	LX_NOSYS("ftime"),
+	LX_NOSYS("sync"),
+	LX_CL("kill",		lx_kill,		2),
+	LX_NOSYS("rename"),
+	LX_NOSYS("mkdir"),
+	LX_NOSYS("rmdir"),					/* 40 */
+	LX_NOSYS("dup"),
+	LX_NOSYS("pipe"),
+	LX_NOSYS("times"),
+	LX_NOSYS("prof"),
+	LX_CL("brk",	lx_brk,		1),
+	LX_NOSYS("setgid16"),
+	LX_NOSYS("getgid16"),
+	LX_NOSYS("signal"),
+	LX_NOSYS("geteuid16"),
+	LX_NOSYS("getegid16"),					/* 50 */
+	LX_NOSYS("sysacct"),
+	LX_NOSYS("umount2"),
+	LX_NOSYS("lock"),
+	LX_NOSYS("ioctl"),
+	LX_NOSYS("fcntl"),
+	LX_NOSYS("mpx"),
+	LX_NOSYS("setpgid"),
+	LX_NOSYS("ulimit"),
+	LX_NOSYS("olduname"),
+	LX_NOSYS("umask"),					/* 60 */
+	LX_NOSYS("chroot"),
+	LX_NOSYS("ustat"),
+	LX_NOSYS("dup2"),
+	LX_CL("getppid",	lx_getppid,	0),
+	LX_NOSYS("pgrp"),
+	LX_NOSYS("setsid"),
+	LX_NOSYS("sigaction"),
+	LX_NOSYS("sgetmask"),
+	LX_NOSYS("ssetmask"),
+	LX_NOSYS("setreuid16"),					/* 70 */
+	LX_NOSYS("setregid16"),
+	LX_NOSYS("sigsuspend"),
+	LX_NOSYS("sigpending"),
+	LX_NOSYS("sethostname"),
+	LX_NOSYS("setrlimit"),
+	LX_NOSYS("old_getrlimit"),
+	LX_NOSYS("getrusage"),
+	LX_NOSYS("gettimeofday"),
+	LX_NOSYS("settimeofday"),
+	LX_NOSYS("getgroups16"),				/* 80 */
+	LX_NOSYS("setgroups16"),
+	LX_NOSYS("old_select"),
+	LX_NOSYS("symlink"),
+	LX_NOSYS("oldlstat"),
+	LX_NOSYS("readlink"),
+	LX_NOSYS("uselib"),
+	LX_NOSYS("swapon"),
+	LX_NOSYS("reboot"),
+	LX_NOSYS("old_readdir"),
+	LX_NOSYS("old_mmap"),					/* 90 */
+	LX_NOSYS("munmap"),
+	LX_NOSYS("truncate"),
+	LX_NOSYS("ftruncate"),
+	LX_NOSYS("fchmod"),
+	LX_NOSYS("fchown16"),
+	LX_NOSYS("getpriority"),
+	LX_NOSYS("setpriority"),
+	LX_NOSYS("profil"),
+	LX_NOSYS("statfs"),
+	LX_NOSYS("fstatfs"),					/* 100 */
+	LX_NOSYS("ioperm"),
+	LX_NOSYS("socketcall"),
+	LX_NOSYS("syslog"),
+	LX_NOSYS("setitimer"),
+	LX_NOSYS("getitimer"),
+	LX_NOSYS("newstat"),
+	LX_NOSYS("newsltat"),
+	LX_NOSYS("newsftat"),
+	LX_NOSYS("uname"),
+	LX_NOSYS("oldiopl"),					/* 110 */
+	LX_NOSYS("oldvhangup"),
+	LX_NOSYS("idle"),
+	LX_NOSYS("vm86old"),
+	LX_NOSYS("wait4"),
+	LX_NOSYS("swapoff"),
+	LX_CL("sysinfo", lx_sysinfo,	1),
+	LX_NOSYS("ipc"),
+	LX_NOSYS("fsync"),
+	LX_NOSYS("sigreturn"),
+	LX_CL("clone",	lx_clone,	5),			/* 120 */
+	LX_NOSYS("setdomainname"),
+	LX_NOSYS("newuname"),
+	LX_CL("modify_ldt",	lx_modify_ldt,	3),
+	LX_NOSYS("adjtimex"),
+	LX_NOSYS("mprotect"),
+	LX_NOSYS("sigprocmask"),
+	LX_NOSYS("create_module"),
+	LX_NOSYS("init_module"),
+	LX_NOSYS("delete_module"),
+	LX_NOSYS("get_kernel_syms"),				/* 130 */
+	LX_NOSYS("quotactl"),
+	LX_NOSYS("getpgid"),
+	LX_NOSYS("fchdir"),
+	LX_NOSYS("bdflush"),
+	LX_NOSYS("sysfs"),
+	LX_NOSYS("personality"),
+	LX_NOSYS("afs_syscall"),
+	LX_NOSYS("setfsuid16"),
+	LX_NOSYS("setfsgid16"),
+	LX_NOSYS("llseek"),					/* 140 */
+	LX_NOSYS("getdents"),
+	LX_NOSYS("select"),
+	LX_NOSYS("flock"),
+	LX_NOSYS("msync"),
+	LX_NOSYS("readv"),
+	LX_NOSYS("writev"),
+	LX_NOSYS("getsid"),
+	LX_NOSYS("fdatasync"),
+	LX_NOSYS("sysctl"),
+	LX_NOSYS("mlock"),					/* 150 */
+	LX_NOSYS("munlock"),
+	LX_NOSYS("mlockall"),
+	LX_NOSYS("munlockall"),
+	LX_CL("sched_setparam",	lx_sched_setparam, 2),
+	LX_CL("sched_getparam",	lx_sched_getparam, 2),
+	LX_NOSYS("sched_setscheduler"),
+	LX_NOSYS("sched_getscheduler"),
+	LX_NOSYS("yield"),
+	LX_NOSYS("sched_get_priority_max"),
+	LX_NOSYS("sched_get_priority_min"),			/* 160 */
+	LX_CL("sched_rr_get_interval", lx_sched_rr_get_interval, 2),
+	LX_NOSYS("nanosleep"),
+	LX_NOSYS("mremap"),
+	LX_CL("setresuid16",		lx_setresuid16,	3),
+	LX_NOSYS("getresuid16"),
+	LX_NOSYS("vm86"),
+	LX_NOSYS("query_module"),
+	LX_NOSYS("poll"),
+	LX_NOSYS("nfsserctl"),
+	LX_CL("setresgid16",		lx_setresgid16, 3),	/* 170 */
+	LX_NOSYS("getresgid16"),
+	LX_NOSYS("prctl"),
+	LX_NOSYS("rt_sigreturn"),
+	LX_NOSYS("rt_sigaction"),
+	LX_NOSYS("rt_sigprocmask"),
+	LX_NOSYS("rt_sigpending"),
+	LX_NOSYS("rt_sigtimedwait"),
+	LX_NOSYS("rt_sigqueueinfo"),
+	LX_NOSYS("rt_sigsuspend"),
+	LX_NOSYS("pread64"),					/* 180 */
+	LX_NOSYS("pwrite64"),
+	LX_NOSYS("chown16"),
+	LX_NOSYS("getcwd"),
+	LX_NOSYS("capget"),
+	LX_NOSYS("capset"),
+	LX_NOSYS("sigaltstack"),
+	LX_NOSYS("sendfile"),
+	LX_NOSYS("getpmsg"),
+	LX_NOSYS("putpmsg"),
+	LX_NOSYS("vfork"),					/* 190 */
+	LX_NOSYS("getrlimit"),
+	LX_NOSYS("mmap2"),
+	LX_NOSYS("truncate64"),
+	LX_NOSYS("ftruncate64"),
+	LX_NOSYS("stat64"),
+	LX_NOSYS("lstat64"),
+	LX_NOSYS("fstat64"),
+	LX_NOSYS("lchown"),
+	LX_NOSYS("getuid"),
+	LX_NOSYS("getgid"),					/* 200 */
+	LX_NOSYS("geteuid"),
+	LX_NOSYS("getegid"),
+	LX_NOSYS("setreuid"),
+	LX_NOSYS("setregid"),
+	LX_NOSYS("getgroups"),
+	LX_CL("setgroups",	lx_setgroups,	2),
+	LX_NOSYS("fchown"),
+	LX_CL("setresuid",	lx_setresuid,	3),
+	LX_NOSYS("getresuid"),
+	LX_CL("setresgid",	lx_setresgid,	3),		/* 210 */
+	LX_NOSYS("getresgid"),
+	LX_NOSYS("chown"),
+	LX_NOSYS("setuid"),
+	LX_NOSYS("setgid"),
+	LX_NOSYS("setfsuid"),
+	LX_NOSYS("setfsgid"),
+	LX_NOSYS("pivot_root"),
+	LX_NOSYS("mincore"),
+	LX_NOSYS("madvise"),
+	LX_NOSYS("getdents64"),					/* 220 */
+	LX_NOSYS("fcntl64"),
+	LX_NOSYS("lx_nosys"),
+	LX_NOSYS("security"),
+	LX_CL("gettid",	lx_gettid,	0),
+	LX_NOSYS("readahead"),
+	LX_NOSYS("setxattr"),
+	LX_NOSYS("lsetxattr"),
+	LX_NOSYS("fsetxattr"),
+	LX_NOSYS("getxattr"),
+	LX_NOSYS("lgetxattr"),					/* 230 */
+	LX_NOSYS("fgetxattr"),
+	LX_NOSYS("listxattr"),
+	LX_NOSYS("llistxattr"),
+	LX_NOSYS("flistxattr"),
+	LX_NOSYS("removexattr"),
+	LX_NOSYS("lremovexattr"),
+	LX_NOSYS("fremovexattr"),
+	LX_CL("tkill",		lx_tkill,		2),
+	LX_NOSYS("sendfile64"),
+	LX_CL("futex",		lx_futex,		6), 	/* 240 */
+	LX_NOSYS("sched_setaffinity"),
+	LX_NOSYS("sched_getaffinity"),
+	LX_CL("set_thread_area",	lx_set_thread_area,	1),
+	LX_CL("get_thread_area",	lx_get_thread_area,	1),
+	LX_NOSYS("io_setup"),
+	LX_NOSYS("io_destroy"),
+	LX_NOSYS("io_getevents"),
+	LX_NOSYS("io_submit"),
+	LX_NOSYS("io_cancel"),
+	LX_NOSYS("fadvise64"),					/* 250 */
+	LX_NOSYS("lx_nosys"),
+	LX_NOSYS("exit_group"),
+	LX_NOSYS("lookup_dcookie"),
+	LX_NOSYS("epoll_create"),
+	LX_NOSYS("epoll_ctl"),
+	LX_NOSYS("epoll_wait"),
+	LX_NOSYS("remap_file_pages"),
+	LX_CL("set_tid_address",	lx_set_tid_address,	1),
+	LX_NOSYS("timer_create"),
+	LX_NOSYS("timer_settime"),				/* 260 */
+	LX_NOSYS("timer_gettime"),
+	LX_NOSYS("timer_getoverrun"),
+	LX_NOSYS("timer_delete"),
+	LX_NOSYS("clock_settime"),
+	LX_NOSYS("clock_gettime"),
+	LX_NOSYS("clock_getres"),
+	LX_NOSYS("clock_nanosleep"),
+	LX_NOSYS("statfs64"),
+	LX_NOSYS("fstatfs64"),
+	LX_NOSYS("tgkill"),					/* 270 */
+	LX_NOSYS("utimes"),
+	LX_NOSYS("fadvise64_64"),
+	LX_NOSYS("vserver"),
+	NULL	/* NULL-termination is required for lx_systrace */
+};
+
+int64_t
+lx_emulate_syscall(int num, uintptr_t arg1, uintptr_t arg2,
+    uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+	struct lx_sysent *jsp;
+	int64_t rval;
+
+	rval = (int64_t)0;
+
+	jsp = &(lx_sysent[num]);
+
+	switch (jsp->sy_narg) {
+	case 0: {
+		lx_print("--> %s()\n", jsp->sy_name);
+		rval = (int64_t)jsp->sy_callc();
+		break;
+	}
+	case 1: {
+		lx_print("--> %s(0x%lx)\n", jsp->sy_name, arg1);
+		rval = (int64_t)jsp->sy_callc(arg1);
+		break;
+	}
+	case 2: {
+		lx_print("--> %s(0x%lx, 0x%lx)\n", jsp->sy_name, arg1, arg2);
+		rval = (int64_t)jsp->sy_callc(arg1, arg2);
+		break;
+	}
+	case 3: {
+		lx_print("--> %s(0x%lx, 0x%lx, 0x%lx)\n",
+		    jsp->sy_name, arg1, arg2, arg3);
+		rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3);
+		break;
+	}
+	case 4: {
+		lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+		    jsp->sy_name, arg1, arg2, arg3, arg4);
+		rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4);
+		break;
+	}
+	case 5: {
+		lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx)\n",
+		    jsp->sy_name, arg1, arg2, arg3, arg4, arg5);
+		rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5);
+		break;
+	}
+	case 6: {
+		lx_print("--> %s(0x%lx, 0x%lx, 0x%lx, 0x%lx,"
+		    " 0x%lx, 0x%lx)\n",
+		    jsp->sy_name, arg1, arg2, arg3, arg4, arg5, arg6);
+		rval = (int64_t)jsp->sy_callc(arg1, arg2, arg3, arg4, arg5,
+		    arg6);
+		break;
+	}
+	default:
+		panic("Invalid syscall entry: #%d at 0x%p\n", num, jsp);
+	}
+	lx_print("----------> return  (0x%llx)\n", (long long)rval);
+	return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_proc.h b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
new file mode 100644
index 0000000000..c79e3fa590
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_proc.h
@@ -0,0 +1,233 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LXPROC_H
+#define	_LXPROC_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxproc.h: declarations, data structures and macros for lxprocfs
+ */
+
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/debug.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/user.h>
+#include <sys/t_lock.h>
+#include <sys/sysmacros.h>
+#include <sys/cred_impl.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/dnlc.h>
+#include <sys/atomic.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <vm/as.h>
+#include <vm/anon.h>
+
+/*
+ * Convert a vnode into an lxpr_mnt_t
+ */
+#define	VTOLXPM(vp)	((lxpr_mnt_t *)(vp)->v_vfsp->vfs_data)
+
+/*
+ * convert a vnode into an lxpr_node
+ */
+#define	VTOLXP(vp)	((lxpr_node_t *)(vp)->v_data)
+
+/*
+ * convert a lxprnode into a vnode
+ */
+#define	LXPTOV(lxpnp)	((lxpnp)->lxpr_vnode)
+
+/*
+ * convert a lxpr_node into zone for fs
+ */
+#define	LXPTOZ(lxpnp) \
+	(((lxpr_mnt_t *)(lxpnp)->lxpr_vnode->v_vfsp->vfs_data)->lxprm_zone)
+
+#define	LXPNSIZ		256	/* max size of lx /proc file name entries */
+
+/*
+ * Pretend that a directory entry takes 16 bytes
+ */
+#define	LXPR_SDSIZE	16
+
+/*
+ * Node/file types for lx /proc files
+ * (directories and files contained therein).
+ */
+typedef enum lxpr_nodetype {
+	LXPR_PROCDIR,		/* /proc		*/
+	LXPR_PIDDIR,		/* /proc/<pid>		*/
+	LXPR_PID_CMDLINE,	/* /proc/<pid>/cmdline	*/
+	LXPR_PID_CPU,		/* /proc/<pid>/cpu	*/
+	LXPR_PID_CURDIR,	/* /proc/<pid>/cwd	*/
+	LXPR_PID_ENV,		/* /proc/<pid>/environ	*/
+	LXPR_PID_EXE,		/* /proc/<pid>/exe	*/
+	LXPR_PID_MAPS,		/* /proc/<pid>/maps	*/
+	LXPR_PID_MEM,		/* /proc/<pid>/mem	*/
+	LXPR_PID_ROOTDIR,	/* /proc/<pid>/root	*/
+	LXPR_PID_STAT,		/* /proc/<pid>/stat	*/
+	LXPR_PID_STATM,		/* /proc/<pid>/statm	*/
+	LXPR_PID_STATUS,	/* /proc/<pid>/status	*/
+	LXPR_PID_FDDIR,		/* /proc/<pid>/fd	*/
+	LXPR_PID_FD_FD,		/* /proc/<pid>/fd/nn	*/
+	LXPR_CMDLINE,		/* /proc/cmdline	*/
+	LXPR_CPUINFO,		/* /proc/cpuinfo	*/
+	LXPR_DEVICES,		/* /proc/devices	*/
+	LXPR_DMA,		/* /proc/dma		*/
+	LXPR_FILESYSTEMS,	/* /proc/filesystems	*/
+	LXPR_INTERRUPTS,	/* /proc/interrupts	*/
+	LXPR_IOPORTS,		/* /proc/ioports	*/
+	LXPR_KCORE,		/* /proc/kcore		*/
+	LXPR_KMSG,		/* /proc/kmsg		*/
+	LXPR_LOADAVG,		/* /proc/loadavg	*/
+	LXPR_MEMINFO,		/* /proc/meminfo	*/
+	LXPR_MOUNTS,		/* /proc/mounts		*/
+	LXPR_NETDIR,		/* /proc/net		*/
+	LXPR_NET_ARP,		/* /proc/net/arp	*/
+	LXPR_NET_DEV,		/* /proc/net/dev	*/
+	LXPR_NET_DEV_MCAST,	/* /proc/net/dev_mcast	*/
+	LXPR_NET_IGMP,		/* /proc/net/igmp	*/
+	LXPR_NET_IP_MR_CACHE,	/* /proc/net/ip_mr_cache */
+	LXPR_NET_IP_MR_VIF,	/* /proc/net/ip_mr_vif	*/
+	LXPR_NET_MCFILTER,	/* /proc/net/mcfilter	*/
+	LXPR_NET_NETSTAT,	/* /proc/net/netstat	*/
+	LXPR_NET_RAW,		/* /proc/net/raw	*/
+	LXPR_NET_ROUTE,		/* /proc/net/route	*/
+	LXPR_NET_RPC,		/* /proc/net/rpc	*/
+	LXPR_NET_RT_CACHE,	/* /proc/net/rt_cache	*/
+	LXPR_NET_SOCKSTAT,	/* /proc/net/sockstat	*/
+	LXPR_NET_SNMP,		/* /proc/net/snmp	*/
+	LXPR_NET_STAT,		/* /proc/net/stat	*/
+	LXPR_NET_TCP,		/* /proc/net/tcp	*/
+	LXPR_NET_UDP,		/* /proc/net/udp	*/
+	LXPR_NET_UNIX,		/* /proc/net/unix	*/
+	LXPR_PARTITIONS,	/* /proc/partitions	*/
+	LXPR_SELF,		/* /proc/self		*/
+	LXPR_STAT,		/* /proc/stat		*/
+	LXPR_UPTIME,		/* /proc/uptime		*/
+	LXPR_VERSION,		/* /proc/version	*/
+	LXPR_NFILES		/* number of lx /proc file types */
+} lxpr_nodetype_t;
+
+
+/*
+ * Number of fds allowed for in the inode number calculation
+ * per process (if a process has more fds then inode numbers
+ * may be duplicated)
+ */
+#define	LXPR_FD_PERPROC 2000
+
+/*
+ * external dirent characteristics
+ */
+#define	LXPRMAXNAMELEN	14
+typedef struct {
+	lxpr_nodetype_t	d_type;
+	char		d_name[LXPRMAXNAMELEN];
+} lxpr_dirent_t;
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to v_data in the vnode structure
+ */
+typedef struct lxpr_node {
+	lxpr_nodetype_t	lxpr_type;	/* type of this node 		*/
+	vnode_t		*lxpr_vnode;	/* vnode for the node		*/
+	vnode_t		*lxpr_parent;	/* parent directory		*/
+	vnode_t		*lxpr_realvp;	/* real vnode, file in dirs	*/
+	timestruc_t	lxpr_time;	/* creation etc time for file	*/
+	mode_t		lxpr_mode;	/* file mode bits		*/
+	uid_t		lxpr_uid;	/* file owner			*/
+	gid_t		lxpr_gid;	/* file group owner		*/
+	pid_t		lxpr_pid;	/* pid of proc referred to	*/
+	ino_t		lxpr_ino;	/* node id 			*/
+	ldi_handle_t	lxpr_cons_ldih; /* ldi handle for console device */
+} lxpr_node_t;
+
+struct zone;    /* forward declaration */
+
+/*
+ * This is the lxprocfs private data object
+ * which is attached to vfs_data in the vfs structure
+ */
+typedef struct lxpr_mnt {
+	lxpr_node_t	*lxprm_node;	/* node at root of proc mount */
+	struct zone	*lxprm_zone;	/* zone for this mount */
+	ldi_ident_t	lxprm_li;	/* ident for ldi */
+} lxpr_mnt_t;
+
+extern vnodeops_t	*lxpr_vnodeops;
+extern int		nproc_highbit;	/* highbit(v.v_nproc)		*/
+
+typedef struct mounta	mounta_t;
+
+extern void lxpr_initnodecache();
+extern void lxpr_fininodecache();
+extern void lxpr_initrootnode(lxpr_node_t **, vfs_t *);
+extern ino_t lxpr_inode(lxpr_nodetype_t, pid_t, int);
+extern ino_t lxpr_parentinode(lxpr_node_t *);
+extern lxpr_node_t *lxpr_getnode(vnode_t *, lxpr_nodetype_t, proc_t *, int);
+extern void lxpr_freenode(lxpr_node_t *);
+
+typedef struct lxpr_uiobuf lxpr_uiobuf_t;
+extern lxpr_uiobuf_t *lxpr_uiobuf_new(uio_t *);
+extern void lxpr_uiobuf_free(lxpr_uiobuf_t *);
+extern int lxpr_uiobuf_flush(lxpr_uiobuf_t *);
+extern void lxpr_uiobuf_seek(lxpr_uiobuf_t *, offset_t);
+extern void lxpr_uiobuf_write(lxpr_uiobuf_t *, const char *, size_t);
+extern void lxpr_uiobuf_printf(lxpr_uiobuf_t *, const char *, ...);
+extern void lxpr_uiobuf_seterr(lxpr_uiobuf_t *, int);
+
+proc_t *lxpr_lock(pid_t);
+void lxpr_unlock(proc_t *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _LXPROC_H */
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
new file mode 100644
index 0000000000..5d252207fb
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prsubr.c
@@ -0,0 +1,494 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * lxprsubr.c: Various functions for the /lxproc vnodeops.
+ */
+
+#include <sys/varargs.h>
+
+#include <sys/cpuvar.h>
+#include <sys/mman.h>
+#include <sys/vmsystm.h>
+#include <sys/prsystm.h>
+
+#include "lx_proc.h"
+
+#define	LXPRCACHE_NAME "lxpr_cache"
+
+static int lxpr_node_constructor(void*, void*, int);
+static void lxpr_node_destructor(void*, void*);
+
+static kmem_cache_t *lxpr_node_cache;
+
+struct lxpr_uiobuf {
+	uio_t *uiop;
+	char *buffer;
+	uint32_t buffsize;
+	char *pos;
+	size_t beg;
+	int error;
+};
+
+#define	BUFSIZE 4000
+
+struct lxpr_uiobuf *
+lxpr_uiobuf_new(uio_t *uiop)
+{
+	/* Allocate memory for both lxpr_uiobuf and output buffer */
+	struct lxpr_uiobuf *uiobuf =
+	    kmem_alloc(sizeof (struct lxpr_uiobuf) + BUFSIZE, KM_SLEEP);
+
+	uiobuf->uiop = uiop;
+	uiobuf->buffer = (char *)&uiobuf[1];
+	uiobuf->buffsize = BUFSIZE;
+	uiobuf->pos = uiobuf->buffer;
+	uiobuf->beg = 0;
+	uiobuf->error = 0;
+
+	return (uiobuf);
+}
+
+void
+lxpr_uiobuf_free(struct lxpr_uiobuf *uiobuf)
+{
+	ASSERT(uiobuf != NULL);
+	ASSERT(uiobuf->pos == uiobuf->buffer);
+
+	kmem_free(uiobuf, sizeof (struct lxpr_uiobuf) + uiobuf->buffsize);
+}
+
+void
+lxpr_uiobuf_seek(struct lxpr_uiobuf *uiobuf, offset_t offset)
+{
+	uiobuf->uiop->uio_offset = offset;
+}
+
+void
+lxpr_uiobuf_seterr(struct lxpr_uiobuf *uiobuf, int err)
+{
+	ASSERT(uiobuf->error == 0);
+
+	uiobuf->error = err;
+}
+
+int
+lxpr_uiobuf_flush(struct lxpr_uiobuf *uiobuf)
+{
+	off_t off = uiobuf->uiop->uio_offset;
+	caddr_t uaddr = uiobuf->buffer;
+	size_t beg = uiobuf->beg;
+
+	size_t size = uiobuf->pos - uaddr;
+
+	if (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+		ASSERT(off >= beg);
+
+		if (beg+size > off && off >= 0)
+			uiobuf->error =
+			    uiomove(uaddr+(off-beg), size-(off-beg),
+				UIO_READ, uiobuf->uiop);
+
+		uiobuf->beg += size;
+	}
+
+	uiobuf->pos = uaddr;
+
+	return (uiobuf->error);
+}
+
+void
+lxpr_uiobuf_write(struct lxpr_uiobuf *uiobuf, const char *buf, size_t size)
+{
+	/* While we can still carry on */
+	while (uiobuf->error == 0 && uiobuf->uiop->uio_resid != 0) {
+		uint_t remain
+		    = uiobuf->buffsize-(uiobuf->pos-uiobuf->buffer);
+
+		/* Enough space in buffer? */
+		if (remain >= size) {
+			bcopy(buf, uiobuf->pos, size);
+			uiobuf->pos += size;
+			return;
+		}
+
+		/* Not enough space, so copy all we can and try again */
+		bcopy(buf, uiobuf->pos, remain);
+		uiobuf->pos += remain;
+		(void) lxpr_uiobuf_flush(uiobuf);
+		buf += remain;
+		size -= remain;
+	}
+}
+
+#define	TYPBUFFSIZE 256
+void
+lxpr_uiobuf_printf(struct lxpr_uiobuf *uiobuf, const char *fmt, ...)
+{
+	va_list args;
+	char buff[TYPBUFFSIZE];
+	int len;
+	char *buffer;
+
+	/* Can we still do any output */
+	if (uiobuf->error != 0 || uiobuf->uiop->uio_resid == 0)
+		return;
+
+	va_start(args, fmt);
+
+	/* Try using stack allocated buffer */
+	len = vsnprintf(buff, TYPBUFFSIZE, fmt, args);
+	if (len < TYPBUFFSIZE) {
+		va_end(args);
+		lxpr_uiobuf_write(uiobuf, buff, len);
+		return;
+	}
+
+	/* Not enough space in pre-allocated buffer */
+	buffer = kmem_alloc(len+1, KM_SLEEP);
+
+	/*
+	 * We know we allocated the correct amount of space
+	 * so no check on the return value
+	 */
+	(void) vsnprintf(buffer, len+1, fmt, args);
+	lxpr_uiobuf_write(uiobuf, buffer, len);
+	va_end(args);
+	kmem_free(buffer, len+1);
+}
+
+/*
+ * lxpr_lock():
+ *
+ * Lookup process from pid and return with p_plock and P_PR_LOCK held.
+ */
+proc_t *
+lxpr_lock(pid_t pid)
+{
+	proc_t *p;
+	kmutex_t *mp;
+
+	ASSERT(!MUTEX_HELD(&pidlock));
+
+	for (;;) {
+		mutex_enter(&pidlock);
+
+		/*
+		 * If the pid is 1, we really want the zone's init process
+		 */
+		p = prfind((pid == 1) ?
+		    curproc->p_zone->zone_proc_initpid : pid);
+
+		if (p == NULL || p->p_stat == SIDL) {
+			mutex_exit(&pidlock);
+			return (NULL);
+		}
+		/*
+		 * p_lock is persistent, but p itself is not -- it could
+		 * vanish during cv_wait().  Load p->p_lock now so we can
+		 * drop it after cv_wait() without referencing p.
+		 */
+		mp = &p->p_lock;
+		mutex_enter(mp);
+
+		mutex_exit(&pidlock);
+
+		if (!(p->p_proc_flag & P_PR_LOCK))
+			break;
+
+		cv_wait(&pr_pid_cv[p->p_slot], mp);
+		mutex_exit(mp);
+	}
+	p->p_proc_flag |= P_PR_LOCK;
+	THREAD_KPRI_REQUEST();
+	return (p);
+}
+
+/*
+ * lxpr_unlock()
+ *
+ * Unlock locked process
+ */
+void
+lxpr_unlock(proc_t *p)
+{
+	ASSERT(p->p_proc_flag & P_PR_LOCK);
+	ASSERT(MUTEX_HELD(&p->p_lock));
+	ASSERT(!MUTEX_HELD(&pidlock));
+
+	cv_signal(&pr_pid_cv[p->p_slot]);
+	p->p_proc_flag &= ~P_PR_LOCK;
+	mutex_exit(&p->p_lock);
+	THREAD_KPRI_RELEASE();
+}
+
+void
+lxpr_initnodecache()
+{
+	lxpr_node_cache =
+	    kmem_cache_create(LXPRCACHE_NAME,
+		sizeof (lxpr_node_t), 0,
+		lxpr_node_constructor, lxpr_node_destructor, NULL,
+		NULL, NULL, 0);
+}
+
+void
+lxpr_fininodecache()
+{
+	kmem_cache_destroy(lxpr_node_cache);
+}
+
+/* ARGSUSED */
+static int
+lxpr_node_constructor(void *buf, void *un, int kmflags)
+{
+	lxpr_node_t	*lxpnp = buf;
+	vnode_t		*vp;
+
+	vp = lxpnp->lxpr_vnode = vn_alloc(KM_SLEEP);
+
+	(void) vn_setops(vp, lxpr_vnodeops);
+	vp->v_data = (caddr_t)lxpnp;
+
+	return (0);
+}
+
+/* ARGSUSED */
+static void
+lxpr_node_destructor(void *buf, void *un)
+{
+	lxpr_node_t	*lxpnp = buf;
+
+	vn_free(LXPTOV(lxpnp));
+}
+
+/*
+ * Calculate an inode number
+ *
+ * This takes various bits of info and munges them
+ * to give the inode number for an lxproc node
+ */
+ino_t
+lxpr_inode(lxpr_nodetype_t type, pid_t pid, int fd)
+{
+	if (pid == 1)
+		pid = curproc->p_zone->zone_proc_initpid;
+
+	switch (type) {
+	case LXPR_PIDDIR:
+		return (pid + 1);
+	case LXPR_PROCDIR:
+		return (maxpid + 2);
+	case LXPR_PID_FD_FD:
+		return (maxpid + 2 +
+		    (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+		    LXPR_NFILES + fd);
+	default:
+		return (maxpid + 2 +
+		    (pid * (LXPR_FD_PERPROC + LXPR_NFILES)) +
+		    type);
+	}
+}
+
+/*
+ * Return inode number of parent (directory)
+ */
+ino_t
+lxpr_parentinode(lxpr_node_t *lxpnp)
+{
+	/*
+	 * If the input node is the root then the parent inode
+	 * is the mounted on inode so just return our inode number
+	 */
+	if (lxpnp->lxpr_type != LXPR_PROCDIR)
+		return (VTOLXP(lxpnp->lxpr_parent)->lxpr_ino);
+	else
+		return (lxpnp->lxpr_ino);
+}
+
+/*
+ * Allocate a new lxproc node
+ *
+ * This also allocates the vnode associated with it
+ */
+lxpr_node_t *
+lxpr_getnode(vnode_t *dp, lxpr_nodetype_t type, proc_t *p, int fd)
+{
+	lxpr_node_t *lxpnp;
+	vnode_t *vp;
+	user_t *up;
+	timestruc_t now;
+
+	/*
+	 * Allocate a new node. It is deallocated in vop_innactive
+	 */
+	lxpnp = kmem_cache_alloc(lxpr_node_cache, KM_SLEEP);
+
+	/*
+	 * Set defaults (may be overridden below)
+	 */
+	gethrestime(&now);
+	lxpnp->lxpr_type = type;
+	lxpnp->lxpr_realvp = NULL;
+	lxpnp->lxpr_parent = dp;
+	VN_HOLD(dp);
+	if (p != NULL) {
+		lxpnp->lxpr_pid = ((p->p_pid ==
+		    curproc->p_zone->zone_proc_initpid) ? 1 : p->p_pid);
+
+		lxpnp->lxpr_time = PTOU(p)->u_start;
+		lxpnp->lxpr_uid = crgetruid(p->p_cred);
+		lxpnp->lxpr_gid = crgetrgid(p->p_cred);
+		lxpnp->lxpr_ino = lxpr_inode(type, p->p_pid, fd);
+	} else {
+		/* Pretend files without a proc belong to sched */
+		lxpnp->lxpr_pid = 0;
+		lxpnp->lxpr_time = now;
+		lxpnp->lxpr_uid = lxpnp->lxpr_gid = 0;
+		lxpnp->lxpr_ino = lxpr_inode(type, 0, 0);
+	}
+
+	/* initialize the vnode data */
+	vp = lxpnp->lxpr_vnode;
+	vn_reinit(vp);
+	vp->v_flag = VNOCACHE|VNOMAP|VNOSWAP|VNOMOUNT;
+	vp->v_vfsp = dp->v_vfsp;
+
+	/*
+	 * Do node specific stuff
+	 */
+	switch (type) {
+	case LXPR_PROCDIR:
+		vp->v_flag |= VROOT;
+		vp->v_type = VDIR;
+		lxpnp->lxpr_mode = 0555;	/* read-search by everyone */
+		break;
+
+	case LXPR_PID_CURDIR:
+		ASSERT(p != NULL);
+		up = PTOU(p);
+		lxpnp->lxpr_realvp = up->u_cdir;
+		ASSERT(lxpnp->lxpr_realvp != NULL);
+		VN_HOLD(lxpnp->lxpr_realvp);
+		vp->v_type = VLNK;
+		lxpnp->lxpr_mode = 0777;	/* anyone does anything ! */
+		break;
+
+	case LXPR_PID_ROOTDIR:
+		ASSERT(p != NULL);
+		up = PTOU(p);
+		lxpnp->lxpr_realvp = up->u_rdir != NULL ? up->u_rdir : rootdir;
+		ASSERT(lxpnp->lxpr_realvp != NULL);
+		VN_HOLD(lxpnp->lxpr_realvp);
+		vp->v_type = VLNK;
+		lxpnp->lxpr_mode = 0777;	/* anyone does anything ! */
+		break;
+
+	case LXPR_PID_EXE:
+		ASSERT(p != NULL);
+		lxpnp->lxpr_realvp = p->p_exec;
+		if (lxpnp->lxpr_realvp != NULL) {
+			VN_HOLD(lxpnp->lxpr_realvp);
+		}
+		vp->v_type = VLNK;
+		lxpnp->lxpr_mode = 0777;
+		break;
+
+	case LXPR_SELF:
+		vp->v_type = VLNK;
+		lxpnp->lxpr_mode = 0777;	/* anyone does anything ! */
+		break;
+
+	case LXPR_PID_FD_FD:
+		ASSERT(p != NULL);
+		/* lxpr_realvp is set after we return */
+		vp->v_type = VLNK;
+		lxpnp->lxpr_mode = 0700;	/* read-write-exe owner only */
+		break;
+
+	case LXPR_PID_FDDIR:
+		ASSERT(p != NULL);
+		vp->v_type = VDIR;
+		lxpnp->lxpr_mode = 0500;	/* read-search by owner only */
+		break;
+
+	case LXPR_PIDDIR:
+		ASSERT(p != NULL);
+		vp->v_type = VDIR;
+		lxpnp->lxpr_mode = 0511;
+		break;
+
+	case LXPR_NETDIR:
+		vp->v_type = VDIR;
+		lxpnp->lxpr_mode = 0555;	/* read-search by all */
+		break;
+
+	case LXPR_PID_ENV:
+	case LXPR_PID_MEM:
+		ASSERT(p != NULL);
+		/*FALLTHRU*/
+	case LXPR_KCORE:
+		vp->v_type = VREG;
+		lxpnp->lxpr_mode = 0400;	/* read-only by owner only */
+		break;
+
+	default:
+		vp->v_type = VREG;
+		lxpnp->lxpr_mode = 0444;	/* read-only by all */
+		break;
+	}
+
+	return (lxpnp);
+}
+
+
+/*
+ * Free the storage obtained from lxpr_getnode().
+ */
+void
+lxpr_freenode(lxpr_node_t *lxpnp)
+{
+	ASSERT(lxpnp != NULL);
+	ASSERT(LXPTOV(lxpnp) != NULL);
+
+	/*
+	 * delete any association with realvp
+	 */
+	if (lxpnp->lxpr_realvp != NULL)
+		VN_RELE(lxpnp->lxpr_realvp);
+
+	/*
+	 * delete any association with parent vp
+	 */
+	if (lxpnp->lxpr_parent != NULL)
+		VN_RELE(lxpnp->lxpr_parent);
+
+	/*
+	 * Release the lxprnode.
+	 */
+	kmem_cache_free(lxpr_node_cache, lxpnp);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
new file mode 100644
index 0000000000..44891dc612
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvfsops.c
@@ -0,0 +1,373 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * lxprvfsops.c: vfs operations for /lxprocfs.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/debug.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/mode.h>
+#include <sys/signal.h>
+#include <sys/user.h>
+#include <sys/mount.h>
+#include <sys/bitmap.h>
+#include <sys/kmem.h>
+#include <sys/policy.h>
+#include <sys/modctl.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi.h>
+#include <sys/lx_impl.h>
+
+#include "lx_proc.h"
+
+/* Module level parameters */
+static int	lxprocfstype;
+static dev_t	lxprocdev;
+static kmutex_t	lxpr_mount_lock;
+
+int nproc_highbit;	/* highbit(v.v_nproc) */
+
+static int lxpr_mount(vfs_t *, vnode_t *, mounta_t *, cred_t *);
+static int lxpr_unmount(vfs_t *, int, cred_t *);
+static int lxpr_root(vfs_t *, vnode_t **);
+static int lxpr_statvfs(vfs_t *, statvfs64_t *);
+static int lxpr_init(int, char *);
+
+static vfsdef_t vfw = {
+	VFSDEF_VERSION,
+	"lx_proc",
+	lxpr_init,
+	0,
+	NULL
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+extern struct mod_ops mod_fsops;
+
+static struct modlfs modlfs = {
+	&mod_fsops, "generic linux procfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1, (void *)&modlfs, NULL
+};
+
+int
+_init(void)
+{
+	return (mod_install(&modlinkage));
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+	int retval;
+
+	/*
+	 * attempt to unload the module
+	 */
+	if ((retval = mod_remove(&modlinkage)) != 0)
+		goto done;
+
+	/*
+	 * destroy lxpr_node cache
+	 */
+	lxpr_fininodecache();
+
+	/*
+	 * clean out the vfsops and vnodeops
+	 */
+	(void) vfs_freevfsops_by_type(lxprocfstype);
+	vn_freevnodeops(lxpr_vnodeops);
+
+	mutex_destroy(&lxpr_mount_lock);
+done:
+	return (retval);
+}
+
+static int
+lxpr_init(int fstype, char *name)
+{
+	static const fs_operation_def_t lxpr_vfsops_template[] = {
+		VFSNAME_MOUNT, lxpr_mount,
+		VFSNAME_UNMOUNT, lxpr_unmount,
+		VFSNAME_ROOT, lxpr_root,
+		VFSNAME_STATVFS, lxpr_statvfs,
+		NULL, NULL
+	};
+	extern const fs_operation_def_t lxpr_vnodeops_template[];
+	int error;
+	major_t dev;
+
+	nproc_highbit = highbit(v.v_proc);
+	lxprocfstype = fstype;
+	ASSERT(lxprocfstype != 0);
+
+	mutex_init(&lxpr_mount_lock, NULL, MUTEX_DEFAULT, NULL);
+
+	/*
+	 * Associate VFS ops vector with this fstype.
+	 */
+	error = vfs_setfsops(fstype, lxpr_vfsops_template, NULL);
+	if (error != 0) {
+		cmn_err(CE_WARN, "lxpr_init: bad vfs ops template");
+		return (error);
+	}
+
+	/*
+	 * Set up vnode ops vector too.
+	 */
+	error = vn_make_ops(name, lxpr_vnodeops_template, &lxpr_vnodeops);
+	if (error != 0) {
+		(void) vfs_freevfsops_by_type(fstype);
+		cmn_err(CE_WARN, "lxpr_init: bad vnode ops template");
+		return (error);
+	}
+
+	/*
+	 * Assign a unique "device" number (used by stat(2)).
+	 */
+	if ((dev = getudev()) == (major_t)-1) {
+		cmn_err(CE_WARN, "lxpr_init: can't get unique device number");
+		dev = 0;
+	}
+
+	/*
+	 * Make the pseudo device
+	 */
+	lxprocdev = makedevice(dev, 0);
+
+	/*
+	 * Initialise cache for lxpr_nodes
+	 */
+	lxpr_initnodecache();
+
+	return (0);
+}
+
+static int
+lxpr_mount(vfs_t *vfsp, vnode_t *mvp, mounta_t *uap, cred_t *cr)
+{
+	lxpr_mnt_t *lxpr_mnt;
+	zone_t *zone = curproc->p_zone;
+	ldi_ident_t li;
+	int err;
+
+	/*
+	 * must be root to mount
+	 */
+	if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
+		return (EPERM);
+
+	/*
+	 * mount point must be a directory
+	 */
+	if (mvp->v_type != VDIR)
+		return (ENOTDIR);
+
+	if (zone == global_zone) {
+		zone_t *mntzone;
+
+		mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
+		zone_rele(mntzone);
+		if (zone != mntzone)
+			return (EBUSY);
+	}
+
+	/*
+	 * Having the resource be anything but "lxproc" doesn't make sense
+	 */
+	vfs_setresource(vfsp, "lxproc");
+
+	lxpr_mnt = kmem_alloc(sizeof (*lxpr_mnt), KM_SLEEP);
+
+	if ((err = ldi_ident_from_mod(&modlinkage, &li)) != 0) {
+		kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+		return (err);
+	}
+
+	lxpr_mnt->lxprm_li = li;
+
+	mutex_enter(&lxpr_mount_lock);
+
+	/*
+	 * Ensure we don't allow overlaying mounts
+	 */
+	mutex_enter(&mvp->v_lock);
+	if ((uap->flags & MS_OVERLAY) == 0 &&
+	    (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+		mutex_exit(&mvp->v_lock);
+		mutex_exit(&lxpr_mount_lock);
+		kmem_free(lxpr_mnt, sizeof ((*lxpr_mnt)));
+		return (EBUSY);
+	}
+	mutex_exit(&mvp->v_lock);
+
+	/*
+	 * allocate the first vnode
+	 */
+	zone_hold(lxpr_mnt->lxprm_zone = zone);
+
+	/* Arbitrarily set the parent vnode to the mounted over directory */
+	lxpr_mnt->lxprm_node = lxpr_getnode(mvp, LXPR_PROCDIR, NULL, 0);
+
+	/* Correctly set the fs for the root node */
+	lxpr_mnt->lxprm_node->lxpr_vnode->v_vfsp = vfsp;
+
+	vfs_make_fsid(&vfsp->vfs_fsid, lxprocdev, lxprocfstype);
+	vfsp->vfs_bsize = DEV_BSIZE;
+	vfsp->vfs_fstype = lxprocfstype;
+	vfsp->vfs_data = (caddr_t)lxpr_mnt;
+	vfsp->vfs_dev = lxprocdev;
+
+	mutex_exit(&lxpr_mount_lock);
+
+	return (0);
+}
+
+static int
+lxpr_unmount(vfs_t *vfsp, int flag, cred_t *cr)
+{
+	lxpr_mnt_t *lxpr_mnt = (lxpr_mnt_t *)vfsp->vfs_data;
+	vnode_t *vp;
+	int count;
+
+	ASSERT(lxpr_mnt != NULL);
+	vp = LXPTOV(lxpr_mnt->lxprm_node);
+
+	mutex_enter(&lxpr_mount_lock);
+
+	/*
+	 * must be root to unmount
+	 */
+	if (secpolicy_fs_unmount(cr, vfsp) != 0) {
+		mutex_exit(&lxpr_mount_lock);
+		return (EPERM);
+	}
+
+	/*
+	 * forced unmount is not supported by this file system
+	 */
+	if (flag & MS_FORCE) {
+		mutex_exit(&lxpr_mount_lock);
+		return (ENOTSUP);
+	}
+
+	/*
+	 * Ensure that no vnodes are in use on this mount point.
+	 */
+	mutex_enter(&vp->v_lock);
+	count = vp->v_count;
+	mutex_exit(&vp->v_lock);
+	if (count > 1) {
+		mutex_exit(&lxpr_mount_lock);
+		return (EBUSY);
+	}
+
+
+	/*
+	 * purge the dnlc cache for vnode entries
+	 * associated with this file system
+	 */
+	count = dnlc_purge_vfsp(vfsp, 0);
+
+	/*
+	 * free up the lxprnode
+	 */
+	lxpr_freenode(lxpr_mnt->lxprm_node);
+	zone_rele(lxpr_mnt->lxprm_zone);
+	kmem_free(lxpr_mnt, sizeof (*lxpr_mnt));
+
+	mutex_exit(&lxpr_mount_lock);
+
+	return (0);
+}
+
+static int
+lxpr_root(vfs_t *vfsp, vnode_t **vpp)
+{
+	lxpr_node_t *lxpnp = ((lxpr_mnt_t *)vfsp->vfs_data)->lxprm_node;
+	vnode_t *vp = LXPTOV(lxpnp);
+
+	VN_HOLD(vp);
+	*vpp = vp;
+	return (0);
+}
+
+static int
+lxpr_statvfs(vfs_t *vfsp, statvfs64_t *sp)
+{
+	int n;
+	dev32_t d32;
+	extern uint_t nproc;
+
+	n = v.v_proc - nproc;
+
+	bzero((caddr_t)sp, sizeof (*sp));
+	sp->f_bsize	= DEV_BSIZE;
+	sp->f_frsize	= DEV_BSIZE;
+	sp->f_blocks	= (fsblkcnt64_t)0;
+	sp->f_bfree	= (fsblkcnt64_t)0;
+	sp->f_bavail	= (fsblkcnt64_t)0;
+	sp->f_files	= (fsfilcnt64_t)v.v_proc + 2;
+	sp->f_ffree	= (fsfilcnt64_t)n;
+	sp->f_favail	= (fsfilcnt64_t)n;
+	(void) cmpldev(&d32, vfsp->vfs_dev);
+	sp->f_fsid	= d32;
+	/* It is guaranteed that vsw_name will fit in f_basetype */
+	(void) strcpy(sp->f_basetype, vfssw[lxprocfstype].vsw_name);
+	sp->f_flag = vf_to_stf(vfsp->vfs_flag);
+	sp->f_namemax = 64;		/* quite arbitrary */
+	bzero(sp->f_fstr, sizeof (sp->f_fstr));
+
+	/* We know f_fstr is 32 chars */
+	(void) strcpy(sp->f_fstr, "/proc");
+	(void) strcpy(&sp->f_fstr[6], "/proc");
+
+	return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
new file mode 100644
index 0000000000..45bff38e16
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/procfs/lx_prvnops.c
@@ -0,0 +1,2951 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * lxpr_vnops.c:  Vnode operations for the lx /proc file system
+ *
+ * Assumptions and Gotchas:
+ *
+ * In order to preserve Solaris' security policy. This file system's
+ * functionality does not override Solaris' security policies even if
+ * that means breaking Linux compatability.
+ *
+ * Linux has no concept of lwps so we only implement procs here as in the
+ * old /proc interface.
+ */
+
+#include <sys/cpupart.h>
+#include <sys/cpuvar.h>
+#include <sys/session.h>
+#include <sys/vmparam.h>
+#include <sys/mman.h>
+#include <vm/rm.h>
+#include <vm/seg_vn.h>
+#include <sys/sdt.h>
+#include <lx_signum.h>
+#include <sys/strlog.h>
+#include <sys/stropts.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_brand.h>
+#include <sys/x86_archext.h>
+#include <sys/archsystm.h>
+#include <sys/fp.h>
+#include <sys/pool_pset.h>
+#include <sys/pset.h>
+#include <sys/zone.h>
+
+/* Dependent on the Solaris procfs */
+extern kthread_t *prchoose(proc_t *);
+
+#include "lx_proc.h"
+
+extern pgcnt_t swapfs_minfree;
+extern volatile clock_t lbolt;
+extern time_t boot_time;
+
+/*
+ * Pointer to the vnode ops vector for this fs.
+ * This is instantiated in lxprinit() in lxpr_vfsops.c
+ */
+vnodeops_t *lxpr_vnodeops;
+
+static int lxpr_open(vnode_t **, int, cred_t *);
+static int lxpr_close(vnode_t *, int, int, offset_t, cred_t *);
+static int lxpr_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
+static int lxpr_getattr(vnode_t *, vattr_t *, int, cred_t *);
+static int lxpr_access(vnode_t *, int, int, cred_t *);
+static int lxpr_lookup(vnode_t *, char *, vnode_t **,
+    pathname_t *, int, vnode_t *, cred_t *);
+static int lxpr_readdir(vnode_t *, uio_t *, cred_t *, int *);
+static int lxpr_readlink(vnode_t *, uio_t *);
+static int lxpr_cmp(vnode_t *, vnode_t *);
+static int lxpr_realvp(vnode_t *, vnode_t **);
+static int lxpr_sync(void);
+static void lxpr_inactive(vnode_t *, cred_t *);
+
+static vnode_t *lxpr_lookup_procdir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_piddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_not_a_dir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_fddir(vnode_t *, char *);
+static vnode_t *lxpr_lookup_netdir(vnode_t *, char *);
+
+static int lxpr_readdir_procdir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_piddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_not_a_dir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_fddir(lxpr_node_t *, uio_t *, int *);
+static int lxpr_readdir_netdir(lxpr_node_t *, uio_t *, int *);
+
+static void lxpr_read_invalid(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_empty(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_cpuinfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_isdir(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_fd(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_kmsg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_loadavg(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_meminfo(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_mounts(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_partitions(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_uptime(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_version(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_pid_cmdline(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_maps(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_statm(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_pid_status(lxpr_node_t *, lxpr_uiobuf_t *);
+
+static void lxpr_read_net_arp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_dev_mcast(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_igmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_ip_mr_vif(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_mcfilter(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_netstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_raw(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_route(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rpc(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_rt_cache(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_sockstat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_snmp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_stat(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_tcp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_udp(lxpr_node_t *, lxpr_uiobuf_t *);
+static void lxpr_read_net_unix(lxpr_node_t *, lxpr_uiobuf_t *);
+
+/*
+ * Simple conversion
+ */
+#define	btok(x)	((x) >> 10)			/* bytes to kbytes */
+#define	ptok(x)	((x) << (PAGESHIFT - 10))	/* pages to kbytes */
+
+/*
+ * The lx /proc vnode operations vector
+ */
+const fs_operation_def_t lxpr_vnodeops_template[] = {
+	VOPNAME_OPEN, lxpr_open,
+	VOPNAME_CLOSE, lxpr_close,
+	VOPNAME_READ, lxpr_read,
+	VOPNAME_GETATTR, lxpr_getattr,
+	VOPNAME_ACCESS, lxpr_access,
+	VOPNAME_LOOKUP, lxpr_lookup,
+	VOPNAME_READDIR, lxpr_readdir,
+	VOPNAME_READLINK, lxpr_readlink,
+	VOPNAME_FSYNC, lxpr_sync,
+	VOPNAME_SEEK, lxpr_sync,
+	VOPNAME_INACTIVE, (fs_generic_func_p) lxpr_inactive,
+	VOPNAME_CMP, lxpr_cmp,
+	VOPNAME_REALVP, lxpr_realvp,
+	NULL, NULL
+};
+
+
+/*
+ * file contents of an lx /proc directory.
+ */
+static lxpr_dirent_t lx_procdir[] = {
+	{ LXPR_CMDLINE,		"cmdline" },
+	{ LXPR_CPUINFO,		"cpuinfo" },
+	{ LXPR_DEVICES,		"devices" },
+	{ LXPR_DMA,		"dma" },
+	{ LXPR_FILESYSTEMS,	"filesystems" },
+	{ LXPR_INTERRUPTS,	"interrupts" },
+	{ LXPR_IOPORTS,		"ioports" },
+	{ LXPR_KCORE,		"kcore" },
+	{ LXPR_KMSG,		"kmsg" },
+	{ LXPR_LOADAVG,		"loadavg" },
+	{ LXPR_MEMINFO,		"meminfo" },
+	{ LXPR_MOUNTS,		"mounts" },
+	{ LXPR_NETDIR,		"net" },
+	{ LXPR_PARTITIONS,	"partitions" },
+	{ LXPR_SELF,		"self" },
+	{ LXPR_STAT,		"stat" },
+	{ LXPR_UPTIME,		"uptime" },
+	{ LXPR_VERSION,		"version" }
+};
+
+#define	PROCDIRFILES	(sizeof (lx_procdir) / sizeof (lx_procdir[0]))
+
+/*
+ * Contents of an lx /proc/<pid> directory.
+ */
+static lxpr_dirent_t piddir[] = {
+	{ LXPR_PID_CMDLINE,	"cmdline" },
+	{ LXPR_PID_CPU,		"cpu" },
+	{ LXPR_PID_CURDIR,	"cwd" },
+	{ LXPR_PID_ENV,		"environ" },
+	{ LXPR_PID_EXE,		"exe" },
+	{ LXPR_PID_MAPS,	"maps" },
+	{ LXPR_PID_MEM,		"mem" },
+	{ LXPR_PID_ROOTDIR,	"root" },
+	{ LXPR_PID_STAT,	"stat" },
+	{ LXPR_PID_STATM,	"statm" },
+	{ LXPR_PID_STATUS,	"status" },
+	{ LXPR_PID_FDDIR,	"fd" }
+};
+
+#define	PIDDIRFILES	(sizeof (piddir) / sizeof (piddir[0]))
+
+/*
+ * contents of lx /proc/net directory
+ */
+static lxpr_dirent_t netdir[] = {
+	{ LXPR_NET_ARP,		"arp" },
+	{ LXPR_NET_DEV,		"dev" },
+	{ LXPR_NET_DEV_MCAST,	"dev_mcast" },
+	{ LXPR_NET_IGMP,	"igmp" },
+	{ LXPR_NET_IP_MR_CACHE,	"ip_mr_cache" },
+	{ LXPR_NET_IP_MR_VIF,	"ip_mr_vif" },
+	{ LXPR_NET_MCFILTER,	"mcfilter" },
+	{ LXPR_NET_NETSTAT,	"netstat" },
+	{ LXPR_NET_RAW,		"raw" },
+	{ LXPR_NET_ROUTE,	"route" },
+	{ LXPR_NET_RPC,		"rpc" },
+	{ LXPR_NET_RT_CACHE,	"rt_cache" },
+	{ LXPR_NET_SOCKSTAT,	"sockstat" },
+	{ LXPR_NET_SNMP,	"snmp" },
+	{ LXPR_NET_STAT,	"stat" },
+	{ LXPR_NET_TCP,		"tcp" },
+	{ LXPR_NET_UDP,		"udp" },
+	{ LXPR_NET_UNIX,	"unix" }
+};
+
+#define	NETDIRFILES	(sizeof (netdir) / sizeof (netdir[0]))
+
+/*
+ * lxpr_open(): Vnode operation for VOP_OPEN()
+ */
+static int
+lxpr_open(vnode_t **vpp, int flag, cred_t *cr)
+{
+	vnode_t		*vp = *vpp;
+	lxpr_node_t	*lxpnp = VTOLXP(vp);
+	lxpr_nodetype_t	type = lxpnp->lxpr_type;
+	vnode_t		*rvp;
+	int		error = 0;
+
+	/*
+	 * We only allow reading in this file systrem
+	 */
+	if (flag & FWRITE)
+		return (EROFS);
+
+	/*
+	 * If we are opening an underlying file only allow regular files
+	 * reject the open for anything but a regular file.
+	 * Just do it if we are opening the current or root directory.
+	 */
+	if (lxpnp->lxpr_realvp != NULL) {
+		rvp = lxpnp->lxpr_realvp;
+
+		if (type == LXPR_PID_FD_FD && rvp->v_type != VREG)
+			error = EACCES;
+		else {
+			/*
+			 * Need to hold rvp since VOP_OPEN() may release it.
+			 */
+			VN_HOLD(rvp);
+			error = VOP_OPEN(&rvp, flag, cr);
+			if (error) {
+				VN_RELE(rvp);
+			} else {
+				*vpp = rvp;
+				VN_RELE(vp);
+			}
+		}
+	}
+
+	if (type == LXPR_KMSG) {
+		ldi_ident_t	li = VTOLXPM(vp)->lxprm_li;
+		struct strioctl	str;
+		int		rv;
+
+		/*
+		 * Open the zone's console device using the layered driver
+		 * interface.
+		 */
+		if ((error = ldi_open_by_name("/dev/log", FREAD, cr,
+		    &lxpnp->lxpr_cons_ldih, li)) != 0)
+			return (error);
+
+		/*
+		 * Send an ioctl to the underlying console device, letting it
+		 * know we're interested in getting console messages.
+		 */
+		str.ic_cmd = I_CONSLOG;
+		str.ic_timout = 0;
+		str.ic_len = 0;
+		str.ic_dp = NULL;
+		if ((error = ldi_ioctl(lxpnp->lxpr_cons_ldih, I_STR,
+		    (intptr_t)&str, FKIOCTL, cr, &rv)) != 0)
+			return (error);
+	}
+
+	return (error);
+}
+
+
+/*
+ * lxpr_close(): Vnode operation for VOP_CLOSE()
+ */
+/* ARGSUSED */
+static int
+lxpr_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
+{
+	lxpr_node_t	*lxpr = VTOLXP(vp);
+	lxpr_nodetype_t	type = lxpr->lxpr_type;
+	int		err;
+
+	/*
+	 * we should never get here because the close is done on the realvp
+	 * for these nodes
+	 */
+	ASSERT(type != LXPR_PID_FD_FD &&
+		type != LXPR_PID_CURDIR &&
+		type != LXPR_PID_ROOTDIR &&
+		type != LXPR_PID_EXE);
+
+	if (type == LXPR_KMSG) {
+		if ((err = ldi_close(lxpr->lxpr_cons_ldih, 0, cr)) != 0)
+			return (err);
+	}
+
+	return (0);
+}
+
+static void (*lxpr_read_function[LXPR_NFILES])() = {
+	lxpr_read_isdir,		/* /proc		*/
+	lxpr_read_isdir,		/* /proc/<pid>		*/
+	lxpr_read_pid_cmdline,		/* /proc/<pid>/cmdline	*/
+	lxpr_read_empty,		/* /proc/<pid>/cpu	*/
+	lxpr_read_invalid,		/* /proc/<pid>/cwd	*/
+	lxpr_read_empty,		/* /proc/<pid>/environ	*/
+	lxpr_read_invalid,		/* /proc/<pid>/exe	*/
+	lxpr_read_pid_maps,		/* /proc/<pid>/maps	*/
+	lxpr_read_empty,		/* /proc/<pid>/mem	*/
+	lxpr_read_invalid,		/* /proc/<pid>/root	*/
+	lxpr_read_pid_stat,		/* /proc/<pid>/stat	*/
+	lxpr_read_pid_statm,		/* /proc/<pid>/statm	*/
+	lxpr_read_pid_status,		/* /proc/<pid>/status	*/
+	lxpr_read_isdir,		/* /proc/<pid>/fd	*/
+	lxpr_read_fd,			/* /proc/<pid>/fd/nn	*/
+	lxpr_read_empty,		/* /proc/cmdline	*/
+	lxpr_read_cpuinfo,		/* /proc/cpuinfo	*/
+	lxpr_read_empty,		/* /proc/devices	*/
+	lxpr_read_empty,		/* /proc/dma		*/
+	lxpr_read_empty,		/* /proc/filesystems	*/
+	lxpr_read_empty,		/* /proc/interrupts	*/
+	lxpr_read_empty,		/* /proc/ioports	*/
+	lxpr_read_empty,		/* /proc/kcore		*/
+	lxpr_read_kmsg,			/* /proc/kmsg		*/
+	lxpr_read_loadavg,		/* /proc/loadavg	*/
+	lxpr_read_meminfo,		/* /proc/meminfo	*/
+	lxpr_read_mounts,		/* /proc/mounts		*/
+	lxpr_read_isdir,		/* /proc/net		*/
+	lxpr_read_net_arp,		/* /proc/net/arp	*/
+	lxpr_read_net_dev,		/* /proc/net/dev	*/
+	lxpr_read_net_dev_mcast,	/* /proc/net/dev_mcast	*/
+	lxpr_read_net_igmp,		/* /proc/net/igmp	*/
+	lxpr_read_net_ip_mr_cache,	/* /proc/net/ip_mr_cache */
+	lxpr_read_net_ip_mr_vif,	/* /proc/net/ip_mr_vif	*/
+	lxpr_read_net_mcfilter,		/* /proc/net/mcfilter	*/
+	lxpr_read_net_netstat,		/* /proc/net/netstat	*/
+	lxpr_read_net_raw,		/* /proc/net/raw	*/
+	lxpr_read_net_route,		/* /proc/net/route	*/
+	lxpr_read_net_rpc,		/* /proc/net/rpc	*/
+	lxpr_read_net_rt_cache,		/* /proc/net/rt_cache	*/
+	lxpr_read_net_sockstat,		/* /proc/net/sockstat	*/
+	lxpr_read_net_snmp,		/* /proc/net/snmp	*/
+	lxpr_read_net_stat,		/* /proc/net/stat	*/
+	lxpr_read_net_tcp,		/* /proc/net/tcp	*/
+	lxpr_read_net_udp,		/* /proc/net/udp	*/
+	lxpr_read_net_unix,		/* /proc/net/unix	*/
+	lxpr_read_partitions,		/* /proc/partitions	*/
+	lxpr_read_invalid,		/* /proc/self		*/
+	lxpr_read_stat,			/* /proc/stat		*/
+	lxpr_read_uptime,		/* /proc/uptime		*/
+	lxpr_read_version,		/* /proc/version	*/
+};
+
+/*
+ * Array of lookup functions, indexed by lx /proc file type.
+ */
+static vnode_t *(*lxpr_lookup_function[LXPR_NFILES])() = {
+	lxpr_lookup_procdir,		/* /proc		*/
+	lxpr_lookup_piddir,		/* /proc/<pid>		*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/cmdline	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/cpu	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/cwd	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/environ	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/exe	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/maps	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/mem	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/root	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/stat	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/statm	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/status	*/
+	lxpr_lookup_fddir,		/* /proc/<pid>/fd	*/
+	lxpr_lookup_not_a_dir,		/* /proc/<pid>/fd/nn	*/
+	lxpr_lookup_not_a_dir,		/* /proc/cmdline	*/
+	lxpr_lookup_not_a_dir,		/* /proc/cpuinfo	*/
+	lxpr_lookup_not_a_dir,		/* /proc/devices	*/
+	lxpr_lookup_not_a_dir,		/* /proc/dma		*/
+	lxpr_lookup_not_a_dir,		/* /proc/filesystems	*/
+	lxpr_lookup_not_a_dir,		/* /proc/interrupts	*/
+	lxpr_lookup_not_a_dir,		/* /proc/ioports	*/
+	lxpr_lookup_not_a_dir,		/* /proc/kcore		*/
+	lxpr_lookup_not_a_dir,		/* /proc/kmsg		*/
+	lxpr_lookup_not_a_dir,		/* /proc/loadavg	*/
+	lxpr_lookup_not_a_dir,		/* /proc/meminfo	*/
+	lxpr_lookup_not_a_dir,		/* /proc/mounts		*/
+	lxpr_lookup_netdir,		/* /proc/net		*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/arp	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/dev	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/dev_mcast	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/igmp	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/ip_mr_cache */
+	lxpr_lookup_not_a_dir,		/* /proc/net/ip_mr_vif	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/mcfilter	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/netstat	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/raw	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/route	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/rpc	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/rt_cache	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/sockstat	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/snmp	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/stat	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/tcp	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/udp	*/
+	lxpr_lookup_not_a_dir,		/* /proc/net/unix	*/
+	lxpr_lookup_not_a_dir,		/* /proc/partitions	*/
+	lxpr_lookup_not_a_dir,		/* /proc/self		*/
+	lxpr_lookup_not_a_dir,		/* /proc/stat		*/
+	lxpr_lookup_not_a_dir,		/* /proc/uptime		*/
+	lxpr_lookup_not_a_dir,		/* /proc/version	*/
+};
+
+/*
+ * Array of readdir functions, indexed by /proc file type.
+ */
+static int (*lxpr_readdir_function[LXPR_NFILES])() = {
+	lxpr_readdir_procdir,		/* /proc		*/
+	lxpr_readdir_piddir,		/* /proc/<pid>		*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/cmdline	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/cpu	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/cwd	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/environ	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/exe	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/maps	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/mem	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/root	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/stat	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/statm	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/status	*/
+	lxpr_readdir_fddir,		/* /proc/<pid>/fd	*/
+	lxpr_readdir_not_a_dir,		/* /proc/<pid>/fd/nn	*/
+	lxpr_readdir_not_a_dir,		/* /proc/cmdline	*/
+	lxpr_readdir_not_a_dir,		/* /proc/cpuinfo	*/
+	lxpr_readdir_not_a_dir,		/* /proc/devices	*/
+	lxpr_readdir_not_a_dir,		/* /proc/dma		*/
+	lxpr_readdir_not_a_dir,		/* /proc/filesystems	*/
+	lxpr_readdir_not_a_dir,		/* /proc/interrupts	*/
+	lxpr_readdir_not_a_dir,		/* /proc/ioports	*/
+	lxpr_readdir_not_a_dir,		/* /proc/kcore		*/
+	lxpr_readdir_not_a_dir,		/* /proc/kmsg		*/
+	lxpr_readdir_not_a_dir,		/* /proc/loadavg	*/
+	lxpr_readdir_not_a_dir,		/* /proc/meminfo	*/
+	lxpr_readdir_not_a_dir,		/* /proc/mounts		*/
+	lxpr_readdir_netdir,		/* /proc/net		*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/arp	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/dev	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/dev_mcast	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/igmp	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/ip_mr_cache */
+	lxpr_readdir_not_a_dir,		/* /proc/net/ip_mr_vif	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/mcfilter	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/netstat	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/raw	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/route	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/rpc	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/rt_cache	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/sockstat	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/snmp	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/stat	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/tcp	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/udp	*/
+	lxpr_readdir_not_a_dir,		/* /proc/net/unix	*/
+	lxpr_readdir_not_a_dir,		/* /proc/partitions	*/
+	lxpr_readdir_not_a_dir,		/* /proc/self		*/
+	lxpr_readdir_not_a_dir,		/* /proc/stat		*/
+	lxpr_readdir_not_a_dir,		/* /proc/uptime		*/
+	lxpr_readdir_not_a_dir,		/* /proc/version	*/
+};
+
+
+/*
+ * lxpr_read(): Vnode operation for VOP_READ()
+ *
+ * As the format of all the files that can be read in the lx procfs is human
+ * readable and not binary structures there do not have to be different
+ * read variants depending on whether the reading process model is 32 or 64 bits
+ * (at least in general, and certainly the difference is unlikely to be enough
+ * to justify have different routines for 32 and 64 bit reads
+ */
+/* ARGSUSED */
+static int
+lxpr_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
+    caller_context_t *ct)
+{
+	lxpr_node_t *lxpnp = VTOLXP(vp);
+	lxpr_nodetype_t type = lxpnp->lxpr_type;
+	lxpr_uiobuf_t *uiobuf = lxpr_uiobuf_new(uiop);
+	int error;
+
+	ASSERT(type >= 0 && type < LXPR_NFILES);
+
+	lxpr_read_function[type](lxpnp, uiobuf);
+
+	error = lxpr_uiobuf_flush(uiobuf);
+	lxpr_uiobuf_free(uiobuf);
+
+	return (error);
+}
+
+
+/*
+ * lxpr_read_invalid(), lxpr_read_isdir(), lxpr_read_empty()
+ *
+ * Various special case reads:
+ * - trying to read a directory
+ * - invalid file (used to mean a file that should be implemented,
+ *   but isn't yet)
+ * - empty file
+ * - wait to be able to read a file that will never have anything to read
+ */
+/* ARGSUSED */
+static void
+lxpr_read_isdir(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	lxpr_uiobuf_seterr(uiobuf, EISDIR);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_invalid(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	lxpr_uiobuf_seterr(uiobuf, EINVAL);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_empty(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_pid_cmdline():
+ *
+ * This is not precisely compatible with linux:
+ *
+ * The linux cmdline returns argv with the correct separation
+ * using \0 between the arguments, we cannot do that without
+ * copying the real argv from the correct process context.
+ * This is too difficult to attempt so we pretend that the
+ * entire cmdline is just argv[0]. This is good enough for
+ * ps to display correctly, but might cause some other things
+ * not to work correctly.
+ */
+static void
+lxpr_read_pid_cmdline(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	proc_t *p;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_PID_CMDLINE);
+
+	p = lxpr_lock(lxpnp->lxpr_pid);
+	if (p == NULL) {
+		lxpr_uiobuf_seterr(uiobuf, EINVAL);
+		return;
+	}
+
+	if (PTOU(p)->u_argv != 0) {
+		char *buff = PTOU(p)->u_psargs;
+		int len = strlen(buff);
+		lxpr_unlock(p);
+		lxpr_uiobuf_write(uiobuf, buff, len+1);
+	} else {
+		lxpr_unlock(p);
+	}
+}
+
+
+/*
+ * lxpr_read_pid_maps(): memory map file
+ */
+static void
+lxpr_read_pid_maps(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	proc_t *p;
+	struct as *as;
+	struct seg *seg;
+	char *buf;
+	int buflen = MAXPATHLEN;
+	struct print_data {
+		caddr_t saddr;
+		caddr_t eaddr;
+		int type;
+		char prot[5];
+		uint32_t offset;
+		vnode_t *vp;
+		struct print_data *next;
+	} *print_head = NULL;
+	struct print_data **print_tail = &print_head;
+	struct print_data *pbuf;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_PID_MAPS);
+
+	p = lxpr_lock(lxpnp->lxpr_pid);
+	if (p == NULL) {
+		lxpr_uiobuf_seterr(uiobuf, EINVAL);
+		return;
+	}
+
+	as = p->p_as;
+
+	if (as == &kas) {
+		lxpr_unlock(p);
+		return;
+	}
+
+	mutex_exit(&p->p_lock);
+
+	/* Iterate over all segments in the address space */
+	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
+		vnode_t *vp;
+		uint_t protbits;
+
+		pbuf = kmem_alloc(sizeof (*pbuf), KM_SLEEP);
+
+		pbuf->saddr = seg->s_base;
+		pbuf->eaddr = seg->s_base+seg->s_size;
+		pbuf->type = SEGOP_GETTYPE(seg, seg->s_base);
+
+		/*
+		 * Cheat and only use the protection bits of the first page
+		 * in the segment
+		 */
+		(void) strncpy(pbuf->prot, "----", sizeof (pbuf->prot));
+		(void) SEGOP_GETPROT(seg, seg->s_base, 0, &protbits);
+
+		if (protbits & PROT_READ)	   pbuf->prot[0] = 'r';
+		if (protbits & PROT_WRITE)	   pbuf->prot[1] = 'w';
+		if (protbits & PROT_EXEC)	   pbuf->prot[2] = 'x';
+		if (pbuf->type & MAP_SHARED)	   pbuf->prot[3] = 's';
+		else if (pbuf->type & MAP_PRIVATE) pbuf->prot[3] = 'p';
+
+		if (seg->s_ops == &segvn_ops &&
+		    SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
+		    vp != NULL && vp->v_type == VREG) {
+			VN_HOLD(vp);
+			pbuf->vp = vp;
+		} else {
+			pbuf->vp = NULL;
+		}
+
+		pbuf->offset = (uint32_t)SEGOP_GETOFFSET(seg, pbuf->saddr);
+
+		pbuf->next = NULL;
+		*print_tail = pbuf;
+		print_tail = &pbuf->next;
+	}
+	AS_LOCK_EXIT(as, &as->a_lock);
+	mutex_enter(&p->p_lock);
+	lxpr_unlock(p);
+
+	buf = kmem_alloc(buflen, KM_SLEEP);
+
+	/* print the data we've extracted */
+	pbuf = print_head;
+	while (pbuf != NULL) {
+		struct print_data *pbuf_next;
+		vattr_t vattr;
+
+		int maj = 0;
+		int min = 0;
+		int inode = 0;
+
+		*buf = '\0';
+		if (pbuf->vp != NULL) {
+			vattr.va_mask = AT_FSID | AT_NODEID;
+			if (VOP_GETATTR(pbuf->vp, &vattr, 0, CRED()) == 0) {
+				maj = getmajor(vattr.va_fsid);
+				min = getminor(vattr.va_fsid);
+				inode = vattr.va_nodeid;
+			}
+			(void) vnodetopath(NULL, pbuf->vp, buf, buflen, CRED());
+			VN_RELE(pbuf->vp);
+		}
+
+		if (*buf != '\0') {
+			lxpr_uiobuf_printf(uiobuf,
+			    "%08x-%08x %s %08x %02d:%03d %d %s\n",
+			    pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+			    maj, min, inode, buf);
+		} else {
+			lxpr_uiobuf_printf(uiobuf,
+			    "%08x-%08x %s %08x %02d:%03d %d\n",
+			    pbuf->saddr, pbuf->eaddr, pbuf->prot, pbuf->offset,
+			    maj, min, inode);
+		}
+
+		pbuf_next = pbuf->next;
+		kmem_free(pbuf, sizeof (*pbuf));
+		pbuf = pbuf_next;
+	}
+
+	kmem_free(buf, buflen);
+}
+
+/*
+ * lxpr_read_pid_statm(): memory status file
+ */
+static void
+lxpr_read_pid_statm(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	proc_t *p;
+	struct as *as;
+	size_t vsize;
+	size_t rss;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_PID_STATM);
+
+	p = lxpr_lock(lxpnp->lxpr_pid);
+	if (p == NULL) {
+		lxpr_uiobuf_seterr(uiobuf, EINVAL);
+		return;
+	}
+
+	as = p->p_as;
+
+	mutex_exit(&p->p_lock);
+
+	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+	vsize = btopr(rm_assize(as));
+	rss = rm_asrss(as);
+	AS_LOCK_EXIT(as, &as->a_lock);
+
+	mutex_enter(&p->p_lock);
+	lxpr_unlock(p);
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "%lu %lu %lu %lu %lu %lu %lu\n",
+	    vsize, rss, 0l, rss, 0l, 0l, 0l);
+}
+
+/*
+ * lxpr_read_pid_status(): status file
+ */
+static void
+lxpr_read_pid_status(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	proc_t *p;
+	kthread_t *t;
+	user_t *up;
+	cred_t *cr;
+	const gid_t *groups;
+	int    ngroups;
+	struct as *as;
+	char *status;
+	pid_t pid, ppid;
+	size_t vsize;
+	size_t rss;
+	k_sigset_t current, ignore, handle;
+	int    i, lx_sig;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_PID_STATUS);
+
+	p = lxpr_lock(lxpnp->lxpr_pid);
+	if (p == NULL) {
+		lxpr_uiobuf_seterr(uiobuf, EINVAL);
+		return;
+	}
+
+	pid = p->p_pid;
+
+	/*
+	 * Convert pid to the Linux default of 1 if we're the zone's init
+	 * process
+	 */
+	if (pid == curproc->p_zone->zone_proc_initpid) {
+		pid = 1;
+		ppid = 0;	/* parent pid for init is 0 */
+	} else {
+		/*
+		 * Make sure not to reference parent PIDs that reside outside
+		 * the zone
+		 */
+		ppid = ((p->p_flag & SZONETOP)
+		    ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+		/*
+		 * Convert ppid to the Linux default of 1 if our parent is the
+		 * zone's init process
+		 */
+		if (ppid == curproc->p_zone->zone_proc_initpid)
+			ppid = 1;
+	}
+
+	t = prchoose(p);
+	if (t != NULL) {
+		switch (t->t_state) {
+		case TS_SLEEP:
+			status = "S (sleeping)";
+			break;
+		case TS_RUN:
+		case TS_ONPROC:
+			status = "R (running)";
+			break;
+		case TS_ZOMB:
+			status = "Z (zombie)";
+			break;
+		case TS_STOPPED:
+			status = "T (stopped)";
+			break;
+		default:
+			status = "! (unknown)";
+			break;
+		}
+		thread_unlock(t);
+	} else {
+		/*
+		 * there is a hole in the exit code, where a proc can have
+		 * no threads but it is yet to be flagged SZOMB. We will
+		 * assume we are about to become a zombie
+		 */
+		status = "Z (zombie)";
+	}
+
+	up = PTOU(p);
+	mutex_enter(&p->p_crlock);
+	crhold(cr = p->p_cred);
+	mutex_exit(&p->p_crlock);
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "Name:\t%s\n"
+	    "State:\t%s\n"
+	    "Tgid:\t%d\n"
+	    "Pid:\t%d\n"
+	    "PPid:\t%d\n"
+	    "TracerPid:\t%d\n"
+	    "Uid:\t%d\t%d\t%d\t%d\n"
+	    "Gid:\t%d\t%d\t%d\t%d\n"
+	    "FDSize:\t%d\n"
+	    "Groups:\t",
+	    up->u_comm,
+	    status,
+	    pid, /* thread group id - same as pid until we map lwps to procs */
+	    pid,
+	    ppid,
+	    0,
+	    crgetruid(cr), crgetuid(cr), crgetsuid(cr), crgetuid(cr),
+	    crgetrgid(cr), crgetgid(cr), crgetsgid(cr), crgetgid(cr),
+	    p->p_fno_ctl);
+
+	ngroups = crgetngroups(cr);
+	groups  = crgetgroups(cr);
+	for (i = 0; i < ngroups; i++) {
+		lxpr_uiobuf_printf(uiobuf,
+		    "%d ",
+		    groups[i]);
+	}
+	crfree(cr);
+
+	as = p->p_as;
+	if ((p->p_stat != SZOMB) && !(p->p_flag & SSYS) && (as != &kas)) {
+		mutex_exit(&p->p_lock);
+		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+		vsize = rm_assize(as);
+		rss = rm_asrss(as);
+		AS_LOCK_EXIT(as, &as->a_lock);
+		mutex_enter(&p->p_lock);
+
+		lxpr_uiobuf_printf(uiobuf,
+		    "\n"
+		    "VmSize:\t%8lu kB\n"
+		    "VmLck:\t%8lu kB\n"
+		    "VmRSS:\t%8lu kB\n"
+		    "VmData:\t%8lu kB\n"
+		    "VmStk:\t%8lu kB\n"
+		    "VmExe:\t%8lu kB\n"
+		    "VmLib:\t%8lu kB",
+		    btok(vsize),
+		    0l,
+		    ptok(rss),
+		    0l,
+		    btok(p->p_stksize),
+		    ptok(rss),
+		    0l);
+	}
+
+	sigemptyset(&current);
+	sigemptyset(&ignore);
+	sigemptyset(&handle);
+
+	for (i = 1; i < MAXSIG; i++) {
+		lx_sig = stol_signo[i];
+
+		if ((lx_sig > 0) && (lx_sig < MAXSIG)) {
+			if (sigismember(&p->p_sig, i))
+				sigaddset(&current, lx_sig);
+
+			if (up->u_signal[i] == SIG_IGN)
+				sigaddset(&ignore, lx_sig);
+			else if (up->u_signal[i] != SIG_DFL)
+				sigaddset(&handle, lx_sig);
+		}
+	}
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "\n"
+	    "SigPnd:\t%08x%08x\n"
+	    "SigBlk:\t%08x%08x\n"
+	    "SigIgn:\t%08x%08x\n"
+	    "SigCgt:\t%08x%08x\n"
+	    "CapInh:\t%016x\n"
+	    "CapPrm:\t%016x\n"
+	    "CapEff:\t%016x\n",
+	    current.__sigbits[1], current.__sigbits[0],
+	    0, 0, /* signals blocked on per thread basis */
+	    ignore.__sigbits[1], ignore.__sigbits[0],
+	    handle.__sigbits[1], handle.__sigbits[0],
+	    /* Can't do anything with linux capabilities */
+	    0,
+	    0,
+	    0);
+
+	lxpr_unlock(p);
+}
+
+
+/*
+ * lxpr_read_pid_stat(): pid stat file
+ */
+static void
+lxpr_read_pid_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	proc_t *p;
+	kthread_t *t;
+	struct as *as;
+	char stat;
+	pid_t pid, ppid, pgpid, spid;
+	gid_t psgid;
+	dev_t psdev;
+	size_t rss, vsize;
+	int nice, pri;
+	caddr_t wchan;
+	processorid_t cpu;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_PID_STAT);
+
+	p = lxpr_lock(lxpnp->lxpr_pid);
+	if (p == NULL) {
+		lxpr_uiobuf_seterr(uiobuf, EINVAL);
+		return;
+	}
+
+	pid = p->p_pid;
+
+	/*
+	 * Set Linux defaults if we're the zone's init process
+	 */
+	if (pid == curproc->p_zone->zone_proc_initpid) {
+		pid = 1;	/* PID for init */
+		ppid = 0;	/* parent PID for init is 0 */
+		pgpid = 0;	/* process group for init is 0 */
+		psgid = -1;	/* credential GID for init is -1 */
+		spid = 0;	/* session id for init is 0 */
+		psdev = 0;	/* session device for init is 0 */
+	} else {
+		/*
+		 * Make sure not to reference parent PIDs that reside outside
+		 * the zone
+		 */
+		ppid = ((p->p_flag & SZONETOP)
+		    ? curproc->p_zone->zone_zsched->p_pid : p->p_ppid);
+
+		/*
+		 * Convert ppid to the Linux default of 1 if our parent is the
+		 * zone's init process
+		 */
+		if (ppid == curproc->p_zone->zone_proc_initpid)
+			ppid = 1;
+
+		pgpid = p->p_pgrp;
+
+		mutex_enter(&p->p_splock);
+		mutex_enter(&p->p_sessp->s_lock);
+		spid = p->p_sessp->s_sid;
+		/* XXBRAND psdev = DEV_TO_LXDEV(p->p_sessp->s_dev, VCHR); */
+		psdev = p->p_sessp->s_dev;
+		if (p->p_sessp->s_cred)
+			psgid = crgetgid(p->p_sessp->s_cred);
+		else
+			psgid = crgetgid(p->p_cred);
+
+		mutex_exit(&p->p_sessp->s_lock);
+		mutex_exit(&p->p_splock);
+	}
+
+	t = prchoose(p);
+	if (t != NULL) {
+		switch (t->t_state) {
+		case TS_SLEEP:
+			stat = 'S'; break;
+		case TS_RUN:
+		case TS_ONPROC:
+			stat = 'R'; break;
+		case TS_ZOMB:
+			stat = 'Z'; break;
+		case TS_STOPPED:
+			stat = 'T'; break;
+		default:
+			stat = '!'; break;
+		}
+
+		if (CL_DONICE(t, NULL, 0, &nice) != 0)
+			nice = 0;
+
+		pri = v.v_maxsyspri - t->t_pri;
+		wchan = t->t_wchan;
+		cpu = t->t_cpu->cpu_seqid;
+		thread_unlock(t);
+	} else {
+		/* Only zombies have no threads */
+		stat = 'Z';
+		nice = 0;
+		pri = 0;
+		wchan = 0;
+		cpu = 0;
+	}
+	as = p->p_as;
+	mutex_exit(&p->p_lock);
+	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+	vsize = rm_assize(as);
+	rss = rm_asrss(as);
+	AS_LOCK_EXIT(as, &as->a_lock);
+	mutex_enter(&p->p_lock);
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "%d (%s) %c %d %d %d %d %d "
+	    "%lu %lu %lu %lu %lu "
+	    "%lu %lu %ld %ld "
+	    "%d %d "
+	    "0 "
+	    "%ld %lu "
+	    "%lu %ld %llu "
+	    "%lu %lu %u "
+	    "%lu %lu "
+	    "%lu %lu %lu %lu "
+	    "%lu "
+	    "%lu %lu "
+	    "%d "
+	    "%d"
+	    "\n",
+	    pid,
+	    PTOU(p)->u_comm,
+	    stat,
+	    ppid, pgpid,
+	    spid, psdev, psgid,
+	    0l, 0l, 0l, 0l, 0l, /* flags, minflt, cminflt, majflt, cmajflt */
+	    p->p_utime, p->p_stime, p->p_cutime, p->p_cstime,
+	    pri, nice,
+	    0l, PTOU(p)->u_ticks, /* ticks till next SIGALARM, start time */
+	    vsize, rss, p->p_vmem_ctl,
+	    0l, 0l, USRSTACK, /* startcode, endcode, startstack */
+	    0l, 0l, /* kstkesp, kstkeip */
+	    0l, 0l, 0l, 0l, /* signal, blocked, sigignore, sigcatch */
+	    wchan,
+	    0l, 0l, /* nswap, cnswap */
+	    0, /* exit_signal */
+	    cpu);
+
+	lxpr_unlock(p);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_arp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	lxpr_uiobuf_printf(uiobuf, "Inter-|   Receive                   "
+	    "                             |  Transmit\n");
+	lxpr_uiobuf_printf(uiobuf, " face |bytes    packets errs drop fifo"
+	    " frame compressed multicast|bytes    packets errs drop fifo"
+	    " colls carrier compressed\n");
+
+	/*
+	 * XXX: data about each interface should go here, but we'll wait to
+	 * see if anybody wants to use it.
+	 */
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_dev_mcast(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_igmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_ip_mr_vif(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_mcfilter(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_netstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_raw(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_route(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rpc(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_rt_cache(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_sockstat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_snmp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_tcp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_udp(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_net_unix(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+}
+
+/*
+ * lxpr_read_kmsg(): read the contents of the kernel message queue. We
+ * translate this into the reception of console messages for this lx zone; each
+ * read copies out a single zone console message, or blocks until the next one
+ * is produced.
+ */
+
+#define	LX_KMSG_PRI	"<0>"
+
+static void
+lxpr_read_kmsg(lxpr_node_t *lxpnp, struct lxpr_uiobuf *uiobuf)
+{
+	ldi_handle_t	lh = lxpnp->lxpr_cons_ldih;
+	mblk_t		*mp;
+
+	if (ldi_getmsg(lh, &mp, NULL) == 0) {
+		/*
+		 * lx procfs doesn't like successive reads to the same file
+		 * descriptor unless we do an explicit rewind each time.
+		 */
+		lxpr_uiobuf_seek(uiobuf, 0);
+
+		lxpr_uiobuf_printf(uiobuf, "%s%s", LX_KMSG_PRI,
+		    mp->b_cont->b_rptr);
+
+		freemsg(mp);
+	}
+}
+
+/*
+ * lxpr_read_loadavg(): read the contents of the "loadavg" file.
+ *
+ * Just enough for uptime to work
+ */
+extern int nthread;
+
+static void
+lxpr_read_loadavg(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	ulong_t avenrun1;
+	ulong_t avenrun5;
+	ulong_t avenrun15;
+	ulong_t avenrun1_cs;
+	ulong_t avenrun5_cs;
+	ulong_t avenrun15_cs;
+	int loadavg[3];
+	int *loadbuf;
+	cpupart_t *cp;
+
+	uint_t nrunnable = 0;
+	rctl_qty_t nlwps;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_LOADAVG);
+
+	mutex_enter(&cpu_lock);
+
+	/*
+	 * Need to add up values over all CPU partitions. If pools are active,
+	 * only report the values of the zone's partition, which by definition
+	 * includes the current CPU.
+	 */
+	if (pool_pset_enabled()) {
+		psetid_t psetid = zone_pset_get(curproc->p_zone);
+
+		ASSERT(curproc->p_zone != &zone0);
+		cp = CPU->cpu_part;
+
+		nrunnable = cp->cp_nrunning + cp->cp_nrunnable;
+		(void) cpupart_get_loadavg(psetid, &loadavg[0], 3);
+		loadbuf = &loadavg[0];
+
+		/*
+		 * We'll report the total number of lwps in the zone for the
+		 * "nproc" parameter of /proc/loadavg; good enough for lx.
+		 */
+		nlwps = curproc->p_zone->zone_nlwps;
+	} else {
+		cp = cp_list_head;
+		do {
+			nrunnable += cp->cp_nrunning + cp->cp_nrunnable;
+		} while ((cp = cp->cp_next) != cp_list_head);
+
+		loadbuf = &avenrun[0];
+
+		/*
+		 * This will report kernel threads as well as user lwps, but it
+		 * should be good enough for lx consumers.
+		 */
+		nlwps = nthread;
+	}
+
+	mutex_exit(&cpu_lock);
+
+	avenrun1 = loadbuf[0] >> FSHIFT;
+	avenrun1_cs = ((loadbuf[0] & (FSCALE-1)) * 100) >> FSHIFT;
+	avenrun5 = loadbuf[1] >> FSHIFT;
+	avenrun5_cs = ((loadbuf[1] & (FSCALE-1)) * 100) >> FSHIFT;
+	avenrun15 = loadbuf[2] >> FSHIFT;
+	avenrun15_cs = ((loadbuf[2] & (FSCALE-1)) * 100) >> FSHIFT;
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "%ld.%02d %ld.%02d %ld.%02d %d/%d %d\n",
+	    avenrun1, avenrun1_cs,
+	    avenrun5, avenrun5_cs,
+	    avenrun15, avenrun15_cs,
+	    nrunnable, nlwps, 0);
+}
+
+/*
+ * lxpr_read_meminfo(): read the contents of the "meminfo" file.
+ */
+static void
+lxpr_read_meminfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	long total_mem = physmem * PAGESIZE;
+	long free_mem = freemem * PAGESIZE;
+	long total_swap = k_anoninfo.ani_max * PAGESIZE;
+	long used_swap = k_anoninfo.ani_phys_resv * PAGESIZE;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_MEMINFO);
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "        total:     used:    free:  shared: buffers:  cached:\n"
+	    "Mem:  %8lu %8lu %8lu %8u %8u %8u\n"
+	    "Swap: %8lu %8lu %8lu\n"
+	    "MemTotal:  %8lu kB\n"
+	    "MemFree:   %8lu kB\n"
+	    "MemShared: %8u kB\n"
+	    "Buffers:   %8u kB\n"
+	    "Cached:    %8u kB\n"
+	    "SwapCached:%8u kB\n"
+	    "Active:    %8u kB\n"
+	    "Inactive:  %8u kB\n"
+	    "HighTotal: %8u kB\n"
+	    "HighFree:  %8u kB\n"
+	    "LowTotal:  %8u kB\n"
+	    "LowFree:   %8u kB\n"
+	    "SwapTotal: %8lu kB\n"
+	    "SwapFree:  %8lu kB\n",
+	    total_mem, total_mem - free_mem, free_mem, 0, 0, 0,
+	    total_swap, used_swap, total_swap - used_swap,
+	    btok(total_mem),				/* MemTotal */
+	    btok(free_mem),				/* MemFree */
+	    0,						/* MemShared */
+	    0,						/* Buffers */
+	    0,						/* Cached */
+	    0,						/* SwapCached */
+	    0,						/* Active */
+	    0,						/* Inactive */
+	    0,						/* HighTotal */
+	    0,						/* HighFree */
+	    btok(total_mem),				/* LowTotal */
+	    btok(free_mem),				/* LowFree */
+	    btok(total_swap),				/* SwapTotal */
+	    btok(total_swap - used_swap));		/* SwapFree */
+}
+
+/*
+ * lxpr_read_mounts():
+ */
+/* ARGSUSED */
+static void
+lxpr_read_mounts(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	struct vfs *vfsp;
+	struct vfs *vfslist;
+	zone_t *zone = LXPTOZ(lxpnp);
+	struct print_data {
+		refstr_t *vfs_mntpt;
+		refstr_t *vfs_resource;
+		uint_t vfs_flag;
+		int vfs_fstype;
+		struct print_data *next;
+	} *print_head = NULL;
+	struct print_data **print_tail = &print_head;
+	struct print_data *printp;
+
+	vfs_list_read_lock();
+
+	if (zone == global_zone) {
+		vfsp = vfslist = rootvfs;
+	} else {
+		vfsp = vfslist = zone->zone_vfslist;
+		/*
+		 * If the zone has a root entry, it will be the first in
+		 * the list.  If it doesn't, we conjure one up.
+		 */
+		if (vfslist == NULL ||
+		    strcmp(refstr_value(vfsp->vfs_mntpt),
+			zone->zone_rootpath) != 0) {
+			struct vfs *tvfsp;
+			/*
+			 * The root of the zone is not a mount point.  The vfs
+			 * we want to report is that of the zone's root vnode.
+			 */
+			tvfsp = zone->zone_rootvp->v_vfsp;
+
+			lxpr_uiobuf_printf(uiobuf,
+			    "/ / %s %s 0 0\n",
+			    vfssw[tvfsp->vfs_fstype].vsw_name,
+			    tvfsp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+		}
+		if (vfslist == NULL) {
+			vfs_list_unlock();
+			return;
+		}
+	}
+
+	/*
+	 * Later on we have to do a lookupname, which can end up causing
+	 * another vfs_list_read_lock() to be called. Which can lead to a
+	 * deadlock. To avoid this, we extract the data we need into a local
+	 * list, then we can run this list without holding vfs_list_read_lock()
+	 * We keep the list in the same order as the vfs_list
+	 */
+	do {
+		/* Skip mounts we shouldn't show */
+		if (vfsp->vfs_flag & VFS_NOMNTTAB) {
+			goto nextfs;
+		}
+
+		printp = kmem_alloc(sizeof (*printp), KM_SLEEP);
+		refstr_hold(vfsp->vfs_mntpt);
+		printp->vfs_mntpt = vfsp->vfs_mntpt;
+		refstr_hold(vfsp->vfs_resource);
+		printp->vfs_resource = vfsp->vfs_resource;
+		printp->vfs_flag = vfsp->vfs_flag;
+		printp->vfs_fstype = vfsp->vfs_fstype;
+		printp->next = NULL;
+
+		*print_tail = printp;
+		print_tail = &printp->next;
+
+nextfs:
+		vfsp = (zone == global_zone) ?
+		    vfsp->vfs_next : vfsp->vfs_zone_next;
+
+	} while (vfsp != vfslist);
+
+	vfs_list_unlock();
+
+	/*
+	 * now we can run through what we've extracted without holding
+	 * vfs_list_read_lock()
+	 */
+	printp = print_head;
+	while (printp != NULL) {
+		struct print_data *printp_next;
+		const char *resource;
+		char *mntpt;
+		struct vnode *vp;
+		int error;
+
+		mntpt = (char *)refstr_value(printp->vfs_mntpt);
+		resource = refstr_value(printp->vfs_resource);
+
+		if (mntpt != NULL && mntpt[0] != '\0')
+			mntpt = ZONE_PATH_TRANSLATE(mntpt, zone);
+		else
+			mntpt = "-";
+
+		error = lookupname(mntpt, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
+
+		if (error != 0)
+			goto nextp;
+
+		if (!(vp->v_flag & VROOT)) {
+			VN_RELE(vp);
+			goto nextp;
+		}
+		VN_RELE(vp);
+
+		if (resource != NULL && resource[0] != '\0') {
+			if (resource[0] == '/') {
+				resource = ZONE_PATH_VISIBLE(resource, zone) ?
+				    ZONE_PATH_TRANSLATE(resource, zone) :
+				    mntpt;
+			}
+		} else {
+			resource = "-";
+		}
+
+		lxpr_uiobuf_printf(uiobuf,
+		    "%s %s %s %s 0 0\n",
+		    resource, mntpt, vfssw[printp->vfs_fstype].vsw_name,
+		    printp->vfs_flag & VFS_RDONLY ? "ro" : "rw");
+
+nextp:
+		printp_next = printp->next;
+		refstr_rele(printp->vfs_mntpt);
+		refstr_rele(printp->vfs_resource);
+		kmem_free(printp, sizeof (*printp));
+		printp = printp_next;
+
+	}
+}
+
+/*
+ * lxpr_read_partitions():
+ *
+ * We don't support partitions in a local zone because it requires access to
+ * physical devices.  But we need to fake up enough of the file to show that we
+ * have no partitions.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_partitions(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	lxpr_uiobuf_printf(uiobuf,
+	    "major minor  #blocks  name     rio rmerge rsect ruse "
+	    "wio wmerge wsect wuse running use aveq\n\n");
+}
+
+/*
+ * lxpr_read_version(): read the contents of the "version" file.
+ */
+/* ARGSUSED */
+static void
+lxpr_read_version(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	lxpr_uiobuf_printf(uiobuf,
+	    "%s version %s (%s version %d.%d.%d) "
+	    "#%s SMP %s\n",
+	    LX_UNAME_SYSNAME, LX_UNAME_RELEASE,
+#if defined(__GNUC__)
+	    "gcc",
+	    __GNUC__,
+	    __GNUC_MINOR__,
+	    __GNUC_PATCHLEVEL__,
+#else
+	    "Sun C",
+	    __SUNPRO_C / 0x100,
+	    (__SUNPRO_C & 0xff) / 0x10,
+	    __SUNPRO_C & 0xf,
+#endif
+	    LX_UNAME_VERSION,
+	    __TIME__ " " __DATE__);
+}
+
+
+/*
+ * lxpr_read_stat(): read the contents of the "stat" file.
+ *
+ */
+/* ARGSUSED */
+static void
+lxpr_read_stat(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	cpu_t *cp, *cpstart;
+	int pools_enabled;
+	ulong_t idle_cum = 0;
+	ulong_t sys_cum  = 0;
+	ulong_t user_cum = 0;
+	ulong_t pgpgin_cum    = 0;
+	ulong_t pgpgout_cum   = 0;
+	ulong_t pgswapout_cum = 0;
+	ulong_t pgswapin_cum  = 0;
+	ulong_t intr_cum = 0;
+	ulong_t pswitch_cum = 0;
+	ulong_t forks_cum = 0;
+	hrtime_t msnsecs[NCMSTATES];
+
+	ASSERT(lxpnp->lxpr_type == LXPR_STAT);
+
+	mutex_enter(&cpu_lock);
+	pools_enabled = pool_pset_enabled();
+
+	/* Calculate cumulative stats */
+	cp = cpstart = CPU;
+	do {
+		int i;
+
+		/*
+		 * Don't count CPUs that aren't even in the system
+		 * or aren't up yet.
+		 */
+		if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+			continue;
+		}
+
+		get_cpu_mstate(cp, msnsecs);
+
+		idle_cum += NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+		sys_cum  += NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+		user_cum += NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+		pgpgin_cum += CPU_STATS(cp, vm.pgpgin);
+		pgpgout_cum += CPU_STATS(cp, vm.pgpgout);
+		pgswapin_cum += CPU_STATS(cp, vm.pgswapin);
+		pgswapout_cum += CPU_STATS(cp, vm.pgswapout);
+
+		for (i = 0; i < PIL_MAX; i++)
+			intr_cum += CPU_STATS(cp, sys.intr[i]);
+
+		pswitch_cum += CPU_STATS(cp, sys.pswitch);
+		forks_cum += CPU_STATS(cp, sys.sysfork);
+		forks_cum += CPU_STATS(cp, sys.sysvfork);
+
+		if (pools_enabled)
+			cp = cp->cpu_next_part;
+		else
+			cp = cp->cpu_next;
+	} while (cp != cpstart);
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "cpu %ld %ld %ld %ld\n",
+	    user_cum, 0, sys_cum, idle_cum);
+
+	/* Do per processor stats */
+	do {
+		ulong_t idle_ticks;
+		ulong_t sys_ticks;
+		ulong_t user_ticks;
+
+		/*
+		 * Don't count CPUs that aren't even in the system
+		 * or aren't up yet.
+		 */
+		if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+			continue;
+		}
+
+		get_cpu_mstate(cp, msnsecs);
+
+		idle_ticks = NSEC_TO_TICK(msnsecs[CMS_IDLE]);
+		sys_ticks  = NSEC_TO_TICK(msnsecs[CMS_SYSTEM]);
+		user_ticks = NSEC_TO_TICK(msnsecs[CMS_USER]);
+
+		lxpr_uiobuf_printf(uiobuf,
+		    "cpu%d %ld %ld %ld %ld\n",
+		    cp->cpu_id,
+		    user_ticks, 0, sys_ticks, idle_ticks);
+
+		if (pools_enabled)
+			cp = cp->cpu_next_part;
+		else
+			cp = cp->cpu_next;
+	} while (cp != cpstart);
+
+	mutex_exit(&cpu_lock);
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "page %lu %lu\n"
+	    "swap %lu %lu\n"
+	    "intr %lu\n"
+	    "ctxt %lu\n"
+	    "btime %lu\n"
+	    "processes %lu\n",
+	    pgpgin_cum, pgpgout_cum,
+	    pgswapin_cum, pgswapout_cum,
+	    intr_cum,
+	    pswitch_cum,
+	    boot_time,
+	    forks_cum);
+}
+
+
+/*
+ * lxpr_read_uptime(): read the contents of the "uptime" file.
+ *
+ * format is: "%.2lf, %.2lf",uptime_secs, idle_secs
+ * Use fixed point arithmetic to get 2 decimal places
+ */
+/* ARGSUSED */
+static void
+lxpr_read_uptime(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	cpu_t *cp, *cpstart;
+	int pools_enabled;
+	ulong_t idle_cum = 0;
+	ulong_t cpu_count = 0;
+	ulong_t idle_s;
+	ulong_t idle_cs;
+	ulong_t up_s;
+	ulong_t up_cs;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_UPTIME);
+
+	/* Calculate cumulative stats */
+	mutex_enter(&cpu_lock);
+	pools_enabled = pool_pset_enabled();
+
+	cp = cpstart = CPU;
+	do {
+		/*
+		 * Don't count CPUs that aren't even in the system
+		 * or aren't up yet.
+		 */
+		if ((cp->cpu_flags & CPU_EXISTS) == 0) {
+			continue;
+		}
+
+		idle_cum += CPU_STATS(cp, sys.cpu_ticks_idle);
+		idle_cum += CPU_STATS(cp, sys.cpu_ticks_wait);
+		cpu_count += 1;
+
+		if (pools_enabled)
+			cp = cp->cpu_next_part;
+		else
+			cp = cp->cpu_next;
+	} while (cp != cpstart);
+	mutex_exit(&cpu_lock);
+
+	/* Capture lbolt in case it changes */
+	up_cs = lbolt;
+	up_s = up_cs / hz;
+	up_cs %= hz;
+	up_cs *= 100;
+	up_cs /= hz;
+
+	ASSERT(cpu_count > 0);
+	idle_cum /= cpu_count;
+	idle_s = idle_cum / hz;
+	idle_cs = idle_cum % hz;
+	idle_cs *= 100;
+	idle_cs /= hz;
+
+	lxpr_uiobuf_printf(uiobuf,
+	    "%ld.%02d %ld.%02d\n", up_s, up_cs, idle_s, idle_cs);
+}
+
+static const char *amd_x_edx[] = {
+	NULL,	NULL,	NULL,	NULL,
+	NULL,	NULL,	NULL,	NULL,
+	NULL,	NULL,	NULL,	"syscall",
+	NULL,	NULL,	NULL,	NULL,
+	NULL,	NULL,	NULL,	"mp",
+	"nx",	NULL,	"mmxext", NULL,
+	NULL,	NULL,	NULL,	NULL,
+	NULL,	"lm",	"3dnowext", "3dnow"
+};
+
+static const char *amd_x_ecx[] = {
+	"lahf_lm", NULL, "svm", NULL,
+	"altmovcr8"
+};
+
+static const char *tm_x_edx[] = {
+	"recovery", "longrun", NULL, "lrti"
+};
+
+/*
+ * Intel calls no-execute "xd" in its docs, but Linux still reports it as "nx."
+ */
+static const char *intc_x_edx[] = {
+	NULL,	NULL,	NULL,	NULL,
+	NULL,	NULL,	NULL,	NULL,
+	NULL,	NULL,	NULL,	"syscall",
+	NULL,	NULL,	NULL,	NULL,
+	NULL,	NULL,	NULL,	NULL,
+	"nx",	NULL,	NULL,   NULL,
+	NULL,	NULL,	NULL,	NULL,
+	NULL,	"lm",   NULL,   NULL
+};
+
+static const char *intc_edx[] = {
+	"fpu",	"vme",	"de",	"pse",
+	"tsc",	"msr",	"pae",	"mce",
+	"cx8",	"apic",	 NULL,	"sep",
+	"mtrr",	"pge",	"mca",	"cmov",
+	"pat",	"pse36", "pn",	"clflush",
+	NULL,	"dts",	"acpi",	"mmx",
+	"fxsr",	"sse",	"sse2",	"ss",
+	"ht",	"tm",	"ia64",	"pbe"
+};
+
+/*
+ * "sse3" on linux is called "pni" (Prescott New Instructions).
+ */
+static const char *intc_ecx[] = {
+	"pni",	NULL,	NULL, "monitor",
+	"ds_cpl", NULL,	NULL, "est",
+	"tm2",	NULL,	"cid", NULL,
+	NULL,	"cx16",	"xtpr"
+};
+
+static void
+lxpr_read_cpuinfo(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	int i;
+	uint32_t bits;
+	cpu_t *cp, *cpstart;
+	int pools_enabled;
+	const char **fp;
+	char brandstr[CPU_IDSTRLEN];
+	struct cpuid_regs cpr;
+	int maxeax;
+	int std_ecx, std_edx, ext_ecx, ext_edx;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_CPUINFO);
+
+	mutex_enter(&cpu_lock);
+	pools_enabled = pool_pset_enabled();
+
+	cp = cpstart = CPU;
+	do {
+		/*
+		 * This returns the maximum eax value for standard cpuid
+		 * functions in eax.
+		 */
+		cpr.cp_eax = 0;
+		(void) cpuid_insn(cp, &cpr);
+		maxeax = cpr.cp_eax;
+
+		/*
+		 * Get standard x86 feature flags.
+		 */
+		cpr.cp_eax = 1;
+		(void) cpuid_insn(cp, &cpr);
+		std_ecx = cpr.cp_ecx;
+		std_edx = cpr.cp_edx;
+
+		/*
+		 * Now get extended feature flags.
+		 */
+		cpr.cp_eax = 0x80000001;
+		(void) cpuid_insn(cp, &cpr);
+		ext_ecx = cpr.cp_ecx;
+		ext_edx = cpr.cp_edx;
+
+		(void) cpuid_getbrandstr(cp, brandstr, CPU_IDSTRLEN);
+
+		lxpr_uiobuf_printf(uiobuf,
+		    "processor\t: %d\n"
+		    "vendor_id\t: %s\n"
+		    "cpu family\t: %d\n"
+		    "model\t\t: %d\n"
+		    "model name\t: %s\n"
+		    "stepping\t: %d\n"
+		    "cpu MHz\t\t: %u.%03u\n",
+		    cp->cpu_id, cpuid_getvendorstr(cp), cpuid_getfamily(cp),
+		    cpuid_getmodel(cp), brandstr, cpuid_getstep(cp),
+		    (uint32_t)(cpu_freq_hz / 1000000),
+		    ((uint32_t)(cpu_freq_hz / 1000)) % 1000);
+
+		lxpr_uiobuf_printf(uiobuf, "cache size\t: %u KB\n",
+		    getl2cacheinfo(cp, NULL, NULL, NULL) / 1024);
+
+		if (x86_feature & X86_HTT) {
+			/*
+			 * 'siblings' is used for HT-style threads
+			 */
+			lxpr_uiobuf_printf(uiobuf,
+			    "physical id\t: %lu\n"
+			    "siblings\t: %u\n",  chip_plat_get_chipid(cp),
+			    cpuid_get_ncpu_per_chip(cp));
+		}
+
+		/*
+		 * Since we're relatively picky about running on older hardware,
+		 * we can be somewhat cavalier about the answers to these ones.
+		 *
+		 * In fact, given the hardware we support, we just say:
+		 *
+		 *	fdiv_bug	: no	(if we're on a 64-bit kernel)
+		 *	hlt_bug		: no
+		 *	f00f_bug	: no
+		 *	coma_bug	: no
+		 *	wp		: yes	(write protect in supervsr mode)
+		 */
+		lxpr_uiobuf_printf(uiobuf,
+		    "fdiv_bug\t: %s\n"
+		    "hlt_bug \t: no\n"
+		    "f00f_bug\t: no\n"
+		    "coma_bug\t: no\n"
+		    "fpu\t\t: %s\n"
+		    "fpu_exception\t: %s\n"
+		    "cpuid level\t: %d\n"
+		    "flags\t\t:",
+#if defined(__i386)
+		    fpu_pentium_fdivbug ? "yes" : "no",
+#else
+		    "no",
+#endif /* __i386 */
+		    fpu_exists ? "yes" : "no", fpu_exists ? "yes" : "no",
+		    maxeax);
+
+		for (bits = std_edx, fp = intc_edx, i = 0;
+		    i < sizeof (intc_edx) / sizeof (intc_edx[0]); fp++, i++)
+			if ((bits & (1 << i)) != 0 && *fp)
+				lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+		/*
+		 * name additional features where appropriate
+		 */
+		switch (x86_vendor) {
+		case X86_VENDOR_Intel:
+			for (bits = ext_edx, fp = intc_x_edx, i = 0;
+			    i < sizeof (intc_x_edx) / sizeof (intc_x_edx[0]);
+			    fp++, i++)
+				if ((bits & (1 << i)) != 0 && *fp)
+					lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+			break;
+
+		case X86_VENDOR_AMD:
+			for (bits = ext_edx, fp = amd_x_edx, i = 0;
+			    i < sizeof (amd_x_edx) / sizeof (amd_x_edx[0]);
+			    fp++, i++)
+				if ((bits & (1 << i)) != 0 && *fp)
+					lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+			for (bits = ext_ecx, fp = amd_x_ecx, i = 0;
+			    i < sizeof (amd_x_ecx) / sizeof (amd_x_ecx[0]);
+			    fp++, i++)
+				if ((bits & (1 << i)) != 0 && *fp)
+					lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+			break;
+
+		case X86_VENDOR_TM:
+			for (bits = ext_edx, fp = tm_x_edx, i = 0;
+			    i < sizeof (tm_x_edx) / sizeof (tm_x_edx[0]);
+			    fp++, i++)
+				if ((bits & (1 << i)) != 0 && *fp)
+					lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+			break;
+		default:
+			break;
+		}
+
+		for (bits = std_ecx, fp = intc_ecx, i = 0;
+		    i < sizeof (intc_ecx) / sizeof (intc_ecx[0]); fp++, i++)
+			if ((bits & (1 << i)) != 0 && *fp)
+				lxpr_uiobuf_printf(uiobuf, " %s", *fp);
+
+		lxpr_uiobuf_printf(uiobuf, "\n\n");
+
+		if (pools_enabled)
+			cp = cp->cpu_next_part;
+		else
+			cp = cp->cpu_next;
+	} while (cp != cpstart);
+
+	mutex_exit(&cpu_lock);
+}
+
+/* ARGSUSED */
+static void
+lxpr_read_fd(lxpr_node_t *lxpnp, lxpr_uiobuf_t *uiobuf)
+{
+	ASSERT(lxpnp->lxpr_type == LXPR_PID_FD_FD);
+	lxpr_uiobuf_seterr(uiobuf, EFAULT);
+}
+
+
+
+/*
+ * lxpr_getattr(): Vnode operation for VOP_GETATTR()
+ */
+static int
+lxpr_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr)
+{
+	register lxpr_node_t *lxpnp = VTOLXP(vp);
+	lxpr_nodetype_t type = lxpnp->lxpr_type;
+	extern uint_t nproc;
+	int error;
+
+	/*
+	 * Return attributes of underlying vnode if ATTR_REAL
+	 *
+	 * but keep fd files with the symlink permissions
+	 */
+	if (lxpnp->lxpr_realvp != NULL && (flags & ATTR_REAL)) {
+		vnode_t *rvp = lxpnp->lxpr_realvp;
+
+		/*
+		 * withold attribute information to owner or root
+		 */
+		if ((error = VOP_ACCESS(rvp, 0, 0, cr)) != 0) {
+			return (error);
+		}
+
+		/*
+		 * now its attributes
+		 */
+		if ((error = VOP_GETATTR(rvp, vap, flags, cr)) != 0) {
+			return (error);
+		}
+
+		/*
+		 * if it's a file in lx /proc/pid/fd/xx then set its
+		 * mode and keep it looking like a symlink
+		 */
+		if (type == LXPR_PID_FD_FD) {
+			vap->va_mode = lxpnp->lxpr_mode;
+			vap->va_type = vp->v_type;
+			vap->va_size = 0;
+			vap->va_nlink = 1;
+		}
+		return (0);
+	}
+
+	/* Default attributes, that may be overridden below */
+	bzero(vap, sizeof (*vap));
+	vap->va_atime = vap->va_mtime = vap->va_ctime = lxpnp->lxpr_time;
+	vap->va_nlink = 1;
+	vap->va_type = vp->v_type;
+	vap->va_mode = lxpnp->lxpr_mode;
+	vap->va_fsid = vp->v_vfsp->vfs_dev;
+	vap->va_blksize = DEV_BSIZE;
+	vap->va_uid = lxpnp->lxpr_uid;
+	vap->va_gid = lxpnp->lxpr_gid;
+	vap->va_nodeid = lxpnp->lxpr_ino;
+
+	switch (type) {
+	case LXPR_PROCDIR:
+		vap->va_nlink = nproc + 2 + PROCDIRFILES;
+		vap->va_size = (nproc + 2 + PROCDIRFILES) * LXPR_SDSIZE;
+		break;
+	case LXPR_PIDDIR:
+		vap->va_nlink = PIDDIRFILES;
+		vap->va_size = PIDDIRFILES * LXPR_SDSIZE;
+		break;
+	case LXPR_SELF:
+		vap->va_uid = crgetruid(curproc->p_cred);
+		vap->va_gid = crgetrgid(curproc->p_cred);
+		break;
+	default:
+		break;
+	}
+
+	vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
+	return (0);
+}
+
+
+/*
+ * lxpr_access(): Vnode operation for VOP_ACCESS()
+ */
+static int
+lxpr_access(vnode_t *vp, int mode, int flags, cred_t *cr)
+{
+	lxpr_node_t *lxpnp = VTOLXP(vp);
+	int shift = 0;
+	proc_t *tp;
+
+	/* lx /proc is a read only file system */
+	if (mode & VWRITE)
+		return (EROFS);
+
+	/*
+	 * If this is a restricted file, check access permissions.
+	 */
+	switch (lxpnp->lxpr_type) {
+	case LXPR_PIDDIR:
+		return (0);
+	case LXPR_PID_CURDIR:
+	case LXPR_PID_ENV:
+	case LXPR_PID_EXE:
+	case LXPR_PID_MAPS:
+	case LXPR_PID_MEM:
+	case LXPR_PID_ROOTDIR:
+	case LXPR_PID_FDDIR:
+	case LXPR_PID_FD_FD:
+		if ((tp = lxpr_lock(lxpnp->lxpr_pid)) == NULL)
+			return (ENOENT);
+		if (tp != curproc && secpolicy_proc_access(cr) != 0 &&
+		    priv_proc_cred_perm(cr, tp, NULL, mode) != 0) {
+			lxpr_unlock(tp);
+			return (EACCES);
+		}
+		lxpr_unlock(tp);
+	default:
+		break;
+	}
+
+	if (lxpnp->lxpr_realvp != NULL) {
+		/*
+		 * For these we use the underlying vnode's accessibility.
+		 */
+		return (VOP_ACCESS(lxpnp->lxpr_realvp, mode, flags, cr));
+	}
+
+	/*
+	 * Access check is based on only
+	 * one of owner, group, public.
+	 * If not owner, then check group.
+	 * If not a member of the group, then
+	 * check public access.
+	 */
+	if (crgetuid(cr) != lxpnp->lxpr_uid) {
+		shift += 3;
+		if (!groupmember((uid_t)lxpnp->lxpr_gid, cr))
+			shift += 3;
+	}
+
+	mode &= ~(lxpnp->lxpr_mode << shift);
+
+	if (mode == 0)
+		return (0);
+
+	return (EACCES);
+}
+
+
+
+
+/* ARGSUSED */
+static vnode_t *
+lxpr_lookup_not_a_dir(vnode_t *dp, char *comp)
+{
+	return (NULL);
+}
+
+
+/*
+ * lxpr_lookup(): Vnode operation for VOP_LOOKUP()
+ */
+/* ARGSUSED */
+static int
+lxpr_lookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pathp,
+	int flags, vnode_t *rdir, cred_t *cr)
+{
+	lxpr_node_t *lxpnp = VTOLXP(dp);
+	lxpr_nodetype_t type = lxpnp->lxpr_type;
+	int error;
+
+	ASSERT(dp->v_type == VDIR);
+	ASSERT(type >= 0 && type < LXPR_NFILES);
+
+	/*
+	 * we should never get here because the lookup
+	 * is done on the realvp for these nodes
+	 */
+	ASSERT(type != LXPR_PID_FD_FD &&
+	    type != LXPR_PID_CURDIR &&
+	    type != LXPR_PID_ROOTDIR);
+
+	/*
+	 * restrict lookup permission to owner or root
+	 */
+	if ((error = lxpr_access(dp, VEXEC, 0, cr)) != 0) {
+		return (error);
+	}
+
+	/*
+	 * Just return the parent vnode
+	 * if thats where we are trying to go
+	 */
+	if (strcmp(comp, "..") == 0) {
+		VN_HOLD(lxpnp->lxpr_parent);
+		*vpp = lxpnp->lxpr_parent;
+		return (0);
+	}
+
+	/*
+	 * Special handling for directory searches
+	 * Note: null component name is synonym for
+	 * current directory being searched.
+	 */
+	if ((dp->v_type == VDIR) && (*comp == '\0' || strcmp(comp, ".") == 0)) {
+		VN_HOLD(dp);
+		*vpp = dp;
+		return (0);
+	}
+
+	*vpp = (lxpr_lookup_function[type](dp, comp));
+	return ((*vpp == NULL) ? ENOENT : 0);
+}
+
+/*
+ * Do a sequential search on the given directory table
+ */
+static vnode_t *
+lxpr_lookup_common(vnode_t *dp, char *comp, proc_t *p,
+    lxpr_dirent_t *dirtab, int dirtablen)
+{
+	lxpr_node_t *lxpnp;
+	int count;
+
+	for (count = 0; count < dirtablen; count++) {
+		if (strcmp(dirtab[count].d_name, comp) == 0) {
+			lxpnp = lxpr_getnode(dp, dirtab[count].d_type, p, 0);
+			dp = LXPTOV(lxpnp);
+			ASSERT(dp != NULL);
+			return (dp);
+		}
+	}
+	return (NULL);
+}
+
+
+static vnode_t *
+lxpr_lookup_piddir(vnode_t *dp, char *comp)
+{
+	proc_t *p;
+
+	ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PIDDIR);
+
+	p = lxpr_lock(VTOLXP(dp)->lxpr_pid);
+	if (p == NULL)
+		return (NULL);
+
+	dp = lxpr_lookup_common(dp, comp, p, piddir, PIDDIRFILES);
+
+	lxpr_unlock(p);
+
+	return (dp);
+}
+
+
+/*
+ * Lookup one of the process's open files.
+ */
+static vnode_t *
+lxpr_lookup_fddir(vnode_t *dp, char *comp)
+{
+	lxpr_node_t *dlxpnp = VTOLXP(dp);
+	lxpr_node_t *lxpnp;
+	vnode_t *vp = NULL;
+	proc_t *p;
+	file_t *fp;
+	uint_t fd;
+	int c;
+	uf_entry_t *ufp;
+	uf_info_t *fip;
+
+	ASSERT(dlxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+	/*
+	 * convert the string rendition of the filename
+	 * to a file descriptor
+	 */
+	fd = 0;
+	while ((c = *comp++) != '\0') {
+		int ofd;
+		if (c < '0' || c > '9')
+			return (NULL);
+
+		ofd = fd;
+		fd = 10*fd + c - '0';
+		/* integer overflow */
+		if (fd / 10 != ofd)
+			return (NULL);
+	}
+
+	/*
+	 * get the proc to work with and lock it
+	 */
+	p = lxpr_lock(dlxpnp->lxpr_pid);
+	if ((p == NULL))
+		return (NULL);
+
+	/*
+	 * If the process is a zombie or system process
+	 * it can't have any open files.
+	 */
+	if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas)) {
+		lxpr_unlock(p);
+		return (NULL);
+	}
+
+	/*
+	 * get us a fresh node/vnode
+	 */
+	lxpnp = lxpr_getnode(dp, LXPR_PID_FD_FD, p, fd);
+
+	/*
+	 * get open file info
+	 */
+	fip = (&(p)->p_user.u_finfo);
+	mutex_enter(&fip->fi_lock);
+
+	/*
+	 * got the fd data so now done with this proc
+	 */
+	lxpr_unlock(p);
+
+	if (fd < fip->fi_nfiles) {
+		UF_ENTER(ufp, fip, fd);
+		/*
+		 * ensure the fd is still kosher.
+		 * it may have gone between the readdir and
+		 * the lookup
+		 */
+		if (fip->fi_list[fd].uf_file == NULL) {
+			mutex_exit(&fip->fi_lock);
+			UF_EXIT(ufp);
+			lxpr_freenode(lxpnp);
+			return (NULL);
+		}
+
+		if ((fp = ufp->uf_file) != NULL)
+			vp = fp->f_vnode;
+		UF_EXIT(ufp);
+	}
+	mutex_exit(&fip->fi_lock);
+
+	if (vp == NULL) {
+		lxpr_freenode(lxpnp);
+		return (NULL);
+	} else {
+		/*
+		 * Fill in the lxpr_node so future references will
+		 * be able to find the underlying vnode.
+		 * The vnode is held on the realvp.
+		 */
+		lxpnp->lxpr_realvp = vp;
+		VN_HOLD(lxpnp->lxpr_realvp);
+	}
+
+	dp = LXPTOV(lxpnp);
+	ASSERT(dp != NULL);
+
+	return (dp);
+}
+
+
+static vnode_t *
+lxpr_lookup_netdir(vnode_t *dp, char *comp)
+{
+	ASSERT(VTOLXP(dp)->lxpr_type == LXPR_NETDIR);
+
+	dp = lxpr_lookup_common(dp, comp, NULL, netdir, NETDIRFILES);
+
+	return (dp);
+}
+
+
+static vnode_t *
+lxpr_lookup_procdir(vnode_t *dp, char *comp)
+{
+	ASSERT(VTOLXP(dp)->lxpr_type == LXPR_PROCDIR);
+
+	/*
+	 * We know all the names of files & dirs in our
+	 * file system structure except those that are pid names.
+	 * These change as pids are created/deleted etc.
+	 * So just look for a number as the first char to see if we
+	 * are we doing pid lookups?
+	 *
+	 * Don't need to check for "self" as it is implemented as a symlink
+	 */
+	if (*comp >= '0' && *comp <= '9') {
+		pid_t pid = 0;
+		lxpr_node_t *lxpnp = NULL;
+		proc_t *p;
+		int c;
+
+		while ((c = *comp++) != '\0')
+			pid = 10*pid + c - '0';
+
+		/*
+		 * Can't continue if the process is still loading
+		 * or it doesn't really exist yet (or maybe it just died!)
+		 */
+		p = lxpr_lock(pid);
+		if (p == NULL)
+			return (NULL);
+
+		if (secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+			lxpr_unlock(p);
+			return (NULL);
+		}
+
+		/*
+		 * allocate and fill in a new lx /proc node
+		 */
+		lxpnp = lxpr_getnode(dp, LXPR_PIDDIR, p, 0);
+
+		lxpr_unlock(p);
+
+		dp = LXPTOV(lxpnp);
+		ASSERT(dp != NULL);
+
+		return (dp);
+
+	}
+
+	/* Lookup fixed names */
+	return (lxpr_lookup_common(dp, comp, NULL, lx_procdir, PROCDIRFILES));
+}
+
+
+
+
+/*
+ * lxpr_readdir(): Vnode operation for VOP_READDIR()
+ */
+/* ARGSUSED */
+static int
+lxpr_readdir(vnode_t *dp, uio_t *uiop, cred_t *cr, int *eofp)
+{
+	lxpr_node_t *lxpnp = VTOLXP(dp);
+	lxpr_nodetype_t type = lxpnp->lxpr_type;
+	ssize_t uresid;
+	off_t uoffset;
+	int error;
+
+	ASSERT(dp->v_type == VDIR);
+	ASSERT(type >= 0 && type < LXPR_NFILES);
+
+	/*
+	 * we should never get here because the readdir
+	 * is done on the realvp for these nodes
+	 */
+	ASSERT(type != LXPR_PID_FD_FD &&
+		type != LXPR_PID_CURDIR &&
+		type != LXPR_PID_ROOTDIR);
+
+	/*
+	 * restrict readdir permission to owner or root
+	 */
+	if ((error = lxpr_access(dp, VREAD, 0, cr)) != 0)
+		return (error);
+
+	uoffset = uiop->uio_offset;
+	uresid = uiop->uio_resid;
+
+	/* can't do negative reads */
+	if (uoffset < 0 || uresid <= 0)
+		return (EINVAL);
+
+	/* can't read directory entries that don't exist! */
+	if (uoffset % LXPR_SDSIZE)
+		return (ENOENT);
+
+	return (lxpr_readdir_function[lxpnp->lxpr_type](lxpnp, uiop, eofp));
+}
+
+
+/* ARGSUSED */
+static int
+lxpr_readdir_not_a_dir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+	return (ENOTDIR);
+}
+
+/*
+ * This has the common logic for returning directory entries
+ */
+static int
+lxpr_readdir_common(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp,
+    lxpr_dirent_t *dirtab, int dirtablen)
+{
+	/* bp holds one dirent64 structure */
+	longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+	dirent64_t *dirent = (dirent64_t *)bp;
+	ssize_t oresid;	/* save a copy for testing later */
+	ssize_t uresid;
+
+	oresid = uiop->uio_resid;
+
+	/* clear out the dirent buffer */
+	bzero(bp, sizeof (bp));
+
+	/*
+	 * Satisfy user request
+	 */
+	while ((uresid = uiop->uio_resid) > 0) {
+		int dirindex;
+		off_t uoffset;
+		int reclen;
+		int error;
+
+		uoffset = uiop->uio_offset;
+		dirindex  = (uoffset / LXPR_SDSIZE) - 2;
+
+		if (uoffset == 0) {
+
+			dirent->d_ino = lxpnp->lxpr_ino;
+			dirent->d_name[0] = '.';
+			dirent->d_name[1] = '\0';
+			reclen = DIRENT64_RECLEN(1);
+
+		} else if (uoffset == LXPR_SDSIZE) {
+
+			dirent->d_ino = lxpr_parentinode(lxpnp);
+			dirent->d_name[0] = '.';
+			dirent->d_name[1] = '.';
+			dirent->d_name[2] = '\0';
+			reclen = DIRENT64_RECLEN(2);
+
+		} else if (dirindex < dirtablen) {
+			int slen = strlen(dirtab[dirindex].d_name);
+
+			dirent->d_ino = lxpr_inode(dirtab[dirindex].d_type,
+			    lxpnp->lxpr_pid, 0);
+
+			ASSERT(slen < LXPNSIZ);
+			(void) strcpy(dirent->d_name, dirtab[dirindex].d_name);
+			reclen = DIRENT64_RECLEN(slen);
+
+		} else {
+			/* Run out of table entries */
+			if (eofp) {
+				*eofp = 1;
+			}
+			return (0);
+		}
+
+		dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+		dirent->d_reclen = (ushort_t)reclen;
+
+		/*
+		 * if the size of the data to transfer is greater
+		 * that that requested then we can't do it this transfer.
+		 */
+		if (reclen > uresid) {
+			/*
+			 * Error if no entries have been returned yet.
+			 */
+			if (uresid == oresid) {
+				return (EINVAL);
+			}
+			break;
+		}
+
+		/*
+		 * uiomove() updates both uiop->uio_resid and
+		 * uiop->uio_offset by the same amount.  But we want
+		 * uiop->uio_offset to change in increments
+		 * of LXPR_SDSIZE, which is different from the number of bytes
+		 * being returned to the user.
+		 * So we set uiop->uio_offset separately, ignoring what
+		 * uiomove() does.
+		 */
+		if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) {
+			return (error);
+		}
+
+		uiop->uio_offset = uoffset + LXPR_SDSIZE;
+	}
+
+	/* Have run out of space, but could have just done last table entry */
+	if (eofp) {
+		*eofp =
+		    (uiop->uio_offset >= ((dirtablen+2) * LXPR_SDSIZE)) ? 1 : 0;
+	}
+	return (0);
+}
+
+
+static int
+lxpr_readdir_procdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+	/* bp holds one dirent64 structure */
+	longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+	dirent64_t *dirent = (dirent64_t *)bp;
+	ssize_t oresid;	/* save a copy for testing later */
+	ssize_t uresid;
+	off_t uoffset;
+	zoneid_t zoneid;
+	pid_t pid;
+	int error;
+	int ceof;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_PROCDIR);
+
+	oresid = uiop->uio_resid;
+	zoneid = LXPTOZ(lxpnp)->zone_id;
+
+	/*
+	 * We return directory entries in the order:
+	 * "." and ".." then the unique lx procfs files, then the
+	 * directories corresponding to the running processes.
+	 *
+	 * This is a good order because it allows us to more easily
+	 * keep track of where we are betwen calls to getdents().
+	 * If the number of processes changes between calls then we
+	 * can't lose track of where we are in the lx procfs files.
+	 */
+
+	/* Do the fixed entries */
+	error = lxpr_readdir_common(lxpnp, uiop, &ceof, lx_procdir,
+	    PROCDIRFILES);
+
+	/* Finished if we got an error or if we couldn't do all the table */
+	if (error != 0 || ceof == 0)
+	    return (error);
+
+	/* clear out the dirent buffer */
+	bzero(bp, sizeof (bp));
+
+	/* Do the process entries */
+	while ((uresid = uiop->uio_resid) > 0) {
+		proc_t *p;
+		int len;
+		int reclen;
+		int i;
+
+		uoffset = uiop->uio_offset;
+
+		/*
+		 * Stop when entire proc table has been examined.
+		 */
+		i = (uoffset / LXPR_SDSIZE) - 2 - PROCDIRFILES;
+		if (i >= v.v_proc) {
+			/* Run out of table entries */
+			if (eofp) {
+				*eofp = 1;
+			}
+			return (0);
+		}
+		mutex_enter(&pidlock);
+
+		/*
+		 * Skip indices for which there is no pid_entry, PIDs for
+		 * which there is no corresponding process, the zched process,
+		 * a PID of 0, and anything the security policy doesn't allow
+		 * us to look at.
+		 */
+		if ((p = pid_entry(i)) == NULL || p->p_stat == SIDL ||
+		    p->p_pid == curproc->p_zone->zone_zsched->p_pid ||
+		    p->p_pid == 0 ||
+		    secpolicy_basic_procinfo(CRED(), p, curproc) != 0) {
+			mutex_exit(&pidlock);
+			goto next;
+		}
+		mutex_exit(&pidlock);
+
+		/*
+		 * Convert pid to the Linux default of 1 if we're the zone's
+		 * init process, otherwise use the value from the proc
+		 * structure
+		 */
+		pid = ((p->p_pid != curproc->p_zone->zone_proc_initpid) ?
+		    p->p_pid : 1);
+
+		/*
+		 * If this /proc was mounted in the global zone, view
+		 * all procs; otherwise, only view zone member procs.
+		 */
+		if (zoneid != GLOBAL_ZONEID && p->p_zone->zone_id != zoneid) {
+			goto next;
+		}
+
+		ASSERT(p->p_stat != 0);
+
+		dirent->d_ino = lxpr_inode(LXPR_PIDDIR, pid, 0);
+		len = snprintf(dirent->d_name, LXPNSIZ, "%d", pid);
+		ASSERT(len < LXPNSIZ);
+		reclen = DIRENT64_RECLEN(len);
+
+		dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+		dirent->d_reclen = (ushort_t)reclen;
+
+		/*
+		 * if the size of the data to transfer is greater
+		 * that that requested then we can't do it this transfer.
+		 */
+		if (reclen > uresid) {
+			/*
+			 * Error if no entries have been returned yet.
+			 */
+			if (uresid == oresid)
+				return (EINVAL);
+			break;
+		}
+
+		/*
+		 * uiomove() updates both uiop->uio_resid and
+		 * uiop->uio_offset by the same amount.  But we want
+		 * uiop->uio_offset to change in increments
+		 * of LXPR_SDSIZE, which is different from the number of bytes
+		 * being returned to the user.
+		 * So we set uiop->uio_offset separately, in the
+		 * increment of this for loop, ignoring what uiomove() does.
+		 */
+		if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
+			return (error);
+
+next:
+		uiop->uio_offset = uoffset + LXPR_SDSIZE;
+	}
+
+	if (eofp)
+		*eofp =
+		    (uiop->uio_offset >=
+			((v.v_proc + PROCDIRFILES + 2) * LXPR_SDSIZE)) ? 1 : 0;
+
+	return (0);
+}
+
+
+static int
+lxpr_readdir_piddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+	proc_t *p;
+
+	ASSERT(lxpnp->lxpr_type == LXPR_PIDDIR);
+
+	/* can't read its contents if it died */
+	mutex_enter(&pidlock);
+
+	p = prfind((lxpnp->lxpr_pid == 1) ?
+	    curproc->p_zone->zone_proc_initpid : lxpnp->lxpr_pid);
+
+	if (p == NULL || p->p_stat == SIDL) {
+		mutex_exit(&pidlock);
+		return (ENOENT);
+	}
+	mutex_exit(&pidlock);
+
+	return (lxpr_readdir_common(lxpnp, uiop, eofp, piddir, PIDDIRFILES));
+}
+
+
+static int
+lxpr_readdir_netdir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+	ASSERT(lxpnp->lxpr_type == LXPR_NETDIR);
+	return (lxpr_readdir_common(lxpnp, uiop, eofp, netdir, NETDIRFILES));
+}
+
+
+static int
+lxpr_readdir_fddir(lxpr_node_t *lxpnp, uio_t *uiop, int *eofp)
+{
+	/* bp holds one dirent64 structure */
+	longlong_t bp[DIRENT64_RECLEN(LXPNSIZ) / sizeof (longlong_t)];
+	dirent64_t *dirent = (dirent64_t *)bp;
+	ssize_t oresid;	/* save a copy for testing later */
+	ssize_t uresid;
+	off_t uoffset;
+	int error;
+	int ceof;
+	proc_t *p;
+	int fddirsize;
+	uf_info_t *fip;
+
+
+	ASSERT(lxpnp->lxpr_type == LXPR_PID_FDDIR);
+
+	oresid = uiop->uio_resid;
+
+	/* can't read its contents if it died */
+	p = lxpr_lock(lxpnp->lxpr_pid);
+	if (p == NULL)
+		return (ENOENT);
+
+	/* Get open file info */
+	fip = (&(p)->p_user.u_finfo);
+
+	if ((p->p_stat == SZOMB) || (p->p_flag & SSYS) || (p->p_as == &kas))
+		fddirsize = 0;
+	else
+		fddirsize = fip->fi_nfiles;
+
+	mutex_enter(&fip->fi_lock);
+	lxpr_unlock(p);
+
+	/* Do the fixed entries (in this case just "." & "..") */
+	error = lxpr_readdir_common(lxpnp, uiop, &ceof, 0, 0);
+
+	/* Finished if we got an error or if we couldn't do all the table */
+	if (error != 0 || ceof == 0)
+	    return (error);
+
+	/* clear out the dirent buffer */
+	bzero(bp, sizeof (bp));
+
+	/*
+	 * Loop until user's request is satisfied or until
+	 * all file descriptors have been examined.
+	 */
+	for (; (uresid = uiop->uio_resid) > 0;
+		uiop->uio_offset = uoffset + LXPR_SDSIZE) {
+		int reclen;
+		int fd;
+		int len;
+
+		uoffset = uiop->uio_offset;
+
+		/*
+		 * Stop at the end of the fd list
+		 */
+		fd = (uoffset / LXPR_SDSIZE) - 2;
+		if (fd >= fddirsize) {
+			if (eofp) {
+				*eofp = 1;
+			}
+			goto out;
+		}
+
+		if (fip->fi_list[fd].uf_file == NULL)
+			continue;
+
+		dirent->d_ino = lxpr_inode(LXPR_PID_FD_FD, lxpnp->lxpr_pid, fd);
+		len = snprintf(dirent->d_name, LXPNSIZ, "%d", fd);
+		ASSERT(len < LXPNSIZ);
+		reclen = DIRENT64_RECLEN(len);
+
+		dirent->d_off = (off64_t)(uoffset + LXPR_SDSIZE);
+		dirent->d_reclen = (ushort_t)reclen;
+
+		if (reclen > uresid) {
+			/*
+			 * Error if no entries have been returned yet.
+			 */
+			if (uresid == oresid)
+				error = EINVAL;
+			goto out;
+		}
+
+		if (error = uiomove((caddr_t)dirent, reclen, UIO_READ, uiop))
+			goto out;
+	}
+
+	if (eofp)
+		*eofp =
+		    (uiop->uio_offset >= ((fddirsize+2) * LXPR_SDSIZE)) ? 1 : 0;
+
+out:
+	mutex_exit(&fip->fi_lock);
+	return (error);
+}
+
+
+/*
+ * lxpr_readlink(): Vnode operation for VOP_READLINK()
+ */
+static int
+lxpr_readlink(vnode_t *vp, uio_t *uiop)
+{
+	char bp[MAXPATHLEN + 1];
+	size_t buflen = sizeof (bp);
+	lxpr_node_t *lxpnp = VTOLXP(vp);
+	pid_t pid;
+	int error = 0;
+
+	/* must be a symbolic link file */
+	if (vp->v_type != VLNK)
+		return (EINVAL);
+
+	/*
+	 * Try to produce a symlink name for anything that's really a regular
+	 * file or directory (but not for anything else)
+	 */
+	if (lxpnp->lxpr_realvp != NULL && (lxpnp->lxpr_realvp->v_type == VDIR ||
+	    lxpnp->lxpr_realvp->v_type == VREG)) {
+		if ((error = lxpr_access(vp, VREAD, 0, CRED())) != 0)
+			return (error);
+		error = vnodetopath(NULL, lxpnp->lxpr_realvp, bp, buflen,
+		    CRED());
+		if (error != 0)
+			return (error);
+	} else {
+		switch (lxpnp->lxpr_type) {
+		case LXPR_SELF:
+			/*
+			 * Don't need to check result as every possible int
+			 * will fit within MAXPATHLEN bytes
+			 */
+
+			/*
+			 * Convert pid to the Linux default of 1 if we're the
+			 * zone's init process
+			 */
+			pid = ((curproc->p_pid !=
+			    curproc->p_zone->zone_proc_initpid)
+			    ? curproc->p_pid : 1);
+
+			(void) snprintf(bp, buflen, "%d", pid);
+			break;
+		default:
+			/*
+			 * Need to return error so that nothing thinks
+			 * that the symlink is empty and hence "."
+			 */
+			return (EINVAL);
+		}
+	}
+
+	/* copy the link data to user space */
+	return (uiomove(bp, strlen(bp), UIO_READ, uiop));
+}
+
+
+/*
+ * lxpr_inactive(): Vnode operation for VOP_INACTIVE()
+ * Vnode is no longer referenced, deallocate the file
+ * and all its resources.
+ */
+/* ARGSUSED */
+static void
+lxpr_inactive(vnode_t *vp, cred_t *cr)
+{
+	lxpr_freenode(VTOLXP(vp));
+}
+
+
+/*
+ * lxpr_sync(): Vnode operation for VOP_SYNC()
+ */
+static int
+lxpr_sync()
+{
+	/*
+	 * nothing to sync but this
+	 * function must never fail
+	 */
+	return (0);
+}
+
+
+/*
+ * lxpr_cmp(): Vnode operation for VOP_CMP()
+ */
+static int
+lxpr_cmp(vnode_t *vp1, vnode_t *vp2)
+{
+	vnode_t *rvp;
+
+	while (vn_matchops(vp1, lxpr_vnodeops) &&
+		(rvp = VTOLXP(vp1)->lxpr_realvp) != NULL)
+		vp1 = rvp;
+	while (vn_matchops(vp2, lxpr_vnodeops) &&
+		(rvp = VTOLXP(vp2)->lxpr_realvp) != NULL)
+		vp2 = rvp;
+	if (vn_matchops(vp1, lxpr_vnodeops) || vn_matchops(vp2, lxpr_vnodeops))
+		return (vp1 == vp2);
+	return (VOP_CMP(vp1, vp2));
+}
+
+
+/*
+ * lxpr_realvp(): Vnode operation for VOP_REALVP()
+ */
+static int
+lxpr_realvp(vnode_t *vp, vnode_t **vpp)
+{
+	vnode_t *rvp;
+
+	if ((rvp = VTOLXP(vp)->lxpr_realvp) != NULL) {
+		vp = rvp;
+		if (VOP_REALVP(vp, &rvp) == 0)
+			vp = rvp;
+	}
+
+	*vpp = vp;
+	return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/sys/ldlinux.h b/usr/src/uts/common/brand/lx/sys/ldlinux.h
new file mode 100644
index 0000000000..b259c05d97
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/ldlinux.h
@@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LDLINUX_H
+#define	_SYS_LDLINUX_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * The ldlinux streams module is only intended for use in lx branded zones.
+ * This streams module implements the following ioctls:
+ * 	TIOCSETLD and TIOCGETLD
+ *
+ * These ioctls are special ioctls supported only by the ldlinux streams
+ * module and invoked only by the lx brand emulation library.  These ioctls
+ * do not exist on native Linux systems.
+ *
+ * The TIOCSETLD ioctl is used when emulating the following Linux ioctls:
+ *	TCSETS/TCSETSW/TCSETSF
+ *	TCSETA/TCSETAW/TCSETAF
+ *
+ * The TIOCGETLD ioctl is used when emulating the following Linux ioctls:
+ *	TCGETS/TCGETA
+ *
+ * This module is needed to emulate these ioctls because the following arrays:
+ *	termio.c_cc
+ *	termios.c_cc
+ * which are parameters for the following ioctls:
+ *	TCSETS/TCSETSW/TCSETSF
+ *	TCSETA/TCSETAW/TCSETAF
+ *	TCGETS/TCGETA
+ *
+ * are defined differently on Solaris and Linux.
+ *
+ * According to the termio(7I) man page on Solaris the following is true of
+ * the members of the c_cc array:
+ *	The VMIN element is the same element as the VEOF element.
+ *	The VTIME element is the same element as the VEOL element.
+ *
+ * But on Linux the termios(3) man page states:
+ *	These symbolic subscript values are all different, except that
+ *	VTIME, VMIN may have the same value as VEOL, VEOF, respectively.
+ *
+ * While the man page indicates that these values may be the same empirical
+ * tests shows them to be different.  Since these values are different on
+ * Linux systems it's possible that applications could set the members of
+ * the c_cc array to different values and then later expect to be able to
+ * read back those same separate values.  The ldlinux module exists to provide
+ * a per-stream storage area where the lx_brand emulation library can save
+ * these values.  The values are set and retrieved via the TIOCSETLD and
+ * TIOCGETLD ioctls respectively.
+ */
+
+#include <sys/termios.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	LDLINUX_MOD	"ldlinux"
+
+#ifdef _KERNEL
+
+/*
+ * LDLINUX_MODID - This should be a unique number associated with
+ * this particular module.  Unfortunatly there is no authority responsible
+ * for administering this name space, hence there's no real guarantee that
+ * whatever number we choose will be unique.  Luckily, this constant
+ * is not really used anywhere by the system.  It is used by some
+ * kernel subsystems to check for the presence of certain streams
+ * modules with known id vaules.  Since no other kernel subsystem
+ * checks for the presence of this module we'll just set the id to 0.
+ */
+#define	LDLINUX_MODID	0
+
+struct ldlinux {
+	int	state;		/* state information */
+				/* Linux expects the next four c_cc values */
+				/* to be distinct, whereas solaris (legally) */
+				/* overlaps their storage */
+	unsigned char veof;	/* veof value */
+	unsigned char veol;	/* veol value */
+	unsigned char vmin;	/* vmin value */
+	unsigned char vtime;	/* vtime value */
+};
+
+#define	ISPTSTTY	0x01
+
+#endif /* _KERNEL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _SYS_LDLINUX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_audio.h b/usr/src/uts/common/brand/lx/sys/lx_audio.h
new file mode 100644
index 0000000000..cbb3431c4b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_audio.h
@@ -0,0 +1,130 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LX_AUDIO_H
+#define	_LX_AUDIO_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/zone.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * name for this driver
+ */
+#define	LX_AUDIO_DRV			"lx_audio"
+
+/*
+ * names for the minor nodes this driver exports
+ */
+#define	LXA_MINORNAME_DEVCTL		"lx_devctl"
+#define	LXA_MINORNAME_DSP		"lx_dsp"
+#define	LXA_MINORNAME_MIXER		"lx_mixer"
+
+/*
+ * minor numbers for the minor nodes this driver exporrts
+ */
+#define	LXA_MINORNUM_DEVCTL		0
+#define	LXA_MINORNUM_DSP		1
+#define	LXA_MINORNUM_MIXER		2
+#define	LXA_MINORNUM_COUNT		3
+
+/*
+ * driver ioctls
+ *
+ * note that we're layering on top of solaris audio devices so we want
+ * to make sure that our ioctls namespace doesn't conflict with theirs.
+ * looking in sys/audioio.h and sys/mixer.h we see that they seem to
+ * use an _IO key of 'A' and 'M', so we'll choose an _IO key of 'a.'
+ */
+
+/*
+ * administrative ioctls.
+ * these ioctls are only supported on the DEVCTL minor node
+ */
+#define	LXA_IOC_ZONE_REG		(_IOR('a', 0, lxa_zone_reg_t))
+#define	LXA_IOC_ZONE_UNREG		(_IOR('a', 1, lxa_zone_reg_t))
+
+
+/*
+ * audio and mixer device ioctls
+ * these ioctls are supported on DSP and MIXER minor nodes.
+ */
+#define	LXA_IOC_GETMINORNUM		(_IOR('a', 20, int))
+
+/*
+ * audio device ioctls.
+ * these ioctls are supports on DSP minor nodes.
+ */
+#define	LXA_IOC_MMAP_OUTPUT		(_IOR('a', 41, int))
+#define	LXA_IOC_MMAP_PTR		(_IOR('a', 42, int))
+#define	LXA_IOC_GET_FRAG_INFO		(_IOR('a', 43, lxa_frag_info_t))
+#define	LXA_IOC_SET_FRAG_INFO		(_IOR('a', 44, lxa_frag_info_t))
+
+/*
+ * mixer device ioctls.
+ * these ioctls are supports on MIXER minor nodes.
+ */
+#define	LXA_IOC_MIXER_GET_VOL		(_IOR('a', 60, lxa_mixer_levels_t))
+#define	LXA_IOC_MIXER_SET_VOL		(_IOR('a', 61, lxa_mixer_levels_t))
+#define	LXA_IOC_MIXER_GET_MIC		(_IOR('a', 62, lxa_mixer_levels_t))
+#define	LXA_IOC_MIXER_SET_MIC		(_IOR('a', 63, lxa_mixer_levels_t))
+#define	LXA_IOC_MIXER_GET_PCM		(_IOR('a', 64, lxa_mixer_levels_t))
+#define	LXA_IOC_MIXER_SET_PCM		(_IOR('a', 65, lxa_mixer_levels_t))
+
+/* command structure for LXA_IOC_ZONE_REG */
+#define	LXA_INTSTRLEN 32
+typedef struct lxa_zone_reg {
+	char	lxa_zr_zone_name[ZONENAME_MAX];
+	char	lxa_zr_inputdev[LXA_INTSTRLEN];
+	char	lxa_zr_outputdev[LXA_INTSTRLEN];
+} lxa_zone_reg_t;
+
+/* command structure for LXA_IOC_GET_FRAG_INFO and LXA_IOC_SET_FRAG_INFO */
+typedef struct lxa_frag_info {
+	int	lxa_fi_size;
+	int	lxa_fi_cnt;
+} lxa_frag_info_t;
+
+/* command structure for LXA_IOC_MIXER_GET_* and LXA_IOC_MIXER_SET_* */
+typedef struct lxa_mixer_levels {
+	int	lxa_ml_gain;
+	int	lxa_ml_balance;
+} lxa_mixer_levels_t;
+
+/* verify that a solaris mixer level structure has valid values */
+#define	LXA_MIXER_LEVELS_OK(x) (((x)->lxa_ml_gain >= AUDIO_MIN_GAIN) && \
+				((x)->lxa_ml_gain <= AUDIO_MAX_GAIN) && \
+				((x)->lxa_ml_balance >= AUDIO_LEFT_BALANCE) && \
+				((x)->lxa_ml_balance <= AUDIO_RIGHT_BALANCE))
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LX_AUDIO_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs.h b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
new file mode 100644
index 0000000000..4436226deb
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs.h
@@ -0,0 +1,334 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LX_AUTOFS_H
+#define	_LX_AUTOFS_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * The lx_autofs filesystem exists to emulate the Linux autofs filesystem
+ * and provide support for the Linux "automount" automounter.
+ *
+ *
+ *
+ * +++ Linux automounter background.
+ *
+ * Linux has two automounters: "amd" and "automount"
+ *
+ * 1) "amd" is a userland NFS server.  It basically mounts an NFS filesystem
+ * at an automount point, and it acts as the NFS server for the mount.  When
+ * an access is done to that NFS filesystem, the access is redirected by the
+ * kernel to the "amd" process via rpc.  "amd" then looks up any information
+ * required to resolve the requests, mounts real NFS filesystems if
+ * necessary, and returns.  "amd" has it's own strange configuration
+ * mechanism that doesn't seem to be very compatabile with Solaris's network
+ * based automounter map support.
+ *
+ * 2) "automount" is the other Linux automounter.  It utilizes a kernel
+ * filesystem (autofs) to provide it's functionality.  Basically, it mounts
+ * the autofs filesystem at any automounter controlled mount point.  This
+ * filesystem then intercepts and redirects lookup operations (and only
+ * lookup ops) to the userland automounter process via a pipe.  (The
+ * pipe to the automounter is establised via mount options when the autofs
+ * filesystem is mounted.)  When the automounter recieves a request via this
+ * pipe, it does lookups to whatever backing store it's configured to use,
+ * does mkdir operations on the autofs filesystem, mounts remote NFS
+ * filesystems on any leaf directories it just created, and signals the
+ * autofs filesystem via an ioctl to let it know that the lookup can
+ * continue.
+ *
+ *
+ *
+ * +++ Linux autofs (and automount daemon) notes
+ *
+ * Since we're mimicking the behavior of the Linux autofs filesystem it's
+ * important to document some of it's observed behavior here since there's
+ * no doubt that in the future this behavior will change.  These comments
+ * apply to the behavior of the automounter as observed on a system
+ * running Linux v2.4.21 (autofs is bundled with the Linux kernel).
+ *
+ * A) Autofs allows root owned, non-automounter processes to create
+ * directories in the autofs filesystem.  The autofs filesystem treats the
+ * automounter's process group as special, but it doesn't prevent root
+ * processes outside of the automounter's process group from creating new
+ * directories in the autofs filesystem.
+ *
+ * B) Autofs doesn't allow creation of any non-directory entries in the
+ * autofs filesystem.  No entity can create files (e.g. /bin/touch or
+ * VOP_CREATE/VOP_SYMLINK/etc.)  The only entries that can exist within
+ * the autofs filesystem are directories.
+ *
+ * C) Autofs only intercepts vop lookup operations.  Notably, it does _not_
+ * intercept and re-direct vop readdir operations.  This means that the
+ * observed behavior of the Linux automounter can be considerably different
+ * from that of the Solaris automounter.  Specifically, on Solaris if autofs
+ * mount point is mounted _without_ the -nobrowse option then if a user does
+ * an ls operation (which translates into a vop readdir operation) then the
+ * automounter will intercept that operation and list all the possible
+ * directories and mount points without actually mounting any filesystems.
+ * Essentially, all automounter managed mount points on Linux will behave
+ * like "-nobrowse" mount points on Solaris.  Here's an example to
+ * illustrate this.  If /ws was mounted on Solaris without the -nobrowse
+ * option and an auto_ws yp map was setup as the backing store for this
+ * mount point, then an "ls /ws" would list all the keys in the map as
+ * valid directories, but an "ls /ws" on Linux would list an emptry
+ * directory.
+ *
+ * D) NFS mounts are performed by the automount process.  When the automount
+ * process gets a redirected lookup request, it determines _all_ the
+ * possible remote mount points for that request, creates directory paths
+ * via mkdir, and mounts the remote filesystems on the newly created paths.
+ * So for example, if a machine called mcescher exported /var/crash and
+ * /var/core, an "ls /net/mcescher" would result in the following actions
+ * being done by the automounter:
+ * 	mkdir /net/mcescher
+ * 	mkdir /net/mcescher/var
+ * 	mkdir /net/mcescher/var/crash
+ * 	mkdir /net/mcescher/var/core
+ * 	mount mcescher:/var/crash /var/crash
+ * 	mount mcescher:/var/crash /var/core
+ * once the automounter compleated the work above it would signal the autofs
+ * filesystem (via an ioctl) that the lookup could continue.
+ *
+ * E.1) Autofs only redirects vop lookup operations for path entries that
+ * don't already exist in the autofs filesystem.  So for the example above,
+ * an initial (after the start of the automounter) "ls /net/mcescher" would
+ * result in a request to the automounter.  A subsequest "ls /net/mcescher"
+ * would not result in a request to the automounter.  Even if
+ * /net/mcescher/var/crash and /net/mcescher/var/core were manually unmounted
+ * after the initial "ls /net/mcescher", a subsequest "ls /net/mcescher"
+ * would not result in a new request to the automounter.
+ *
+ * E.2) Autofs lookup requests that are sent to the automounter only include
+ * the root directory path component.  So for example, after starting up
+ * the automounter if a user were to do a "ls /net/mcescher/var/crash", the
+ * lookup request actually sent to the automounter would just be for
+ * "mcescher".  (The same request as if the user had done "ls /net/mcescher".)
+ *
+ * E.3) The two statements above aren't entirely entirely true.  The Linux
+ * autofs filesystem will also redirect lookup operations for leaf
+ * directories that don't have a filesystem mounted on them.  Using the
+ * example above, if a user did a "ls /net/mcescher", then manually
+ * unmounted /net/mcescher/var/crash, and then did an "ls
+ * /net/mcescher/var/crash", this would result in a request for
+ * "mcescher/var/crash" being sent to the automounter.  The strange thing
+ * (a Linux bug perhaps) is that the automounter won't do anything with this
+ * request and the lookup will fail.
+ *
+ * F) The autofs filesystem communication protocol (what ioctls it supports
+ * and what data it passes to the automount process) are versioned.  The
+ * source for the userland automount daemon (i looked at version v3.1.7)
+ * seemed to support two versions of the Linux kernel autofs implementation.
+ * Both versions supported communiciation with a pipe and the format of the
+ * structure passed via this pipe was the same.  The difference between the
+ * two versions was in the functionality supported.  (The v3 version has
+ * additional ioctls to support automount timeouts.)
+ *
+ *
+ *
+ * +++ lx_autofs notes
+ *
+ * 1) In general, the lx_autofs filesystem tries to mimic the behavior of the
+ * Linux autofs filesystem with the following exceptions:
+ *
+ * 	1.1) We don't bother to implement the E.3 functionality listed above
+ * 	since it doesn't appear to be of any use.
+ *
+ * 	1.2) We only implement v2 of the automounter protocol since
+ * 	implementing v3 would take a _lot_ more work.  If this proves to be a
+ * 	problem we can re-visit this decision later.  (More details about v3
+ * 	support are included in comments below.)
+ *
+ * 2) In general, the approach taken for lx_autofs is to keep it as simple
+ * as possible and to minimize it's memory usage.  To do this all information
+ * about the contents of the lx_autofs filesystem are mirrored in the
+ * underlying filesystem that lx_autofs is mounted on and most vop operations
+ * are simply passed onto this underlying filesystem.  This means we don't
+ * have to implement most the complex operations that a full filesystem
+ * normally has to implement.  It also means that most of our filesystem state
+ * (wrt the contents of the filesystem) doesn't actually have to be stored
+ * in memory, we can simply go to the underlying filesystem to get it when
+ * it's requested.  For the purposes of discussion, we'll call the underlying
+ * filesystem the "backing store."
+ *
+ * The backing store is actually directory called ".lx_afs" which is created in
+ * the directory where the lx_autofs filesystem is mounted.  When the lx_autofs
+ * filesystem is unmounted this backing store directory is deleted.  If this
+ * directory exists at mount time (perhaps the system crashed while a previous
+ * lx_autofs instance was mounted at the same location) it will be deleted.
+ * There are a few implications of using a backing store worth mentioning.
+ *
+ * 	2.1) lx_autofs can't be mounted on a read only filesystem.  If this
+ * 	proves to be a problem we can probably move the location of the
+ * 	backing store.
+ *
+ * 	2.2) If the backing store filesystem runs out of space then the
+ * 	automounter process won't be able to create more directories and mount
+ * 	new filesystems.  Of course, strange failures usually happen when
+ * 	filesystems run out of space.
+ *
+ * 3) Why aren't we using gfs?  gfs has two different usage models.
+ *
+ * 	3.1) I'm my own filesystem but i'm using gfs to help with managing
+ * 	readdir operations.
+ *
+ * 	3.2) I'm a gfs filesystem and gfs is managing all my vnodes
+ *
+ * We're not using the 3.1 interfaces because we don't implement readdir
+ * ourselves.  We pass all readdir operations onto the backing store
+ * filesystem and utilize its readdir implementation.
+ *
+ * We're not using the 3.2 interfaces because they are really designed for
+ * in memory filesystems where all of the filesystem state is stored in
+ * memory.  They don't lend themselves to filesystems where part of the
+ * state is in memory and part of the state is on disk.
+ *
+ * For more information on gfs take a look at the block comments in the
+ * top of gfs.c
+ */
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Note that the name of the actual Solaris filesystem is lx_afs and not
+ * lx_autofs.  This is becase filesystem names are stupidly limited to 8
+ * characters.
+ */
+#define	LX_AUTOFS_NAME			"lx_afs"
+
+/*
+ * Mount options supported.
+ */
+#define	LX_MNTOPT_FD			"fd"
+#define	LX_MNTOPT_PGRP			"pgrp"
+#define	LX_MNTOPT_MINPROTO		"minproto"
+#define	LX_MNTOPT_MAXPROTO		"maxproto"
+
+/* Version of the Linux kernel automount protocol we support. */
+#define	LX_AUTOFS_PROTO_VERSION		2
+
+/*
+ * Command structure sent to automount process from lx_autofs via a pipe.
+ * This structure is the same for v2 and v3 of the automount protocol
+ * (the communication pipe is established at mount time).
+ */
+typedef struct lx_autofs_pkt {
+	int	lap_protover;	/* protocol version number */
+	int	lap_constant;	/* always set to 0 */
+	int	lap_id;		/* every pkt must have a unique id */
+	int	lap_name_len;	/* don't include newline or NULL */
+	char	lap_name[256];	/* path component to lookup */
+} lx_autofs_pkt_t;
+
+/*
+ * Ioctls supprted (v2 protocol).
+ */
+#define	LX_AUTOFS_IOC_READY		0x00009360 /* arg: int */
+#define	LX_AUTOFS_IOC_FAIL		0x00009361 /* arg: int */
+#define	LX_AUTOFS_IOC_CATATONIC		0x00009362 /* arg: <none> */
+
+/*
+ * Ioctls not supported (v3 protocol).
+ *
+ * Initially we're only going to support v2 of the Linux kernel automount
+ * protocol.  This means that we don't support the following ioctls.
+ *
+ * 1) The protocol version ioctl (by not supporting it the automounter
+ * will assume version 2).
+ *
+ * 2) Automounter timeout ioctls.  For v3 and later the automounter can
+ * be started with a timeout option.  It will notify the filesystem of
+ * this timeout and, if any automounter filesystem root directory entry
+ * is not in use, it will notify the automounter via the LX_AUTOFS_IOC_EXPIRE
+ * ioctl.  For example, if the timeout is 60 seconds, the Linux
+ * automounter will use the LX_AUTOFS_IOC_EXPIRE ioctl to query for
+ * timeouts more often than that.  (v3.1.7 of the automount daemon would
+ * perform this ioctl every <timeout>/4 seconds.)  Then, if the autofs
+ * filesystem will
+ * report top level directories that aren't in use to the automounter
+ * via this ioctl.  If /net was managed by the automounter and
+ * there were the following mount points:
+ *	/net/jurassic/var/crash
+ *	/net/mcescher/var/crash
+ * and no one was looking at any crash dumps on mcescher but someone
+ * was analyzing a crash dump on jurassic, then after <timeout> seconds
+ * had passed the autofs filesystem would let the automounter know that
+ * "mcescher" could be unmounted.  (Note the granularity of notification
+ * is directories in the root of the autofs filesystem.)  Here's two
+ * ideas for how this functionality could be implemented on Solaris:
+ *
+ * 2.1) The easy incomplete way.  Don't do any in-use detection.  Simply
+ * tell the automounter it can try to unmount the filesystem every time
+ * the specified timeout passes.  If the filesystem is in use then the
+ * unmount will fail.  This would break down for remote hosts with multiple
+ * mounts.  For example, if the automounter had mounted the following
+ * filesystems:
+ *	/net/jurassic/var/crash
+ *	/net/jurassic/var/core
+ * and the user was looking at a core file, and the timeout expired, the
+ * automounter would recieve notification to unmount "jurassic".  Then
+ * it would unmount crash (which would succeed) and then to try unmount
+ * core (which would fail).  After that (since the automounter only
+ * performs mounts for failed lookups in the root autofs directory)
+ * future access to /net/jurassic/var/crash would result to access
+ * to an empty autofs directory.  We might be able to work around
+ * this by caching which root autofs directories we've timed out,
+ * then any access to paths that contain those directories could be
+ * stalled and we could resend another request to the automounter.
+ * This could work if the automounter ignores mount failures.
+ *
+ * 2.2) The hard correct way.  The real difficulty here is detecting
+ * files in use on other filesystems (say NFS) that have been mounted
+ * on top of autofs.  (Detecting in use autofs vnodes should be easy.)
+ * to do this we would probably have to create a new brand op to intercept
+ * mount/umount filesystem operations.  Then using this entry point we
+ * could detect mounts of other filesystems on top of lx_autofs.  When
+ * a successful mount finishes we would use the FEM (file event
+ * monitoring) framework to push a module onto that filesystem and
+ * intercept VOP operations that allocate/free vnodes in that filesystem.
+ * (We would also then have to track mount operations on top of that
+ * filesystem, etc.)  this would allow us to properly detect any
+ * usage of subdirectories of an autofs directory.
+ */
+#define	LX_AUTOFS_IOC_PROTOVER		0x80049363 /* arg: int */
+#define	LX_AUTOFS_IOC_EXPIRE		0x81109365 /* arg: lx_autofs_expire * */
+#define	LX_AUTOFS_IOC_SETTIMEOUT	0xc0049364 /* arg: ulong_t */
+
+typedef struct lx_autofs_expire {
+	int	lap_protover;	/* protol version number */
+	int	lap_constant;	/* always set to 1 */
+	int	lap_name_len;	/* don't include newline or NULL */
+	char	lap_name[256];	/* path component that has timed out */
+} lx_autofs_expire_t;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LX_AUTOFS_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
new file mode 100644
index 0000000000..9c5517b8d5
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_autofs_impl.h
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LX_AUTOFS_IMPL_H
+#define	_LX_AUTOFS_IMPL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <sys/file.h>
+#include <sys/id_space.h>
+#include <sys/modhash.h>
+#include <sys/vnode.h>
+
+#include <sys/lx_autofs.h>
+
+/*
+ * Space key.
+ * Used to persist data across lx_autofs filesystem module unloads.
+ */
+#define	LX_AUTOFS_SPACE_KEY_UDEV	LX_AUTOFS_NAME "_udev"
+
+/*
+ * Name of the backing store directory.
+ */
+#define	LX_AUTOFS_BS_DIR		"." LX_AUTOFS_NAME
+
+#define	LX_AUTOFS_VFS_ID_HASH_SIZE	15
+#define	LX_AUTOFS_VFS_PATH_HASH_SIZE	15
+#define	LX_AUTOFS_VFS_VN_HASH_SIZE	15
+
+/*
+ * VFS data object.
+ */
+typedef struct lx_autofs_vfs {
+	/* Info about the underlying filesystem and backing store. */
+	vnode_t		*lav_mvp;
+	char		*lav_bs_name;
+	vnode_t		*lav_bs_vp;
+
+	/* Info about the automounter process managing this filesystem. */
+	int		lav_fd;
+	pid_t		lav_pgrp;
+	file_t		*lav_fifo_wr;
+	file_t		*lav_fifo_rd;
+
+	/* Each automount requests needs a unique id. */
+	id_space_t	*lav_ids;
+
+	/* All remaining structure members are protected by lav_lock. */
+	kmutex_t	lav_lock;
+
+	/* Hashes to keep track of outstanding automounter requests. */
+	mod_hash_t	*lav_path_hash;
+	mod_hash_t	*lav_id_hash;
+
+	/* We need to keep track of all our vnodes. */
+	vnode_t		*lav_root;
+	mod_hash_t	*lav_vn_hash;
+} lx_autofs_vfs_t;
+
+/*
+ * Structure to keep track of requests sent to the automounter.
+ */
+typedef struct lx_autofs_lookup_req {
+	/* Packet that gets sent to the automounter. */
+	lx_autofs_pkt_t	lalr_pkt;
+
+	/* Reference count.  Always updated atomically. */
+	uint_t		lalr_ref;
+
+	/*
+	 * Fields to keep track and sync threads waiting on a lookup.
+	 * Fields are protected by lalr_lock.
+	 */
+	kmutex_t	lalr_lock;
+	kcondvar_t	lalr_cv;
+	int		lalr_complete;
+} lx_autofs_lookup_req_t;
+
+/*
+ * Generic stack structure.
+ */
+typedef struct stack_elem {
+	list_node_t	se_list;
+	caddr_t		se_ptr1;
+	caddr_t		se_ptr2;
+	caddr_t		se_ptr3;
+} stack_elem_t;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LX_AUTOFS_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
new file mode 100644
index 0000000000..4cbcda48bf
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -0,0 +1,210 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _LX_BRAND_H
+#define	_LX_BRAND_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifndef _ASM
+#include <sys/types.h>
+#include <sys/cpuvar.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	LX_BRANDNAME	"lx"
+
+/*
+ * Brand uname info
+ */
+#define	LX_UNAME_SYSNAME	"Linux"
+#define	LX_UNAME_RELEASE	"2.4.21"
+#define	LX_UNAME_VERSION	"BrandZ fake linux"
+#define	LX_UNAME_MACHINE	"i686"
+
+#define	LX_LINKER_NAME "ld-linux.so.2"
+#define	LX_LINKER	"/lib/" LX_LINKER_NAME
+#define	LX_LIBC_NAME	"libc.so.6"
+#define	LIB_PATH	"/native/usr/lib/"
+#define	LX_LIB		"lx_brand.so.1"
+#define	LX_LIB_PATH	LIB_PATH LX_LIB
+
+#define	LX_NSYSCALLS	270
+
+/*
+ * brand(2) subcommands
+ *
+ * Everything >= 128 is a brand-specific subcommand.
+ * 192 to 462 are reserved for system calls, although most of that space is
+ * unused.
+ */
+#define	B_LPID_TO_SPAIR		128
+#define	B_SYSENTRY		129
+#define	B_SYSRETURN		130
+#define	B_PTRACE_SYSCALL	131
+#define	B_SET_AFFINITY_MASK	132
+#define	B_GET_AFFINITY_MASK	133
+
+#define	B_EMULATE_SYSCALL	192
+
+#define	LX_VERSION_1		1
+#define	LX_VERSION		LX_VERSION_1
+
+#define	LX_ATTR_RESTART_INIT	ZONE_ATTR_BRAND_ATTRS
+
+#ifndef	_ASM
+
+typedef struct lx_brand_registration {
+	uint_t lxbr_version;		/* version number */
+	void *lxbr_handler;		/* base address of handler */
+	void *lxbr_tracehandler;	/* base address of trace handler */
+	void *lxbr_traceflag;		/* address of trace flag */
+} lx_brand_registration_t;
+
+#ifdef	_SYSCALL32
+typedef struct lx_brand_registration32 {
+	uint32_t lxbr_version;		/* version number */
+	caddr32_t lxbr_handler;		/* base address of handler */
+	caddr32_t lxbr_tracehandler;	/* base address of trace handler */
+	caddr32_t lxbr_traceflag;	/* address of trace flag */
+} lx_brand_registration32_t;
+#endif
+
+typedef struct lx_regs {
+	long lxr_gs;
+	long lxr_edi;
+	long lxr_esi;
+	long lxr_ebp;
+	long lxr_esp;
+	long lxr_ebx;
+	long lxr_edx;
+	long lxr_ecx;
+	long lxr_eax;
+	long lxr_eip;
+
+	long lxr_orig_eax;
+} lx_regs_t;
+
+#endif /* _ASM */
+
+/*
+ * GDT usage
+ */
+#define	GDT_TLSMIN	(GDT_BRANDMIN)
+#define	GDT_TLSMAX	(GDT_TLSMIN + 2)
+#define	LX_TLSNUM	(GDT_TLSMAX - GDT_TLSMIN)
+
+#ifndef _ASM
+
+/*
+ * Stores information needed by the lx linker to launch the main
+ * lx executable.
+ */
+typedef struct lx_elf_data {
+	int	ed_phdr;
+	int	ed_phent;
+	int	ed_phnum;
+	int	ed_entry;
+	int	ed_base;
+	int	ed_ldentry;
+} lx_elf_data_t;
+
+#ifdef	_KERNEL
+
+typedef struct lx_proc_data {
+	uintptr_t l_handler;	/* address of user-space handler */
+	uintptr_t l_tracehandler; /* address of user-space traced handler */
+	uintptr_t l_traceflag;	/* address of 32-bit tracing flag */
+	void (*l_sigrestorer[MAXSIG])(void); /* array of sigrestorer fns */
+	pid_t l_ppid;		/* pid of originating parent proc */
+	uint64_t l_ptrace;	/* process being observed with ptrace */
+	lx_elf_data_t l_elf_data; /* ELF data for linux executable */
+} lx_proc_data_t;
+
+#ifdef __amd64
+typedef uint64_t lx_affmask_t;	/* Tolerates NCPU up to 64 */
+#else
+typedef uint32_t lx_affmask_t;	/* Tolerates NCPU up to 32 */
+#endif /* __amd64 */
+
+/*
+ * lx-specific data in the klwp_t
+ */
+typedef struct lx_lwp_data {
+	uint_t	br_lwp_flags;		/* misc. flags */
+	klwp_t	*br_lwp;		/* back pointer to container lwp */
+	int	br_signal;		/* signal to send to parent when */
+					/* clone()'ed child terminates */
+	int	br_exitwhy;		/* reason for thread (process) exit */
+	int	br_exitwhat;		/* exit code / killing signal */
+	lx_affmask_t br_affinitymask;	/* bitmask of CPU sched affinities */
+	struct user_desc br_tls[LX_TLSNUM];
+			/* descriptors used by libc for TLS */
+	pid_t	br_pid;			/* converted pid for this thread */
+	pid_t	br_tgid;		/* thread group ID for this thread */
+	pid_t	br_ppid;		/* parent pid for this thread */
+	id_t	br_ptid;		/* parent tid for this thread */
+	void	*br_clear_ctidp;	/* clone thread id ptr */
+	void	*br_set_ctidp;		/* clone thread id ptr */
+
+	/*
+	 * The following struct is used by lx_clone()
+	 * to pass info into fork()
+	 */
+	void	 *br_clone_args;
+
+	/*
+	 * Space to save off userland Linux %gs pointer so we can restore it
+	 * before calling signal handlers.
+	 */
+	greg_t	br_ugs;
+
+	uint_t	br_ptrace;		/* ptrace is active for this LWP */
+} lx_lwp_data_t;
+
+#define	BR_CPU_BOUND	0x0001
+
+#define	ttolxlwp(t)	((struct lx_lwp_data *)ttolwpbrand(t))
+#define	lwptolxlwp(l)	((struct lx_lwp_data *)lwptolwpbrand(l))
+#define	ttolxproc(t)	((struct lx_proc_data *)(t)->t_procp->p_brand_data)
+
+void	lx_brand_int80_callback(void);
+int64_t	lx_emulate_syscall(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t,
+	uintptr_t, uintptr_t);
+
+extern int lx_debug;
+#define	lx_print	if (lx_debug) printf
+
+#endif	/* _KERNEL */
+#endif /* _ASM */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LX_BRAND_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_futex.h b/usr/src/uts/common/brand/lx/sys/lx_futex.h
new file mode 100644
index 0000000000..ac963b015b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_futex.h
@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_FUTEX_H
+#define	_SYS_LX_FUTEX_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	FUTEX_WAIT		0
+#define	FUTEX_WAKE		1
+#define	FUTEX_FD		2
+#define	FUTEX_REQUEUE		3
+#define	FUTEX_CMP_REQUEUE	4
+#define	FUTEX_MAX_CMD		FUTEX_CMP_REQUEUE
+
+extern long lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
+	uintptr_t addr2, int val2);
+extern void lx_futex_init(void);
+extern int lx_futex_fini(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LX_FUTEX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_impl.h b/usr/src/uts/common/brand/lx/sys/lx_impl.h
new file mode 100644
index 0000000000..12f1aab2b3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_impl.h
@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_LX_IMPL_H
+#define	_LX_IMPL_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+typedef int64_t (*llfcn_t)();
+
+typedef struct lx_sysent {
+	int	sy_flags;
+	char	*sy_name;
+	llfcn_t	sy_callc;
+	char	sy_narg;
+} lx_sysent_t;
+
+typedef void (lx_systrace_f)(ulong_t, ulong_t, ulong_t, ulong_t, ulong_t,
+    ulong_t, ulong_t);
+
+
+extern lx_sysent_t lx_sysent[];
+
+extern lx_systrace_f *lx_systrace_entry_ptr;
+extern lx_systrace_f *lx_systrace_return_ptr;
+
+extern void lx_brand_systrace_enable(void);
+extern void lx_brand_systrace_disable(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _LX_IMPL_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ldt.h b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
new file mode 100644
index 0000000000..5080c3adae
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ldt.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_LDT_H
+#define	_SYS_LINUX_LDT_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/segments.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+struct ldt_info {
+	uint_t	entry_number;
+	uint_t	base_addr;
+	uint_t	limit;
+	uint_t	seg_32bit:1,
+		contents:2,
+		read_exec_only:1,
+		limit_in_pages:1,
+		seg_not_present:1,
+		useable:1;
+};
+
+#define	LDT_INFO_EMPTY(info)						\
+	((info)->base_addr == 0 && (info)->limit == 0 &&		\
+	(info)->contents == 0 && (info)->read_exec_only == 1 &&		\
+	(info)->seg_32bit == 0 && (info)->limit_in_pages == 0 &&	\
+	(info)->seg_not_present == 1 && (info)->useable == 0)
+
+#if defined(__amd64)
+#define	SETMODE(desc)	(desc)->usd_long = SDP_SHORT;
+#else
+#define	SETMODE(desc)
+#endif
+
+#define	LDT_INFO_TO_DESC(info, desc)	{				\
+	USEGD_SETBASE(desc, (info)->base_addr);				\
+	USEGD_SETLIMIT(desc, (info)->limit);				\
+	(desc)->usd_type = ((info)->contents << 2) | 			\
+	    ((info)->read_exec_only ^ 1) << 1 | 0x10;			\
+	(desc)->usd_dpl = SEL_UPL;					\
+	(desc)->usd_p = (info)->seg_not_present ^ 1;			\
+	(desc)->usd_def32 = (info)->seg_32bit;				\
+	(desc)->usd_gran = (info)->limit_in_pages;			\
+	(desc)->usd_avl = (info)->useable;				\
+	SETMODE(desc);							\
+}
+
+#define	DESC_TO_LDT_INFO(desc, info)	{				\
+	bzero((info), sizeof (*(info)));				\
+	(info)->base_addr = USEGD_GETBASE(desc);			\
+	(info)->limit = USEGD_GETLIMIT(desc);				\
+	(info)->seg_not_present = (desc)->usd_p ^ 1;			\
+	(info)->contents = ((desc)->usd_type >> 2) & 3;			\
+	(info)->read_exec_only = (((desc)->usd_type >> 1) & 1) ^ 1;	\
+	(info)->seg_32bit = (desc)->usd_def32;				\
+	(info)->limit_in_pages = (desc)->usd_gran;			\
+	(info)->useable = (desc)->usd_avl;				\
+}
+
+extern void lx_set_gdt(int, user_desc_t *);
+extern void lx_clear_gdt(int);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LINUX_LDT_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_pid.h b/usr/src/uts/common/brand/lx/sys/lx_pid.h
new file mode 100644
index 0000000000..80c8079f0b
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_pid.h
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LX_PID_H
+#define	_SYS_LX_PID_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/note.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+struct lx_pid {
+	pid_t	s_pid;			/* the solaris pid and ... */
+	id_t	s_tid;			/* ... tid pair */
+	pid_t	l_pid;			/* the corresponding linux pid */
+	time_t	l_start;		/* birthday of this pid */
+	struct pid *l_pidp;
+	struct lx_pid *stol_next;	/* link in stol hash table */
+	struct lx_pid *ltos_next;	/* link in ltos hash table */
+};
+
+extern int lx_pid_assign(kthread_t *);
+extern void lx_pid_reassign(kthread_t *);
+extern void lx_pid_rele(pid_t, id_t);
+extern pid_t lx_lpid_to_spair(pid_t, pid_t *, id_t *);
+extern pid_t lx_lwp_ppid(klwp_t *, pid_t *, id_t *);
+extern void lx_pid_init(void);
+extern void lx_pid_fini(void);
+#endif
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LX_PID_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_ptm.h b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
new file mode 100644
index 0000000000..74bbc939a3
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_ptm.h
@@ -0,0 +1,44 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef	_SYS_PTM_LINUX_H
+#define	_SYS_PTM_LINUX_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	LX_PTM_DRV		"lx_ptm"
+#define	LX_PTM_MINOR_NODE	"lx_ptmajor"
+
+#define	LX_PTM_DEV_TO_PTS(dev)	(getminor(dev) - 1)
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_PTM_LINUX_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_sched.h b/usr/src/uts/common/brand/lx/sys/lx_sched.h
new file mode 100644
index 0000000000..b0ae748f3c
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_sched.h
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_SCHED_H
+#define	_SYS_LINUX_SCHED_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/procset.h>
+#include <sys/priocntl.h>
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * Linux scheduler policies.
+ */
+#define	LX_SCHED_OTHER		0
+#define	LX_SCHED_FIFO		1
+#define	LX_SCHED_RR		2
+
+#define	LX_PRI_MAX		99
+
+typedef	int l_pid_t;
+
+struct lx_sched_param {
+	int	lx_sched_prio;
+};
+
+extern int sched_setprocset(procset_t *, l_pid_t);
+extern long do_priocntlsys(int, procset_t *, void *);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LINUX_SCHED_H */
diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
new file mode 100644
index 0000000000..b4d41d5241
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h
@@ -0,0 +1,68 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_LINUX_SYSCALLS_H
+#define	_SYS_LINUX_SYSCALLS_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#ifdef	_KERNEL
+
+extern long lx_brk();
+extern long lx_getpid();
+extern long lx_getppid();
+extern long lx_clone();
+extern long lx_kill();
+extern long lx_tkill();
+extern long lx_modify_ldt();
+extern long lx_gettid();
+extern long lx_futex();
+extern long lx_get_thread_area();
+extern long lx_sched_getparam();
+extern long lx_sched_getscheduler();
+extern long lx_sched_rr_get_interval();
+extern long lx_sched_setparam();
+extern long lx_sched_setscheduler();
+extern long lx_set_thread_area();
+extern long lx_set_tid_address();
+extern long lx_setresgid();
+extern long lx_setresgid16();
+extern long lx_setresuid();
+extern long lx_setresuid16();
+extern long lx_sysinfo();
+extern long lx_setgroups();
+
+#endif	/* _KERNEL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_LINUX_SYSCALLS_H */
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_brk.c b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
new file mode 100644
index 0000000000..25a719986e
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_brk.c
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+
+/*
+ * The brk() system call needs to be in-kernel because Linux expects a call to
+ * brk(0) to return the current breakpoint.  In Solaris, the process breakpoint
+ * is setup and managed by libc.  Due to the way we link our libraries and the
+ * need for Linux to manage its own breakpoint, this has to remain in the
+ * kernel.
+ */
+extern int brk(caddr_t);
+
+long
+lx_brk(caddr_t nva)
+{
+	proc_t *p = curproc;
+	klwp_t *lwp = ttolwp(curthread);
+
+	if (nva != 0) {
+		(void) brk(nva);
+
+		/*
+		 * Despite claims to the contrary in the manpage, when Linux
+		 * brk() fails, errno is left unchanged.
+		 */
+		lwp->lwp_errno = 0;
+	}
+
+out:
+	return ((long)(p->p_brkbase + p->p_brksize));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_clone.c b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
new file mode 100644
index 0000000000..2af3c00bae
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_clone.c
@@ -0,0 +1,135 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+
+#define	LX_CSIGNAL		0x000000ff
+#define	LX_CLONE_VM		0x00000100
+#define	LX_CLONE_FS		0x00000200
+#define	LX_CLONE_FILES		0x00000400
+#define	LX_CLONE_SIGHAND	0x00000800
+#define	LX_CLONE_PID		0x00001000
+#define	LX_CLONE_PTRACE		0x00002000
+#define	LX_CLONE_PARENT		0x00008000
+#define	LX_CLONE_THREAD		0x00010000
+#define	LX_CLONE_SYSVSEM	0x00040000
+#define	LX_CLONE_SETTLS		0x00080000
+#define	LX_CLONE_PARENT_SETTID	0x00100000
+#define	LX_CLONE_CHILD_CLEARTID 0x00200000
+#define	LX_CLONE_DETACH		0x00400000
+#define	LX_CLONE_CHILD_SETTID	0x01000000
+
+/*
+ * Our lwp has already been created at this point, so this routine is
+ * responsible for setting up all the state needed to track this as a
+ * linux cloned thread.
+ */
+/* ARGSUSED */
+long
+lx_clone(int flags, void *stkp, void *ptidp, void *ldtinfo, void *ctidp)
+{
+	struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+	struct ldt_info info;
+	struct user_desc descr;
+	int tls_index;
+	int entry = -1;
+	int signo;
+
+	signo = flags & LX_CSIGNAL;
+	if (signo < 0 || signo > MAXSIG)
+		return (set_errno(EINVAL));
+
+	if (flags & LX_CLONE_SETTLS) {
+		if (copyin((caddr_t)ldtinfo, &info, sizeof (info)))
+			return (set_errno(EFAULT));
+
+		if (LDT_INFO_EMPTY(&info))
+			return (set_errno(EINVAL));
+
+		entry = info.entry_number;
+		if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+			return (set_errno(EINVAL));
+
+		tls_index = entry - GDT_TLSMIN;
+
+		/*
+		 * Convert the user-space structure into a real x86
+		 * descriptor and copy it into this LWP's TLS array.  We
+		 * also load it into the GDT.
+		 */
+		LDT_INFO_TO_DESC(&info, &descr);
+		bcopy(&descr, &lwpd->br_tls[tls_index], sizeof (descr));
+		lx_set_gdt(entry, &lwpd->br_tls[tls_index]);
+	} else {
+		tls_index = -1;
+		bzero(&descr, sizeof (descr));
+	}
+
+	lwpd->br_clear_ctidp =
+	    (flags & LX_CLONE_CHILD_CLEARTID) ?  ctidp : NULL;
+
+	if (signo && ! (flags & LX_CLONE_DETACH))
+		lwpd->br_signal = signo;
+	else
+		lwpd->br_signal = 0;
+
+	if (flags & LX_CLONE_THREAD)
+		lwpd->br_tgid = curthread->t_procp->p_pid;
+
+	if (flags & LX_CLONE_PARENT)
+		lwpd->br_ppid = 0;
+
+	if ((flags & LX_CLONE_CHILD_SETTID) && (ctidp != NULL) &&
+	    (suword32(ctidp, lwpd->br_pid) != 0)) {
+		if (entry >= 0)
+			lx_clear_gdt(entry);
+		return (set_errno(EFAULT));
+	}
+	if ((flags & LX_CLONE_PARENT_SETTID) && (ptidp != NULL) &&
+	    (suword32(ptidp, lwpd->br_pid) != 0)) {
+		if (entry >= 0)
+			lx_clear_gdt(entry);
+		return (set_errno(EFAULT));
+	}
+
+	return (lwpd->br_pid);
+}
+
+long
+lx_set_tid_address(int *tidp)
+{
+	struct lx_lwp_data *lwpd = ttolxlwp(curthread);
+
+	lwpd->br_clear_ctidp = tidp;
+
+	return (lwpd->br_pid);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_futex.c b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
new file mode 100644
index 0000000000..ceb6f330aa
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_futex.c
@@ -0,0 +1,471 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/debug.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <vm/seg_vn.h>
+#include <vm/page.h>
+#include <sys/mman.h>
+#include <sys/timer.h>
+#include <sys/condvar.h>
+#include <sys/inttypes.h>
+#include <sys/lx_futex.h>
+
+/*
+ * Futexes are a Linux-specific implementation of inter-process mutexes.
+ * They are designed to use shared memory for simple, uncontested
+ * operations, and rely on the kernel to resolve any contention issues.
+ *
+ * Most of the information in this section comes from the paper "Futexes
+ * Are Tricky", by Ulrich Drepper.  This paper is currently available at:
+ * http://people.redhat.com/~drepper/futex.pdf.
+ *
+ * A futex itself a 4-byte integer, which must be 4-byte aligned.  The
+ * value of this integer is expected to be modified using user-level atomic
+ * operations.  The futex(4) design itself does not impose any semantic
+ * constraints on the value stored in the futex; it is up to the
+ * application to define its own protocol.
+ *
+ * When the application decides that kernel intervention is required, it
+ * will use the futex(2) system call.  There are 5 different operations
+ * that can be performed on a futex, using this system call.  Since this
+ * interface has evolved over time, there are several different prototypes
+ * available to the user.  Fortunately, there is only a single kernel-level
+ * interface:
+ *
+ * long sys_futex(void *futex1, int cmd, int val1,
+ * 	struct timespec	*timeout, void *futex2, int val2)
+ *
+ * The kernel-level operations that may be performed on a futex are:
+ *
+ * FUTEX_WAIT
+ *
+ *	Atomically verify that futex1 contains the value val1.  If it
+ *	doesn't, return EWOULDBLOCK.  If it does contain the expected
+ *	value, the thread will sleep until somebody performs a FUTEX_WAKE
+ *	on the futex.  The caller may also specify a timeout, indicating
+ *	the maximum time the thread should sleep.  If the timer expires,
+ *	the call returns ETIMEDOUT.  If the thread is awoken with a signal,
+ *	the call returns EINTR.  Otherwise, the call returns 0.
+ *
+ * FUTEX_WAKE
+ *
+ *	Wake up val1 processes that are waiting on futex1.  The call
+ *	returns the number of blocked threads that were woken up.
+ *
+ * FUTEX_CMP_REQUEUE
+ *
+ *	If the value stored in futex1 matches that passed in in val2, wake
+ *	up val1 processes that are waiting on futex1.  Otherwise, return
+ *	EAGAIN.
+ *
+ *	If there are more than val1 threads waiting on the futex, remove
+ *	the remaining threads from this futex, and requeue them on futex2.
+ *	The caller can limit the number of threads being requeued by
+ *	encoding an integral numerical value in the position usually used
+ *	for the timeout pointer.
+ *
+ *	The call returns the number of blocked threads that were woken up
+ *	or requeued.
+ *
+ * FUTEX_REQUEUE
+ *
+ *	 Identical to FUTEX_CMP_REQUEUE except that it does not use val2.
+ *	 This command has been declared broken and obsolete, but we still
+ *	 need to support it.
+ *
+ * FUTEX_FD
+ *
+ *	Return a file descriptor, which can be used to refer to the futex.
+ *	We don't support this operation.
+ */
+
+/*
+ * This structure is used to track all the threads currently waiting on a
+ * futex.  There is one fwaiter_t for each blocked thread.  We store all
+ * fwaiter_t's in a hash structure, indexed by the memid_t of the integer
+ * containing the futex's value.
+ *
+ * At the moment, all fwaiter_t's for a single futex are simply dumped into
+ * the hash bucket.  If futex contention ever becomes a hot path, we can
+ * chain a single futex's waiters together.
+ */
+typedef struct fwaiter {
+	memid_t		fw_memid;	/* memid of the user-space futex */
+	kcondvar_t	fw_cv;		/* cond var */
+	struct fwaiter	*fw_next;	/* hash queue */
+	struct fwaiter	*fw_prev;	/* hash queue */
+	volatile int	fw_woken;
+} fwaiter_t;
+
+#define	MEMID_COPY(s, d) \
+	{ (d)->val[0] = (s)->val[0]; (d)->val[1] = (s)->val[1]; }
+#define	MEMID_EQUAL(s, d) \
+	((d)->val[0] == (s)->val[0] && (d)->val[1] == (s)->val[1])
+
+/* Borrowed from the page freelist hash code.  */
+#define	HASH_SHIFT_SZ	7
+#define	HASH_SIZE	(1 << HASH_SHIFT_SZ)
+#define	HASH_FUNC(id)						\
+	((((uintptr_t)((id)->val[1]) >> PAGESHIFT) +			\
+	((uintptr_t)((id)->val[1]) >> (PAGESHIFT + HASH_SHIFT_SZ)) +	\
+	((uintptr_t)((id)->val[0]) >> 3) +				\
+	((uintptr_t)((id)->val[0]) >> (3 + HASH_SHIFT_SZ)) +		\
+	((uintptr_t)((id)->val[0]) >> (3 + 2 * HASH_SHIFT_SZ))) &	\
+	(HASH_SIZE - 1))
+
+static fwaiter_t *futex_hash[HASH_SIZE];
+static kmutex_t futex_hash_lock[HASH_SIZE];
+
+static void
+futex_hashin(fwaiter_t *fwp)
+{
+	int index;
+
+	index = HASH_FUNC(&fwp->fw_memid);
+	ASSERT(MUTEX_HELD(&futex_hash_lock[index]));
+
+	fwp->fw_prev = NULL;
+	fwp->fw_next = futex_hash[index];
+	if (fwp->fw_next)
+		fwp->fw_next->fw_prev = fwp;
+	futex_hash[index] = fwp;
+}
+
+static void
+futex_hashout(fwaiter_t *fwp)
+{
+	int index;
+
+	index = HASH_FUNC(&fwp->fw_memid);
+	ASSERT(MUTEX_HELD(&futex_hash_lock[index]));
+
+	if (fwp->fw_prev)
+		fwp->fw_prev->fw_next = fwp->fw_next;
+	if (fwp->fw_next)
+		fwp->fw_next->fw_prev = fwp->fw_prev;
+	if (futex_hash[index] == fwp)
+		futex_hash[index] = fwp->fw_next;
+
+	fwp->fw_prev = NULL;
+	fwp->fw_next = NULL;
+}
+
+/*
+ * Go to sleep until somebody does a WAKE operation on this futex, we get a
+ * signal, or the timeout expires.
+ */
+static int
+futex_wait(memid_t *memid, caddr_t addr, int val, timespec_t *timeout)
+{
+	int err, ret;
+	int32_t curval;
+	fwaiter_t fw;
+	int index;
+
+	fw.fw_woken = 0;
+	MEMID_COPY(memid, &fw.fw_memid);
+	cv_init(&fw.fw_cv, NULL, CV_DEFAULT, NULL);
+
+	index = HASH_FUNC(&fw.fw_memid);
+	mutex_enter(&futex_hash_lock[index]);
+
+	if (fuword32(addr, (uint32_t *)&curval)) {
+		err = set_errno(EFAULT);
+		goto out;
+	}
+	if (curval != val) {
+		err = set_errno(EWOULDBLOCK);
+		goto out;
+	}
+
+	futex_hashin(&fw);
+
+	err = 0;
+	while ((fw.fw_woken == 0) && (err == 0)) {
+		ret = cv_waituntil_sig(&fw.fw_cv, &futex_hash_lock[index],
+			timeout, timechanged);
+		if (ret < 0)
+			err = set_errno(ETIMEDOUT);
+		else if (ret == 0)
+			err = set_errno(EINTR);
+	}
+
+	/*
+	 * The futex is normally hashed out in wakeup.  If we timed out or
+	 * got a signal, we need to hash it out here instead.
+	 */
+	if (fw.fw_woken == 0)
+		futex_hashout(&fw);
+
+out:
+	mutex_exit(&futex_hash_lock[index]);
+
+	return (err);
+}
+
+/*
+ * Wake up to wake_threads threads that are blocked on the futex at memid.
+ */
+static int
+futex_wake(memid_t *memid, int wake_threads)
+{
+	fwaiter_t *fwp, *next;
+	int index;
+	int ret = 0;
+
+	index = HASH_FUNC(memid);
+
+	mutex_enter(&futex_hash_lock[index]);
+
+	for (fwp = futex_hash[index]; fwp && ret < wake_threads; fwp = next) {
+		next = fwp->fw_next;
+		if (MEMID_EQUAL(&fwp->fw_memid, memid)) {
+			futex_hashout(fwp);
+			fwp->fw_woken = 1;
+			cv_signal(&fwp->fw_cv);
+			ret++;
+		}
+	}
+
+	mutex_exit(&futex_hash_lock[index]);
+
+	return (ret);
+}
+
+/*
+ * Wake up to wake_threads waiting on the futex at memid.  If there are
+ * more than that many threads waiting, requeue the remaining threads on
+ * the futex at requeue_memid.
+ */
+static int
+futex_requeue(memid_t *memid, memid_t *requeue_memid, int wake_threads,
+	ulong_t requeue_threads, caddr_t addr, int *cmpval)
+{
+	fwaiter_t *fwp, *next;
+	int index1, index2;
+	int ret = 0;
+	int32_t curval;
+	kmutex_t *l1, *l2;
+
+	/*
+	 * To ensure that we don't miss a wakeup if the value of cmpval
+	 * changes, we need to grab locks on both the original and new hash
+	 * buckets.  To avoid deadlock, we always grab the lower-indexed
+	 * lock first.
+	 */
+	index1 = HASH_FUNC(memid);
+	index2 = HASH_FUNC(requeue_memid);
+
+	if (index1 == index2) {
+		l1 = &futex_hash_lock[index1];
+		l2 = NULL;
+	} else if (index1 < index2) {
+		l1 = &futex_hash_lock[index1];
+		l2 = &futex_hash_lock[index2];
+	} else {
+		l1 = &futex_hash_lock[index2];
+		l2 = &futex_hash_lock[index1];
+	}
+
+	mutex_enter(l1);
+	if (l2 != NULL)
+		mutex_enter(l2);
+
+	if (cmpval != NULL) {
+		if (fuword32(addr, (uint32_t *)&curval)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (curval != *cmpval) {
+			ret = -EAGAIN;
+			goto out;
+		}
+	}
+
+	for (fwp = futex_hash[index1]; fwp; fwp = next) {
+		next = fwp->fw_next;
+		if (!MEMID_EQUAL(&fwp->fw_memid, memid))
+			continue;
+
+		futex_hashout(fwp);
+		if (ret++ < wake_threads) {
+			fwp->fw_woken = 1;
+			cv_signal(&fwp->fw_cv);
+		} else {
+			MEMID_COPY(requeue_memid, &fwp->fw_memid);
+			futex_hashin(fwp);
+
+			if ((ret - wake_threads) >= requeue_threads)
+				break;
+		}
+	}
+
+out:
+	if (l2 != NULL)
+		mutex_exit(l2);
+	mutex_exit(l1);
+
+	if (ret < 0)
+		return (set_errno(-ret));
+	return (ret);
+}
+
+/*
+ * Copy in the relative timeout provided by the application and convert it
+ * to an absolute timeout.
+ */
+static int
+get_timeout(void *lx_timeout, timestruc_t *timeout)
+{
+	timestruc_t now;
+
+	if (get_udatamodel() == DATAMODEL_NATIVE) {
+		if (copyin(lx_timeout, timeout, sizeof (timestruc_t)))
+			return (EFAULT);
+	}
+#ifdef _SYSCALL32_IMPL
+	else {
+		timestruc32_t timeout32;
+		if (copyin(lx_timeout, &timeout32, sizeof (timestruc32_t)))
+			return (EFAULT);
+		timeout->tv_sec = (time_t)timeout32.tv_sec;
+		timeout->tv_nsec = timeout32.tv_nsec;
+	}
+#endif
+	gethrestime(&now);
+
+	if (itimerspecfix(timeout))
+		return (EINVAL);
+
+	timespecadd(timeout, &now);
+	return (0);
+}
+
+long
+lx_futex(uintptr_t addr, int cmd, int val, uintptr_t lx_timeout,
+	uintptr_t addr2, int val2)
+{
+	struct as *as = curproc->p_as;
+	memid_t memid, requeue_memid;
+	timestruc_t timeout;
+	timestruc_t *tptr = NULL;
+	int requeue_threads;
+	int *requeue_cmp = NULL;
+	int rval = 0;
+
+	/* must be aligned on int boundary */
+	if (addr & 0x3)
+		return (set_errno(EINVAL));
+
+	/* Sanity check the futex command */
+	if (cmd < 0 || cmd > FUTEX_MAX_CMD)
+		return (set_errno(EINVAL));
+
+	/* Copy in the timeout structure from userspace. */
+	if (cmd == FUTEX_WAIT && lx_timeout != NULL) {
+		rval = get_timeout((timespec_t *)lx_timeout, &timeout);
+		if (rval != 0)
+			return (set_errno(rval));
+		tptr = &timeout;
+	}
+
+	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) {
+		if (cmd == FUTEX_CMP_REQUEUE)
+			requeue_cmp = &val2;
+
+		/*
+		 * lx_timeout is nominally a pointer to a userspace
+		 * address.  For these two commands, it actually contains
+		 * an integer which indicates the maximum number of threads
+		 * to requeue.  This is horrible, and I'm sorry.
+		 */
+		requeue_threads = (int)lx_timeout;
+	}
+
+	/*
+	 * Translate the process-specific, user-space futex virtual
+	 * address(es) to universal memid.
+	 */
+	rval = as_getmemid(as, (void *)addr, &memid);
+	if (rval != 0)
+		return (set_errno(rval));
+
+	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) {
+		rval = as_getmemid(as, (void *)addr2, &requeue_memid);
+		if (rval)
+			return (set_errno(rval));
+	}
+
+	switch (cmd) {
+	case FUTEX_WAIT:
+		rval = futex_wait(&memid, (void *)addr, val, tptr);
+		break;
+
+	case FUTEX_WAKE:
+		rval = futex_wake(&memid, val);
+		break;
+
+	case FUTEX_CMP_REQUEUE:
+	case FUTEX_REQUEUE:
+		rval = futex_requeue(&memid, &requeue_memid, val,
+		    requeue_threads, (void *)addr2, requeue_cmp);
+
+		break;
+	}
+
+	return (rval);
+}
+
+void
+lx_futex_init(void)
+{
+	int i;
+
+	for (i = 0; i < HASH_SIZE; i++)
+		mutex_init(&futex_hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
+	bzero(futex_hash, sizeof (futex_hash));
+}
+
+int
+lx_futex_fini(void)
+{
+	int i, err;
+
+	err = 0;
+	for (i = 0; (err == 0) && (i < HASH_SIZE); i++) {
+		mutex_enter(&futex_hash_lock[i]);
+		if (futex_hash[i] != NULL)
+			err = EBUSY;
+		mutex_exit(&futex_hash_lock[i]);
+	}
+	return (err);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_getpid.c b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
new file mode 100644
index 0000000000..91dc24c6d6
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_getpid.c
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/zone.h>
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cpuvar.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+
+/*
+ * return the pid
+ */
+long
+lx_getpid()
+{
+	lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+	long rv;
+
+	if (curproc->p_pid == curproc->p_zone->zone_proc_initpid)
+		rv = 1;
+	else
+		rv = lwpd->br_tgid;
+
+	return (rv);
+}
+
+/*
+ * return the parent pid
+ */
+long
+lx_getppid(void)
+{
+	return (lx_lwp_ppid(ttolwp(curthread), NULL, NULL));
+}
+
+/*
+ * return the thread id
+ */
+long
+lx_gettid(void)
+{
+	lx_lwp_data_t *lwpd = ttolxlwp(curthread);
+
+	return (lwpd->br_pid);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_id.c b/usr/src/uts/common/brand/lx/syscall/lx_id.c
new file mode 100644
index 0000000000..077194ee25
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_id.c
@@ -0,0 +1,297 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/zone.h>
+#include <sys/cred_impl.h>
+#include <sys/policy.h>
+
+typedef ushort_t	l_uid16_t;
+typedef ushort_t	l_gid16_t;
+typedef uint_t		l_uid_t;
+typedef uint_t		l_gid_t;
+
+#define	LINUX_UID16_TO_UID32(uid16)	\
+	(((uid16) == (l_uid16_t)-1) ? ((l_uid_t)-1) : (l_uid_t)(uid16))
+
+#define	LINUX_GID16_TO_GID32(gid16)     \
+	(((gid16) == (l_gid16_t)-1) ? ((l_gid_t)-1) : (l_gid_t)(gid16))
+
+#define	LX_NGROUPS_MAX	32
+extern int setgroups(int, gid_t *);
+
+/*
+ * This function is based on setreuid in common/syscall/uid.c and exists
+ * because Solaris does not have a way to explicitly set the saved uid (suid)
+ * from any other system call.
+ */
+long
+lx_setresuid(l_uid_t ruid, l_uid_t euid, l_uid_t suid)
+{
+	proc_t	*p;
+	int	error = 0;
+	int	do_nocd = 0;
+	int	uidchge = 0;
+	uid_t	oldruid = ruid;
+	cred_t	*cr, *newcr;
+	zoneid_t zoneid = getzoneid();
+
+	if ((ruid != -1 && (ruid < 0 || ruid > MAXUID)) ||
+	    (euid != -1 && (euid < 0 || euid > MAXUID)) ||
+	    (suid != -1 && (suid < 0 || suid > MAXUID))) {
+		error = EINVAL;
+		goto done;
+	}
+
+	/*
+	 * Need to pre-allocate the new cred structure before grabbing
+	 * the p_crlock mutex.
+	 */
+	newcr = cralloc();
+
+	p = ttoproc(curthread);
+
+retry:
+	mutex_enter(&p->p_crlock);
+	cr = p->p_cred;
+
+	if (ruid != -1 &&
+	    ruid != cr->cr_ruid && ruid != cr->cr_uid &&
+	    ruid != cr->cr_suid && secpolicy_allow_setid(cr, ruid, B_FALSE)) {
+		error = EPERM;
+	} else if (euid != -1 &&
+	    euid != cr->cr_ruid && euid != cr->cr_uid &&
+	    euid != cr->cr_suid && secpolicy_allow_setid(cr, euid, B_FALSE)) {
+		error = EPERM;
+	} else if (suid != -1 &&
+	    suid != cr->cr_ruid && suid != cr->cr_uid &&
+	    suid != cr->cr_suid && secpolicy_allow_setid(cr, suid, B_FALSE)) {
+		error = EPERM;
+	} else {
+		if (!uidchge && ruid != -1 && cr->cr_ruid != ruid) {
+			/*
+			 * The ruid of the process is going to change. In order
+			 * to avoid a race condition involving the
+			 * process count associated with the newly given ruid,
+			 * we increment the count before assigning the
+			 * credential to the process.
+			 * To do that, we'll have to take pidlock, so we first
+			 * release p_crlock.
+			 */
+			mutex_exit(&p->p_crlock);
+			uidchge = 1;
+			mutex_enter(&pidlock);
+			upcount_inc(ruid, zoneid);
+			mutex_exit(&pidlock);
+			/*
+			 * As we released p_crlock we can't rely on the cr
+			 * we read. So retry the whole thing.
+			 */
+			goto retry;
+		}
+		crhold(cr);
+		crcopy_to(cr, newcr);
+		p->p_cred = newcr;
+
+		if (euid != -1)
+			newcr->cr_uid = euid;
+		if (suid != -1)
+			newcr->cr_suid = suid;
+		if (ruid != -1) {
+			oldruid = newcr->cr_ruid;
+			newcr->cr_ruid = ruid;
+			ASSERT(ruid != oldruid ? uidchge : 1);
+		}
+
+		/*
+		 * A process that gives up its privilege
+		 * must be marked to produce no core dump.
+		 */
+		if ((cr->cr_uid != newcr->cr_uid ||
+		    cr->cr_ruid != newcr->cr_ruid ||
+		    cr->cr_suid != newcr->cr_suid))
+			do_nocd = 1;
+
+		crfree(cr);
+	}
+	mutex_exit(&p->p_crlock);
+
+	/*
+	 * We decrement the number of processes associated with the oldruid
+	 * to match the increment above, even if the ruid of the process
+	 * did not change or an error occurred (oldruid == uid).
+	 */
+	if (uidchge) {
+		ASSERT(oldruid != -1 && ruid != -1);
+		mutex_enter(&pidlock);
+		upcount_dec(oldruid, zoneid);
+		mutex_exit(&pidlock);
+	}
+
+	if (error == 0) {
+		if (do_nocd) {
+			mutex_enter(&p->p_lock);
+			p->p_flag |= SNOCD;
+			mutex_exit(&p->p_lock);
+		}
+		crset(p, newcr);	/* broadcast to process threads */
+		goto done;
+	}
+	crfree(newcr);
+done:
+	if (error)
+		return (set_errno(error));
+	else
+		return (0);
+}
+
+long
+lx_setresuid16(l_uid16_t ruid16, l_uid16_t euid16, l_uid16_t suid16)
+{
+	long	rval;
+
+	rval = lx_setresuid(
+			LINUX_UID16_TO_UID32(ruid16),
+			LINUX_UID16_TO_UID32(euid16),
+			LINUX_UID16_TO_UID32(suid16));
+
+	return (rval);
+}
+
+/*
+ * This function is based on setregid in common/syscall/gid.c
+ */
+long
+lx_setresgid(l_gid_t rgid, l_gid_t egid, l_gid_t sgid)
+{
+	proc_t	*p;
+	int	error = 0;
+	int	do_nocd = 0;
+	cred_t	*cr, *newcr;
+
+	if ((rgid != -1 && (rgid < 0 || rgid > MAXUID)) ||
+	    (egid != -1 && (egid < 0 || egid > MAXUID)) ||
+	    (sgid != -1 && (sgid < 0 || sgid > MAXUID))) {
+		error = EINVAL;
+		goto done;
+	}
+
+	/*
+	 * Need to pre-allocate the new cred structure before grabbing
+	 * the p_crlock mutex.
+	 */
+	newcr = cralloc();
+
+	p = ttoproc(curthread);
+	mutex_enter(&p->p_crlock);
+	cr = p->p_cred;
+
+	if (rgid != -1 &&
+	    rgid != cr->cr_rgid && rgid != cr->cr_gid &&
+	    rgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+		error = EPERM;
+	} else if (egid != -1 &&
+	    egid != cr->cr_rgid && egid != cr->cr_gid &&
+	    egid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+		error = EPERM;
+	} else if (sgid != -1 &&
+	    sgid != cr->cr_rgid && sgid != cr->cr_gid &&
+	    sgid != cr->cr_sgid && secpolicy_allow_setid(cr, -1, B_FALSE)) {
+		error = EPERM;
+	} else {
+		crhold(cr);
+		crcopy_to(cr, newcr);
+		p->p_cred = newcr;
+
+		if (egid != -1)
+			newcr->cr_gid = egid;
+		if (sgid != -1)
+			newcr->cr_sgid = sgid;
+		if (rgid != -1)
+			newcr->cr_rgid = rgid;
+
+		/*
+		 * A process that gives up its privilege
+		 * must be marked to produce no core dump.
+		 */
+		if ((cr->cr_gid != newcr->cr_gid ||
+		    cr->cr_rgid != newcr->cr_rgid ||
+		    cr->cr_sgid != newcr->cr_sgid))
+			do_nocd = 1;
+
+		crfree(cr);
+	}
+	mutex_exit(&p->p_crlock);
+
+	if (error == 0) {
+		if (do_nocd) {
+			mutex_enter(&p->p_lock);
+			p->p_flag |= SNOCD;
+			mutex_exit(&p->p_lock);
+		}
+		crset(p, newcr);	/* broadcast to process threads */
+		goto done;
+	}
+	crfree(newcr);
+done:
+	if (error)
+		return (set_errno(error));
+	else
+		return (0);
+}
+
+long
+lx_setresgid16(l_gid16_t rgid16, l_gid16_t egid16, l_gid16_t sgid16)
+{
+	long	rval;
+
+	rval = lx_setresgid(
+			LINUX_GID16_TO_GID32(rgid16),
+			LINUX_GID16_TO_GID32(egid16),
+			LINUX_GID16_TO_GID32(sgid16));
+
+	return (rval);
+}
+
+/*
+ * Linux defines NGROUPS_MAX to be 32, but on Solaris it is only 16. We employ
+ * the terrible hack below so that tests may proceed, if only on DEBUG kernels.
+ */
+long
+lx_setgroups(int ngroups, gid_t *grouplist)
+{
+#ifdef DEBUG
+	if (ngroups > ngroups_max && ngroups <= LX_NGROUPS_MAX)
+		ngroups = ngroups_max;
+#endif /* DEBUG */
+
+	return (setgroups(ngroups, grouplist));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_kill.c b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
new file mode 100644
index 0000000000..d86d50f4e6
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_kill.c
@@ -0,0 +1,249 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/zone.h>
+#include <sys/thread.h>
+#include <sys/signal.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_pid.h>
+#include <lx_signum.h>
+
+extern int kill(pid_t, int);
+
+/*
+ * Check if it is legal to send this signal to the init process.  Linux
+ * kill(2) semantics dictate that no _unhandled_ signal may be sent to pid
+ * 1.
+ */
+static int
+init_sig_check(int sig, pid_t pid)
+{
+	proc_t *p;
+	int rv = 0;
+
+	mutex_enter(&pidlock);
+
+	if (((p = prfind(pid)) == NULL) || (p->p_stat == SIDL))
+		rv = ESRCH;
+	else if (sig && (sigismember(&cantmask, sig) ||
+	    (PTOU(p)->u_signal[sig-1] == SIG_DFL) ||
+	    (PTOU(p)->u_signal[sig-1] == SIG_IGN)))
+		rv = EPERM;
+
+	mutex_exit(&pidlock);
+
+	return (rv);
+}
+
+long
+lx_tkill(pid_t pid, int lx_sig)
+{
+	kthread_t *t;
+	proc_t *pp;
+	pid_t initpid;
+	sigqueue_t *sqp;
+	struct lx_lwp_data *br = ttolxlwp(curthread);
+	int tid = 1;	/* default tid */
+	int sig, rv;
+
+	/*
+	 * Unlike kill(2), Linux tkill(2) doesn't allow signals to
+	 * be sent to process IDs <= 0 as it doesn't overlay any special
+	 * semantics on the pid.
+	 */
+	if ((pid <= 0) || ((lx_sig < 0) || (lx_sig >= LX_NSIG)) ||
+	    ((sig = ltos_signo[lx_sig]) < 0))
+		return (set_errno(EINVAL));
+
+	/*
+	 * If the Linux pid is 1, translate the pid to the actual init
+	 * pid for the zone.  Note that Linux dictates that no unhandled
+	 * signals may be sent to init, so check for that, too.
+	 *
+	 * Otherwise, extract the tid and real pid from the Linux pid.
+	 */
+	initpid = curproc->p_zone->zone_proc_initpid;
+	if (pid == 1)
+		pid = initpid;
+	if ((pid == initpid) && ((rv = init_sig_check(sig, pid)) != 0))
+		return (set_errno(rv));
+	else if (lx_lpid_to_spair(pid, &pid, &tid) < 0)
+		return (set_errno(ESRCH));
+
+	sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
+
+	/*
+	 * Find the process for the passed pid...
+	 */
+	mutex_enter(&pidlock);
+	if (((pp = prfind(pid)) == NULL) || (pp->p_stat == SIDL)) {
+		mutex_exit(&pidlock);
+		rv = set_errno(ESRCH);
+		goto free_and_exit;
+	}
+	mutex_enter(&pp->p_lock);
+	mutex_exit(&pidlock);
+
+	/*
+	 * Deny permission to send the signal if either of the following
+	 * is true:
+	 *
+	 *	+ The signal is SIGCONT and the target pid is not in the same
+	 *	  session as the sender
+	 *
+	 *	+ prochasprocperm() shows the user lacks sufficient permission
+	 *	  to send the signal to the target pid
+	 */
+	if (((sig == SIGCONT) && (pp->p_sessp != curproc->p_sessp)) ||
+	    (!prochasprocperm(pp, curproc, CRED()))) {
+		mutex_exit(&pp->p_lock);
+		rv = set_errno(EPERM);
+		goto free_and_exit;
+	}
+
+	/* check for the tid */
+	if ((t = idtot(pp, tid)) == NULL) {
+		mutex_exit(&pp->p_lock);
+		rv = set_errno(ESRCH);
+		goto free_and_exit;
+	}
+
+	/* a signal of 0 means just check for the existence of the thread */
+	if (lx_sig == 0) {
+		mutex_exit(&pp->p_lock);
+		rv = 0;
+		goto free_and_exit;
+	}
+
+	sqp->sq_info.si_signo = sig;
+	sqp->sq_info.si_code = SI_LWP;
+	sqp->sq_info.si_pid = br->br_pid;
+	sqp->sq_info.si_uid = crgetruid(CRED());
+	sigaddqa(pp, t, sqp);
+
+	mutex_exit(&pp->p_lock);
+
+	return (0);
+
+free_and_exit:
+	kmem_free(sqp, sizeof (sigqueue_t));
+	return (rv);
+}
+
+long
+lx_kill(pid_t lx_pid, int lx_sig)
+{
+	pid_t s_pid, initpid;
+	sigsend_t v;
+	zone_t *zone = curproc->p_zone;
+	struct proc *p;
+	int err, sig, nfound;
+
+	if ((lx_sig < 0) || (lx_sig >= LX_NSIG) ||
+	    ((sig = ltos_signo[lx_sig]) < 0))
+		return (set_errno(EINVAL));
+
+	/*
+	 * Since some linux apps rely on init(1M) having PID 1, we
+	 * transparently translate 1 to the real init(1M)'s pid.  We then
+	 * check to be sure that it is legal for this process to send this
+	 * signal to init(1M).
+	 */
+	initpid = zone->zone_proc_initpid;
+	if (lx_pid == 1 || lx_pid == -1) {
+		s_pid = initpid;
+	} else if (lx_pid == 0) {
+		s_pid = 0;
+	} else {
+		if (lx_pid < 0)
+			err = lx_lpid_to_spair(-lx_pid, &s_pid, NULL);
+		else
+			err = lx_lpid_to_spair(lx_pid, &s_pid, NULL);
+
+		/*
+		 * If we didn't find this pid in our hash table, it either
+		 * means that the process doesn't exist, that it exists but
+		 * isn't a Linux process, or that it is a zombie process.
+		 * In each of these cases, assuming that the Linux pid is
+		 * the same as the Solaris pid will get us the correct
+		 * behavior.
+		 */
+		if (err < 0)
+			s_pid = lx_pid;
+	}
+
+	if ((s_pid == initpid) && ((err = init_sig_check(sig, s_pid)) != 0))
+		return (set_errno(err));
+
+	/*
+	 * For individual processes, kill() semantics are the same between
+	 * Solaris and Linux.
+	 */
+	if (lx_pid >= 0)
+		return (kill(s_pid, sig));
+
+	/*
+	 * In Solaris, sending a signal to -pid means "send a signal to
+	 * everyone in process group pid."  In Linux it means "send a
+	 * signal to everyone in the group other than init."  Sending a
+	 * signal to -1 means "send a signal to every process except init
+	 * and myself."
+	 */
+
+	bzero(&v, sizeof (v));
+	v.sig = sig;
+	v.checkperm = 1;
+	v.sicode = SI_USER;
+	err = 0;
+
+	mutex_enter(&pidlock);
+
+	p = (lx_pid == -1) ? practive : pgfind(s_pid);
+	nfound = 0;
+	while (err == 0 && p != NULL) {
+		if ((p->p_zone == zone) && (p->p_stat != SIDL) &&
+		    (p->p_pid != initpid) && (lx_pid < -1 || p != curproc)) {
+			nfound++;
+			err = sigsendproc(p, &v);
+		}
+
+		p = (lx_pid == -1) ? p->p_next : p->p_pglink;
+	}
+	mutex_exit(&pidlock);
+	if (nfound == 0)
+		err = ESRCH;
+	else if (err == 0 && v.perm == 0)
+		err = EPERM;
+	return (err ? set_errno(err) : 0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
new file mode 100644
index 0000000000..aa6e12a7d8
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_modify_ldt.c
@@ -0,0 +1,121 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/segments.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/sysi86.h>
+#include <sys/cmn_err.h>
+#include <sys/lx_ldt.h>
+
+/*
+ * Read the ldt_info structure in from the Linux app, convert it to an ssd
+ * structure, and then call setdscr() to do all the heavy lifting.
+ */
+static int
+write_ldt(void *data, ulong_t count)
+{
+	user_desc_t usd;
+	struct ssd ssd;
+	struct ldt_info ldt_inf;
+	proc_t *pp = curthread->t_procp;
+	int err;
+
+	if (count != sizeof (ldt_inf))
+		return (set_errno(EINVAL));
+
+	if (copyin(data, &ldt_inf, sizeof (ldt_inf)))
+		return (set_errno(EFAULT));
+
+	if (ldt_inf.entry_number >= MAXNLDT)
+		return (set_errno(EINVAL));
+
+	LDT_INFO_TO_DESC(&ldt_inf, &usd);
+	usd_to_ssd(&usd, &ssd, SEL_LDT(ldt_inf.entry_number));
+
+	/*
+	 * Get everyone into a safe state before changing the LDT.
+	 */
+	if (!holdlwps(SHOLDFORK1))
+		return (set_errno(EINTR));
+
+	err = setdscr(&ssd);
+
+	/*
+	 * Release the hounds!
+	 */
+	mutex_enter(&pp->p_lock);
+	continuelwps(pp);
+	mutex_exit(&pp->p_lock);
+
+	return (err ? set_errno(err) : 0);
+}
+
+static int
+read_ldt(void *uptr, ulong_t count)
+{
+	proc_t *pp = curproc;
+	int bytes;
+
+	if (pp->p_ldt == NULL)
+		return (0);
+
+	bytes = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
+	if (bytes > count)
+		bytes = count;
+
+	if (copyout(pp->p_ldt, uptr, bytes))
+		return (set_errno(EFAULT));
+
+	return (bytes);
+}
+
+long
+lx_modify_ldt(int op, void *data, ulong_t count)
+{
+	int rval;
+
+	switch (op) {
+	case 0:
+		rval = read_ldt(data, count);
+		break;
+
+	case 1:
+		rval = write_ldt(data, count);
+		break;
+
+	default:
+		rval = set_errno(ENOSYS);
+		break;
+	}
+
+	return (rval);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sched.c b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
new file mode 100644
index 0000000000..7b1cd49f37
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sched.c
@@ -0,0 +1,513 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/proc.h>
+#include <sys/cpu.h>
+#include <sys/rtpriocntl.h>
+#include <sys/tspriocntl.h>
+#include <sys/processor.h>
+#include <sys/brand.h>
+#include <sys/lx_pid.h>
+#include <sys/lx_sched.h>
+#include <sys/lx_brand.h>
+
+extern long priocntl_common(int, procset_t *, int, caddr_t, caddr_t, uio_seg_t);
+
+int
+lx_sched_affinity(int cmd, uintptr_t pid, int len, uintptr_t maskp,
+    int64_t *rval)
+{
+	pid_t		s_pid;
+	id_t		s_tid;
+	kthread_t	*t = curthread;
+	lx_lwp_data_t	*lx_lwp;
+
+	if (cmd != B_GET_AFFINITY_MASK && cmd != B_SET_AFFINITY_MASK)
+		return (set_errno(EINVAL));
+
+	/*
+	 * The caller wants to know how large the mask should be.
+	 */
+	if (cmd == B_GET_AFFINITY_MASK && len == 0) {
+		*rval = sizeof (lx_affmask_t);
+		return (0);
+	}
+
+	/*
+	 * Otherwise, ensure they have a large enough mask.
+	 */
+	if (cmd == B_GET_AFFINITY_MASK && len < sizeof (lx_affmask_t)) {
+		*rval = -1;
+		return (set_errno(EINVAL));
+	}
+
+	if (pid == 0) {
+		s_pid = curproc->p_pid;
+		s_tid = curthread->t_tid;
+	} else if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) == -1) {
+		return (set_errno(ESRCH));
+	}
+
+	/*
+	 * For now, we only support manipulating threads in the
+	 * same process.
+	 */
+	if (curproc->p_pid != s_pid)
+		return (set_errno(EPERM));
+
+	/*
+	 * We must hold the process lock so that the thread list
+	 * doesn't change while we're looking at it. We'll hold
+	 * the lock until we no longer reference the
+	 * corresponding lwp.
+	 */
+
+	mutex_enter(&curproc->p_lock);
+
+	do {
+		if (t->t_tid == s_tid)
+			break;
+		t = t->t_forw;
+	} while (t != curthread);
+
+	/*
+	 * If the given PID is in the current thread's process,
+	 * then we _must_ find it in the process's thread list.
+	 */
+	ASSERT(t->t_tid == s_tid);
+
+	lx_lwp = t->t_lwp->lwp_brand;
+
+	if (cmd == B_SET_AFFINITY_MASK) {
+		if (copyin_nowatch((void *)maskp, &lx_lwp->br_affinitymask,
+		    sizeof (lx_affmask_t)) != 0) {
+			mutex_exit(&curproc->p_lock);
+			return (set_errno(EFAULT));
+		}
+
+		*rval = 0;
+	} else {
+		if (copyout_nowatch(&lx_lwp->br_affinitymask, (void *)maskp,
+		    sizeof (lx_affmask_t)) != 0) {
+			mutex_exit(&curproc->p_lock);
+			return (set_errno(EFAULT));
+		}
+
+		*rval = sizeof (lx_affmask_t);
+	}
+
+	mutex_exit(&curproc->p_lock);
+	return (0);
+}
+
+long
+lx_sched_setscheduler(l_pid_t pid, int policy, struct lx_sched_param *param)
+{
+	klwp_t *lwp = ttolwp(curthread);
+	procset_t procset;
+	procset_t procset_cid;
+	pcparms_t pcparm;
+	pcinfo_t pcinfo;
+	struct lx_sched_param sched_param;
+	tsparms_t *tsp;
+	int prio, maxupri;
+	int rv;
+
+	if (pid < 0)
+		return (set_errno(ESRCH));
+
+	if (rv = sched_setprocset(&procset, pid))
+		return (rv);
+
+	if (copyin(param, &sched_param, sizeof (sched_param)))
+		return (set_errno(EFAULT));
+
+	prio = sched_param.lx_sched_prio;
+
+	if (policy < 0) {
+		/*
+		 * get the class id
+		 */
+		pcparm.pc_cid = PC_CLNULL;
+		(void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+		if (lwp->lwp_errno)
+			return (lwp->lwp_errno);
+
+		/*
+		 * get the current policy
+		 */
+		bzero(&pcinfo, sizeof (pcinfo));
+		pcinfo.pc_cid = pcparm.pc_cid;
+		(void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+		if (lwp->lwp_errno)
+			return (lwp->lwp_errno);
+
+		if (strcmp(pcinfo.pc_clname, "TS") == 0)
+			policy = LX_SCHED_OTHER;
+		else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+			policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+				RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+		else
+			return (set_errno(EINVAL));
+	}
+
+	bzero(&pcinfo, sizeof (pcinfo));
+	bzero(&pcparm, sizeof (pcparm));
+	setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+	switch (policy) {
+	case LX_SCHED_FIFO:
+	case LX_SCHED_RR:
+		(void) strcpy(pcinfo.pc_clname, "RT");
+		(void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+		if (lwp->lwp_errno)
+			return (lwp->lwp_errno);
+
+		if (prio < 0 ||
+		    prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+			return (set_errno(EINVAL));
+		pcparm.pc_cid = pcinfo.pc_cid;
+		((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+		((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+			policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+		break;
+
+	case LX_SCHED_OTHER:
+		(void) strcpy(pcinfo.pc_clname, "TS");
+		(void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+		if (lwp->lwp_errno)
+			return (lwp->lwp_errno);
+
+		maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+		if (prio > maxupri || prio < -maxupri)
+			return (set_errno(EINVAL));
+
+		pcparm.pc_cid = pcinfo.pc_cid;
+		tsp = (tsparms_t *)pcparm.pc_clparms;
+		tsp->ts_upri = prio;
+		tsp->ts_uprilim = TS_NOCHANGE;
+		break;
+
+	default:
+		return (set_errno(EINVAL));
+	}
+
+	/*
+	 * finally set scheduling policy and parameters
+	 */
+	(void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+	return (0);
+}
+
+long
+lx_sched_getscheduler(l_pid_t pid)
+{
+	klwp_t *lwp = ttolwp(curthread);
+	procset_t procset;
+	pcparms_t pcparm;
+	pcinfo_t pcinfo;
+	int policy;
+	int rv;
+
+	if (pid < 0)
+		return (set_errno(ESRCH));
+
+	if (rv = sched_setprocset(&procset, pid))
+		return (rv);
+
+	/*
+	 * get the class id
+	 */
+	pcparm.pc_cid = PC_CLNULL;
+	(void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+	if (lwp->lwp_errno)
+		return (lwp->lwp_errno);
+
+	/*
+	 * get the class info and identify the equivalent linux policy
+	 */
+	bzero(&pcinfo, sizeof (pcinfo));
+	pcinfo.pc_cid = pcparm.pc_cid;
+	(void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+	if (lwp->lwp_errno)
+		return (lwp->lwp_errno);
+
+	if (strcmp(pcinfo.pc_clname, "TS") == 0)
+		policy = LX_SCHED_OTHER;
+	else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+		policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+			RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+	else
+		policy = set_errno(EINVAL);
+
+	return (policy);
+}
+
+long
+lx_sched_setparam(l_pid_t pid, struct lx_sched_param *param)
+{
+	klwp_t *lwp = ttolwp(curthread);
+	procset_t procset;
+	procset_t procset_cid;
+	pcparms_t pcparm;
+	pcinfo_t pcinfo;
+	struct lx_sched_param sched_param;
+	tsparms_t *tsp;
+	int policy;
+	int prio, maxupri;
+	int rv;
+
+	if (pid < 0)
+		return (set_errno(ESRCH));
+
+	if (rv = sched_setprocset(&procset, pid))
+		return (rv);
+
+	if (copyin(param, &sched_param, sizeof (sched_param)))
+		return (set_errno(EFAULT));
+
+	prio = sched_param.lx_sched_prio;
+
+	/*
+	 * get the class id
+	 */
+	pcparm.pc_cid = PC_CLNULL;
+	(void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+	if (lwp->lwp_errno)
+		return (lwp->lwp_errno);
+
+	/*
+	 * get the current policy
+	 */
+	bzero(&pcinfo, sizeof (pcinfo));
+	pcinfo.pc_cid = pcparm.pc_cid;
+	(void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+	if (lwp->lwp_errno)
+		return (lwp->lwp_errno);
+
+	if (strcmp(pcinfo.pc_clname, "TS") == 0)
+		policy = LX_SCHED_OTHER;
+	else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+		policy = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs ==
+			RT_TQINF ? LX_SCHED_FIFO : LX_SCHED_RR;
+	else
+		return (set_errno(EINVAL));
+
+	bzero(&pcinfo, sizeof (pcinfo));
+	bzero(&pcparm, sizeof (pcparm));
+	setprocset(&procset_cid, POP_AND, P_PID, 0, P_ALL, 0);
+	switch (policy) {
+	case LX_SCHED_FIFO:
+	case LX_SCHED_RR:
+		(void) strcpy(pcinfo.pc_clname, "RT");
+		(void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+		if (lwp->lwp_errno)
+			return (lwp->lwp_errno);
+
+		if (prio < 0 ||
+		    prio > ((rtinfo_t *)pcinfo.pc_clinfo)->rt_maxpri)
+			return (set_errno(EINVAL));
+		pcparm.pc_cid = pcinfo.pc_cid;
+		((rtparms_t *)pcparm.pc_clparms)->rt_pri = prio;
+		((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs =
+			policy == LX_SCHED_RR ? RT_TQDEF : RT_TQINF;
+		break;
+
+	case LX_SCHED_OTHER:
+		(void) strcpy(pcinfo.pc_clname, "TS");
+		(void) do_priocntlsys(PC_GETCID, &procset_cid, &pcinfo);
+		if (lwp->lwp_errno)
+			return (lwp->lwp_errno);
+
+		maxupri = ((tsinfo_t *)pcinfo.pc_clinfo)->ts_maxupri;
+		if (prio > maxupri || prio < -maxupri)
+			return (set_errno(EINVAL));
+
+		pcparm.pc_cid = pcinfo.pc_cid;
+		tsp = (tsparms_t *)pcparm.pc_clparms;
+		tsp->ts_upri = prio;
+		tsp->ts_uprilim = TS_NOCHANGE;
+		break;
+
+	default:
+		return (set_errno(EINVAL));
+	}
+
+	/*
+	 * finally set scheduling policy and parameters
+	 */
+	(void) do_priocntlsys(PC_SETPARMS, &procset, &pcparm);
+
+	return (0);
+}
+
+long
+lx_sched_getparam(l_pid_t pid, struct lx_sched_param *param)
+{
+	klwp_t *lwp = ttolwp(curthread);
+	struct lx_sched_param local_param;
+	procset_t procset;
+	pcparms_t pcparm;
+	pcinfo_t pcinfo;
+	tsinfo_t *tsi;
+	int prio, scale;
+	int rv;
+
+	if (pid < 0)
+		return (set_errno(ESRCH));
+
+	if (rv = sched_setprocset(&procset, pid))
+		return (rv);
+
+	/*
+	 * get the class id
+	 */
+	pcparm.pc_cid = PC_CLNULL;
+	(void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+	if (lwp->lwp_errno)
+		return (lwp->lwp_errno);
+
+	/*
+	 * get the class info and identify the equivalent linux policy
+	 */
+	bzero(&pcinfo, sizeof (pcinfo));
+	pcinfo.pc_cid = pcparm.pc_cid;
+	(void) do_priocntlsys(PC_GETCLINFO, &procset, &pcinfo);
+	if (lwp->lwp_errno)
+		return (lwp->lwp_errno);
+
+	bzero(&local_param, sizeof (local_param));
+	if (strcmp(pcinfo.pc_clname, "TS") == 0) {
+		/*
+		 * I don't know if we need to do this, coz it can't be
+		 * changed from zero anyway.....
+		 */
+		tsi = (tsinfo_t *)pcinfo.pc_clinfo;
+		prio = ((tsparms_t *)pcparm.pc_clparms)->ts_upri;
+		scale = tsi->ts_maxupri;
+		if (scale == 0)
+			local_param.lx_sched_prio = 0;
+		else
+			local_param.lx_sched_prio = -(prio * 20) / scale;
+	} else if (strcmp(pcinfo.pc_clname, "RT") == 0)
+		local_param.lx_sched_prio =
+			((rtparms_t *)pcparm.pc_clparms)->rt_pri;
+	else
+		rv = set_errno(EINVAL);
+
+	if (rv == 0)
+		if (copyout(&local_param, param, sizeof (local_param)))
+			return (set_errno(EFAULT));
+
+	return (rv);
+}
+
+long
+lx_sched_rr_get_interval(l_pid_t pid, struct timespec *ival)
+{
+	klwp_t *lwp = ttolwp(curthread);
+	struct timespec interval;
+	procset_t procset;
+	pcparms_t pcparm;
+	pcinfo_t pcinfo;
+	int rv;
+
+	if (pid < 0)
+		return (set_errno(ESRCH));
+
+	if (rv = sched_setprocset(&procset, pid))
+		return (rv);
+
+	/*
+	 * get the class id
+	 */
+	pcparm.pc_cid = PC_CLNULL;
+	(void) do_priocntlsys(PC_GETPARMS, &procset, &pcparm);
+	if (lwp->lwp_errno)
+		return (lwp->lwp_errno);
+
+	/*
+	 * get the class info and identify the equivalent linux policy
+	 */
+	setprocset(&procset, POP_AND, P_PID, 0, P_ALL, 0);
+	bzero(&pcinfo, sizeof (pcinfo));
+	(void) strcpy(pcinfo.pc_clname, "RT");
+	(void) do_priocntlsys(PC_GETCID, &procset, &pcinfo);
+	if (lwp->lwp_errno)
+		return (lwp->lwp_errno);
+
+	if (pcparm.pc_cid == pcinfo.pc_cid &&
+	    ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs != RT_TQINF) {
+		interval.tv_sec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqsecs;
+		interval.tv_nsec = ((rtparms_t *)pcparm.pc_clparms)->rt_tqnsecs;
+
+		if (copyout(&interval, ival, sizeof (interval)))
+			return (set_errno(EFAULT));
+
+		return (0);
+	}
+
+	return (set_errno(EINVAL));
+}
+
+int
+sched_setprocset(procset_t *procset, l_pid_t pid)
+{
+	id_t lid, rid;
+	idtype_t lidtype, ridtype;
+
+	/*
+	 * define the target lwp
+	 */
+	if (pid == 0) {
+		ridtype = P_ALL;
+		lidtype = P_PID;
+		rid = 0;
+		lid = P_MYID;
+	} else {
+		if (lx_lpid_to_spair(pid, &pid, &lid) < 0)
+			return (set_errno(ESRCH));
+		if (pid != curproc->p_pid)
+			return (set_errno(ESRCH));
+		rid = 0;
+		ridtype = P_ALL;
+		lidtype = P_LWPID;
+	}
+	setprocset(procset, POP_AND, lidtype, lid, ridtype, rid);
+
+	return (0);
+}
+
+long
+do_priocntlsys(int cmd, procset_t *procset, void *arg)
+{
+	return (priocntl_common(PC_VERSION, procset, cmd, (caddr_t)arg, 0,
+	    UIO_SYSSPACE));
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
new file mode 100644
index 0000000000..9fdb734805
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_sysinfo.c
@@ -0,0 +1,118 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <vm/anon.h>
+#include <sys/systm.h>
+#include <sys/sysmacros.h>
+#include <sys/zone.h>
+#include <sys/time.h>
+
+struct lx_sysinfo {
+	int32_t si_uptime;	/* Seconds since boot */
+	uint32_t si_loads[3];	/* 1, 5, and 15 minute avg runq length */
+	uint32_t si_totalram;	/* Total memory size */
+	uint32_t si_freeram;	/* Available memory */
+	uint32_t si_sharedram;	/* Shared memory */
+	uint32_t si_bufferram;	/* Buffer memory */
+	uint32_t si_totalswap;	/* Total swap space */
+	uint32_t si_freeswap;	/* Avail swap space */
+	uint16_t si_procs;	/* Process count */
+	uint32_t si_totalhigh;	/* High memory size */
+	uint32_t si_freehigh;	/* Avail high memory */
+	uint32_t si_mem_unit;	/* Unit size of memory fields */
+};
+
+long
+lx_sysinfo(struct lx_sysinfo *sip)
+{
+	struct lx_sysinfo si;
+	hrtime_t birthtime;
+	zone_t *zone = curthread->t_procp->p_zone;
+	proc_t *init_proc;
+
+	/*
+	 * We don't record the time a zone was booted, so we use the
+	 * birthtime of that zone's init process instead.
+	 */
+	mutex_enter(&pidlock);
+	init_proc = prfind(zone->zone_proc_initpid);
+	if (init_proc != NULL)
+		birthtime = init_proc->p_mstart;
+	else
+		birthtime = p0.p_mstart;
+	mutex_exit(&pidlock);
+	si.si_uptime = (gethrtime() - birthtime) / NANOSEC;
+
+	/*
+	 * We scale down the load in avenrun to allow larger load averages
+	 * to fit in 32 bits.  Linux doesn't, so we remove the scaling
+	 * here.
+	 */
+	si.si_loads[0] = avenrun[0] << FSHIFT;
+	si.si_loads[1] = avenrun[1] << FSHIFT;
+	si.si_loads[2] = avenrun[2] << FSHIFT;
+
+	/*
+	 * In linux each thread looks like a process, so we conflate the
+	 * two in this stat as well.
+	 */
+	si.si_procs = (int32_t)zone->zone_nlwps;
+
+	/*
+	 * If the maximum memory stat is less than 1^20 pages (i.e. 4GB),
+	 * then we report the result in bytes.  Otherwise we use pages.
+	 * Once we start supporting >1TB x86 systems, we'll need a third
+	 * option.
+	 */
+	if (MAX(physmem, k_anoninfo.ani_max) < 1024 * 1024) {
+		si.si_totalram = physmem * PAGESIZE;
+		si.si_freeram = freemem * PAGESIZE;
+		si.si_totalswap = k_anoninfo.ani_max * PAGESIZE;
+		si.si_freeswap = k_anoninfo.ani_free * PAGESIZE;
+		si.si_mem_unit = 1;
+	} else {
+		si.si_totalram = physmem;
+		si.si_freeram = freemem;
+		si.si_totalswap = k_anoninfo.ani_max;
+		si.si_freeswap = k_anoninfo.ani_free;
+		si.si_mem_unit = PAGESIZE;
+	}
+	si.si_bufferram = 0;
+	si.si_sharedram = 0;
+
+	/*
+	 * These two stats refer to high physical memory.  If an
+	 * application running in a Linux zone cares about this, then
+	 * either it or we are broken.
+	 */
+	si.si_totalhigh = 0;
+	si.si_freehigh = 0;
+
+	if (copyout(&si, sip, sizeof (si)) != 0)
+		return (set_errno(EFAULT));
+	return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
new file mode 100644
index 0000000000..f9751819f9
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/syscall/lx_thread_area.c
@@ -0,0 +1,128 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/cpuvar.h>
+#include <sys/archsystm.h>
+#include <sys/proc.h>
+#include <sys/brand.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ldt.h>
+
+long
+lx_get_thread_area(struct ldt_info *inf)
+{
+	struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+	struct ldt_info ldt_inf;
+	user_desc_t *dscrp;
+	int entry;
+
+	if (fuword32(&inf->entry_number, (uint32_t *)&entry))
+		return (set_errno(EFAULT));
+
+	if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+		return (set_errno(EINVAL));
+
+	dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+	/*
+	 * convert the solaris ldt to the linux format expected by the
+	 * caller
+	 */
+	DESC_TO_LDT_INFO(dscrp, &ldt_inf);
+	ldt_inf.entry_number = entry;
+
+	if (copyout(&ldt_inf, inf, sizeof (struct ldt_info)))
+		return (set_errno(EFAULT));
+
+	return (0);
+}
+
+long
+lx_set_thread_area(struct ldt_info *inf)
+{
+	struct lx_lwp_data *jlwp = ttolxlwp(curthread);
+	struct ldt_info ldt_inf;
+	user_desc_t *dscrp;
+	int entry;
+	int i;
+
+	if (copyin(inf, &ldt_inf, sizeof (ldt_inf)))
+		return (set_errno(EFAULT));
+
+	entry = ldt_inf.entry_number;
+	if (entry == -1) {
+		/*
+		 * find an empty entry in the tls for this thread
+		 */
+		for (i = 0, dscrp = jlwp->br_tls;
+					i < LX_TLSNUM; i++, dscrp++)
+			if (((unsigned long *)dscrp)[0] == 0 &&
+			    ((unsigned long *)dscrp)[1] == 0)
+				break;
+
+		if (i < LX_TLSNUM) {
+			/*
+			 * found one
+			 */
+			entry = i + GDT_TLSMIN;
+			if (suword32(&inf->entry_number, entry))
+				return (set_errno(EFAULT));
+		} else {
+			return (set_errno(ESRCH));
+		}
+	}
+
+	if (entry < GDT_TLSMIN || entry > GDT_TLSMAX)
+		return (set_errno(EINVAL));
+
+	/*
+	 * convert the linux ldt info to standard intel descriptor
+	 */
+	dscrp = jlwp->br_tls + entry - GDT_TLSMIN;
+
+	if (LDT_INFO_EMPTY(&ldt_inf)) {
+		((unsigned long *)dscrp)[0] = 0;
+		((unsigned long *)dscrp)[1] = 0;
+	} else {
+		LDT_INFO_TO_DESC(&ldt_inf, dscrp);
+	}
+
+	/*
+	 * update the gdt with the new descriptor
+	 */
+	kpreempt_disable();
+
+	for (i = 0, dscrp = jlwp->br_tls; i < LX_TLSNUM; i++, dscrp++)
+		lx_set_gdt(GDT_TLSMIN + i, dscrp);
+
+	kpreempt_enable();
+
+	return (0);
+}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.c b/usr/src/uts/common/brand/sn1/sn1_brand.c
new file mode 100644
index 0000000000..a46ea3c979
--- /dev/null
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.c
@@ -0,0 +1,288 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/errno.h>
+#include <sys/exec.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/model.h>
+#include <sys/proc.h>
+#include <sys/syscall.h>
+#include <sys/systm.h>
+#include <sys/thread.h>
+#include <sys/cmn_err.h>
+#include <sys/archsystm.h>
+
+#include <sys/machbrand.h>
+#include <sys/brand.h>
+#include "sn1_brand.h"
+
+char *sn1_emulation_table = NULL;
+
+void	sn1_setbrand(proc_t *);
+int	sn1_getattr(zone_t *, int, void *, size_t *);
+int	sn1_setattr(zone_t *, int, void *, size_t);
+int	sn1_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+		uintptr_t, uintptr_t, uintptr_t);
+void	sn1_copy_procdata(proc_t *, proc_t *);
+void	sn1_proc_exit(struct proc *, klwp_t *);
+void	sn1_exec();
+int	sn1_initlwp(klwp_t *);
+void	sn1_forklwp(klwp_t *, klwp_t *);
+void	sn1_freelwp(klwp_t *);
+void	sn1_lwpexit(klwp_t *);
+int	sn1_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
+	long *, int, caddr_t, cred_t *, int);
+
+/* sn1 brand */
+struct brand_ops sn1_brops = {
+	sn1_brandsys,
+	sn1_setbrand,
+	sn1_getattr,
+	sn1_setattr,
+	sn1_copy_procdata,
+	sn1_proc_exit,
+	sn1_exec,
+	lwp_setrval,
+	sn1_initlwp,
+	sn1_forklwp,
+	sn1_freelwp,
+	sn1_lwpexit,
+	sn1_elfexec
+};
+
+#ifdef	sparc
+
+struct brand_mach_ops sn1_mops = {
+	sn1_brand_syscall_callback,
+	sn1_brand_syscall_callback
+};
+
+#else	/* sparc */
+
+#ifdef	__amd64
+
+struct brand_mach_ops sn1_mops = {
+	sn1_brand_sysenter_callback,
+	NULL,
+	sn1_brand_int91_callback,
+	sn1_brand_syscall_callback,
+	sn1_brand_syscall32_callback,
+	NULL
+};
+
+#else	/* ! __amd64 */
+
+struct brand_mach_ops sn1_mops = {
+	sn1_brand_sysenter_callback,
+	NULL,
+	NULL,
+	sn1_brand_syscall_callback,
+	NULL,
+	NULL
+};
+#endif	/* __amd64 */
+
+#endif	/* _sparc */
+
+struct brand	sn1_brand = {
+	BRAND_VER_1,
+	"sn1",
+	&sn1_brops,
+	&sn1_mops
+};
+
+static struct modlbrand modlbrand = {
+	&mod_brandops, "Solaris N-1 Brand %I%", &sn1_brand
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1, (void *)&modlbrand, NULL
+};
+
+void
+sn1_setbrand(proc_t *p)
+{
+	p->p_brand_data = NULL;
+	p->p_brand = &sn1_brand;
+}
+
+/* ARGSUSED */
+int
+sn1_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
+{
+	return (EINVAL);
+}
+
+/* ARGSUSED */
+int
+sn1_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
+{
+	return (EINVAL);
+}
+
+/*
+ * Get the address of the user-space system call handler from the user
+ * process and attach it to the proc structure.
+ */
+/*ARGSUSED*/
+int
+sn1_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
+    uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+	proc_t *p = curproc;
+	*rval = 0;
+
+	if (cmd == B_REGISTER) {
+		p->p_brand = &sn1_brand;
+		p->p_brand_data = (void *) arg1;
+		return (0);
+	}
+
+	ASSERT(p->p_brand == &sn1_brand);
+
+	return (EINVAL);
+}
+
+/*
+ * Copy the per-process brand data from a parent proc to a child.  In the
+ * sn1 brand, the only per-process state is the address of the user-space
+ * handler.
+ */
+void
+sn1_copy_procdata(proc_t *child, proc_t *parent)
+{
+	child->p_brand_data = parent->p_brand_data;
+}
+
+/*ARGSUSED*/
+void
+sn1_proc_exit(struct proc *p, klwp_t *l)
+{
+	p->p_brand_data = NULL;
+	p->p_brand = &native_brand;
+}
+
+void
+sn1_exec()
+{
+	curproc->p_brand_data = NULL;
+}
+
+/*ARGSUSED*/
+int
+sn1_initlwp(klwp_t *l)
+{
+	return (0);
+}
+
+/*ARGSUSED*/
+void
+sn1_forklwp(klwp_t *p, klwp_t *c)
+{
+}
+
+/*ARGSUSED*/
+void
+sn1_freelwp(klwp_t *l)
+{
+}
+
+/*ARGSUSED*/
+void
+sn1_lwpexit(klwp_t *l)
+{
+}
+
+int
+sn1_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
+	int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
+	int brand_action)
+{
+	args->brandname = "sn1";
+	return ((args->execswp->exec_func)(vp, uap, args, idatap, level + 1,
+	    execsz, setid, exec_file, cred, brand_action));
+}
+
+
+int
+_init(void)
+{
+	int err;
+
+#if defined(sparc) && !defined(DEBUG)
+	cmn_err(CE_WARN, "The sn1 brand is only supported on DEBUG kernels.");
+	return (ENOTSUP);
+#else
+
+	/*
+	 * Set up the table indicating which system calls we want to
+	 * interpose on.  We should probably build this automatically from
+	 * a list of system calls that is shared with the user-space
+	 * library.
+	 */
+	sn1_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
+	sn1_emulation_table[SYS_uname] = 1;
+	sn1_emulation_table[SYS_fork1] = 1;
+
+	err = mod_install(&modlinkage);
+	if (err) {
+		cmn_err(CE_WARN, "Couldn't install brand module");
+		kmem_free(sn1_emulation_table, NSYSCALL);
+	}
+
+	return (err);
+#endif
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+	int err;
+
+	/*
+	 * If there are any zones using this brand, we can't allow it to be
+	 * unloaded.
+	 */
+	if (brand_zone_count(&sn1_brand))
+		return (EBUSY);
+
+	kmem_free(sn1_emulation_table, NSYSCALL);
+	sn1_emulation_table = NULL;
+
+	err = mod_remove(&modlinkage);
+	if (err)
+		cmn_err(CE_WARN, "Couldn't unload sn1 brand module");
+
+	return (err);
+}
diff --git a/usr/src/uts/common/brand/sn1/sn1_brand.h b/usr/src/uts/common/brand/sn1/sn1_brand.h
new file mode 100644
index 0000000000..a4efca189b
--- /dev/null
+++ b/usr/src/uts/common/brand/sn1/sn1_brand.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SN1_BRAND_H
+#define	_SN1_BRAND_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+void sn1_brand_syscall_callback(void);
+void sn1_brand_sysenter_callback(void);
+void sn1_brand_int91_callback(void);
+#ifdef	__amd64
+void sn1_brand_syscall32_callback(void);
+#endif
+
+extern struct brand *sbrand;
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SN1_BRAND_H */
diff --git a/usr/src/uts/common/c2/audit_event.c b/usr/src/uts/common/c2/audit_event.c
index 4ee95e1728..b45da7bf26 100644
--- a/usr/src/uts/common/c2/audit_event.c
+++ b/usr/src/uts/common/c2/audit_event.c
@@ -158,6 +158,7 @@ static void	aus_sigqueue(struct t_audit_data *);
 static void	aus_p_online(struct t_audit_data *);
 static void	aus_processor_bind(struct t_audit_data *);
 static void	aus_inst_sync(struct t_audit_data *);
+static void	aus_brandsys(struct t_audit_data *);
 
 static void	auf_accept(struct t_audit_data *, int, rval_t *);
 
@@ -270,7 +271,7 @@ aui_null,	AUE_FSTATFS,	aus_fstatfs,	/* 38 fstatfs */
 		auf_null,	S2E_PUB,
 aui_null,	AUE_SETPGRP,	aus_null,	/* 39 setpgrp */
 		auf_null,	0,
-aui_null,	AUE_NULL,	aus_null,	/* 40 (loadable) was cxenix */
+aui_null,	AUE_NULL,	aus_null,	/* 40 uucopystr */
 		auf_null,	0,
 aui_null,	AUE_NULL,	aus_null,	/* 41 dup */
 		auf_null,	0,
@@ -564,7 +565,7 @@ aui_null,	AUE_NULL,	aus_null,	/* 175 llseek */
 aui_null,	AUE_INST_SYNC,	aus_inst_sync,  /* 176 (loadable) */
 						/* aus_inst_sync */
 		auf_null,	0,
-aui_null,	AUE_NULL,	aus_null,	/* 177 (loadable) */
+aui_null,	AUE_BRANDSYS,	aus_brandsys,	/* 177 brandsys */
 		auf_null,	0,
 aui_null,	AUE_NULL,	aus_null,	/* 178 (loadable) */
 		auf_null,	0,
@@ -718,8 +719,7 @@ aui_null,	AUE_NULL,	aus_null,	/* 252 lwp_mutex_init */
 		auf_null,	0,
 aui_null,	AUE_NULL,	aus_null,	/* 253 cladm */
 		auf_null,	0,
-aui_null,	AUE_NULL,	aus_null,	/* 254 (loadable) */
-						/*	was lwp_sigtimedwait */
+aui_null,	AUE_NULL,	aus_null,	/* 254 uucopy */
 		auf_null,	0,
 aui_null,	AUE_UMOUNT2,	aus_umount2,	/* 255 umount2 */
 		auf_null,	0
@@ -4706,6 +4706,40 @@ aus_inst_sync(struct t_audit_data *tad)
 
 /*ARGSUSED*/
 static void
+aus_brandsys(struct t_audit_data *tad)
+{
+	klwp_t *clwp = ttolwp(curthread);
+
+	struct a {
+		long	cmd;
+		long	arg1;
+		long	arg2;
+		long	arg3;
+		long	arg4;
+		long	arg5;
+		long	arg6;
+	} *uap = (struct a *)clwp->lwp_ap;
+
+	au_uwrite(au_to_arg32(1, "cmd", (uint_t)uap->cmd));
+#ifdef _LP64
+	au_uwrite(au_to_arg64(2, "arg1", (uint64_t)uap->arg1));
+	au_uwrite(au_to_arg64(3, "arg2", (uint64_t)uap->arg2));
+	au_uwrite(au_to_arg64(4, "arg3", (uint64_t)uap->arg3));
+	au_uwrite(au_to_arg64(5, "arg4", (uint64_t)uap->arg4));
+	au_uwrite(au_to_arg64(6, "arg5", (uint64_t)uap->arg5));
+	au_uwrite(au_to_arg64(7, "arg6", (uint64_t)uap->arg6));
+#else
+	au_uwrite(au_to_arg32(2, "arg1", (uint32_t)uap->arg1));
+	au_uwrite(au_to_arg32(3, "arg2", (uint32_t)uap->arg2));
+	au_uwrite(au_to_arg32(4, "arg3", (uint32_t)uap->arg3));
+	au_uwrite(au_to_arg32(5, "arg4", (uint32_t)uap->arg4));
+	au_uwrite(au_to_arg32(6, "arg5", (uint32_t)uap->arg5));
+	au_uwrite(au_to_arg32(7, "arg6", (uint32_t)uap->arg6));
+#endif
+}
+
+/*ARGSUSED*/
+static void
 aus_p_online(struct t_audit_data *tad)
 {
 	struct a {
diff --git a/usr/src/uts/common/c2/audit_kevents.h b/usr/src/uts/common/c2/audit_kevents.h
index 942887ae72..4a2e5b27db 100644
--- a/usr/src/uts/common/c2/audit_kevents.h
+++ b/usr/src/uts/common/c2/audit_kevents.h
@@ -330,9 +330,10 @@ extern "C" {
 #define	AUE_MODADDPRIV		291	/* =ad modctl(2) */
 #define	AUE_CRYPTOADM		292	/* =as kernel cryptographic framework */
 #define	AUE_CONFIGKSSL		293	/* =as kernel SSL */
+#define	AUE_BRANDSYS		294	/* =ot */
 /* NOTE: update MAX_KEVENTS below if events are added. */
 
-#define	MAX_KEVENTS		293
+#define	MAX_KEVENTS		294
 
 
 #ifdef __cplusplus
diff --git a/usr/src/uts/common/disp/class.c b/usr/src/uts/common/disp/class.c
index b5b2674d89..8e83a839ee 100644
--- a/usr/src/uts/common/disp/class.c
+++ b/usr/src/uts/common/disp/class.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -325,7 +324,8 @@ parmsset(pcparms_t *parmsp, kthread_id_t targtp)
  * The parameters are specified by a key.
  */
 int
-vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp)
+vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp,
+    uio_seg_t seg)
 {
 	char	*clname;
 
@@ -348,7 +348,8 @@ vaparmsout(char *classp, pcparms_t *prmsp, pc_vaparms_t *vaparmsp)
 		return (EINVAL);
 
 	clname = sclass[prmsp->pc_cid].cl_name;
-	if (copyout(clname, (void *)(uintptr_t)vaparmsp->pc_parms[0].pc_parm,
+	if ((seg == UIO_USERSPACE ? copyout : kcopy)(clname,
+	    (void *)(uintptr_t)vaparmsp->pc_parms[0].pc_parm,
 	    MIN(strlen(clname) + 1, PC_CLNMSZ)))
 		return (EFAULT);
 
diff --git a/usr/src/uts/common/disp/priocntl.c b/usr/src/uts/common/disp/priocntl.c
index 3c1a271155..3bb90cf1fa 100644
--- a/usr/src/uts/common/disp/priocntl.c
+++ b/usr/src/uts/common/disp/priocntl.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -81,7 +80,7 @@ struct stprmargs {
  * between the 64-bit kernel ABI and the 32-bit user ABI.
  */
 static int
-copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap)
+copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg)
 {
 	pc_vaparms32_t vaparms32;
 	pc_vaparm32_t *src;
@@ -90,7 +89,8 @@ copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap)
 
 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
 
-	if (copyin(arg, &vaparms32, sizeof (vaparms32)))
+	if ((seg == UIO_USERSPACE ? copyin : kcopy)(arg, &vaparms32,
+	    sizeof (vaparms32)))
 		return (EFAULT);
 
 	vap->pc_vaparmscnt = vaparms32.pc_vaparmscnt;
@@ -104,13 +104,13 @@ copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap)
 	return (0);
 }
 
-#define	COPYIN_VAPARMS(arg, vap, size)	\
+#define	COPYIN_VAPARMS(arg, vap, size, seg)	\
 	(get_udatamodel() == DATAMODEL_NATIVE ?	\
-	copyin(arg, vap, size) : copyin_vaparms32(arg, vap))
+	(*copyinfn)(arg, vap, size) : copyin_vaparms32(arg, vap, seg))
 
 #else
 
-#define	COPYIN_VAPARMS(arg, vap, size)	copyin(arg, vap, size)
+#define	COPYIN_VAPARMS(arg, vap, size, seg)	(*copyinfn)(arg, vap, size)
 
 #endif
 
@@ -123,7 +123,8 @@ extern int threadcmp(struct pcmpargs *, kthread_id_t);
  * The priocntl system call.
  */
 long
-priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
+priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
+    caddr_t arg2, uio_seg_t seg)
 {
 	pcinfo_t		pcinfo;
 	pcparms_t		pcparms;
@@ -144,6 +145,8 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 	int			rv = 0;
 	pid_t			saved_pid;
 	id_t			classid;
+	int (*copyinfn)(const void *, void *, size_t);
+	int (*copyoutfn)(const void *, void *, size_t);
 
 	/*
 	 * First just check the version number. Right now there is only
@@ -157,6 +160,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 	if (pc_version != PC_VERSION)
 		return (set_errno(EINVAL));
 
+	if (seg == UIO_USERSPACE) {
+		copyinfn = copyin;
+		copyoutfn = copyout;
+	} else {
+		copyinfn = kcopy;
+		copyoutfn = kcopy;
+	}
+
 	switch (cmd) {
 	case PC_GETCID:
 		/*
@@ -171,7 +182,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 			rv = loaded_classes;
 			break;
 		} else {
-			if (copyin(arg, &pcinfo, sizeof (pcinfo)))
+			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
 				return (set_errno(EFAULT));
 		}
 
@@ -204,7 +215,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 		if (error)
 			return (set_errno(error));
 
-		if (copyout(&pcinfo, arg, sizeof (pcinfo)))
+		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
 			return (set_errno(EFAULT));
 
 		rv = loaded_classes;
@@ -221,7 +232,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 			rv = loaded_classes;
 			break;
 		} else {
-			if (copyin(arg, &pcinfo, sizeof (pcinfo)))
+			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
 				return (set_errno(EFAULT));
 		}
 
@@ -245,7 +256,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 		if (error)
 			return (set_errno(error));
 
-		if (copyout(&pcinfo, arg, sizeof (pcinfo)))
+		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
 			return (set_errno(EFAULT));
 
 		rv = loaded_classes;
@@ -259,13 +270,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 		 * because it's done on a per thread basis by parmsset().
 		 */
 		if (cmd == PC_SETPARMS) {
-			if (copyin(arg, &pcparms, sizeof (pcparms)))
+			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
 				return (set_errno(EFAULT));
 
 			error = parmsin(&pcparms, NULL);
 		} else {
-			if (copyin(arg, clname, PC_CLNMSZ) ||
-			    COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms)))
+			if ((*copyinfn)(arg, clname, PC_CLNMSZ) ||
+			    COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
+			    seg))
 				return (set_errno(EFAULT));
 			clname[PC_CLNMSZ-1] = '\0';
 
@@ -281,7 +293,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 		/*
 		 * Get the procset from the user.
 		 */
-		if (copyin(psp, &procset, sizeof (procset)))
+		if ((*copyinfn)(psp, &procset, sizeof (procset)))
 			return (set_errno(EFAULT));
 
 		/*
@@ -372,11 +384,11 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 	case PC_GETPARMS:
 	case PC_GETXPARMS:
 		if (cmd == PC_GETPARMS) {
-			if (copyin(arg, &pcparms, sizeof (pcparms)))
+			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
 				return (set_errno(EFAULT));
 		} else {
 			if (arg != NULL) {
-				if (copyin(arg, clname, PC_CLNMSZ))
+				if ((*copyinfn)(arg, clname, PC_CLNMSZ))
 					return (set_errno(EFAULT));
 
 				clname[PC_CLNMSZ-1] = '\0';
@@ -385,7 +397,9 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 					return (set_errno(EINVAL));
 			} else
 				pcparms.pc_cid = PC_CLNULL;
-			if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms)))
+
+			if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
+			    seg))
 				return (set_errno(EFAULT));
 		}
 
@@ -393,7 +407,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 		    (pcparms.pc_cid < 1 && pcparms.pc_cid != PC_CLNULL))
 			return (set_errno(EINVAL));
 
-		if (copyin(psp, &procset, sizeof (procset)))
+		if ((*copyinfn)(psp, &procset, sizeof (procset)))
 			return (set_errno(EFAULT));
 
 		/*
@@ -590,9 +604,10 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 			return (set_errno(error));
 
 		if (cmd == PC_GETPARMS) {
-			if (copyout(&pcparms, arg, sizeof (pcparms)))
+			if ((*copyoutfn)(&pcparms, arg, sizeof (pcparms)))
 				return (set_errno(EFAULT));
-		} else if ((error = vaparmsout(arg, &pcparms, &vaparms)) != 0)
+		} else if ((error = vaparmsout(arg, &pcparms, &vaparms,
+		    seg)) != 0)
 			return (set_errno(error));
 
 		/*
@@ -603,14 +618,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 
 	case PC_ADMIN:
 		if (get_udatamodel() == DATAMODEL_NATIVE) {
-			if (copyin(arg, &pcadmin, sizeof (pcadmin_t)))
+			if ((*copyinfn)(arg, &pcadmin, sizeof (pcadmin_t)))
 				return (set_errno(EFAULT));
 #ifdef _SYSCALL32_IMPL
 		} else {
 			/* pcadmin struct from ILP32 callers */
 			pcadmin32_t pcadmin32;
 
-			if (copyin(arg, &pcadmin32, sizeof (pcadmin32_t)))
+			if ((*copyinfn)(arg, &pcadmin32, sizeof (pcadmin32_t)))
 				return (set_errno(EFAULT));
 			pcadmin.pc_cid = pcadmin32.pc_cid;
 			pcadmin.pc_cladmin = (caddr_t)(uintptr_t)
@@ -632,7 +647,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 		break;
 
 	case PC_GETPRIRANGE:
-		if (copyin(arg, &pcpri, sizeof (pcpri_t)))
+		if ((*copyinfn)(arg, &pcpri, sizeof (pcpri_t)))
 			return (set_errno(EFAULT));
 
 		if (pcpri.pc_cid >= loaded_classes || pcpri.pc_cid < 0)
@@ -640,7 +655,7 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 
 		error = CL_GETCLPRI(&sclass[pcpri.pc_cid], &pcpri);
 		if (!error) {
-			if (copyout(&pcpri, arg, sizeof (pcpri)))
+			if ((*copyoutfn)(&pcpri, arg, sizeof (pcpri)))
 				return (set_errno(EFAULT));
 		}
 		break;
@@ -649,14 +664,14 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 		/*
 		 * Get pcnice and procset structures from the user.
 		 */
-		if (copyin(arg, &pcnice, sizeof (pcnice)) ||
-		    copyin(psp, &procset, sizeof (procset)))
+		if ((*copyinfn)(arg, &pcnice, sizeof (pcnice)) ||
+		    (*copyinfn)(psp, &procset, sizeof (procset)))
 			return (set_errno(EFAULT));
 
 		error = donice(&procset, &pcnice);
 
 		if (!error && (pcnice.pc_op == PC_GETNICE)) {
-			if (copyout(&pcnice, arg, sizeof (pcnice)))
+			if ((*copyoutfn)(&pcnice, arg, sizeof (pcnice)))
 				return (set_errno(EFAULT));
 		}
 		break;
@@ -684,6 +699,12 @@ priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
 	return (error ? (set_errno(error)) : rv);
 }
 
+long
+priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
+{
+	return (priocntl_common(pc_version, psp, cmd, arg, arg2,
+	    UIO_USERSPACE));
+}
 
 /*
  * The proccmp() function is part of the implementation of the
@@ -844,7 +865,7 @@ setparms(proc_t *targpp, struct stprmargs *stprmp)
 		return (0);
 }
 
-static int
+int
 setthreadnice(pcnice_t *pcnice, kthread_t *tp)
 {
 	int error = 0;
@@ -889,7 +910,7 @@ setthreadnice(pcnice_t *pcnice, kthread_t *tp)
 	return (error);
 }
 
-static int
+int
 setprocnice(proc_t *pp, pcnice_t *pcnice)
 {
 	kthread_t *tp;
diff --git a/usr/src/uts/common/disp/thread.c b/usr/src/uts/common/disp/thread.c
index 91b4db8103..5f352b2203 100644
--- a/usr/src/uts/common/disp/thread.c
+++ b/usr/src/uts/common/disp/thread.c
@@ -64,6 +64,7 @@
 #include <sys/spl.h>
 #include <sys/copyops.h>
 #include <sys/rctl.h>
+#include <sys/brand.h>
 #include <sys/pool.h>
 #include <sys/zone.h>
 #include <sys/tsol/label.h>
@@ -186,6 +187,7 @@ thread_init(void)
 
 	rctl_init();
 	project_init();
+	brand_init();
 	zone_init();
 	task_init();
 	tcache_init();
diff --git a/usr/src/uts/common/disp/ts.c b/usr/src/uts/common/disp/ts.c
index a190297100..738a2e47b4 100644
--- a/usr/src/uts/common/disp/ts.c
+++ b/usr/src/uts/common/disp/ts.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -21,7 +20,7 @@
  */
 
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1269,14 +1268,14 @@ ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
 		tspp->ts_flags |= TSIASET;
 		thread_unlock(tx);
 	}
-	TTY_HOLD(p->p_sessp);
+	mutex_enter(&p->p_sessp->s_lock);
 	sess_held = 1;
 	if ((pid == sid) && (p->p_sessp->s_vp != NULL) &&
 	    ((stp = p->p_sessp->s_vp->v_stream) != NULL)) {
 		if ((stp->sd_pgidp != NULL) && (stp->sd_sidp != NULL)) {
 			pgid = stp->sd_pgidp->pid_id;
 			sess_held = 0;
-			TTY_RELE(p->p_sessp);
+			mutex_exit(&p->p_sessp->s_lock);
 			if (iaparmsp->ia_mode ==
 			    IA_SET_INTERACTIVE) {
 				off = 0;
@@ -1292,7 +1291,7 @@ ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
 		}
 	}
 	if (sess_held)
-		TTY_RELE(p->p_sessp);
+		mutex_exit(&p->p_sessp->s_lock);
 
 	thread_lock(tx);
 
@@ -2130,14 +2129,14 @@ ia_set_process_group(pid_t sid, pid_t bg_pgid, pid_t fg_pgid)
 	 * that do not have focus and are changing the process group
 	 * attatched to the tty, e.g. a process that is exiting
 	 */
-	TTY_HOLD(leader->p_sessp);
+	mutex_enter(&leader->p_sessp->s_lock);
 	if (!(tspp->ts_flags & TSIASET) ||
 	    (leader->p_sessp->s_vp == NULL) ||
 	    (leader->p_sessp->s_vp->v_stream == NULL)) {
-		TTY_RELE(leader->p_sessp);
+		mutex_exit(&leader->p_sessp->s_lock);
 		return;
 	}
-	TTY_RELE(leader->p_sessp);
+	mutex_exit(&leader->p_sessp->s_lock);
 
 	/*
 	 * If we're already holding the leader's p_lock, we should use
diff --git a/usr/src/uts/common/exec/aout/aout.c b/usr/src/uts/common/exec/aout/aout.c
index 5c7b6b1773..4e814b339b 100644
--- a/usr/src/uts/common/exec/aout/aout.c
+++ b/usr/src/uts/common/exec/aout/aout.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -56,19 +55,19 @@
 
 static int aoutexec(vnode_t *vp, execa_t *uap, uarg_t *args,
     intpdata_t *idatap, int level, long *execsz, int setid,
-    caddr_t exec_file, cred_t *cred);
+    caddr_t exec_file, cred_t *cred, int brand_action);
 static int get_aout_head(struct vnode **vpp, struct exdata *edp, long *execsz,
     int *isdyn);
 static int aoutcore(vnode_t *vp, proc_t *pp, cred_t *credp,
     rlim64_t rlimit, int sig, core_content_t content);
 #ifdef	_LP64
 extern int elf32exec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
-    long *, int, caddr_t, cred_t *);
+    long *, int, caddr_t, cred_t *, int);
 extern int elf32core(vnode_t *, proc_t *, cred_t *, rlim64_t, int,
     core_content_t);
 #else	/* _LP64 */
 extern int elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
-    long *, int, caddr_t, cred_t *);
+    long *, int, caddr_t, cred_t *, int);
 extern int elfcore(vnode_t *, proc_t *, cred_t *, rlim64_t, int,
     core_content_t);
 #endif	/* _LP64 */
@@ -141,7 +140,7 @@ _info(struct modinfo *modinfop)
 static int
 aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args,
     struct intpdata *idatap, int level, long *execsz, int setid,
-    caddr_t exec_file, cred_t *cred)
+    caddr_t exec_file, cred_t *cred, int brand_action)
 {
 	int error;
 	struct exdata edp, edpout;
@@ -201,10 +200,10 @@ aoutexec(vnode_t *vp, struct execa *uap, struct uarg *args,
 	}
 #ifdef	_LP64
 	if (error = elf32exec(nvp, uap, args, idatap, level, execsz,
-	    setid, exec_file, cred))
+	    setid, exec_file, cred, brand_action))
 #else	/* _LP64 */
 	if (error = elfexec(nvp, uap, args, idatap, level, execsz,
-	    setid, exec_file, cred))
+	    setid, exec_file, cred, brand_action))
 #endif	/* _LP64 */
 	{
 		VN_RELE(nvp);
diff --git a/usr/src/uts/common/exec/elf/elf.c b/usr/src/uts/common/exec/elf/elf.c
index 33e3cc9b8e..6508cdae85 100644
--- a/usr/src/uts/common/exec/elf/elf.c
+++ b/usr/src/uts/common/exec/elf/elf.c
@@ -62,8 +62,11 @@
 #include <sys/shm_impl.h>
 #include <sys/archsystm.h>
 #include <sys/fasttrap.h>
+#include <sys/brand.h>
 #include "elf_impl.h"
 
+#include <sys/sdt.h>
+
 extern int at_flags;
 
 #define	ORIGIN_STR	"ORIGIN"
@@ -77,7 +80,7 @@ static int getelfshdr(vnode_t *, cred_t *, const Ehdr *, int, int, caddr_t *,
 static size_t elfsize(Ehdr *, int, caddr_t, uintptr_t *);
 static int mapelfexec(vnode_t *, Ehdr *, int, caddr_t,
     Phdr **, Phdr **, Phdr **, Phdr **, Phdr *,
-    caddr_t *, caddr_t *, intptr_t *, size_t, long *, size_t *);
+    caddr_t *, caddr_t *, intptr_t *, intptr_t *, size_t, long *, size_t *);
 
 typedef enum {
 	STR_CTF,
@@ -160,10 +163,83 @@ dtrace_safe_phdr(Phdr *phdrp, struct uarg *args, uintptr_t base)
 	return (0);
 }
 
+/*
+ * Map in the executable pointed to by vp. Returns 0 on success.
+ */
+int
+mapexec_brand(vnode_t *vp, uarg_t *args, Ehdr *ehdr, Elf32_Addr *uphdr_vaddr,
+    intptr_t *voffset, caddr_t exec_file, int *interp, caddr_t *bssbase,
+    caddr_t *brkbase, size_t *brksize)
+{
+	size_t		len;
+	struct vattr	vat;
+	caddr_t		phdrbase = NULL;
+	ssize_t		phdrsize;
+	int		nshdrs, shstrndx, nphdrs;
+	int		error = 0;
+	Phdr		*uphdr = NULL;
+	Phdr		*junk = NULL;
+	Phdr		*dynphdr = NULL;
+	Phdr		*dtrphdr = NULL;
+	uintptr_t	lddata;
+	long		execsz;
+	intptr_t	minaddr;
+
+	if (error = execpermissions(vp, &vat, args)) {
+		uprintf("%s: Cannot execute %s\n", exec_file, args->pathname);
+		return (error);
+	}
+
+	if ((error = getelfhead(vp, CRED(), ehdr, &nshdrs, &shstrndx,
+	    &nphdrs)) != 0 ||
+	    (error = getelfphdr(vp, CRED(), ehdr, nphdrs, &phdrbase,
+	    &phdrsize)) != 0) {
+		uprintf("%s: Cannot read %s\n", exec_file, args->pathname);
+		return (error);
+	}
+
+	if ((len = elfsize(ehdr, nphdrs, phdrbase, &lddata)) == 0) {
+		uprintf("%s: Nothing to load in %s", exec_file, args->pathname);
+		kmem_free(phdrbase, phdrsize);
+		return (ENOEXEC);
+	}
+
+	if (error = mapelfexec(vp, ehdr, nphdrs, phdrbase, &uphdr, &dynphdr,
+	    &junk, &dtrphdr, NULL, bssbase, brkbase, voffset, &minaddr,
+	    len, &execsz, brksize)) {
+		uprintf("%s: Cannot map %s\n", exec_file, args->pathname);
+		kmem_free(phdrbase, phdrsize);
+		return (error);
+	}
+
+	/*
+	 * Inform our caller if the executable needs an interpreter.
+	 */
+	*interp = (dynphdr == NULL) ? 0 : 1;
+
+	/*
+	 * If this is a statically linked executable, voffset should indicate
+	 * the address of the executable itself (it normally holds the address
+	 * of the interpreter).
+	 */
+	if (ehdr->e_type == ET_EXEC && *interp == 0)
+		*voffset = minaddr;
+
+	if (uphdr != NULL) {
+		*uphdr_vaddr = uphdr->p_vaddr;
+	} else {
+		*uphdr_vaddr = (Elf32_Addr)-1;
+	}
+
+	kmem_free(phdrbase, phdrsize);
+	return (error);
+}
+
 /*ARGSUSED*/
 int
 elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
-    int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred)
+    int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
+    int brand_action)
 {
 	caddr_t		phdrbase = NULL;
 	caddr_t 	bssbase = 0;
@@ -175,10 +251,10 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 	ssize_t		resid;
 	int		fd = -1;
 	intptr_t	voffset;
-	Phdr	*dyphdr = NULL;
-	Phdr	*stphdr = NULL;
-	Phdr	*uphdr = NULL;
-	Phdr	*junk = NULL;
+	Phdr		*dyphdr = NULL;
+	Phdr		*stphdr = NULL;
+	Phdr		*uphdr = NULL;
+	Phdr		*junk = NULL;
 	size_t		len;
 	ssize_t		phdrsize;
 	int		postfixsize = 0;
@@ -189,6 +265,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 	int		hasu = 0;
 	int		hasauxv = 0;
 	int		hasdy = 0;
+	int		branded = 0;
 
 	struct proc *p = ttoproc(curthread);
 	struct user *up = PTOU(p);
@@ -209,6 +286,13 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 
 	ASSERT(p->p_model == DATAMODEL_ILP32 || p->p_model == DATAMODEL_LP64);
 
+	if ((level < 2) &&
+	    (brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+		return (BROP(p)->b_elfexec(vp, uap, args,
+		    idatap, level + 1, execsz, setid, exec_file, cred,
+		    brand_action));
+	}
+
 	bigwad = kmem_alloc(sizeof (struct bigwad), KM_SLEEP);
 	ehdrp = &bigwad->ehdr;
 	dlnp = bigwad->dl_name;
@@ -353,6 +437,22 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 	} else
 		args->auxsize = 0;
 
+	/*
+	 * If this binary is using an emulator, we need to add an
+	 * AT_SUN_EMULATOR aux entry.
+	 */
+	if (args->emulator != NULL)
+		args->auxsize += sizeof (aux_entry_t);
+
+	if ((brand_action != EBA_NATIVE) && (PROC_IS_BRANDED(p))) {
+		branded = 1;
+		/*
+		 * We will be adding 2 entries to the aux vector.  One for
+		 * the branded binary's phdr and one for the brandname.
+		 */
+		args->auxsize += 2 * sizeof (aux_entry_t);
+	}
+
 	aux = bigwad->elfargs;
 	/*
 	 * Move args to the user's stack.
@@ -364,6 +464,7 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 		}
 		goto out;
 	}
+	/* we're single threaded after this point */
 
 	/*
 	 * If this is an ET_DYN executable (shared object),
@@ -377,8 +478,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 	dtrphdr = NULL;
 
 	if ((error = mapelfexec(vp, ehdrp, nphdrs, phdrbase, &uphdr, &dyphdr,
-	    &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, len,
-	    execsz, &brksize)) != 0)
+	    &stphdr, &dtrphdr, dataphdrp, &bssbase, &brkbase, &voffset, NULL,
+	    len, execsz, &brksize)) != 0)
 		goto bad;
 
 	if (uphdr != NULL && dyphdr == NULL)
@@ -542,8 +643,8 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 		dtrphdr = NULL;
 
 		error = mapelfexec(nvp, ehdrp, nphdrs, phdrbase, &junk, &junk,
-		    &junk, &dtrphdr, NULL, NULL, NULL, &voffset, len, execsz,
-		    NULL);
+		    &junk, &dtrphdr, NULL, NULL, NULL, &voffset, NULL, len,
+		    execsz, NULL);
 		if (error || junk != NULL) {
 			VN_RELE(nvp);
 			uprintf("%s: Cannot map %s\n", exec_file, dlnp);
@@ -601,6 +702,16 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 #else
 		ADDAUX(aux, AT_SUN_HWCAP, auxv_hwcap)
 #endif
+		if (branded) {
+			/*
+			 * Reserve space for the brand-private aux vector entry,
+			 * and record the user addr of that space.
+			 */
+			args->brand_auxp = (auxv32_t *)((char *)args->stackend +
+			    ((char *)&aux->a_type - (char *)bigwad->elfargs));
+			ADDAUX(aux, AT_SUN_BRAND_PHDR, 0)
+		}
+
 		ADDAUX(aux, AT_NULL, 0)
 		postfixsize = (char *)aux - (char *)bigwad->elfargs;
 		ASSERT(postfixsize == args->auxsize);
@@ -639,6 +750,9 @@ elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
 
 		/*
 		 * Copy auxv to the process's user structure for use by /proc.
+		 * If this is a branded process, the brand's exec routine will
+		 * copy it's private entries to the user structure later. It
+		 * relies on the fact that the blank entries are at the end.
 		 */
 		num_auxv = postfixsize / sizeof (aux_entry_t);
 		ASSERT(num_auxv <= sizeof (up->u_auxv) / sizeof (auxv_t));
@@ -968,6 +1082,7 @@ mapelfexec(
 	caddr_t *bssbase,
 	caddr_t *brkbase,
 	intptr_t *voffset,
+	intptr_t *minaddr,
 	size_t len,
 	long *execsz,
 	size_t *brksize)
@@ -980,6 +1095,7 @@ mapelfexec(
 	int page;
 	off_t offset;
 	int hsize = ehdr->e_phentsize;
+	caddr_t mintmp = (caddr_t)-1;
 
 	if (ehdr->e_type == ET_DYN) {
 		/*
@@ -1010,6 +1126,14 @@ mapelfexec(
 				prot |= PROT_EXEC;
 
 			addr = (caddr_t)((uintptr_t)phdr->p_vaddr + *voffset);
+
+			/*
+			 * Keep track of the segment with the lowest starting
+			 * address.
+			 */
+			if (addr < mintmp)
+				mintmp = addr;
+
 			zfodsz = (size_t)phdr->p_memsz - phdr->p_filesz;
 
 			offset = phdr->p_offset;
@@ -1110,6 +1234,12 @@ mapelfexec(
 		}
 		phdr = (Phdr *)((caddr_t)phdr + hsize);
 	}
+
+	if (minaddr != NULL) {
+		ASSERT(mintmp != (caddr_t)-1);
+		*minaddr = (intptr_t)mintmp;
+	}
+
 	return (0);
 bad:
 	if (error == 0)
@@ -1850,13 +1980,14 @@ static struct execsw esw = {
 };
 
 static struct modlexec modlexec = {
-	&mod_execops, "exec module for elf", &esw
+	&mod_execops, "exec module for elf %I%", &esw
 };
 
 #ifdef	_LP64
 extern int elf32exec(vnode_t *vp, execa_t *uap, uarg_t *args,
 			intpdata_t *idatap, int level, long *execsz,
-			int setid, caddr_t exec_file, cred_t *cred);
+			int setid, caddr_t exec_file, cred_t *cred,
+			int brand_action);
 extern int elf32core(vnode_t *vp, proc_t *p, cred_t *credp,
 			rlim64_t rlimit, int sig, core_content_t content);
 
diff --git a/usr/src/uts/common/exec/elf/elf_impl.h b/usr/src/uts/common/exec/elf/elf_impl.h
index 52094e3794..010d5e6256 100644
--- a/usr/src/uts/common/exec/elf/elf_impl.h
+++ b/usr/src/uts/common/exec/elf/elf_impl.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -80,6 +79,7 @@ typedef struct {
 #define	elfexec	elf32exec
 #define	elfnote	elf32note
 #define	elfcore	elf32core
+#define	mapexec_brand		mapexec32_brand
 #define	setup_note_header	setup_note_header32
 #define	write_elfnotes		write_elfnotes32
 #define	setup_old_note_header	setup_old_note_header32
diff --git a/usr/src/uts/common/exec/intp/intp.c b/usr/src/uts/common/exec/intp/intp.c
index 6c6c98246d..4d5c04dfd4 100644
--- a/usr/src/uts/common/exec/intp/intp.c
+++ b/usr/src/uts/common/exec/intp/intp.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -45,6 +44,7 @@
 #include <sys/disp.h>
 #include <sys/exec.h>
 #include <sys/kmem.h>
+#include <sys/note.h>
 
 /*
  * This is the loadable module wrapper.
@@ -166,8 +166,10 @@ intpexec(
 	long *execsz,
 	int setid,
 	caddr_t exec_file,
-	struct cred *cred)
+	struct cred *cred,
+	int brand_action)
 {
+	_NOTE(ARGUNUSED(brand_action))
 	vnode_t *nvp;
 	int error = 0;
 	struct intpdata idata;
@@ -223,8 +225,8 @@ intpexec(
 		args->fname = devfd;
 	}
 
-	error = gexec(&nvp, uap, args, &idata, ++level,
-		execsz, exec_file, cred);
+	error = gexec(&nvp, uap, args, &idata, ++level, execsz, exec_file, cred,
+	    EBA_NONE);
 done:
 	VN_RELE(nvp);
 	args->pathname = opath;
diff --git a/usr/src/uts/common/exec/java/java.c b/usr/src/uts/common/exec/java/java.c
index 0e8c3996e7..bcf61453c9 100644
--- a/usr/src/uts/common/exec/java/java.c
+++ b/usr/src/uts/common/exec/java/java.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -88,7 +87,7 @@ char *jexec_arg = "-jar";
 static int
 javaexec(vnode_t *vp, struct execa *uap, struct uarg *args,
     struct intpdata *idatap, int level, long *execsz, int setid,
-    caddr_t execfile, cred_t *cred)
+    caddr_t execfile, cred_t *cred, int brand_action)
 {
 	struct intpdata idata;
 	int error;
@@ -162,8 +161,8 @@ javaexec(vnode_t *vp, struct execa *uap, struct uarg *args,
 	args->pathname = resolvepn.pn_path;
 	/* don't free resolvepn until we are done with args */
 	pn_free(&lookpn);
-	error = gexec(&nvp,
-		uap, args, &idata, level + 1, execsz, execfile, cred);
+	error = gexec(&nvp, uap, args, &idata, level + 1, execsz, execfile,
+	    cred, EBA_NONE);
 	VN_RELE(nvp);
 	args->pathname = opath;
 	pn_free(&resolvepn);
diff --git a/usr/src/uts/common/fs/fifofs/fifosubr.c b/usr/src/uts/common/fs/fifofs/fifosubr.c
index 3ee72c9124..8767999322 100644
--- a/usr/src/uts/common/fs/fifofs/fifosubr.c
+++ b/usr/src/uts/common/fs/fifofs/fifosubr.c
@@ -304,7 +304,8 @@ static void fifo_reinit_vp(vnode_t *vp)
 {
 	vn_reinit(vp);
 	vp->v_type = VFIFO;
-	vp->v_flag = VNOMAP | VNOSWAP;
+	vp->v_flag &= VROOT;
+	vp->v_flag |= VNOMAP | VNOSWAP;
 }
 
 /*
@@ -470,6 +471,7 @@ fifovp(vnode_t *vp, cred_t *crp)
 	fifo_reinit_vp(newvp);
 	newvp->v_vfsp = vp->v_vfsp;
 	newvp->v_rdev = vp->v_rdev;
+	newvp->v_flag |= (vp->v_flag & VROOT);
 
 	fifoinsert(fnp);
 	mutex_exit(&ftable_lock);
diff --git a/usr/src/uts/common/fs/fifofs/fifovnops.c b/usr/src/uts/common/fs/fifofs/fifovnops.c
index cab88019ff..34f731af1e 100644
--- a/usr/src/uts/common/fs/fifofs/fifovnops.c
+++ b/usr/src/uts/common/fs/fifofs/fifovnops.c
@@ -77,6 +77,8 @@ static int fifo_setattr(vnode_t *, vattr_t *, int, cred_t *,
 	caller_context_t *);
 static int fifo_realvp(vnode_t *, vnode_t **);
 static int fifo_access(vnode_t *, int, int, cred_t *);
+static int fifo_create(struct vnode *, char *, vattr_t *, enum vcexcl,
+    int, struct vnode **, struct cred *, int);
 static int fifo_fid(vnode_t *, fid_t *);
 static int fifo_fsync(vnode_t *, int, cred_t *);
 static int fifo_seek(vnode_t *, offset_t, offset_t *);
@@ -116,6 +118,7 @@ const fs_operation_def_t fifo_vnodeops_template[] = {
 	VOPNAME_GETATTR, fifo_getattr,
 	VOPNAME_SETATTR, fifo_setattr,
 	VOPNAME_ACCESS, fifo_access,
+	VOPNAME_CREATE, fifo_create,
 	VOPNAME_FSYNC, fifo_fsync,
 	VOPNAME_INACTIVE, (fs_generic_func_p) fifo_inactive,
 	VOPNAME_FID, fifo_fid,
@@ -1542,6 +1545,27 @@ fifo_access(vnode_t *vp, int mode, int flags, cred_t *crp)
 }
 
 /*
+ * This can be called if creat or an open with O_CREAT is done on the root
+ * of a lofs mount where the mounted entity is a fifo.
+ */
+/*ARGSUSED*/
+static int
+fifo_create(struct vnode *dvp, char *name, vattr_t *vap, enum vcexcl excl,
+    int mode, struct vnode **vpp, struct cred *cr, int flag)
+{
+	int error;
+
+	ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0');
+	if (excl == NONEXCL) {
+		if (mode && (error = fifo_access(dvp, mode, 0, cr)))
+			return (error);
+		VN_HOLD(dvp);
+		return (0);
+	}
+	return (EEXIST);
+}
+
+/*
  * If shadowing a vnode, apply the VOP_FSYNC to it.
  * Otherwise, return 0.
  */
diff --git a/usr/src/uts/common/fs/nfs/nfs4_subr.c b/usr/src/uts/common/fs/nfs/nfs4_subr.c
index 9278fe03da..2a6505ccf9 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_subr.c
@@ -1451,7 +1451,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
 			} else
 				mutex_exit(&mi->mi_lock);
 
-			if (*doqueue && curproc->p_sessp->s_vp != NULL) {
+			if (*doqueue && nfs_has_ctty()) {
 				*doqueue = 0;
 				if (!(mi->mi_flags & MI4_NOPRINT))
 					nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
@@ -1481,7 +1481,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
 			bufp = clnt_sperror(client, svp->sv_hostname);
 			zprintf(zoneid, "NFS%d %s failed for %s\n",
 			    mi->mi_vers, mi->mi_rfsnames[which], bufp);
-			if (curproc->p_sessp->s_vp != NULL) {
+			if (nfs_has_ctty()) {
 				if (!(mi->mi_flags & MI4_NOPRINT)) {
 					uprintf("NFS%d %s failed for %s\n",
 					    mi->mi_vers, mi->mi_rfsnames[which],
@@ -1494,7 +1494,7 @@ nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
 			    "NFS %s failed for server %s: error %d (%s)\n",
 			    mi->mi_rfsnames[which], svp->sv_hostname,
 			    status, clnt_sperrno(status));
-			if (curproc->p_sessp->s_vp != NULL) {
+			if (nfs_has_ctty()) {
 				if (!(mi->mi_flags & MI4_NOPRINT)) {
 					uprintf(
 				"NFS %s failed for server %s: error %d (%s)\n",
diff --git a/usr/src/uts/common/fs/nfs/nfs_subr.c b/usr/src/uts/common/fs/nfs/nfs_subr.c
index 05e70935be..bf1beb1275 100644
--- a/usr/src/uts/common/fs/nfs/nfs_subr.c
+++ b/usr/src/uts/common/fs/nfs/nfs_subr.c
@@ -1235,7 +1235,7 @@ failoverretry:
 #endif
 			} else
 				mutex_exit(&mi->mi_lock);
-			if (*douprintf && curproc->p_sessp->s_vp != NULL) {
+			if (*douprintf && nfs_has_ctty()) {
 				*douprintf = 0;
 				if (!(mi->mi_flags & MI_NOPRINT))
 #ifdef DEBUG
@@ -1292,7 +1292,7 @@ failoverretry:
 			bufp = clnt_sperror(client, svp->sv_hostname);
 			zprintf(zoneid, "NFS%d %s failed for %s\n",
 			    mi->mi_vers, mi->mi_rfsnames[which], bufp);
-			if (curproc->p_sessp->s_vp != NULL) {
+			if (nfs_has_ctty()) {
 				if (!(mi->mi_flags & MI_NOPRINT)) {
 					uprintf("NFS%d %s failed for %s\n",
 					    mi->mi_vers, mi->mi_rfsnames[which],
@@ -1305,7 +1305,7 @@ failoverretry:
 			    "NFS %s failed for server %s: error %d (%s)\n",
 			    mi->mi_rfsnames[which], svp->sv_hostname,
 			    status, clnt_sperrno(status));
-			if (curproc->p_sessp->s_vp != NULL) {
+			if (nfs_has_ctty()) {
 				if (!(mi->mi_flags & MI_NOPRINT)) {
 					uprintf(
 				"NFS %s failed for server %s: error %d (%s)\n",
@@ -1821,7 +1821,7 @@ failoverretry:
 #endif
 			} else
 				mutex_exit(&mi->mi_lock);
-			if (*douprintf && curproc->p_sessp->s_vp != NULL) {
+			if (*douprintf && nfs_has_ctty()) {
 				*douprintf = 0;
 				if (!(mi->mi_flags & MI_NOPRINT))
 #ifdef DEBUG
@@ -1886,7 +1886,7 @@ failoverretry:
 			bufp = clnt_sperror(client, svp->sv_hostname);
 			zprintf(zoneid, "NFS_ACL%d %s failed for %s\n",
 			    mi->mi_vers, mi->mi_aclnames[which], bufp);
-			if (curproc->p_sessp->s_vp != NULL) {
+			if (nfs_has_ctty()) {
 				if (!(mi->mi_flags & MI_NOPRINT)) {
 					uprintf("NFS_ACL%d %s failed for %s\n",
 					    mi->mi_vers, mi->mi_aclnames[which],
@@ -1899,7 +1899,7 @@ failoverretry:
 			    "NFS %s failed for server %s: error %d (%s)\n",
 			    mi->mi_aclnames[which], svp->sv_hostname,
 			    status, clnt_sperrno(status));
-			if (curproc->p_sessp->s_vp != NULL) {
+			if (nfs_has_ctty()) {
 				if (!(mi->mi_flags & MI_NOPRINT))
 					uprintf(
 				"NFS %s failed for server %s: error %d (%s)\n",
@@ -5117,3 +5117,13 @@ out:
 	label_rele(zlabel);
 	return (retv);
 }
+
+boolean_t
+nfs_has_ctty(void)
+{
+	boolean_t rv;
+	mutex_enter(&curproc->p_splock);
+	rv = (curproc->p_sessp->s_vp != NULL);
+	mutex_exit(&curproc->p_splock);
+	return (rv);
+}
diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c
index 6a2d6f73d0..24c7ffedab 100644
--- a/usr/src/uts/common/fs/specfs/specvnops.c
+++ b/usr/src/uts/common/fs/specfs/specvnops.c
@@ -680,13 +680,16 @@ streams_open:
 		/* STREAMS devices don't have a size */
 		sp->s_size = csp->s_size = 0;
 
-		/*
-		 * try to allocate it as a controlling terminal
-		 */
-		if ((stp->sd_flag & STRISTTY) && !(flag & FNOCTTY))
-			stralloctty(stp);
+		if (!(stp->sd_flag & STRISTTY) || (flag & FNOCTTY))
+			return (0);
 
-		return (0);
+		/* try to allocate it as a controlling terminal */
+		if (strctty(stp) != EINTR)
+			return (0);
+
+		/* strctty() was interrupted by a signal */
+		(void) spec_close(vp, flag, 1, 0, cr);
+		return (EINTR);
 	}
 
 	/*
diff --git a/usr/src/uts/common/fs/vnode.c b/usr/src/uts/common/fs/vnode.c
index 7c64462314..49bde7abeb 100644
--- a/usr/src/uts/common/fs/vnode.c
+++ b/usr/src/uts/common/fs/vnode.c
@@ -943,7 +943,7 @@ top:
 	 * Do remaining checks for FNOFOLLOW and FNOLINKS.
 	 */
 	if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
-		error = EINVAL;
+		error = ELOOP;
 		goto out;
 	}
 	if (filemode & FNOLINKS) {
diff --git a/usr/src/uts/common/io/gentty.c b/usr/src/uts/common/io/gentty.c
index 9cb3e23b87..431e80245d 100644
--- a/usr/src/uts/common/io/gentty.c
+++ b/usr/src/uts/common/io/gentty.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 1990-1992,1996,1998-2003 Sun Microsystems, Inc.
+ * Copyright 2006 Sun Microsystems, Inc.
  * All rights reserved.
  * Use is subject to license terms.
  */
@@ -198,17 +197,20 @@ syopen(dev_t *devp, int flag, int otyp, struct cred *cr)
 {
 	dev_t	ttyd;
 	vnode_t	*ttyvp;
-	sess_t	*sp = curproc->p_sessp;
+	sess_t	*sp;
 	int	error;
 
-	if ((ttyd = sp->s_dev) == NODEV)
+	if ((sp = tty_hold()) == NULL)
+		return (EINTR);
+
+	if (sp->s_dev == NODEV) {
+		tty_rele(sp);
 		return (ENXIO);
-	TTY_HOLD(sp);
-	if ((ttyvp = sp->s_vp) == NULL) {
-		TTY_RELE(sp);
-		return (EIO);
 	}
 
+	ttyd = sp->s_dev;
+	ttyvp = sp->s_vp;
+
 	/*
 	 * Open the control terminal. The control terminal may be
 	 * opened multiple times and it is closed in freectty().
@@ -237,10 +239,12 @@ syopen(dev_t *devp, int flag, int otyp, struct cred *cr)
 		ASSERT(vn_matchops(ttyvp, spec_getvnodeops()));
 		csp = VTOS(VTOS(ttyvp)->s_commonvp);
 		mutex_enter(&csp->s_lock);
+		ASSERT(csp->s_count > 1);
 		csp->s_count--;
 		mutex_exit(&csp->s_lock);
 	}
-	TTY_RELE(sp);
+
+	tty_rele(sp);
 	return (error);
 }
 
@@ -255,41 +259,41 @@ syclose(dev_t dev, int flag, int otyp, struct cred *cr)
 int
 syread(dev_t dev, struct uio *uiop, struct cred *cr)
 {
-	vnode_t *ttyvp;
-	sess_t	*sp = curproc->p_sessp;
+	sess_t	*sp;
 	int	error;
 
-	if (sp->s_dev == NODEV)
+	if ((sp = tty_hold()) == NULL)
+		return (EINTR);
+
+	if (sp->s_dev == NODEV) {
+		tty_rele(sp);
 		return (ENXIO);
-	TTY_HOLD(sp);
-	if ((ttyvp = sp->s_vp) == NULL) {
-		TTY_RELE(sp);
-		return (EIO);
 	}
-	error = VOP_READ(ttyvp, uiop, 0, cr, NULL);
-	TTY_RELE(sp);
-	return (error);
 
+	error = VOP_READ(sp->s_vp, uiop, 0, cr, NULL);
+
+	tty_rele(sp);
+	return (error);
 }
 
 /* ARGSUSED */
 int
 sywrite(dev_t dev, struct uio *uiop, struct cred *cr)
 {
-	vnode_t *ttyvp;
-	sess_t	*sp = curproc->p_sessp;
+	sess_t	*sp;
 	int	error;
 
-	if (sp->s_dev == NODEV)
+	if ((sp = tty_hold()) == NULL)
+		return (EINTR);
+
+	if (sp->s_dev == NODEV) {
+		tty_rele(sp);
 		return (ENXIO);
-	TTY_HOLD(sp);
-	if ((ttyvp = sp->s_vp) == NULL) {
-		TTY_RELE(sp);
-		return (EIO);
 	}
 
-	error = VOP_WRITE(ttyvp, uiop, 0, cr, NULL);
-	TTY_RELE(sp);
+	error = VOP_WRITE(sp->s_vp, uiop, 0, cr, NULL);
+
+	tty_rele(sp);
 	return (error);
 }
 
@@ -299,19 +303,32 @@ int
 syioctl(dev_t dev, int cmd, intptr_t arg, int mode, struct cred *cr,
 	int *rvalp)
 {
-	vnode_t *ttyvp;
-	sess_t	*sp = curproc->p_sessp;
+	sess_t	*sp;
 	int	error;
 
-	if (sp->s_dev == NODEV)
+	if (cmd == TIOCNOTTY) {
+		/*
+		 * we can't allow this ioctl.  the reason is that it
+		 * attempts to remove the ctty for a session.  to do
+		 * this the ctty can't be in use  but we grab a hold on
+		 * the current ctty (via tty_hold) to perform this ioctl.
+		 * if we were to allow this ioctl to pass through we
+		 * would deadlock with ourselves.
+		 */
+		return (EINVAL);
+	}
+
+	if ((sp = tty_hold()) == NULL)
+		return (EINTR);
+
+	if (sp->s_dev == NODEV) {
+		tty_rele(sp);
 		return (ENXIO);
-	TTY_HOLD(sp);
-	if ((ttyvp = sp->s_vp) == NULL) {
-		TTY_RELE(sp);
-		return (EIO);
 	}
-	error = VOP_IOCTL(ttyvp, cmd, arg, mode, cr, rvalp);
-	TTY_RELE(sp);
+
+	error = VOP_IOCTL(sp->s_vp, cmd, arg, mode, cr, rvalp);
+
+	tty_rele(sp);
 	return (error);
 }
 
@@ -322,18 +339,19 @@ int
 sypoll(dev_t dev, short events, int anyyet, short *reventsp,
 	struct pollhead **phpp)
 {
-	vnode_t *ttyvp;
-	sess_t  *sp = curproc->p_sessp;
+	sess_t  *sp;
 	int	error;
 
-	if (sp->s_dev == NODEV)
+	if ((sp = tty_hold()) == NULL)
+		return (EINTR);
+
+	if (sp->s_dev == NODEV) {
+		tty_rele(sp);
 		return (ENXIO);
-	TTY_HOLD(sp);
-	if ((ttyvp = sp->s_vp) == NULL) {
-		TTY_RELE(sp);
-		return (EIO);
 	}
-	error = VOP_POLL(ttyvp, events, anyyet, reventsp, phpp);
-	TTY_RELE(sp);
+
+	error = VOP_POLL(sp->s_vp, events, anyyet, reventsp, phpp);
+
+	tty_rele(sp);
 	return (error);
 }
diff --git a/usr/src/uts/common/io/l_strplumb.c b/usr/src/uts/common/io/l_strplumb.c
index 287ad1f08f..3997874684 100644
--- a/usr/src/uts/common/io/l_strplumb.c
+++ b/usr/src/uts/common/io/l_strplumb.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -228,7 +227,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
 	li = ldi_ident_from_anon();
 	if (op == SET_AUTOPUSH || op == CLR_AUTOPUSH) {
 		error = ldi_open_by_name(SAD_ADM, FREAD|FWRITE,
-		    CRED(), &lh, li);
+		    kcred, &lh, li);
 		if (error) {
 			printf("kstr_autopush: open failed error %d\n", error);
 			ldi_ident_release(li);
@@ -236,7 +235,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
 		}
 	} else	{
 		error = ldi_open_by_name(SAD_USR, FREAD|FWRITE,
-		    CRED(), &lh, li);
+		    kcred, &lh, li);
 		if (error) {
 			printf("kstr_autopush: open failed error %d\n", error);
 			ldi_ident_release(li);
@@ -253,11 +252,11 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
 		push.sap_minor = *min;
 
 		error = ldi_ioctl(lh, SAD_GAP, (intptr_t)&push,
-		    FKIOCTL, CRED(), &rval);
+		    FKIOCTL, kcred, &rval);
 		if (error) {
 			printf("kstr_autopush: ioctl failed, error %d\n",
 			    error);
-			(void) ldi_close(lh, FREAD|FWRITE, CRED());
+			(void) ldi_close(lh, FREAD|FWRITE, kcred);
 			return (error);
 		}
 		switch (push.sap_cmd) {
@@ -288,7 +287,7 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
 				(void) strcpy(mods[i], push.sap_list[i]);
 			mods[i] = NULL;
 		}
-		(void) ldi_close(lh, FREAD|FWRITE, CRED());
+		(void) ldi_close(lh, FREAD|FWRITE, kcred);
 		return (0);
 
 	case CLR_AUTOPUSH:
@@ -299,12 +298,12 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
 		push.sap_major = *maj;
 
 		error = ldi_ioctl(lh, SAD_SAP, (intptr_t)&push,
-		    FKIOCTL, CRED(), &rval);
+		    FKIOCTL, kcred, &rval);
 		if (error) {
 			printf("kstr_autopush: ioctl failed, error %d\n",
 			    error);
 		}
-		(void) ldi_close(lh, FREAD|FWRITE, CRED());
+		(void) ldi_close(lh, FREAD|FWRITE, kcred);
 		return (error);
 
 	case SET_AUTOPUSH:
@@ -338,16 +337,16 @@ kstr_autopush(int op, major_t *maj, minor_t *min, minor_t *lastmin,
 		push.sap_list[i][0] = '\0';
 
 		error = ldi_ioctl(lh, SAD_SAP, (intptr_t)&push,
-		    FKIOCTL, CRED(), &rval);
+		    FKIOCTL, kcred, &rval);
 		if (error) {
 			printf("kstr_autopush: ioctl failed, error %d\n",
 			    error);
 		}
-		(void) ldi_close(lh, FREAD|FWRITE, CRED());
+		(void) ldi_close(lh, FREAD|FWRITE, kcred);
 		return (error);
 
 	default:
-		(void) ldi_close(lh, FREAD|FWRITE, CRED());
+		(void) ldi_close(lh, FREAD|FWRITE, kcred);
 		return (EINVAL);
 	}
 }
diff --git a/usr/src/uts/common/io/ptm.c b/usr/src/uts/common/io/ptm.c
index bd4dc10511..7910b58cc8 100644
--- a/usr/src/uts/common/io/ptm.c
+++ b/usr/src/uts/common/io/ptm.c
@@ -449,6 +449,18 @@ ptmclose(queue_t *rqp, int flag, cred_t *credp)
 	return (0);
 }
 
+static boolean_t
+ptmptsopencb(ptmptsopencb_arg_t arg)
+{
+	struct pt_ttys	*ptmp = (struct pt_ttys *)arg;
+	boolean_t rval;
+
+	PT_ENTER_READ(ptmp);
+	rval = (ptmp->pt_nullmsg != NULL);
+	PT_EXIT_READ(ptmp);
+	return (rval);
+}
+
 /*
  * The wput procedure will only handle ioctl and flush messages.
  */
@@ -572,6 +584,41 @@ ptmwput(queue_t *qp, mblk_t *mp)
 			miocack(qp, mp, 0, 0);
 			break;
 		}
+		case PTMPTSOPENCB:
+		{
+			mblk_t		*dp;	/* ioctl reply data */
+			ptmptsopencb_t	*ppocb;
+
+			/* only allow the kernel to invoke this ioctl */
+			if (iocp->ioc_cr != kcred) {
+				miocnak(qp, mp, 0, EINVAL);
+				break;
+			}
+
+			/* we don't support transparent ioctls */
+			ASSERT(iocp->ioc_count != TRANSPARENT);
+			if (iocp->ioc_count == TRANSPARENT) {
+				miocnak(qp, mp, 0, EINVAL);
+				break;
+			}
+
+			/* allocate a response message */
+			dp = allocb(sizeof (ptmptsopencb_t), BPRI_MED);
+			if (dp == NULL) {
+				miocnak(qp, mp, 0, EAGAIN);
+				break;
+			}
+
+			/* initialize the ioctl results */
+			ppocb = (ptmptsopencb_t *)dp->b_rptr;
+			ppocb->ppocb_func = ptmptsopencb;
+			ppocb->ppocb_arg = (ptmptsopencb_arg_t)ptmp;
+
+			/* send the reply data */
+			mioc2ack(mp, dp, sizeof (ptmptsopencb_t), 0);
+			qreply(qp, mp);
+			break;
+		}
 		}
 		break;
 
@@ -643,6 +690,13 @@ ptmwsrv(queue_t *qp)
 	ASSERT(qp->q_ptr);
 
 	ptmp = (struct pt_ttys *)qp->q_ptr;
+
+	if ((mp = getq(qp)) == NULL) {
+		/* If there are no messages there's nothing to do. */
+		DBG(("leaving ptmwsrv (no messages)\n"));
+		return;
+	}
+
 	PT_ENTER_READ(ptmp);
 	if ((ptmp->pt_state  & PTLOCK) || (ptmp->pts_rdq == NULL)) {
 		DBG(("in master write srv proc but no slave\n"));
@@ -652,12 +706,12 @@ ptmwsrv(queue_t *qp)
 		 * the user process waiting for ACK/NAK from
 		 * the ioctl invocation
 		 */
-		while ((mp = getq(qp)) != NULL) {
+		do {
 			if (mp->b_datap->db_type == M_IOCTL)
 				miocnak(qp, mp, 0, EINVAL);
 			else
 				freemsg(mp);
-		}
+		} while ((mp = getq(qp)) != NULL);
 		flushq(qp, FLUSHALL);
 
 		mp = mexchange(NULL, NULL, 2, M_ERROR, -1);
@@ -672,7 +726,7 @@ ptmwsrv(queue_t *qp)
 	/*
 	 * while there are messages on this write queue...
 	 */
-	while ((mp = getq(qp)) != NULL) {
+	do {
 		/*
 		 * if don't have control message and cannot put
 		 * msg. on slave's read queue, put it back on
@@ -689,7 +743,7 @@ ptmwsrv(queue_t *qp)
 		 */
 		DBG(("send message to slave\n"));
 		putnext(ptmp->pts_rdq, mp);
-	}
+	} while ((mp = getq(qp)) != NULL);
 	DBG(("leaving ptmwsrv\n"));
 	PT_EXIT_READ(ptmp);
 }
diff --git a/usr/src/uts/common/nfs/nfs.h b/usr/src/uts/common/nfs/nfs.h
index eda293574e..03c32254b7 100644
--- a/usr/src/uts/common/nfs/nfs.h
+++ b/usr/src/uts/common/nfs/nfs.h
@@ -931,6 +931,7 @@ extern void	nfsauth_fini();
 extern int	nfs_setopts(vnode_t *vp, model_t model, struct nfs_args *args);
 extern int	nfs_mount_label_policy(vfs_t *vfsp, struct netbuf *addr,
 		    struct knetconfig *knconf, cred_t *cr);
+extern boolean_t	nfs_has_ctty(void);
 extern void	nfs_srv_stop_all(void);
 extern void	nfs_srv_quiesce_all(void);
 extern void	(*nfs_srv_quiesce_func)(void);
diff --git a/usr/src/uts/common/os/brand.c b/usr/src/uts/common/os/brand.c
new file mode 100644
index 0000000000..15d82871bf
--- /dev/null
+++ b/usr/src/uts/common/os/brand.c
@@ -0,0 +1,323 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/kmem.h>
+#include <sys/errno.h>
+#include <sys/systm.h>
+#include <sys/cmn_err.h>
+#include <sys/brand.h>
+#include <sys/machbrand.h>
+#include <sys/modctl.h>
+#include <sys/rwlock.h>
+#include <sys/zone.h>
+
+#define	SUPPORTED_BRAND_VERSION BRAND_VER_1
+
+#if defined(__sparcv9)
+struct brand_mach_ops native_mach_ops  = {
+		NULL, NULL
+};
+#else
+struct brand_mach_ops native_mach_ops  = {
+		NULL, NULL, NULL, NULL, NULL, NULL
+};
+#endif
+
+brand_t native_brand = {
+		BRAND_VER_1,
+		"native",
+		NULL,
+		&native_mach_ops
+};
+
+/*
+ * Used to maintain a list of all the brands currently loaded into the
+ * kernel.
+ */
+struct brand_list {
+	int			bl_refcnt;
+	struct brand_list	*bl_next;
+	brand_t			*bl_brand;
+};
+
+static struct brand_list *brand_list = NULL;
+
+/*
+ * This lock protects the integrity of the brand list.
+ */
+static kmutex_t brand_list_lock;
+
+void
+brand_init()
+{
+	mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
+	p0.p_brand = &native_brand;
+}
+
+int
+brand_register(brand_t *brand)
+{
+	struct brand_list *list, *scan;
+
+	if (brand == NULL)
+		return (EINVAL);
+
+	if (is_system_labeled()) {
+		cmn_err(CE_WARN,
+		    "Branded zones are not allowed on labeled systems.");
+		return (EINVAL);
+	}
+
+	if (brand->b_version != SUPPORTED_BRAND_VERSION) {
+		if (brand->b_version < SUPPORTED_BRAND_VERSION) {
+			cmn_err(CE_WARN,
+			    "brand '%s' was built to run on older versions "
+			    "of Solaris.",
+			    brand->b_name);
+		} else {
+			cmn_err(CE_WARN,
+			    "brand '%s' was built to run on a newer version "
+			    "of Solaris.",
+			    brand->b_name);
+		}
+		return (EINVAL);
+	}
+
+	/* Sanity checks */
+	if (brand->b_name == NULL || brand->b_ops == NULL ||
+	    brand->b_ops->b_brandsys == NULL) {
+		cmn_err(CE_WARN, "Malformed brand");
+		return (EINVAL);
+	}
+
+	list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
+
+	/* Add the brand to the list of loaded brands. */
+	mutex_enter(&brand_list_lock);
+
+	/*
+	 * Check to be sure we haven't already registered this brand.
+	 */
+	for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
+		if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
+			cmn_err(CE_WARN,
+			    "Invalid attempt to load a second instance of "
+			    "brand %s", brand->b_name);
+			mutex_exit(&brand_list_lock);
+			kmem_free(list, sizeof (struct brand_list));
+			return (EINVAL);
+		}
+	}
+
+	list->bl_brand = brand;
+	list->bl_refcnt = 0;
+	list->bl_next = brand_list;
+	brand_list = list;
+	mutex_exit(&brand_list_lock);
+
+	return (0);
+}
+
+/*
+ * The kernel module implementing this brand is being unloaded, so remove
+ * it from the list of active brands.
+ */
+int
+brand_unregister(brand_t *brand)
+{
+	struct brand_list *list, *prev;
+
+	/* Sanity checks */
+	if (brand == NULL || brand->b_name == NULL) {
+		cmn_err(CE_WARN, "Malformed brand");
+		return (EINVAL);
+	}
+
+	prev = NULL;
+	mutex_enter(&brand_list_lock);
+
+	for (list = brand_list; list != NULL; list = list->bl_next) {
+		if (list->bl_brand == brand)
+			break;
+		prev = list;
+	}
+
+	if (list == NULL) {
+		cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
+		mutex_exit(&brand_list_lock);
+		return (EINVAL);
+	}
+
+	if (list->bl_refcnt > 0) {
+		cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
+		    brand->b_name);
+		mutex_exit(&brand_list_lock);
+		return (EBUSY);
+	}
+
+	/* Remove brand from the list */
+	if (prev != NULL)
+		prev->bl_next = list->bl_next;
+	else
+		brand_list = list->bl_next;
+
+	mutex_exit(&brand_list_lock);
+
+	kmem_free(list, sizeof (struct brand_list));
+
+	return (0);
+}
+
+/*
+ * Record that a zone of this brand has been instantiated.  If the kernel
+ * module implementing this brand's functionality is not present, this
+ * routine attempts to load the module as a side effect.
+ */
+brand_t *
+brand_register_zone(struct brand_attr *attr)
+{
+	struct brand_list *l = NULL;
+	ddi_modhandle_t	hdl = NULL;
+	char *modname;
+	int err = 0;
+
+	if (is_system_labeled()) {
+		cmn_err(CE_WARN,
+		    "Branded zones are not allowed on labeled systems.");
+		return (NULL);
+	}
+
+	/*
+	 * We make at most two passes through this loop.  The first time
+	 * through, we're looking to see if this is a new user of an
+	 * already loaded brand.  If the brand hasn't been loaded, we
+	 * call ddi_modopen() to force it to be loaded and then make a
+	 * second pass through the list of brands.  If we don't find the
+	 * brand the second time through it means that the modname
+	 * specified in the brand_attr structure doesn't provide the brand
+	 * specified in the brandname field.  This would suggest a bug in
+	 * the brand's config.xml file.  We close the module and return
+	 * 'NULL' to the caller.
+	 */
+	for (;;) {
+		/*
+		 * Search list of loaded brands
+		 */
+		mutex_enter(&brand_list_lock);
+		for (l = brand_list; l != NULL; l = l->bl_next)
+			if (strcmp(attr->ba_brandname,
+			    l->bl_brand->b_name) == 0)
+				break;
+		if ((l != NULL) || (hdl != NULL))
+			break;
+		mutex_exit(&brand_list_lock);
+
+		/*
+		 * We didn't find that the requested brand has been loaded
+		 * yet, so we trigger the load of the appropriate kernel
+		 * module and search the list again.
+		 */
+		modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+		(void) strcpy(modname, "brand/");
+		(void) strcat(modname, attr->ba_modname);
+		hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
+		kmem_free(modname, MAXPATHLEN);
+
+		if (err != 0)
+			return (NULL);
+	}
+
+	/*
+	 * If we found the matching brand, bump its reference count.
+	 */
+	if (l != NULL)
+		l->bl_refcnt++;
+
+	mutex_exit(&brand_list_lock);
+
+	if (hdl != NULL)
+		(void) ddi_modclose(hdl);
+
+	return ((l != NULL) ? l->bl_brand : NULL);
+}
+
+/*
+ * Return the number of zones currently using this brand.
+ */
+int
+brand_zone_count(struct brand *bp)
+{
+	struct brand_list *l;
+	int cnt = 0;
+
+	mutex_enter(&brand_list_lock);
+	for (l = brand_list; l != NULL; l = l->bl_next)
+		if (l->bl_brand == bp) {
+			cnt = l->bl_refcnt;
+			break;
+		}
+	mutex_exit(&brand_list_lock);
+
+	return (cnt);
+}
+
+void
+brand_unregister_zone(struct brand *bp)
+{
+	struct brand_list *list;
+
+	mutex_enter(&brand_list_lock);
+	for (list = brand_list; list != NULL; list = list->bl_next) {
+		if (list->bl_brand == bp) {
+			ASSERT(list->bl_refcnt > 0);
+			list->bl_refcnt--;
+			break;
+		}
+	}
+	mutex_exit(&brand_list_lock);
+}
+
+void
+brand_setbrand(proc_t *p)
+{
+	brand_t *bp = p->p_zone->zone_brand;
+
+	ASSERT(bp != NULL);
+	ASSERT(p->p_brand == &native_brand);
+
+	/*
+	 * We should only be called from exec(), when we know the process
+	 * is single-threaded.
+	 */
+	ASSERT(p->p_tlist == p->p_tlist->t_forw);
+
+	p->p_brand = bp;
+	if (PROC_IS_BRANDED(p)) {
+		BROP(p)->b_setbrand(p);
+		lwp_attach_brand_hdlrs(p->p_tlist->t_lwp);
+	}
+}
diff --git a/usr/src/uts/common/os/ddi.c b/usr/src/uts/common/os/ddi.c
index ec12f51f37..6a0b6ace80 100644
--- a/usr/src/uts/common/os/ddi.c
+++ b/usr/src/uts/common/os/ddi.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
 
 
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -293,14 +292,15 @@ WR(queue_t *q)
 int
 drv_getparm(unsigned int parm, void *valuep)
 {
-	time_t now;
+	proc_t	*p = curproc;
+	time_t	now;
 
 	switch (parm) {
 	case UPROCP:
-		*(proc_t **)valuep = ttoproc(curthread);
+		*(proc_t **)valuep = p;
 		break;
 	case PPGRP:
-		*(pid_t *)valuep = ttoproc(curthread)->p_pgrp;
+		*(pid_t *)valuep = p->p_pgrp;
 		break;
 	case LBOLT:
 		*(clock_t *)valuep = lbolt;
@@ -317,10 +317,12 @@ drv_getparm(unsigned int parm, void *valuep)
 		}
 		break;
 	case PPID:
-		*(pid_t *)valuep = ttoproc(curthread)->p_pid;
+		*(pid_t *)valuep = p->p_pid;
 		break;
 	case PSID:
-		*(pid_t *)valuep = ttoproc(curthread)->p_sessp->s_sid;
+		mutex_enter(&p->p_splock);
+		*(pid_t *)valuep = p->p_sessp->s_sid;
+		mutex_exit(&p->p_splock);
 		break;
 	case UCRED:
 		*(cred_t **)valuep = CRED();
diff --git a/usr/src/uts/common/os/exec.c b/usr/src/uts/common/os/exec.c
index a3cd19e423..3b01993465 100644
--- a/usr/src/uts/common/os/exec.c
+++ b/usr/src/uts/common/os/exec.c
@@ -65,6 +65,7 @@
 #include <sys/lwpchan_impl.h>
 #include <sys/pool.h>
 #include <sys/sdt.h>
+#include <sys/brand.h>
 
 #include <c2/audit.h>
 
@@ -89,7 +90,6 @@ uint_t auxv_hwcap32 = 0;	/* 32-bit version of auxv_hwcap */
 #endif
 
 int exec_lpg_disable = 0;
-
 #define	PSUIDFLAGS		(SNOCD|SUGID)
 
 /*
@@ -109,12 +109,13 @@ exece(const char *fname, const char **argp, const char **envp)
 {
 	int error;
 
-	error = exec_common(fname, argp, envp);
+	error = exec_common(fname, argp, envp, EBA_NONE);
 	return (error ? (set_errno(error)) : 0);
 }
 
 int
-exec_common(const char *fname, const char **argp, const char **envp)
+exec_common(const char *fname, const char **argp, const char **envp,
+    int brand_action)
 {
 	vnode_t *vp = NULL, *dir = NULL, *tmpvp = NULL;
 	proc_t *p = ttoproc(curthread);
@@ -136,6 +137,7 @@ exec_common(const char *fname, const char **argp, const char **envp)
 	lwpdir_t **old_tidhash;
 	uint_t old_tidhash_sz;
 	lwpent_t *lep;
+	int brandme = 0;
 
 	/*
 	 * exec() is not supported for the /proc agent lwp.
@@ -146,6 +148,35 @@ exec_common(const char *fname, const char **argp, const char **envp)
 	if ((error = secpolicy_basic_exec(CRED())) != 0)
 		return (error);
 
+	if (brand_action != EBA_NONE) {
+		/*
+		 * Brand actions are not supported for processes that are not
+		 * running in a branded zone.
+		 */
+		if (!ZONE_IS_BRANDED(p->p_zone))
+			return (ENOTSUP);
+
+		if (brand_action == EBA_NATIVE) {
+			/* Only branded processes can be unbranded */
+			if (!PROC_IS_BRANDED(p))
+				return (ENOTSUP);
+		} else {
+			/* Only unbranded processes can be branded */
+			if (PROC_IS_BRANDED(p))
+				return (ENOTSUP);
+			brandme = 1;
+		}
+	} else {
+		/*
+		 * If this is a native zone, or if the process is already
+		 * branded, then we don't need to do anything.  If this is
+		 * a native process in a branded zone, we need to brand the
+		 * process as it exec()s the new binary.
+		 */
+		if (ZONE_IS_BRANDED(p->p_zone) && !PROC_IS_BRANDED(p))
+			brandme = 1;
+	}
+
 	/*
 	 * Inform /proc that an exec() has started.
 	 * Hold signals that are ignored by default so that we will
@@ -237,8 +268,14 @@ exec_common(const char *fname, const char **argp, const char **envp)
 	ua.argp = argp;
 	ua.envp = envp;
 
+	/* If necessary, brand this process before we start the exec. */
+	if (brandme != 0)
+		brand_setbrand(p);
+
 	if ((error = gexec(&vp, &ua, &args, NULL, 0, &execsz,
-	    exec_file, p->p_cred)) != 0) {
+	    exec_file, p->p_cred, brand_action)) != 0) {
+		if (brandme != 0)
+			BROP(p)->b_proc_exit(p, lwp);
 		VN_RELE(vp);
 		if (dir != NULL)
 			VN_RELE(dir);
@@ -351,6 +388,12 @@ exec_common(const char *fname, const char **argp, const char **envp)
 	 */
 	close_exec(P_FINFO(p));
 	TRACE_2(TR_FAC_PROC, TR_PROC_EXEC, "proc_exec:p %p up %p", p, up);
+
+	/* Unbrand ourself if requested. */
+	if (brand_action == EBA_NATIVE)
+		BROP(p)->b_proc_exit(p, lwp);
+	ASSERT((brand_action != EBA_NATIVE) || !PROC_IS_BRANDED(p));
+
 	setregs(&args);
 
 	/* Mark this as an executable vnode */
@@ -376,6 +419,9 @@ exec_common(const char *fname, const char **argp, const char **envp)
 			lep = kmem_zalloc(sizeof (*lep), KM_SLEEP);
 	}
 
+	if (PROC_IS_BRANDED(p))
+		BROP(p)->b_exec();
+
 	mutex_enter(&p->p_lock);
 	prbarrier(p);
 
@@ -411,6 +457,7 @@ exec_common(const char *fname, const char **argp, const char **envp)
 		lep->le_start = curthread->t_start;
 		lwp_hash_in(p, lep);
 	}
+
 	/*
 	 * Restore the saved signal mask and
 	 * inform /proc that the exec() has finished.
@@ -422,6 +469,7 @@ exec_common(const char *fname, const char **argp, const char **envp)
 		kmem_free(old_lwpdir, old_lwpdir_sz * sizeof (lwpdir_t));
 		kmem_free(old_tidhash, old_tidhash_sz * sizeof (lwpdir_t *));
 	}
+
 	ASSERT(error == 0);
 	DTRACE_PROC(exec__success);
 	return (0);
@@ -451,7 +499,8 @@ gexec(
 	int level,
 	long *execsz,
 	caddr_t exec_file,
-	struct cred *cred)
+	struct cred *cred,
+	int brand_action)
 {
 	struct vnode *vp;
 	proc_t *pp = ttoproc(curthread);
@@ -593,7 +642,7 @@ gexec(
 		setidfl |= EXECSETID_PRIVS;
 
 	error = (*eswp->exec_func)(vp, uap, args, idatap, level, execsz,
-		setidfl, exec_file, cred);
+		setidfl, exec_file, cred, brand_action);
 	rw_exit(eswp->exec_lock);
 	if (error != 0) {
 		if (newcred != NULL)
@@ -1016,17 +1065,44 @@ execmap(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen,
 	}
 
 	if (zfodlen) {
+		struct as *as = curproc->p_as;
+		struct seg *seg;
+		uint_t zprot = 0;
+
 		end = (size_t)addr + len;
 		zfodbase = (caddr_t)roundup(end, PAGESIZE);
 		zfoddiff = (uintptr_t)zfodbase - end;
 		if (zfoddiff) {
+			/*
+			 * Before we go to zero the remaining space on the last
+			 * page, make sure we have write permission.
+			 */
+
+			AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
+			seg = as_segat(curproc->p_as, (caddr_t)end);
+			if (seg != NULL)
+				SEGOP_GETPROT(seg, (caddr_t)end, zfoddiff - 1,
+				    &zprot);
+			AS_LOCK_EXIT(as, &as->a_lock);
+
+			if (seg != NULL && (zprot & PROT_WRITE) == 0) {
+				(void) as_setprot(as, (caddr_t)end,
+				    zfoddiff - 1, zprot | PROT_WRITE);
+			}
+
 			if (on_fault(&ljb)) {
 				no_fault();
+				if (seg != NULL && (zprot & PROT_WRITE) == 0)
+					(void) as_setprot(as, (caddr_t)end,
+					zfoddiff - 1, zprot);
 				error = EFAULT;
 				goto bad;
 			}
 			uzero((void *)end, zfoddiff);
 			no_fault();
+			if (seg != NULL && (zprot & PROT_WRITE) == 0)
+				(void) as_setprot(as, (caddr_t)end,
+				    zfoddiff - 1, zprot);
 		}
 		if (zfodlen > zfoddiff) {
 			struct segvn_crargs crargs =
@@ -1326,13 +1402,22 @@ stk_copyin(execa_t *uap, uarg_t *args, intpdata_t *intp, void **auxvpp)
 	args->ne = args->na - argc;
 
 	/*
-	 * Add AT_SUN_PLATFORM and AT_SUN_EXECNAME strings to the stack.
+	 * Add AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME, and
+	 * AT_SUN_EMULATOR strings to the stack.
 	 */
 	if (auxvpp != NULL && *auxvpp != NULL) {
 		if ((error = stk_add(args, platform, UIO_SYSSPACE)) != 0)
 			return (error);
 		if ((error = stk_add(args, args->pathname, UIO_SYSSPACE)) != 0)
 			return (error);
+		if (args->brandname != NULL &&
+		    (error = stk_add(args, args->brandname,
+			UIO_SYSSPACE)) != 0)
+			return (error);
+		if (args->emulator != NULL &&
+		    (error = stk_add(args, args->emulator,
+			UIO_SYSSPACE)) != 0)
+			return (error);
 	}
 
 	/*
@@ -1438,19 +1523,32 @@ stk_copyout(uarg_t *args, char *usrstack, void **auxvpp, user_t *up)
 
 	/*
 	 * Fill in the aux vector now that we know the user stack addresses
-	 * for the AT_SUN_PLATFORM and AT_SUN_EXECNAME strings.
+	 * for the AT_SUN_PLATFORM, AT_SUN_EXECNAME, AT_SUN_BRANDNAME and
+	 * AT_SUN_EMULATOR strings.
 	 */
 	if (auxvpp != NULL && *auxvpp != NULL) {
 		if (args->to_model == DATAMODEL_NATIVE) {
 			auxv_t **a = (auxv_t **)auxvpp;
 			ADDAUX(*a, AT_SUN_PLATFORM, (long)&ustrp[*--offp])
 			ADDAUX(*a, AT_SUN_EXECNAME, (long)&ustrp[*--offp])
+			if (args->brandname != NULL)
+				ADDAUX(*a,
+				    AT_SUN_BRANDNAME, (long)&ustrp[*--offp])
+			if (args->emulator != NULL)
+				ADDAUX(*a,
+				    AT_SUN_EMULATOR, (long)&ustrp[*--offp])
 		} else {
 			auxv32_t **a = (auxv32_t **)auxvpp;
 			ADDAUX(*a,
 			    AT_SUN_PLATFORM, (int)(uintptr_t)&ustrp[*--offp])
 			ADDAUX(*a,
-			    AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp]);
+			    AT_SUN_EXECNAME, (int)(uintptr_t)&ustrp[*--offp])
+			if (args->brandname != NULL)
+				ADDAUX(*a, AT_SUN_BRANDNAME,
+				    (int)(uintptr_t)&ustrp[*--offp])
+			if (args->emulator != NULL)
+				ADDAUX(*a, AT_SUN_EMULATOR,
+				    (int)(uintptr_t)&ustrp[*--offp])
 		}
 	}
 
diff --git a/usr/src/uts/common/os/exit.c b/usr/src/uts/common/os/exit.c
index 70061a7d3e..3063e5717f 100644
--- a/usr/src/uts/common/os/exit.c
+++ b/usr/src/uts/common/os/exit.c
@@ -73,6 +73,7 @@
 #include <sys/pool.h>
 #include <sys/sdt.h>
 #include <sys/corectl.h>
+#include <sys/brand.h>
 
 /*
  * convert code/data pair into old style wait status
@@ -158,7 +159,6 @@ restart_init(int what, int why)
 	user_t *up = PTOU(p);
 
 	vnode_t *oldcd, *oldrd;
-	sess_t *sp;
 	int i, err;
 	char reason_buf[64];
 
@@ -257,17 +257,9 @@ restart_init(int what, int why)
 	if (oldcd != NULL)
 		VN_RELE(oldcd);
 
-	/*
-	 * Free the controlling tty.
-	 */
-	mutex_enter(&pidlock);
-	sp = p->p_sessp;
-	if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) {
-		mutex_exit(&pidlock);
-		freectty(sp);
-	} else {
-		mutex_exit(&pidlock);
-	}
+	/* Free the controlling tty.  (freectty() always assumes curproc.) */
+	ASSERT(p == curproc);
+	(void) freectty(B_TRUE);
 
 	/*
 	 * Now exec() the new init(1M) on top of the current process.  If we
@@ -343,7 +335,6 @@ proc_exit(int why, int what)
 	timeout_id_t tmp_id;
 	int rv;
 	proc_t *q;
-	sess_t *sp;
 	task_t *tk;
 	vnode_t *exec_vp, *execdir_vp, *cdir, *rdir;
 	sigqueue_t *sqp;
@@ -367,6 +358,14 @@ proc_exit(int why, int what)
 	DTRACE_PROC1(exit, int, why);
 
 	/*
+	 * Will perform any brand specific proc exit processing, since this
+	 * is always the last lwp, will also perform lwp_exit and free brand
+	 * data
+	 */
+	if (PROC_IS_BRANDED(p))
+		BROP(p)->b_proc_exit(p, lwp);
+
+	/*
 	 * Don't let init exit unless zone_start_init() failed its exec, or
 	 * we are shutting down the zone or the machine.
 	 *
@@ -377,6 +376,7 @@ proc_exit(int why, int what)
 		if (z->zone_boot_err == 0 &&
 		    zone_status_get(z) < ZONE_IS_SHUTTING_DOWN &&
 		    zone_status_get(global_zone) < ZONE_IS_SHUTTING_DOWN &&
+		    z->zone_restart_init == B_TRUE &&
 		    restart_init(what, why) == 0)
 			return (0);
 		/*
@@ -523,13 +523,9 @@ proc_exit(int why, int what)
 
 	closeall(P_FINFO(p));
 
-	mutex_enter(&pidlock);
-	sp = p->p_sessp;
-	if (sp->s_sidp == p->p_pidp && sp->s_vp != NULL) {
-		mutex_exit(&pidlock);
-		freectty(sp);
-	} else
-		mutex_exit(&pidlock);
+	/* Free the controlling tty.  (freectty() always assumes curproc.) */
+	ASSERT(p == curproc);
+	(void) freectty(B_TRUE);
 
 #if defined(__sparc)
 	if (p->p_utraps != NULL)
diff --git a/usr/src/uts/common/os/fork.c b/usr/src/uts/common/os/fork.c
index c7c400246d..fbda5b8c4a 100644
--- a/usr/src/uts/common/os/fork.c
+++ b/usr/src/uts/common/os/fork.c
@@ -80,6 +80,7 @@
 #include <sys/sdt.h>
 #include <sys/class.h>
 #include <sys/corectl.h>
+#include <sys/brand.h>
 
 static int64_t cfork(int, int);
 static int getproc(proc_t **, int);
@@ -461,8 +462,10 @@ cfork(int isvfork, int isfork1)
 		mutex_exit(&p->p_lock);
 	}
 
-	/* set return values for child */
-	lwp_setrval(clone, p->p_pid, 1);
+	if (PROC_IS_BRANDED(p))
+		BROP(p)->b_lwp_setrval(clone, p->p_pid, 1);
+	else
+		lwp_setrval(clone, p->p_pid, 1);
 
 	/* set return values for parent */
 	r.r_val1 = (int)cp->p_pid;
@@ -873,6 +876,7 @@ getproc(proc_t **cpp, int kernel)
 	/*
 	 * Make proc entry for child process
 	 */
+	mutex_init(&cp->p_splock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&cp->p_crlock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&cp->p_pflock, NULL, MUTEX_DEFAULT, NULL);
 #if defined(__x86)
@@ -882,7 +886,7 @@ getproc(proc_t **cpp, int kernel)
 	cp->p_stat = SIDL;
 	cp->p_mstart = gethrtime();
 
-	if ((newpid = pid_assign(cp)) == -1) {
+	if ((newpid = pid_allocate(cp, PID_ALLOC_PROC)) == -1) {
 		if (nproc == v.v_proc) {
 			CPU_STATS_ADDQ(CPU, sys, procovf, 1);
 			cmn_err(CE_WARN, "out of processes");
@@ -926,10 +930,13 @@ getproc(proc_t **cpp, int kernel)
 	cp->p_siginfo = pp->p_siginfo;
 	cp->p_flag = pp->p_flag & (SJCTL|SNOWAIT|SNOCD);
 	cp->p_sessp = pp->p_sessp;
-	SESS_HOLD(pp->p_sessp);
+	sess_hold(pp);
 	cp->p_exec = pp->p_exec;
 	cp->p_execdir = pp->p_execdir;
 	cp->p_zone = pp->p_zone;
+	cp->p_brand = pp->p_brand;
+	if (PROC_IS_BRANDED(pp))
+		BROP(pp)->b_copy_procdata(cp, pp);
 
 	cp->p_bssbase = pp->p_bssbase;
 	cp->p_brkbase = pp->p_brkbase;
@@ -1198,6 +1205,7 @@ try_again:
 
 			if (p->p_segacct)
 				shmexit(p);
+
 			/*
 			 * We grab p_lock for the benefit of /proc
 			 */
diff --git a/usr/src/uts/common/os/lwp.c b/usr/src/uts/common/os/lwp.c
index dbccf77b9e..26a12c805e 100644
--- a/usr/src/uts/common/os/lwp.c
+++ b/usr/src/uts/common/os/lwp.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -59,6 +58,7 @@
 #include <sys/cpc_impl.h>
 #include <sys/sdt.h>
 #include <sys/cmn_err.h>
+#include <sys/brand.h>
 
 void *segkp_lwp;		/* cookie for pool of segkp resources */
 
@@ -87,6 +87,7 @@ lwp_create(void (*proc)(), caddr_t arg, size_t len, proc_t *p,
 	uint_t old_hashsz = 0;
 	int i;
 	int rctlfail = 0;
+	boolean_t branded = 0;
 
 	mutex_enter(&p->p_lock);
 	mutex_enter(&p->p_zone->zone_nlwps_lock);
@@ -448,6 +449,19 @@ grow:
 				break;
 		} while (lwp_hash_lookup(p, t->t_tid) != NULL);
 	}
+
+	/*
+	 * If this is a branded process, let the brand do any necessary lwp
+	 * initialization.
+	 */
+	if (PROC_IS_BRANDED(p)) {
+		if (BROP(p)->b_initlwp(lwp)) {
+			err = 1;
+			goto error;
+		}
+		branded = 1;
+	}
+
 	p->p_lwpcnt++;
 	t->t_waitfor = -1;
 
@@ -540,6 +554,9 @@ error:
 		if (cid != NOCLASS && bufp != NULL)
 			CL_FREE(cid, bufp);
 
+		if (branded)
+			BROP(p)->b_freelwp(lwp);
+
 		mutex_exit(&p->p_lock);
 		t->t_state = TS_FREE;
 		thread_rele(t);
@@ -673,6 +690,13 @@ lwp_exit(void)
 	if (t->t_upimutex != NULL)
 		upimutex_cleanup();
 
+	/*
+	 * Perform any brand specific exit processing, then release any
+	 * brand data associated with the lwp
+	 */
+	if (PROC_IS_BRANDED(p))
+		BROP(p)->b_lwpexit(lwp);
+
 	mutex_enter(&p->p_lock);
 	lwp_cleanup();
 
@@ -1565,6 +1589,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
 	proc_t *p = lwptoproc(lwp);
 	int cid;
 	void *bufp;
+	void *brand_data;
 	int val;
 
 	ASSERT(p == curproc);
@@ -1578,6 +1603,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
 	if (t == curthread)
 		/* copy args out of registers first */
 		(void) save_syscall_args();
+
 	clwp = lwp_create(cp->p_lwpcnt == 0 ? lwp_rtt_initial : lwp_rtt,
 	    NULL, 0, cp, TS_STOPPED, t->t_pri, &t->t_hold, NOCLASS, lwpid);
 	if (clwp == NULL)
@@ -1591,14 +1617,16 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
 	ct = clwp->lwp_thread;
 	tregs = clwp->lwp_regs;
 	tfpu = clwp->lwp_fpu;
+	brand_data = clwp->lwp_brand;
 
 	/* copy parent lwp to child lwp */
 	*clwp = *lwp;
 
 	/* fix up child's lwp */
 
-	clwp->lwp_pcb.pcb_flags = 0;
-#if defined(__sparc)
+#if defined(__i386) || defined(__amd64)
+	clwp->lwp_pcb.pcb_flags = clwp->lwp_pcb.pcb_flags & RUPDATE_PENDING;
+#elif defined(__sparc)
 	clwp->lwp_pcb.pcb_step = STEP_NONE;
 #endif
 	clwp->lwp_cursig = 0;
@@ -1608,6 +1636,7 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
 	ct->t_sysnum = t->t_sysnum;
 	clwp->lwp_regs = tregs;
 	clwp->lwp_fpu = tfpu;
+	clwp->lwp_brand = brand_data;
 	clwp->lwp_ap = clwp->lwp_arg;
 	clwp->lwp_procp = cp;
 	bzero(clwp->lwp_timer, sizeof (clwp->lwp_timer));
@@ -1640,6 +1669,10 @@ forklwp(klwp_t *lwp, proc_t *cp, id_t lwpid)
 		ct->t_proc_flag |= TP_MSACCT;
 	mutex_exit(&cp->p_lock);
 
+	/* Allow brand to propagate brand-specific state */
+	if (PROC_IS_BRANDED(p))
+		BROP(p)->b_forklwp(lwp, clwp);
+
 retry:
 	cid = t->t_cid;
 
diff --git a/usr/src/uts/common/os/main.c b/usr/src/uts/common/os/main.c
index 958bbf96c8..ec9fc6c3e3 100644
--- a/usr/src/uts/common/os/main.c
+++ b/usr/src/uts/common/os/main.c
@@ -70,6 +70,7 @@
 #include <sys/errorq.h>
 #include <sys/class.h>
 #include <sys/stack.h>
+#include <sys/brand.h>
 
 #include <vm/as.h>
 #include <vm/seg_kmem.h>
@@ -124,6 +125,7 @@ cluster_wrapper(void)
 
 char initname[INITNAME_SZ] = "/sbin/init";	/* also referenced by zone0 */
 char initargs[BOOTARGS_MAX] = "";		/* also referenced by zone0 */
+extern int64_t lwp_sigmask(int, uint_t, uint_t);
 
 /*
  * Construct a stack for init containing the arguments to it, then
@@ -144,6 +146,7 @@ exec_init(const char *initpath, const char *args)
 	int error = 0, count = 0;
 	proc_t *p = ttoproc(curthread);
 	klwp_t *lwp = ttolwp(curthread);
+	int brand_action;
 
 	if (args == NULL)
 		args = "";
@@ -247,9 +250,17 @@ exec_init(const char *initpath, const char *args)
 	curthread->t_post_sys = 1;
 	curthread->t_sysnum = SYS_execve;
 
+	/*
+	 * If we are executing init from zsched, we may have inherited its
+	 * parent process's signal mask.  Clear it now so that we behave in
+	 * the same way as when started from the global zone.
+	 */
+	(void) lwp_sigmask(SIG_UNBLOCK, 0xffffffff, 0xffffffff);
+
+	brand_action = ZONE_IS_BRANDED(p->p_zone) ? EBA_BRAND : EBA_NONE;
 again:
 	error = exec_common((const char *)(uintptr_t)exec_fnamep,
-	    (const char **)(uintptr_t)uap, NULL);
+	    (const char **)(uintptr_t)uap, NULL, brand_action);
 
 	/*
 	 * Normally we would just set lwp_argsaved and t_post_sys and
diff --git a/usr/src/uts/common/os/modconf.c b/usr/src/uts/common/os/modconf.c
index 2992567207..3e662fac7d 100644
--- a/usr/src/uts/common/os/modconf.c
+++ b/usr/src/uts/common/os/modconf.c
@@ -55,6 +55,7 @@
 #include <ipp/ipp.h>
 #include <sys/strsubr.h>
 #include <sys/kcpc.h>
+#include <sys/brand.h>
 #include <sys/cpc_pcbe.h>
 #include <sys/kstat.h>
 #include <sys/fs/sdev_node.h>
@@ -237,6 +238,16 @@ struct mod_ops mod_pcbeops = {
 	mod_installpcbe, mod_removepcbe, mod_infonull
 };
 
+/*
+ * Brand modules.
+ */
+static int mod_installbrand(struct modlbrand *, struct modlinkage *);
+static int mod_removebrand(struct modlbrand *, struct modlinkage *);
+
+struct mod_ops mod_brandops = {
+	mod_installbrand, mod_removebrand, mod_infonull
+};
+
 static struct sysent *mod_getsysent(struct modlinkage *, struct sysent *);
 
 static char uninstall_err[] = "Cannot uninstall %s; not installed";
@@ -496,6 +507,23 @@ mod_removepcbe(struct modlpcbe *modl, struct modlinkage *modlp)
 }
 
 /*
+ * Manage BrandZ modules.
+ */
+/*ARGSUSED*/
+static int
+mod_installbrand(struct modlbrand *modl, struct modlinkage *modlp)
+{
+	return (brand_register(modl->brand_branddef));
+}
+
+/*ARGSUSED*/
+static int
+mod_removebrand(struct modlbrand *modl, struct modlinkage *modlp)
+{
+	return (brand_unregister(modl->brand_branddef));
+}
+
+/*
  * manage /dev fs modules
  */
 /*ARGSUSED*/
@@ -1075,8 +1103,10 @@ mod_removefs(struct modlfs *modl, struct modlinkage *modlp)
 		return (EBUSY);
 	}
 
-	/* XXX - Shouldn't the refcount be sufficient? */
-
+	/*
+	 * A mounted filesystem could still have vsw_count = 0
+	 * so we must check whether anyone is actually using our ops
+	 */
 	if (vfs_opsinuse(&vswp->vsw_vfsops)) {
 		vfs_unrefvfssw(vswp);
 		WUNLOCK_VFSSW();
diff --git a/usr/src/uts/common/os/pid.c b/usr/src/uts/common/os/pid.c
index 66cfed74b4..88b0258afe 100644
--- a/usr/src/uts/common/os/pid.c
+++ b/usr/src/uts/common/os/pid.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -21,7 +20,7 @@
  */
 
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -115,6 +114,18 @@ pid_lookup(pid_t pid)
 	return (pidp);
 }
 
+struct pid *
+pid_find(pid_t pid)
+{
+	struct pid *pidp;
+
+	mutex_enter(&pidlinklock);
+	pidp = pid_lookup(pid);
+	mutex_exit(&pidlinklock);
+
+	return (pidp);
+}
+
 void
 pid_setmin(void)
 {
@@ -154,14 +165,13 @@ pid_getlockslot(int prslot)
 }
 
 /*
- * This function assigns a pid for use in a fork request.  It allocates
- * a pid structure, tries to find an empty slot in the proc table,
- * and selects the process id.
+ * This function allocates a pid structure, a free pid, and optionally a
+ * slot in the proc table for it.
  *
- * pid_assign() returns the new pid on success, -1 on failure.
+ * pid_allocate() returns the new pid on success, -1 on failure.
  */
 pid_t
-pid_assign(proc_t *prp)
+pid_allocate(proc_t *prp, int flags)
 {
 	struct pid *pidp;
 	union procent *pep;
@@ -170,7 +180,7 @@ pid_assign(proc_t *prp)
 	pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
 
 	mutex_enter(&pidlinklock);
-	if ((pep = procentfree) == NULL) {
+	if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) {
 		/*
 		 * ran out of /proc directory entries
 		 */
@@ -190,10 +200,6 @@ pid_assign(proc_t *prp)
 		goto failed;
 	}
 
-	procentfree = pep->pe_next;
-	pep->pe_proc = prp;
-	prp->p_pidp = pidp;
-
 	/*
 	 * Put pid into the pid hash table.
 	 */
@@ -201,8 +207,17 @@ pid_assign(proc_t *prp)
 	HASHPID(newpid) = pidp;
 	pidp->pid_ref = 1;
 	pidp->pid_id = newpid;
-	pidp->pid_prslot = pep - procdir;
-	prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
+
+	if (flags & PID_ALLOC_PROC) {
+		procentfree = pep->pe_next;
+		pidp->pid_prslot = pep - procdir;
+		pep->pe_proc = prp;
+		prp->p_pidp = pidp;
+		prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
+	} else {
+		pidp->pid_prslot = 0;
+	}
+
 	mutex_exit(&pidlinklock);
 
 	return (newpid);
@@ -264,7 +279,7 @@ pid_exit(proc_t *prp)
 	if (prp->p_pgidp != NULL)
 		pgexit(prp);
 
-	SESS_RELE(prp->p_sessp);
+	sess_rele(prp->p_sessp, B_TRUE);
 
 	pidp = prp->p_pidp;
 
diff --git a/usr/src/uts/common/os/printf.c b/usr/src/uts/common/os/printf.c
index 603da31b62..a50bfa0db9 100644
--- a/usr/src/uts/common/os/printf.c
+++ b/usr/src/uts/common/os/printf.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -142,21 +141,15 @@ retry:
 
 	if (sl & SL_USER) {
 		ssize_t resid;
-		sess_t *sessp;
-
-		mutex_enter(&pidlock);
-		sessp = curproc->p_sessp;
-		SESS_HOLD(sessp);
-		TTY_HOLD(sessp);
-		mutex_exit(&pidlock);
-		if (sessp->s_vp)
-			(void) vn_rdwr(UIO_WRITE, sessp->s_vp,
-			    body, len, 0LL, UIO_SYSSPACE,
-			    FAPPEND, (rlim64_t)LOG_HIWAT, kcred, &resid);
-		mutex_enter(&pidlock);
-		TTY_RELE(sessp);
-		SESS_RELE(sessp);
-		mutex_exit(&pidlock);
+		sess_t *sp;
+
+		if ((sp = tty_hold()) != NULL) {
+			if (sp->s_vp != NULL)
+				(void) vn_rdwr(UIO_WRITE, sp->s_vp, body,
+				    len, 0LL, UIO_SYSSPACE, FAPPEND,
+				    (rlim64_t)LOG_HIWAT, kcred, &resid);
+			tty_rele(sp);
+		}
 	}
 
 	if (on_intr && !panicstr) {
diff --git a/usr/src/uts/common/os/procset.c b/usr/src/uts/common/os/procset.c
index 7a675c604e..ae5473847e 100644
--- a/usr/src/uts/common/os/procset.c
+++ b/usr/src/uts/common/os/procset.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -290,8 +289,10 @@ procinset(proc_t *pp, procset_t *psp)
 		break;
 
 	case P_SID:
+		mutex_enter(&pp->p_splock);
 		if (pp->p_sessp->s_sid == psp->p_lid)
 			loperand++;
+		mutex_exit(&pp->p_splock);
 		break;
 
 	case P_CID:
@@ -380,8 +381,10 @@ procinset(proc_t *pp, procset_t *psp)
 		break;
 
 	case P_SID:
+		mutex_enter(&pp->p_splock);
 		if (pp->p_sessp->s_sid == psp->p_rid)
 			roperand++;
+		mutex_exit(&pp->p_splock);
 		break;
 
 	case P_TASKID:
@@ -533,8 +536,10 @@ lwpinset(proc_t *pp, procset_t *psp, kthread_t *tp, int *done)
 		break;
 
 	case P_SID:
+		mutex_enter(&pp->p_splock);
 		if (pp->p_sessp->s_sid == psp->p_lid)
 			loperand++;
+		mutex_exit(&pp->p_splock);
 		break;
 
 	case P_TASKID:
@@ -617,8 +622,10 @@ lwpinset(proc_t *pp, procset_t *psp, kthread_t *tp, int *done)
 		break;
 
 	case P_SID:
+		mutex_enter(&pp->p_splock);
 		if (pp->p_sessp->s_sid == psp->p_rid)
 			roperand++;
+		mutex_exit(&pp->p_splock);
 		break;
 
 	case P_TASKID:
@@ -756,6 +763,7 @@ getmyid(idtype_t idtype)
 	proc_t	*pp;
 	uid_t uid;
 	gid_t gid;
+	pid_t sid;
 
 	pp = ttoproc(curthread);
 
@@ -773,7 +781,10 @@ getmyid(idtype_t idtype)
 		return (pp->p_pgrp);
 
 	case P_SID:
-		return (pp->p_sessp->s_sid);
+		mutex_enter(&pp->p_splock);
+		sid = pp->p_sessp->s_sid;
+		mutex_exit(&pp->p_splock);
+		return (sid);
 
 	case P_TASKID:
 		return (pp->p_task->tk_tkid);
diff --git a/usr/src/uts/common/os/session.c b/usr/src/uts/common/os/session.c
index 972677f7dc..7790a09094 100644
--- a/usr/src/uts/common/os/session.c
+++ b/usr/src/uts/common/os/session.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -47,102 +46,614 @@
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/strsubr.h>
+#include <sys/fs/snode.h>
 
 sess_t session0 = {
-	1,	/* s_ref   */
-	NODEV,	/* s_dev   */
-	NULL,	/* s_vp    */
-	&pid0,	/* s_sidp  */
-	NULL	/* s_cred  */
+	&pid0,		/* s_sidp */
+	{0},		/* s_lock */
+	1,		/* s_ref */
+	B_FALSE,	/* s_sighuped */
+	B_FALSE,	/* s_exit */
+	0,		/* s_exit_cv */
+	0,		/* s_cnt */
+	0,		/* s_cnt_cv */
+	NODEV,		/* s_dev */
+	NULL,		/* s_vp */
+	NULL		/* s_cred */
 };
 
 void
-sess_rele(sess_t *sp)
+sess_hold(proc_t *p)
 {
-	ASSERT(MUTEX_HELD(&pidlock));
+	ASSERT(MUTEX_HELD(&pidlock) || MUTEX_HELD(&p->p_splock));
+	mutex_enter(&p->p_sessp->s_lock);
+	p->p_sessp->s_ref++;
+	mutex_exit(&p->p_sessp->s_lock);
+}
+
+void
+sess_rele(sess_t *sp, boolean_t pidlock_held)
+{
+	ASSERT(MUTEX_HELD(&pidlock) || !pidlock_held);
+
+	mutex_enter(&sp->s_lock);
 
 	ASSERT(sp->s_ref != 0);
-	if (--sp->s_ref == 0) {
-		if (sp == &session0)
-			panic("sp == &session0");
-		PID_RELE(sp->s_sidp);
-		mutex_destroy(&sp->s_lock);
-		cv_destroy(&sp->s_wait_cv);
-		kmem_free(sp, sizeof (sess_t));
+	if (--sp->s_ref > 0) {
+		mutex_exit(&sp->s_lock);
+		return;
 	}
+	ASSERT(sp->s_ref == 0);
+
+	/*
+	 * It's ok to free this session structure now because we know
+	 * that no one else can have a pointer to it.  We know this
+	 * to be true because the only time that s_ref can possibly
+	 * be incremented is when pidlock or p_splock is held AND there
+	 * is a proc_t that points to that session structure.  In that
+	 * case we are guaranteed that the s_ref is at least 1 since there
+	 * is a proc_t that points to it.  So when s_ref finally drops to
+	 * zero then no one else has a reference (and hence pointer) to
+	 * this session structure and there is no valid proc_t pointing
+	 * to this session structure anymore so, no one can acquire a
+	 * reference (and pointer) to this session structure so it's
+	 * ok to free it here.
+	 */
+
+	if (sp == &session0)
+		panic("sp == &session0");
+
+	/* make sure there are no outstanding holds */
+	ASSERT(sp->s_cnt == 0);
+
+	/* make sure there is no exit in progress */
+	ASSERT(!sp->s_exit);
+
+	/* make sure someone already freed any ctty */
+	ASSERT(sp->s_vp == NULL);
+	ASSERT(sp->s_dev == NODEV);
+
+	if (!pidlock_held)
+		mutex_enter(&pidlock);
+	PID_RELE(sp->s_sidp);
+	if (!pidlock_held)
+		mutex_exit(&pidlock);
+
+	mutex_destroy(&sp->s_lock);
+	cv_destroy(&sp->s_cnt_cv);
+	kmem_free(sp, sizeof (sess_t));
+}
+
+sess_t *
+tty_hold(void)
+{
+	proc_t		*p = curproc;
+	sess_t		*sp;
+	boolean_t	got_sig = B_FALSE;
+
+	/* make sure the caller isn't holding locks they shouldn't */
+	ASSERT(MUTEX_NOT_HELD(&pidlock));
+
+	for (;;) {
+		mutex_enter(&p->p_splock);	/* protect p->p_sessp */
+		sp = p->p_sessp;
+		mutex_enter(&sp->s_lock);	/* protect sp->* */
+
+		/* make sure the caller isn't holding locks they shouldn't */
+		ASSERT((sp->s_vp == NULL) ||
+		    MUTEX_NOT_HELD(&sp->s_vp->v_stream->sd_lock));
+
+		/*
+		 * If the session leader process is not exiting (and hence
+		 * not trying to release the session's ctty) then we can
+		 * safely grab a hold on the current session structure
+		 * and return it.  If on the other hand the session leader
+		 * process is exiting and clearing the ctty then we'll
+		 * wait till it's done before we loop around and grab a
+		 * hold on the session structure.
+		 */
+		if (!sp->s_exit)
+			break;
+
+		/* need to hold the session so it can't be freed */
+		sp->s_ref++;
+		mutex_exit(&p->p_splock);
+
+		/* Wait till the session leader is done */
+		if (!cv_wait_sig(&sp->s_exit_cv, &sp->s_lock))
+			got_sig = B_TRUE;
+
+		/*
+		 * Now we need to drop our hold on the session structure,
+		 * but we can't hold any locks when we do this because
+		 * sess_rele() may need to aquire pidlock.
+		 */
+		mutex_exit(&sp->s_lock);
+		sess_rele(sp, B_FALSE);
+
+		if (got_sig)
+			return (NULL);
+	}
+
+	/* whew, we finally got a hold */
+	sp->s_cnt++;
+	sp->s_ref++;
+	mutex_exit(&sp->s_lock);
+	mutex_exit(&p->p_splock);
+	return (sp);
 }
 
 void
-sess_create(void)
+tty_rele(sess_t *sp)
 {
-	proc_t *pp;
-	sess_t *sp;
+	/* make sure the caller isn't holding locks they shouldn't */
+	ASSERT(MUTEX_NOT_HELD(&pidlock));
 
-	pp = ttoproc(curthread);
+	mutex_enter(&sp->s_lock);
+	if ((--sp->s_cnt) == 0)
+		cv_broadcast(&sp->s_cnt_cv);
+	mutex_exit(&sp->s_lock);
+
+	sess_rele(sp, B_FALSE);
+}
+
+void
+sess_create(void)
+{
+	proc_t *p = curproc;
+	sess_t *sp, *old_sp;
 
 	sp = kmem_zalloc(sizeof (sess_t), KM_SLEEP);
 
 	mutex_init(&sp->s_lock, NULL, MUTEX_DEFAULT, NULL);
-	cv_init(&sp->s_wait_cv, NULL, CV_DEFAULT, NULL);
+	cv_init(&sp->s_cnt_cv, NULL, CV_DEFAULT, NULL);
 
+	/*
+	 * we need to grap p_lock to protect p_pgidp because
+	 * /proc looks at p_pgidp while holding only p_lock.
+	 *
+	 * we don't need to hold p->p_sessp->s_lock or get a hold on the
+	 * session structure since we're not actually updating any of
+	 * the contents of the old session structure.
+	 */
 	mutex_enter(&pidlock);
+	mutex_enter(&p->p_lock);
+	mutex_enter(&p->p_splock);
+
+	pgexit(p);
+
+	sp->s_sidp = p->p_pidp;
+	sp->s_ref = 1;
+	sp->s_dev = NODEV;
+
+	old_sp = p->p_sessp;
+	p->p_sessp = sp;
+
+	pgjoin(p, p->p_pidp);
+	PID_HOLD(p->p_pidp);
+
+	mutex_exit(&p->p_splock);
+	mutex_exit(&p->p_lock);
+	mutex_exit(&pidlock);
 
+	sess_rele(old_sp, B_FALSE);
+}
+
+/*
+ * Note that sess_ctty_clear() resets all the fields in the session
+ * structure but doesn't release any holds or free any objects
+ * that the session structure might currently point to.  it is the
+ * callers responsibility to do this.
+ */
+static void
+sess_ctty_clear(sess_t *sp, stdata_t *stp)
+{
 	/*
-	 * We need to protect p_pgidp with p_lock because
-	 * /proc looks at it while holding only p_lock.
+	 * Assert that we hold all the necessary locks.  We also need
+	 * to be holding proc_t->p_splock for the process associated
+	 * with this session, but since we don't have a proc pointer
+	 * passed in we can't assert this here.
 	 */
-	mutex_enter(&pp->p_lock);
-	pgexit(pp);
-	SESS_RELE(pp->p_sessp);
+	ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) &&
+	    MUTEX_HELD(&sp->s_lock));
 
-	sp->s_sidp = pp->p_pidp;
-	sp->s_ref = 1;
+	/* reset the session structure members to defaults */
+	sp->s_sighuped = B_FALSE;
 	sp->s_dev = NODEV;
+	sp->s_vp = NULL;
+	sp->s_cred = NULL;
+
+	/* reset the stream session and group pointers */
+	stp->sd_pgidp = NULL;
+	stp->sd_sidp = NULL;
+}
+
+static void
+sess_ctty_set(proc_t *p, sess_t *sp, stdata_t *stp)
+{
+	cred_t	*crp;
+
+	/* Assert that we hold all the necessary locks. */
+	ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) &&
+	    MUTEX_HELD(&p->p_splock) && MUTEX_HELD(&sp->s_lock));
+
+	/* get holds on structures */
+	mutex_enter(&p->p_crlock);
+	crhold(crp = p->p_cred);
+	mutex_exit(&p->p_crlock);
+	PID_HOLD(sp->s_sidp);	/* requires pidlock */
+	PID_HOLD(sp->s_sidp);	/* requires pidlock */
+
+	/* update the session structure members */
+	sp->s_vp = makectty(stp->sd_vnode);
+	sp->s_dev = sp->s_vp->v_rdev;
+	sp->s_cred = crp;
+
+	/* update the stream emebers */
+	stp->sd_flag |= STRISTTY;	/* just to be sure */
+	stp->sd_sidp = sp->s_sidp;
+	stp->sd_pgidp = sp->s_sidp;
+}
+
+int
+strctty(stdata_t *stp)
+{
+	sess_t		*sp;
+	proc_t		*p = curproc;
+	boolean_t	got_sig = B_FALSE;
+
+	/*
+	 * We are going to try to make stp the default ctty for the session
+	 * associated with curproc.  Not only does this require holding a
+	 * bunch of locks but it also requires waiting for any outstanding
+	 * holds on the session structure (aquired via tty_hold()) to be
+	 * released.  Hence, we have the following for(;;) loop that will
+	 * aquire our locks, do some sanity checks, and wait for the hold
+	 * count on the session structure to hit zero.  If we get a signal
+	 * while waiting for outstanding holds to be released then we abort
+	 * the operation and return.
+	 */
+	for (;;) {
+		mutex_enter(&stp->sd_lock);	/* protects sd_pgidp/sd_sidp */
+		mutex_enter(&pidlock);		/* protects p_pidp */
+		mutex_enter(&p->p_splock);	/* protects p_sessp */
+		sp = p->p_sessp;
+		mutex_enter(&sp->s_lock);	/* protects sp->* */
+
+		if (((stp->sd_flag & (STRHUP|STRDERR|STWRERR|STPLEX)) != 0) ||
+		    (stp->sd_sidp != NULL) ||		/* stp already ctty? */
+		    (p->p_pidp != sp->s_sidp) ||	/* we're not leader? */
+		    (sp->s_vp != NULL)) {		/* session has ctty? */
+			mutex_exit(&sp->s_lock);
+			mutex_exit(&p->p_splock);
+			mutex_exit(&pidlock);
+			mutex_exit(&stp->sd_lock);
+			return (ENOTTY);
+		}
+
+		/* sanity check.  we can't be exiting right now */
+		ASSERT(!sp->s_exit);
+
+		/*
+		 * If no one else has a hold on this session structure
+		 * then we now have exclusive access to it, so break out
+		 * of this loop and update the session structure.
+		 */
+		if (sp->s_cnt == 0)
+			break;
+
+		/* need to hold the session so it can't be freed */
+		sp->s_ref++;
 
-	pp->p_sessp = sp;
+		/* ain't locking order fun? */
+		mutex_exit(&p->p_splock);
+		mutex_exit(&pidlock);
+		mutex_exit(&stp->sd_lock);
 
-	pgjoin(pp, pp->p_pidp);
-	mutex_exit(&pp->p_lock);
+		if (!cv_wait_sig(&sp->s_cnt_cv, &sp->s_lock))
+			got_sig = B_TRUE;
+		mutex_exit(&sp->s_lock);
+		sess_rele(sp, B_FALSE);
 
-	PID_HOLD(sp->s_sidp);
+		if (got_sig)
+			return (EINTR);
+	}
+
+	/* set the session ctty bindings */
+	sess_ctty_set(p, sp, stp);
+
+	mutex_exit(&sp->s_lock);
+	mutex_exit(&p->p_splock);
 	mutex_exit(&pidlock);
+	mutex_exit(&stp->sd_lock);
+	return (0);
 }
 
-void
-freectty(sess_t *sp)
+/*
+ * freectty_lock() attempts to aquire the army of locks required to free
+ * the ctty associated with a given session leader process.  If it returns
+ * successfully the following locks will be held:
+ *	sd_lock, pidlock, p_splock, s_lock
+ *
+ * as a secondary bit of convience, freectty_lock() will also return
+ * pointers to the session, ctty, and ctty stream associated with the
+ * specified session leader process.
+ */
+static boolean_t
+freectty_lock(proc_t *p, sess_t **spp, vnode_t **vpp, stdata_t **stpp,
+    boolean_t at_exit)
 {
-	vnode_t *vp = sp->s_vp;
-	cred_t *cred = sp->s_cred;
+	sess_t		*sp;
+	vnode_t		*vp;
+	stdata_t	*stp;
 
-	strfreectty(vp->v_stream);
+	mutex_enter(&pidlock);			/* protect p_pidp */
+	mutex_enter(&p->p_splock);		/* protect p->p_sessp */
+	sp = p->p_sessp;
+	mutex_enter(&sp->s_lock);		/* protect sp->* */
 
-	mutex_enter(&sp->s_lock);
-	while (sp->s_cnt > 0) {
-		cv_wait(&sp->s_wait_cv, &sp->s_lock);
+	if ((sp->s_sidp != p->p_pidp) ||	/* we're not leader? */
+	    (sp->s_vp == NULL)) {		/* no ctty? */
+		mutex_exit(&sp->s_lock);
+		mutex_exit(&p->p_splock);
+		mutex_exit(&pidlock);
+		return (B_FALSE);
+	}
+
+	vp = sp->s_vp;
+	stp = sp->s_vp->v_stream;
+
+	if (at_exit) {
+		/* stop anyone else calling tty_hold() */
+		sp->s_exit = B_TRUE;
+	} else {
+		/*
+		 * due to locking order we have to grab stp->sd_lock before
+		 * grabbing all the other proc/session locks.  but after we
+		 * drop all our current locks it's possible that someone
+		 * could come in and change our current session or close
+		 * the current ctty (vp) there by making sp or stp invalid.
+		 * (a VN_HOLD on vp won't protect stp because that only
+		 * prevents the vnode from being freed not closed.)  so
+		 * to prevent this we bump s_ref and s_cnt here.
+		 *
+		 * course this doesn't matter if we're the last thread in
+		 * an exiting process that is the session leader, since no
+		 * one else can change our session or free our ctty.
+		 */
+		sp->s_ref++;	/* hold the session structure */
+		sp->s_cnt++;	/* protect vp and stp */
+	}
+
+	/* drop our session locks */
+	mutex_exit(&sp->s_lock);
+	mutex_exit(&p->p_splock);
+	mutex_exit(&pidlock);
+
+	/* grab locks in the right order */
+	mutex_enter(&stp->sd_lock);		/* protects sd_pgidp/sd_sidp */
+	mutex_enter(&pidlock);			/* protect p_pidp */
+	mutex_enter(&p->p_splock);		/* protects p->p_sessp */
+	mutex_enter(&sp->s_lock);		/* protects sp->* */
+
+	/* if the session has changed, abort mission */
+	if (sp != p->p_sessp) {
+		/*
+		 * this can't happen during process exit since we're the
+		 * only thread in the process and we sure didn't change
+		 * our own session at this point.
+		 */
+		ASSERT(!at_exit);
+
+		/* release our locks and holds */
+		mutex_exit(&sp->s_lock);
+		mutex_exit(&p->p_splock);
+		mutex_exit(&pidlock);
+		mutex_exit(&stp->sd_lock);
+		tty_rele(sp);
+		return (B_FALSE);
 	}
-	ASSERT(sp->s_cnt == 0);
-	ASSERT(vp->v_count >= 1);
-	sp->s_vp = NULL;
-	sp->s_cred = NULL;
 
 	/*
-	 * It is possible for the VOP_CLOSE below to call stralloctty()
-	 * and reallocate a new tty vnode.  To prevent that the
-	 * session is marked as closing here.
+	 * sanity checks.  none of this should have changed since we had
+	 * holds on the current ctty.
 	 */
+	ASSERT(sp->s_sidp == p->p_pidp);	/* we're the leader */
+	ASSERT(sp->s_vp != NULL);		/* a ctty exists */
+	ASSERT(vp == sp->s_vp);
+	ASSERT(stp == sp->s_vp->v_stream);
+
+	/* release our holds */
+	if (!at_exit) {
+		if ((--(sp)->s_cnt) == 0)
+			cv_broadcast(&sp->s_cnt_cv);
+		sp->s_ref--;
+		ASSERT(sp->s_ref > 0);
+	}
+
+	/* return our pointers */
+	*spp = sp;
+	*vpp = vp;
+	*stpp = stp;
 
-	sp->s_flag = SESS_CLOSE;
+	return (B_TRUE);
+}
+
+/*
+ * Returns B_FALSE if no signal is sent to the process group associated with
+ * this ctty.  Returns B_TRUE if a signal is sent to the process group.
+ * If it return B_TRUE it also means that all the locks we were holding
+ * were dropped so that we could send the signal.
+ */
+static boolean_t
+freectty_signal(proc_t *p, sess_t *sp, stdata_t *stp, boolean_t at_exit)
+{
+	/* Assert that we hold all the necessary locks. */
+	ASSERT(MUTEX_HELD(&stp->sd_lock) && MUTEX_HELD(&pidlock) &&
+	    MUTEX_HELD(&p->p_splock) && MUTEX_HELD(&sp->s_lock));
+
+	/* check if we already signaled this group */
+	if (sp->s_sighuped)
+		return (B_FALSE);
+
+	sp->s_sighuped = B_TRUE;
+
+	if (!at_exit) {
+		/*
+		 * once again, we're about to drop our army of locks and we
+		 * don't want sp or stp to be freed.  (see the comment in
+		 * freectty_lock())
+		 */
+		sp->s_ref++;	/* hold the session structure */
+		sp->s_cnt++;	/* protect vp and stp */
+	}
+
+	/* can't hold these locks while calling pgsignal() */
 	mutex_exit(&sp->s_lock);
+	mutex_exit(&p->p_splock);
+	mutex_exit(&pidlock);
+
+	/* signal anyone in the foreground process group */
+	pgsignal(stp->sd_pgidp, SIGHUP);
+
+	/* signal anyone blocked in poll on this stream */
+	if (!(stp->sd_flag & STRHUP))
+		strhup(stp);
+
+	mutex_exit(&stp->sd_lock);
+
+	/* release our holds */
+	if (!at_exit)
+		tty_rele(sp);
+
+	return (B_TRUE);
+}
+
+int
+freectty(boolean_t at_exit)
+{
+	proc_t		*p = curproc;
+	stdata_t	*stp;
+	vnode_t		*vp;
+	cred_t		*cred;
+	sess_t		*sp;
+	struct pid	*pgidp, *sidp;
+	boolean_t	got_sig = B_FALSE;
 
 	/*
-	 * This will be the only thread with access to
-	 * this vnode, from this point on.
+	 * If the current process is a session leader we are going to
+	 * try to release the ctty associated our current session.  To
+	 * do this we need to aquire a bunch of locks, signal any
+	 * processes in the forground that are associated with the ctty,
+	 * and make sure no one has any outstanding holds on the current
+	 * session * structure (aquired via tty_hold()).  Hence, we have
+	 * the following for(;;) loop that will do all this work for
+	 * us and break out when the hold count on the session structure
+	 * hits zero.
 	 */
+	for (;;) {
+		if (!freectty_lock(p, &sp, &vp, &stp, at_exit))
+			return (EIO);
+
+		if (freectty_signal(p, sp, stp, at_exit)) {
+			/* loop around to re-aquire locks */
+			continue;
+		}
+
+		/*
+		 * Only a session leader process can free a ctty.  So if
+		 * we've made it here we know we're a session leader and
+		 * if we're not actively exiting it impossible for another
+		 * thread in this process to be exiting.  (Because that
+		 * thread would have already stopped all other threads
+		 * in the current process.)
+		 */
+		ASSERT(at_exit || !sp->s_exit);
+
+		/*
+		 * If no one else has a hold on this session structure
+		 * then we now have exclusive access to it, so break out
+		 * of this loop and update the session structure.
+		 */
+		if (sp->s_cnt == 0)
+			break;
+
+		if (!at_exit) {
+			/* need to hold the session so it can't be freed */
+			sp->s_ref++;
+		}
+
+		/* ain't locking order fun? */
+		mutex_exit(&p->p_splock);
+		mutex_exit(&pidlock);
+		mutex_exit(&stp->sd_lock);
+
+		if (at_exit) {
+			/*
+			 * if we're exiting then we can't allow this operation
+			 * to fail so we do a cw_wait() instead of a
+			 * cv_wait_sig().  if there are threads with active
+			 * holds on this ctty that are blocked, then
+			 * they should only be blocked in a cv_wait_sig()
+			 * and hopefully they were in the foreground process
+			 * group and recieved the SIGHUP we sent above.  of
+			 * course it's possible that they weren't in the
+			 * foreground process group and didn't get our
+			 * signal (or they could be stopped by job control
+			 * in which case our signal wouldn't matter until
+			 * they are restarted).  in this case we won't
+			 * exit until someone else sends them a signal.
+			 */
+			cv_wait(&sp->s_cnt_cv, &sp->s_lock);
+			mutex_exit(&sp->s_lock);
+			continue;
+		}
+
+		if (!cv_wait_sig(&sp->s_cnt_cv, &sp->s_lock)) {
+			got_sig = B_TRUE;
+		}
+
+		mutex_exit(&sp->s_lock);
+		sess_rele(sp, B_FALSE);
+
+		if (got_sig)
+			return (EINTR);
+	}
+	ASSERT(sp->s_cnt == 0);
 
+	/* save some pointers for later */
+	cred = sp->s_cred;
+	pgidp = stp->sd_pgidp;
+	sidp = stp->sd_sidp;
+
+	/* clear the session ctty bindings */
+	sess_ctty_clear(sp, stp);
+
+	/* wake up anyone blocked in tty_hold() */
+	if (at_exit) {
+		ASSERT(sp->s_exit);
+		sp->s_exit = B_FALSE;
+		cv_broadcast(&sp->s_exit_cv);
+	}
+
+	/* we can drop these locks now */
+	mutex_exit(&sp->s_lock);
+	mutex_exit(&p->p_splock);
+	mutex_exit(&pidlock);
+	mutex_exit(&stp->sd_lock);
+
+	/* This is the only remaining thread with access to this vnode */
 	(void) VOP_CLOSE(vp, 0, 1, (offset_t)0, cred);
 	VN_RELE(vp);
-
 	crfree(cred);
+
+	/* release our holds on assorted structures and return */
+	mutex_enter(&pidlock);
+	PID_RELE(pgidp);
+	PID_RELE(sidp);
+	mutex_exit(&pidlock);
+
+	return (1);
 }
 
 /*
@@ -169,23 +680,29 @@ vhangup(void)
 dev_t
 cttydev(proc_t *pp)
 {
-	sess_t *sp = pp->p_sessp;
+	sess_t	*sp;
+	dev_t	dev;
+
+	mutex_enter(&pp->p_splock);	/* protects p->p_sessp */
+	sp = pp->p_sessp;
+
+#ifdef DEBUG
+	mutex_enter(&sp->s_lock);	/* protects sp->* */
 	if (sp->s_vp == NULL)
-		return (NODEV);
-	return (sp->s_dev);
+		ASSERT(sp->s_dev == NODEV);
+	else
+		ASSERT(sp->s_dev != NODEV);
+	mutex_exit(&sp->s_lock);
+#endif /* DEBUG */
+
+	dev = sp->s_dev;
+	mutex_exit(&pp->p_splock);
+	return (dev);
 }
 
 void
-alloctty(proc_t *pp, vnode_t *vp)
+ctty_clear_sighuped(void)
 {
-	sess_t *sp = pp->p_sessp;
-	cred_t *crp;
-
-	sp->s_vp = vp;
-	sp->s_dev = vp->v_rdev;
-
-	mutex_enter(&pp->p_crlock);
-	crhold(crp = pp->p_cred);
-	mutex_exit(&pp->p_crlock);
-	sp->s_cred = crp;
+	ASSERT(MUTEX_HELD(&pidlock) || MUTEX_HELD(&curproc->p_splock));
+	curproc->p_sessp->s_sighuped = B_FALSE;
 }
diff --git a/usr/src/uts/common/os/streamio.c b/usr/src/uts/common/os/streamio.c
index ffa676604f..e189a1627d 100644
--- a/usr/src/uts/common/os/streamio.c
+++ b/usr/src/uts/common/os/streamio.c
@@ -77,6 +77,19 @@
 #include <sys/autoconf.h>
 #include <sys/policy.h>
 
+
+/*
+ * This define helps improve the readability of streams code while
+ * still maintaining a very old streams performance enhancement.  The
+ * performance enhancement basically involved having all callers
+ * of straccess() perform the first check that straccess() will do
+ * locally before actually calling straccess().  (There by reducing
+ * the number of unnecessary calls to straccess().)
+ */
+#define	i_straccess(x, y)	((stp->sd_sidp == NULL) ? 0 : \
+				    (stp->sd_vnode->v_type == VFIFO) ? 0 : \
+				    straccess((x), (y)))
+
 /*
  * what is mblk_pull_len?
  *
@@ -1095,11 +1108,13 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
 	ASSERT(vp->v_stream);
 	stp = vp->v_stream;
 
-	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
-		if (error = straccess(stp, JCREAD))
-			return (error);
-
 	mutex_enter(&stp->sd_lock);
+
+	if ((error = i_straccess(stp, JCREAD)) != 0) {
+		mutex_exit(&stp->sd_lock);
+		return (error);
+	}
+
 	if (stp->sd_flag & (STRDERR|STPLEX)) {
 		error = strgeterr(stp, STRDERR|STPLEX, 0);
 		if (error != 0) {
@@ -1161,12 +1176,8 @@ strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
 			}
 			TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
 				"strread awakes:%p, %p, %p", vp, uiop, crp);
-			if (stp->sd_sidp != NULL &&
-			    stp->sd_vnode->v_type != VFIFO) {
-				mutex_exit(&stp->sd_lock);
-				if (error = straccess(stp, JCREAD))
-					goto oops1;
-				mutex_enter(&stp->sd_lock);
+			if ((error = i_straccess(stp, JCREAD)) != 0) {
+				goto oops;
 			}
 			first = 0;
 		}
@@ -2026,8 +2037,8 @@ strrput_nondata(queue_t *q, mblk_t *bp)
 		cv_broadcast(&q->q_wait);	/* the readers */
 		cv_broadcast(&_WR(q)->q_wait);	/* the writers */
 		cv_broadcast(&stp->sd_monitor);	/* the ioctllers */
-		mutex_exit(&stp->sd_lock);
 		strhup(stp);
+		mutex_exit(&stp->sd_lock);
 		return (0);
 
 	case M_UNHANGUP:
@@ -2665,18 +2676,23 @@ strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
 	ASSERT(vp->v_stream);
 	stp = vp->v_stream;
 
-	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
-		if ((error = straccess(stp, JCWRITE)) != 0)
-			return (error);
+	mutex_enter(&stp->sd_lock);
+
+	if ((error = i_straccess(stp, JCWRITE)) != 0) {
+		mutex_exit(&stp->sd_lock);
+		return (error);
+	}
 
 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
-		mutex_enter(&stp->sd_lock);
 		error = strwriteable(stp, B_TRUE, B_TRUE);
-		mutex_exit(&stp->sd_lock);
-		if (error != 0)
+		if (error != 0) {
+			mutex_exit(&stp->sd_lock);
 			return (error);
+		}
 	}
 
+	mutex_exit(&stp->sd_lock);
+
 	wqp = stp->sd_wrq;
 
 	/* get these values from them cached in the stream head */
@@ -2778,11 +2794,11 @@ strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
 			}
 			TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
 				"strwrite wake:q %p awakes", wqp);
+			if ((error = i_straccess(stp, JCWRITE)) != 0) {
+				mutex_exit(&stp->sd_lock);
+				goto out;
+			}
 			mutex_exit(&stp->sd_lock);
-			if (stp->sd_sidp != NULL &&
-			    stp->sd_vnode->v_type != VFIFO)
-				if (error = straccess(stp, JCWRITE))
-					goto out;
 		}
 		waitflag |= NOINTR;
 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
@@ -3101,6 +3117,7 @@ job_control_type(int cmd)
 	case JAGENT:	/* Obsolete */
 	case JTRUN:	/* Obsolete */
 	case JXTPROTO:	/* Obsolete */
+	case TIOCSETLD:
 		return (JCSETP);
 	}
 
@@ -3162,10 +3179,12 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 	if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
 		return (EINVAL);
 
-	if (access != -1 && stp->sd_sidp != NULL &&
-	    stp->sd_vnode->v_type != VFIFO)
-		if (error = straccess(stp, access))
-			return (error);
+	mutex_enter(&stp->sd_lock);
+	if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
+		mutex_exit(&stp->sd_lock);
+		return (error);
+	}
+	mutex_exit(&stp->sd_lock);
 
 	/*
 	 * Check for sgttyb-related ioctls first, and complain as
@@ -3307,11 +3326,16 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 				    secpolicy_sti(crp) != 0) {
 					return (EPERM);
 				}
-				if (stp->sd_sidp !=
-				    ttoproc(curthread)->p_sessp->s_sidp &&
+				mutex_enter(&stp->sd_lock);
+				mutex_enter(&curproc->p_splock);
+				if (stp->sd_sidp != curproc->p_sessp->s_sidp &&
 				    secpolicy_sti(crp) != 0) {
+					mutex_exit(&curproc->p_splock);
+					mutex_exit(&stp->sd_lock);
 					return (EACCES);
 				}
+				mutex_exit(&curproc->p_splock);
+				mutex_exit(&stp->sd_lock);
 
 				strioc.ic_len = sizeof (char);
 				strioc.ic_dp = (char *)arg;
@@ -3445,10 +3469,13 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 			return (EINVAL);
 
 		access = job_control_type(strioc.ic_cmd);
-		if (access != -1 && stp->sd_sidp != NULL &&
-		    stp->sd_vnode->v_type != VFIFO &&
-		    (error = straccess(stp, access)) != 0)
+		mutex_enter(&stp->sd_lock);
+		if ((access != -1) &&
+		    ((error = i_straccess(stp, access)) != 0)) {
+			mutex_exit(&stp->sd_lock);
 			return (error);
+		}
+		mutex_exit(&stp->sd_lock);
 
 		/*
 		 * The I_STR facility provides a trap door for malicious
@@ -3699,7 +3726,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 				/*
 				 * try to allocate it as a controlling terminal
 				 */
-				stralloctty(stp);
+				(void) strctty(stp);
 			}
 		}
 
@@ -5053,15 +5080,11 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 				releasef(STRUCT_FGET(strfdinsert, fildes));
 				return (error);
 			}
-			if (stp->sd_sidp != NULL &&
-			    stp->sd_vnode->v_type != VFIFO) {
+			if ((error = i_straccess(stp, access)) != 0) {
 				mutex_exit(&stp->sd_lock);
-				if (error = straccess(stp, access)) {
-					releasef(
-					    STRUCT_FGET(strfdinsert, fildes));
-					return (error);
-				}
-				mutex_enter(&stp->sd_lock);
+				releasef(
+				    STRUCT_FGET(strfdinsert, fildes));
+				return (error);
 			}
 		}
 		mutex_exit(&stp->sd_lock);
@@ -5144,12 +5167,9 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 				mutex_exit(&stp->sd_lock);
 				return (error);
 			}
-			if (stp->sd_sidp != NULL &&
-			    stp->sd_vnode->v_type != VFIFO) {
+			if ((error = i_straccess(stp, access)) != 0) {
 				mutex_exit(&stp->sd_lock);
-				if (error = straccess(stp, access))
-					return (error);
-				mutex_enter(&stp->sd_lock);
+				return (error);
 			}
 		}
 		if (mp->b_datap->db_type != M_PASSFP) {
@@ -5446,13 +5466,13 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 	{
 		pid_t sid;
 
-		mutex_enter(&pidlock);
+		mutex_enter(&stp->sd_lock);
 		if (stp->sd_sidp == NULL) {
-			mutex_exit(&pidlock);
+			mutex_exit(&stp->sd_lock);
 			return (ENOTTY);
 		}
 		sid = stp->sd_sidp->pid_id;
-		mutex_exit(&pidlock);
+		mutex_exit(&stp->sd_lock);
 		return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
 		    copyflag));
 	}
@@ -5494,6 +5514,7 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 		bg_pgid = stp->sd_pgidp->pid_id;
 		CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
 		PID_RELE(stp->sd_pgidp);
+		ctty_clear_sighuped();
 		stp->sd_pgidp = q->p_pgidp;
 		PID_HOLD(stp->sd_pgidp);
 		mutex_exit(&pidlock);
@@ -5505,17 +5526,30 @@ strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
 	{
 		pid_t pgrp;
 
-		mutex_enter(&pidlock);
+		mutex_enter(&stp->sd_lock);
 		if (stp->sd_sidp == NULL) {
-			mutex_exit(&pidlock);
+			mutex_exit(&stp->sd_lock);
 			return (ENOTTY);
 		}
 		pgrp = stp->sd_pgidp->pid_id;
-		mutex_exit(&pidlock);
+		mutex_exit(&stp->sd_lock);
 		return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
 		    copyflag));
 	}
 
+	case TIOCSCTTY:
+	{
+		return (strctty(stp));
+	}
+
+	case TIOCNOTTY:
+	{
+		/* freectty() always assumes curproc. */
+		if (freectty(B_FALSE) != 0)
+			return (0);
+		return (ENOTTY);
+	}
+
 	case FIONBIO:
 	case FIOASYNC:
 		return (0);	/* handled by the upper layer */
@@ -6233,18 +6267,21 @@ strgetmsg(
 	stp = vp->v_stream;
 	rvp->r_val1 = 0;
 
-	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
-		if (error = straccess(stp, JCREAD))
-			return (error);
+	mutex_enter(&stp->sd_lock);
+
+	if ((error = i_straccess(stp, JCREAD)) != 0) {
+		mutex_exit(&stp->sd_lock);
+		return (error);
+	}
 
-	/* Fast check of flags before acquiring the lock */
 	if (stp->sd_flag & (STRDERR|STPLEX)) {
-		mutex_enter(&stp->sd_lock);
 		error = strgeterr(stp, STRDERR|STPLEX, 0);
-		mutex_exit(&stp->sd_lock);
-		if (error != 0)
+		if (error != 0) {
+			mutex_exit(&stp->sd_lock);
 			return (error);
+		}
 	}
+	mutex_exit(&stp->sd_lock);
 
 	switch (*flagsp) {
 	case MSG_HIPRI:
@@ -6381,11 +6418,9 @@ strgetmsg(
 		}
 		TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
 			"strgetmsg awakes:%p, %p", vp, uiop);
-		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
+		if ((error = i_straccess(stp, JCREAD)) != 0) {
 			mutex_exit(&stp->sd_lock);
-			if (error = straccess(stp, JCREAD))
-				return (error);
-			mutex_enter(&stp->sd_lock);
+			return (error);
 		}
 		first = 0;
 	}
@@ -6797,23 +6832,26 @@ kstrgetmsg(
 	stp = vp->v_stream;
 	rvp->r_val1 = 0;
 
-	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
-		if (error = straccess(stp, JCREAD))
-			return (error);
+	mutex_enter(&stp->sd_lock);
+
+	if ((error = i_straccess(stp, JCREAD)) != 0) {
+		mutex_exit(&stp->sd_lock);
+		return (error);
+	}
 
 	flags = *flagsp;
-	/* Fast check of flags before acquiring the lock */
 	if (stp->sd_flag & (STRDERR|STPLEX)) {
 		if ((stp->sd_flag & STPLEX) ||
 		    (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
-			mutex_enter(&stp->sd_lock);
 			error = strgeterr(stp, STRDERR|STPLEX,
 					(flags & MSG_IPEEK));
-			mutex_exit(&stp->sd_lock);
-			if (error != 0)
+			if (error != 0) {
+				mutex_exit(&stp->sd_lock);
 				return (error);
+			}
 		}
 	}
+	mutex_exit(&stp->sd_lock);
 
 	switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
 	case MSG_HIPRI:
@@ -6955,11 +6993,9 @@ retry:
 		}
 		TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
 			"kstrgetmsg awakes:%p, %p", vp, uiop);
-		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
+		if ((error = i_straccess(stp, JCREAD)) != 0) {
 			mutex_exit(&stp->sd_lock);
-			if (error = straccess(stp, JCREAD))
-				return (error);
-			mutex_enter(&stp->sd_lock);
+			return (error);
 		}
 		first = 0;
 	}
@@ -7430,18 +7466,23 @@ strputmsg(
 		audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
 #endif
 
-	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
-		if (error = straccess(stp, JCWRITE))
-			return (error);
+	mutex_enter(&stp->sd_lock);
+
+	if ((error = i_straccess(stp, JCWRITE)) != 0) {
+		mutex_exit(&stp->sd_lock);
+		return (error);
+	}
 
 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
-		mutex_enter(&stp->sd_lock);
 		error = strwriteable(stp, B_FALSE, xpg4);
-		mutex_exit(&stp->sd_lock);
-		if (error != 0)
+		if (error != 0) {
+			mutex_exit(&stp->sd_lock);
 			return (error);
+		}
 	}
 
+	mutex_exit(&stp->sd_lock);
+
 	/*
 	 * Check for legal flag value.
 	 */
@@ -7561,10 +7602,11 @@ strputmsg(
 		}
 		TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
 			"strputmsg wake:stp %p wakes", stp);
+		if ((error = i_straccess(stp, JCWRITE)) != 0) {
+			mutex_exit(&stp->sd_lock);
+			return (error);
+		}
 		mutex_exit(&stp->sd_lock);
-		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO)
-			if (error = straccess(stp, JCWRITE))
-				return (error);
 	}
 out:
 	/*
@@ -7617,25 +7659,27 @@ kstrputmsg(
 	if (mctl == NULL)
 		return (EINVAL);
 
-	if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
-		if (error = straccess(stp, JCWRITE)) {
-			freemsg(mctl);
-			return (error);
-		}
+	mutex_enter(&stp->sd_lock);
+
+	if ((error = i_straccess(stp, JCWRITE)) != 0) {
+		mutex_exit(&stp->sd_lock);
+		freemsg(mctl);
+		return (error);
 	}
 
 	if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
 		if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
-			mutex_enter(&stp->sd_lock);
 			error = strwriteable(stp, B_FALSE, B_TRUE);
-			mutex_exit(&stp->sd_lock);
 			if (error != 0) {
+				mutex_exit(&stp->sd_lock);
 				freemsg(mctl);
 				return (error);
 			}
 		}
 	}
 
+	mutex_exit(&stp->sd_lock);
+
 	/*
 	 * Check for legal flag value.
 	 */
@@ -7804,13 +7848,12 @@ kstrputmsg(
 		}
 		TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
 			"kstrputmsg wake:stp %p wakes", stp);
-		mutex_exit(&stp->sd_lock);
-		if (stp->sd_sidp != NULL && stp->sd_vnode->v_type != VFIFO) {
-			if (error = straccess(stp, JCWRITE)) {
-				freemsg(mctl);
-				return (error);
-			}
+		if ((error = i_straccess(stp, JCWRITE)) != 0) {
+			mutex_exit(&stp->sd_lock);
+			freemsg(mctl);
+			return (error);
 		}
+		mutex_exit(&stp->sd_lock);
 	}
 out:
 	freemsg(mctl);
diff --git a/usr/src/uts/common/os/strsubr.c b/usr/src/uts/common/os/strsubr.c
index 57a918a3f0..ae99e5198a 100644
--- a/usr/src/uts/common/os/strsubr.c
+++ b/usr/src/uts/common/os/strsubr.c
@@ -3107,13 +3107,18 @@ straccess(struct stdata *stp, enum jcaccess mode)
 	proc_t *p = ttoproc(t);
 	sess_t *sp;
 
+	ASSERT(mutex_owned(&stp->sd_lock));
+
 	if (stp->sd_sidp == NULL || stp->sd_vnode->v_type == VFIFO)
 		return (0);
 
-	mutex_enter(&p->p_lock);
-	sp = p->p_sessp;
+	mutex_enter(&p->p_lock);		/* protects p_pgidp */
 
 	for (;;) {
+		mutex_enter(&p->p_splock);	/* protects p->p_sessp */
+		sp = p->p_sessp;
+		mutex_enter(&sp->s_lock);	/* protects sp->* */
+
 		/*
 		 * If this is not the calling process's controlling terminal
 		 * or if the calling process is already in the foreground
@@ -3121,6 +3126,8 @@ straccess(struct stdata *stp, enum jcaccess mode)
 		 */
 		if (sp->s_dev != stp->sd_vnode->v_rdev ||
 		    p->p_pgidp == stp->sd_pgidp) {
+			mutex_exit(&sp->s_lock);
+			mutex_exit(&p->p_splock);
 			mutex_exit(&p->p_lock);
 			return (0);
 		}
@@ -3131,10 +3138,15 @@ straccess(struct stdata *stp, enum jcaccess mode)
 		if (sp->s_vp == NULL) {
 			if (!cantsend(p, t, SIGHUP))
 				sigtoproc(p, t, SIGHUP);
+			mutex_exit(&sp->s_lock);
+			mutex_exit(&p->p_splock);
 			mutex_exit(&p->p_lock);
 			return (EIO);
 		}
 
+		mutex_exit(&sp->s_lock);
+		mutex_exit(&p->p_splock);
+
 		if (mode == JCGETP) {
 			mutex_exit(&p->p_lock);
 			return (0);
@@ -3146,7 +3158,9 @@ straccess(struct stdata *stp, enum jcaccess mode)
 				return (EIO);
 			}
 			mutex_exit(&p->p_lock);
+			mutex_exit(&stp->sd_lock);
 			pgsignal(p->p_pgidp, SIGTTIN);
+			mutex_enter(&stp->sd_lock);
 			mutex_enter(&p->p_lock);
 		} else {  /* mode == JCWRITE or JCSETP */
 			if ((mode == JCWRITE && !(stp->sd_flag & STRTOSTOP)) ||
@@ -3159,7 +3173,9 @@ straccess(struct stdata *stp, enum jcaccess mode)
 				return (EIO);
 			}
 			mutex_exit(&p->p_lock);
+			mutex_exit(&stp->sd_lock);
 			pgsignal(p->p_pgidp, SIGTTOU);
+			mutex_enter(&stp->sd_lock);
 			mutex_enter(&p->p_lock);
 		}
 
@@ -3174,10 +3190,15 @@ straccess(struct stdata *stp, enum jcaccess mode)
 		 * We can't get here if the signal is ignored or
 		 * if the current thread is blocking the signal.
 		 */
+		mutex_exit(&stp->sd_lock);
 		if (!cv_wait_sig_swap(&lbolt_cv, &p->p_lock)) {
 			mutex_exit(&p->p_lock);
+			mutex_enter(&stp->sd_lock);
 			return (EINTR);
 		}
+		mutex_exit(&p->p_lock);
+		mutex_enter(&stp->sd_lock);
+		mutex_enter(&p->p_lock);
 	}
 }
 
@@ -4001,59 +4022,12 @@ strsignal(stdata_t *stp, int sig, int32_t band)
 void
 strhup(stdata_t *stp)
 {
+	ASSERT(mutex_owned(&stp->sd_lock));
 	pollwakeup(&stp->sd_pollist, POLLHUP);
-	mutex_enter(&stp->sd_lock);
 	if (stp->sd_sigflags & S_HANGUP)
 		strsendsig(stp->sd_siglist, S_HANGUP, 0, 0);
-	mutex_exit(&stp->sd_lock);
-}
-
-void
-stralloctty(stdata_t *stp)
-{
-	proc_t *p = curproc;
-	sess_t *sp = p->p_sessp;
-
-	mutex_enter(&stp->sd_lock);
-	/*
-	 * No need to hold the session lock or do a TTY_HOLD() because
-	 * this is the only thread that can be the session leader and not
-	 * have a controlling tty.
-	 */
-	if ((stp->sd_flag &
-	    (STRHUP|STRDERR|STWRERR|STPLEX|STRISTTY)) == STRISTTY &&
-	    stp->sd_sidp == NULL &&		/* not allocated as ctty */
-	    sp->s_sidp == p->p_pidp &&		/* session leader */
-	    sp->s_flag != SESS_CLOSE &&		/* session is not closing */
-	    sp->s_vp == NULL) {			/* without ctty */
-		ASSERT(stp->sd_pgidp == NULL);
-		alloctty(p, makectty(stp->sd_vnode));
-
-		mutex_enter(&pidlock);
-		stp->sd_sidp = sp->s_sidp;
-		stp->sd_pgidp = sp->s_sidp;
-		PID_HOLD(stp->sd_pgidp);
-		PID_HOLD(stp->sd_sidp);
-		mutex_exit(&pidlock);
-	}
-	mutex_exit(&stp->sd_lock);
 }
 
-void
-strfreectty(stdata_t *stp)
-{
-	mutex_enter(&stp->sd_lock);
-	pgsignal(stp->sd_pgidp, SIGHUP);
-	mutex_enter(&pidlock);
-	PID_RELE(stp->sd_pgidp);
-	PID_RELE(stp->sd_sidp);
-	stp->sd_pgidp = NULL;
-	stp->sd_sidp = NULL;
-	mutex_exit(&pidlock);
-	mutex_exit(&stp->sd_lock);
-	if (!(stp->sd_flag & STRHUP))
-		strhup(stp);
-}
 /*
  * Backenable the first queue upstream from `q' with a service procedure.
  */
diff --git a/usr/src/uts/common/os/sysent.c b/usr/src/uts/common/os/sysent.c
index 80761e102c..8211e23d01 100644
--- a/usr/src/uts/common/os/sysent.c
+++ b/usr/src/uts/common/os/sysent.c
@@ -51,6 +51,7 @@
 int	access();
 int	alarm();
 int	auditsys();
+int64_t	brandsys();
 int	brk();
 int	chdir();
 int	chmod();
@@ -131,6 +132,8 @@ int	unlink();
 int	utime();
 int64_t	utssys32();
 int64_t	utssys64();
+int	uucopy();
+ssize_t	uucopystr();
 int64_t	wait();
 ssize_t	write();
 ssize_t	readv();
@@ -473,7 +476,7 @@ struct sysent sysent[NSYSCALL] =
 			SYSENT_NOSYS(),
 			SYSENT_CI("fstatfs",	fstatfs32,	4)),
 	/* 39 */ SYSENT_CI("setpgrp",		setpgrp,	3),
-	/* 40 */ SYSENT_LOADABLE(),			/* (was cxenix) */
+	/* 40 */ SYSENT_CI("uucopystr",		uucopystr,	3),
 	/* 41 */ SYSENT_CI("dup",		dup,		1),
 	/* 42 */ SYSENT_LOADABLE(),			/* (was pipe ) */
 	/* 43 */ SYSENT_CL("times",		times,		1),
@@ -658,7 +661,7 @@ struct sysent sysent[NSYSCALL] =
 			SYSENT_NOSYS(),
 			SYSENT_C("llseek",	llseek32,	4)),
 	/* 176 */ SYSENT_LOADABLE(),		/* inst_sync */
-	/* 177 */ SYSENT_LOADABLE(),		/* (was srmlimitsys) */
+	/* 177 */ SYSENT_CI("brandsys",		brandsys,	6),
 	/* 178 */ SYSENT_LOADABLE(),		/* kaio */
 	/* 179 */ SYSENT_LOADABLE(),		/* cpc */
 	/* 180 */ SYSENT_CI("lgrpsys",		lgrpsys,	3),
@@ -770,7 +773,7 @@ struct sysent sysent[NSYSCALL] =
 	/* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock,	1),
 	/* 252 */ SYSENT_CI("lwp_mutex_init",	lwp_mutex_init,		2),
 	/* 253 */ SYSENT_CI("cladm",		cladm,		3),
-	/* 254 */ SYSENT_LOADABLE(),		/* (was lwp_sigtimedwait) */
+	/* 254 */ SYSENT_CI("uucopy",		uucopy,		3),
 	/* 255 */ SYSENT_CI("umount2",		umount2,	2)
 /* ONC_PLUS EXTRACT START */
 };
@@ -876,7 +879,7 @@ struct sysent sysent32[NSYSCALL] =
 	/* 37 */ SYSENT_CI("kill",		kill,		2),
 	/* 38 */ SYSENT_CI("fstatfs",		fstatfs32,	4),
 	/* 39 */ SYSENT_CI("setpgrp",		setpgrp,	3),
-	/* 40 */ SYSENT_LOADABLE32(),			/* (was cxenix) */
+	/* 40 */ SYSENT_CI("uucopystr",		uucopystr,	3),
 	/* 41 */ SYSENT_CI("dup",		dup,		1),
 	/* 42 */ SYSENT_LOADABLE32(),			/* (was pipe ) */
 	/* 43 */ SYSENT_CI("times",		times32,	1),
@@ -1036,7 +1039,7 @@ struct sysent sysent32[NSYSCALL] =
 	/* 174 */ SYSENT_CI("pwrite",		pwrite32,		4),
 	/* 175 */ SYSENT_C("llseek",		llseek32,	4),
 	/* 176 */ SYSENT_LOADABLE32(),		/* inst_sync */
-	/* 177 */ SYSENT_LOADABLE32(),		/* srmlimitsys */
+	/* 177 */ SYSENT_CI("brandsys",		brandsys,	6),
 	/* 178 */ SYSENT_LOADABLE32(),		/* kaio */
 	/* 179 */ SYSENT_LOADABLE32(),		/* cpc */
 	/* 180 */ SYSENT_CI("lgrpsys",		lgrpsys,	3),
@@ -1116,7 +1119,7 @@ struct sysent sysent32[NSYSCALL] =
 	/* 251 */ SYSENT_CI("lwp_mutex_trylock", lwp_mutex_trylock,	1),
 	/* 252 */ SYSENT_CI("lwp_mutex_init",	lwp_mutex_init,		2),
 	/* 253 */ SYSENT_CI("cladm",		cladm,		3),
-	/* 254 */ SYSENT_LOADABLE32(),		/* (was lwp_sigtimedwait) */
+	/* 254 */ SYSENT_CI("uucopy",		uucopy,		3),
 	/* 255 */ SYSENT_CI("umount2",		umount2,	2)
 /* ONC_PLUS EXTRACT START */
 };
diff --git a/usr/src/uts/common/os/zone.c b/usr/src/uts/common/os/zone.c
index 6a5c9243b3..9fd6b423bd 100644
--- a/usr/src/uts/common/os/zone.c
+++ b/usr/src/uts/common/os/zone.c
@@ -228,6 +228,7 @@
 #include <sys/nvpair.h>
 #include <sys/rctl.h>
 #include <sys/fss.h>
+#include <sys/brand.h>
 #include <sys/zone.h>
 #include <sys/tsol/label.h>
 
@@ -330,7 +331,6 @@ static kmutex_t mount_lock;
 
 const char * const zone_default_initname = "/sbin/init";
 static char * const zone_prefix = "/zone/";
-
 static int zone_shutdown(zoneid_t zoneid);
 
 /*
@@ -1223,6 +1223,8 @@ zone_init(void)
 	zone0.zone_nlwps = p0.p_lwpcnt;
 	zone0.zone_ntasks = 1;
 	mutex_exit(&p0.p_lock);
+	zone0.zone_restart_init = B_TRUE;
+	zone0.zone_brand = &native_brand;
 	rctl_prealloc_destroy(gp);
 	/*
 	 * pool_default hasn't been initialized yet, so we let pool_init() take
@@ -2330,33 +2332,40 @@ void
 zone_start_init(void)
 {
 	proc_t *p = ttoproc(curthread);
+	zone_t *z = p->p_zone;
 
 	ASSERT(!INGLOBALZONE(curproc));
 
 	/*
+	 * For all purposes (ZONE_ATTR_INITPID and restart_init),
+	 * storing just the pid of init is sufficient.
+	 */
+	z->zone_proc_initpid = p->p_pid;
+
+	/*
 	 * We maintain zone_boot_err so that we can return the cause of the
 	 * failure back to the caller of the zone_boot syscall.
 	 */
 	p->p_zone->zone_boot_err = start_init_common();
 
 	mutex_enter(&zone_status_lock);
-	if (p->p_zone->zone_boot_err != 0) {
+	if (z->zone_boot_err != 0) {
 		/*
 		 * Make sure we are still in the booting state-- we could have
 		 * raced and already be shutting down, or even further along.
 		 */
-		if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING)
-			zone_status_set(p->p_zone, ZONE_IS_SHUTTING_DOWN);
+		if (zone_status_get(z) == ZONE_IS_BOOTING)
+			zone_status_set(z, ZONE_IS_SHUTTING_DOWN);
 		mutex_exit(&zone_status_lock);
 		/* It's gone bad, dispose of the process */
-		if (proc_exit(CLD_EXITED, p->p_zone->zone_boot_err) != 0) {
+		if (proc_exit(CLD_EXITED, z->zone_boot_err) != 0) {
 			mutex_enter(&p->p_lock);
 			ASSERT(p->p_flag & SEXITLWPS);
 			lwp_exit();
 		}
 	} else {
-		if (zone_status_get(p->p_zone) == ZONE_IS_BOOTING)
-			zone_status_set(p->p_zone, ZONE_IS_RUNNING);
+		if (zone_status_get(z) == ZONE_IS_BOOTING)
+			zone_status_set(z, ZONE_IS_RUNNING);
 		mutex_exit(&zone_status_lock);
 		/* cause the process to return to userland. */
 		lwp_rtt();
@@ -2939,6 +2948,9 @@ zone_create(const char *zone_name, const char *zone_root,
 	zone->zone_psetid = ZONE_PS_INVAL;
 	zone->zone_ncpus = 0;
 	zone->zone_ncpus_online = 0;
+	zone->zone_restart_init = B_TRUE;
+	zone->zone_brand = &native_brand;
+	zone->zone_initname = NULL;
 	mutex_init(&zone->zone_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&zone->zone_nlwps_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&zone->zone_cv, NULL, CV_DEFAULT, NULL);
@@ -3464,6 +3476,9 @@ zone_shutdown(zoneid_t zoneid)
 		zone_rele(zone);
 		return (set_errno(EINTR));
 	}
+
+	brand_unregister_zone(zone->zone_brand);
+
 	zone_rele(zone);
 	return (0);
 }
@@ -3771,6 +3786,18 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 		    copyout(&initpid, buf, bufsize) != 0)
 			error = EFAULT;
 		break;
+	case ZONE_ATTR_BRAND:
+		size = strlen(zone->zone_brand->b_name) + 1;
+
+		if (bufsize > size)
+			bufsize = size;
+		if (buf != NULL) {
+			err = copyoutstr(zone->zone_brand->b_name, buf,
+			    bufsize, NULL);
+			if (err != 0 && err != ENAMETOOLONG)
+				error = EFAULT;
+		}
+		break;
 	case ZONE_ATTR_INITNAME:
 		size = strlen(zone->zone_initname) + 1;
 		if (bufsize > size)
@@ -3797,7 +3824,12 @@ zone_getattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 		}
 		break;
 	default:
-		error = EINVAL;
+		if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone)) {
+			size = bufsize;
+			error = ZBROP(zone)->b_getattr(zone, attr, buf, &size);
+		} else {
+			error = EINVAL;
+		}
 	}
 	zone_rele(zone);
 
@@ -3815,6 +3847,7 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 {
 	zone_t *zone;
 	zone_status_t zone_status;
+	struct brand_attr *attrp;
 	int err;
 
 	if (secpolicy_zone_config(CRED()) != 0)
@@ -3847,8 +3880,33 @@ zone_setattr(zoneid_t zoneid, int attr, void *buf, size_t bufsize)
 	case ZONE_ATTR_BOOTARGS:
 		err = zone_set_bootargs(zone, (const char *)buf);
 		break;
+	case ZONE_ATTR_BRAND:
+		ASSERT(!ZONE_IS_BRANDED(zone));
+		err = 0;
+		attrp = kmem_alloc(sizeof (struct brand_attr), KM_SLEEP);
+		if ((buf == NULL) ||
+		    (copyin(buf, attrp, sizeof (struct brand_attr)) != 0)) {
+			kmem_free(attrp, sizeof (struct brand_attr));
+			err = EFAULT;
+			break;
+		}
+
+		if (is_system_labeled() && strncmp(attrp->ba_brandname,
+		    NATIVE_BRAND_NAME, MAXNAMELEN) != 0) {
+			err = EPERM;
+			break;
+		}
+
+		zone->zone_brand = brand_register_zone(attrp);
+		kmem_free(attrp, sizeof (struct brand_attr));
+		if (zone->zone_brand == NULL)
+			err = EINVAL;
+		break;
 	default:
-		err = EINVAL;
+		if ((attr >= ZONE_ATTR_BRAND_ATTRS) && ZONE_IS_BRANDED(zone))
+			err = ZBROP(zone)->b_setattr(zone, attr, buf, bufsize);
+		else
+			err = EINVAL;
 	}
 
 done:
@@ -4145,10 +4203,10 @@ zone_enter(zoneid_t zoneid)
 	 */
 	mutex_enter(&pidlock);
 	sp = zone->zone_zsched->p_sessp;
-	SESS_HOLD(sp);
+	sess_hold(zone->zone_zsched);
 	mutex_enter(&pp->p_lock);
 	pgexit(pp);
-	SESS_RELE(pp->p_sessp);
+	sess_rele(pp->p_sessp, B_TRUE);
 	pp->p_sessp = sp;
 	pgjoin(pp, zone->zone_zsched->p_pidp);
 	mutex_exit(&pp->p_lock);
diff --git a/usr/src/uts/common/rpc/clnt_gen.c b/usr/src/uts/common/rpc/clnt_gen.c
index 0093210bd5..4c557b563f 100644
--- a/usr/src/uts/common/rpc/clnt_gen.c
+++ b/usr/src/uts/common/rpc/clnt_gen.c
@@ -346,6 +346,11 @@ bindresvport_again:
 	}
 
 	if (!error && bound_addr) {
+		if (bound_addr->maxlen < ret->addr.len) {
+			kmem_free(bound_addr->buf, bound_addr->maxlen);
+			bound_addr->buf = kmem_zalloc(ret->addr.len, KM_SLEEP);
+			bound_addr->maxlen = ret->addr.len;
+		}
 		bcopy(ret->addr.buf, bound_addr->buf, ret->addr.len);
 		bound_addr->len = ret->addr.len;
 	}
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index f6bcef9c5c..2754405b01 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -102,6 +102,7 @@ CHKHDRS=			\
 	bofi_impl.h		\
 	bpp_io.h		\
 	bootstat.h		\
+	brand.h			\
 	buf.h			\
 	bufmod.h		\
 	bustypes.h		\
diff --git a/usr/src/uts/common/sys/audioio.h b/usr/src/uts/common/sys/audioio.h
index 5b8152cfc5..2814eb7040 100644
--- a/usr/src/uts/common/sys/audioio.h
+++ b/usr/src/uts/common/sys/audioio.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,8 +19,8 @@
  * CDDL HEADER END
  */
 /*
- * Copyright (c) 1995-2001 by Sun Microsystems, Inc.
- * All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
  */
 
 #ifndef	_SYS_AUDIOIO_H
@@ -209,10 +208,10 @@ typedef struct audio_info audio_info_t;
  * a signed int.
  */
 #define	AUDIO_INITINFO(i)	{					\
-	uint_t	*__x__;						\
-	for (__x__ = (uint_t *)(i);				\
+	uint_t	*__x__;							\
+	for (__x__ = (uint_t *)(i);					\
 	    (char *)__x__ < (((char *)(i)) + sizeof (audio_info_t));	\
-	    *__x__++ = ~0);						\
+	    *__x__++ = (uint_t)~0);					\
 }
 
 
diff --git a/usr/src/uts/common/sys/auxv.h b/usr/src/uts/common/sys/auxv.h
index 025d7a18e9..b9cf07f269 100644
--- a/usr/src/uts/common/sys/auxv.h
+++ b/usr/src/uts/common/sys/auxv.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
 
 
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -171,6 +170,15 @@ extern uint_t getisax(uint32_t *, uint_t);
 #define	AT_SUN_AUXFLAGS	2017	/* AF_SUN_ flags passed from the kernel */
 
 /*
+ * Used to indicate to the runtime linker the name of the emulation binary,
+ * if one is being used. For brands, this is the name of the brand library.
+ */
+#define	AT_SUN_EMULATOR		2018
+
+#define	AT_SUN_BRANDNAME	2019
+#define	AT_SUN_BRAND_PHDR	2020	/* Brand executable's phdr */
+
+/*
  * The kernel is in a better position to determine whether a process needs to
  * ignore dangerous LD environment variables.  If set, this flags tells
  * ld.so.1 to run "secure" and ignore the the environment.
@@ -183,7 +191,6 @@ extern uint_t getisax(uint32_t *, uint_t);
  */
 #define	AF_SUN_HWCAPVERIFY	0x00000002
 
-
 #ifdef	__cplusplus
 }
 #endif
diff --git a/usr/src/uts/common/sys/bitmap.h b/usr/src/uts/common/sys/bitmap.h
index 8476ba9563..d0dd12b683 100644
--- a/usr/src/uts/common/sys/bitmap.h
+++ b/usr/src/uts/common/sys/bitmap.h
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -124,6 +124,14 @@ extern "C" {
 #endif /* _LP64 */
 
 
+/*
+ * BIT_ONLYONESET is a private macro not designed for bitmaps of
+ * arbitrary size.  u must be an unsigned integer/long.  It returns
+ * true if one and only one bit is set in u.
+ */
+#define	BIT_ONLYONESET(u) \
+	((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0))
+
 #if defined(_KERNEL) && !defined(_ASM)
 #include <sys/atomic.h>
 
diff --git a/usr/src/uts/common/sys/brand.h b/usr/src/uts/common/sys/brand.h
new file mode 100644
index 0000000000..c4595e9641
--- /dev/null
+++ b/usr/src/uts/common/sys/brand.h
@@ -0,0 +1,134 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_BRAND_H
+#define	_SYS_BRAND_H
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <sys/proc.h>
+#include <sys/exec.h>
+
+/*
+ * All Brands supported by this kernel must use BRAND_VER_1.
+ */
+#define	BRAND_VER_1	1
+
+/*
+ * sub-commands to brandsys.
+ * 1 - 128 are for common commands
+ * 128+ are available for brand-specific commands.
+ */
+#define	B_REGISTER		1
+#define	B_TTYMODES		2
+#define	B_ELFDATA		3
+#define	B_EXEC_NATIVE		4
+#define	B_EXEC_BRAND		5
+
+/*
+ * Structure used by zoneadmd to communicate the name of a brand and the
+ * supporting brand module into the kernel.
+ */
+struct brand_attr {
+	char	ba_brandname[MAXNAMELEN];
+	char	ba_modname[MAXPATHLEN];
+};
+
+/* What we call the native brand. */
+#define	NATIVE_BRAND_NAME	"native"
+
+#ifdef	_KERNEL
+
+/* Root for branded zone's native binaries */
+#define	NATIVE_ROOT	"/native/"
+
+struct proc;
+struct uarg;
+struct brand_mach_ops;
+struct intpdata;
+struct execa;
+
+struct brand_ops {
+	int	(*b_brandsys)(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
+		uintptr_t, uintptr_t, uintptr_t);
+	void	(*b_setbrand)(struct proc *);
+	int	(*b_getattr)(zone_t *, int, void *, size_t *);
+	int	(*b_setattr)(zone_t *, int, void *, size_t);
+	void	(*b_copy_procdata)(struct proc *, struct proc *);
+	void	(*b_proc_exit)(struct proc *, klwp_t *);
+	void	(*b_exec)();
+	void	(*b_lwp_setrval)(klwp_t *, int, int);
+	int	(*b_initlwp)(klwp_t *);
+	void	(*b_forklwp)(klwp_t *, klwp_t *);
+	void	(*b_freelwp)(klwp_t *);
+	void	(*b_lwpexit)(klwp_t *);
+	int	(*b_elfexec)(struct vnode *vp, struct execa *uap,
+	    struct uarg *args, struct intpdata *idata, int level,
+	    long *execsz, int setid, caddr_t exec_file,
+	    struct cred *cred, int brand_action);
+};
+
+/*
+ * The b_version field must always be the first entry in this struct.
+ */
+typedef struct brand {
+	int			b_version;
+	char    		*b_name;
+	struct brand_ops	*b_ops;
+	struct brand_mach_ops	*b_machops;
+} brand_t;
+
+extern brand_t native_brand;
+
+/*
+ * Convenience macros
+ */
+#define	lwptolwpbrand(l)	((l)->lwp_brand)
+#define	ttolwpbrand(t)		(lwptolwpbrand(ttolwp(t)))
+#define	PROC_IS_BRANDED(p)	((p)->p_brand != &native_brand)
+#define	ZONE_IS_BRANDED(z)	((z)->zone_brand != &native_brand)
+#define	BROP(p)			((p)->p_brand->b_ops)
+#define	ZBROP(z)		((z)->zone_brand->b_ops)
+#define	BRMOP(p)		((p)->p_brand->b_machops)
+
+extern void	brand_init();
+extern int	brand_register(brand_t *);
+extern int	brand_unregister(brand_t *);
+extern brand_t	*brand_register_zone(struct brand_attr *);
+extern brand_t	*brand_find_name(char *);
+extern void	brand_unregister_zone(brand_t *);
+extern int	brand_zone_count(brand_t *);
+extern void	brand_setbrand(proc_t *);
+#endif	/* _KERNEL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _SYS_BRAND_H */
diff --git a/usr/src/uts/common/sys/class.h b/usr/src/uts/common/sys/class.h
index fbfbcc6080..9988ca3190 100644
--- a/usr/src/uts/common/sys/class.h
+++ b/usr/src/uts/common/sys/class.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -38,6 +37,7 @@
 #include <sys/thread.h>
 #include <sys/priocntl.h>
 #include <sys/mutex.h>
+#include <sys/uio.h>
 
 #ifdef	__cplusplus
 extern "C" {
@@ -128,15 +128,15 @@ extern pri_t	minclsyspri;
 extern id_t	syscid;		/* system scheduling class ID */
 extern id_t	defaultcid;	/* "default" class id; see dispadmin(1M) */
 
-extern int		alloc_cid(char *, id_t *);
-extern int		scheduler_load(char *, sclass_t *);
-extern int		getcid(char *, id_t *);
-extern int		getcidbyname(char *, id_t *);
-extern int		parmsin(pcparms_t *, pc_vaparms_t *);
-extern int		parmsout(pcparms_t *, pc_vaparms_t *);
-extern int		parmsset(pcparms_t *, kthread_id_t);
-extern void		parmsget(kthread_id_t, pcparms_t *);
-extern int		vaparmsout(char *, pcparms_t *, pc_vaparms_t *);
+extern int	alloc_cid(char *, id_t *);
+extern int	scheduler_load(char *, sclass_t *);
+extern int	getcid(char *, id_t *);
+extern int	getcidbyname(char *, id_t *);
+extern int	parmsin(pcparms_t *, pc_vaparms_t *);
+extern int	parmsout(pcparms_t *, pc_vaparms_t *);
+extern int	parmsset(pcparms_t *, kthread_id_t);
+extern void	parmsget(kthread_id_t, pcparms_t *);
+extern int	vaparmsout(char *, pcparms_t *, pc_vaparms_t *, uio_seg_t);
 
 #endif
 
diff --git a/usr/src/uts/common/sys/exec.h b/usr/src/uts/common/sys/exec.h
index e9a34eacfe..a5eaf18edd 100644
--- a/usr/src/uts/common/sys/exec.h
+++ b/usr/src/uts/common/sys/exec.h
@@ -105,9 +105,19 @@ typedef struct uarg {
 	uint_t	brkpageszc;
 	uintptr_t entry;
 	uintptr_t thrptr;
+	char	*emulator;
+	char	*brandname;
+	auxv32_t *brand_auxp;	/* starting user addr of brand auxvs on stack */
 } uarg_t;
 
 /*
+ * Possible brand actions for exec.
+ */
+#define	EBA_NONE	0
+#define	EBA_NATIVE	1
+#define	EBA_BRAND	2
+
+/*
  * The following macro is a machine dependent encapsulation of
  * postfix processing to hide the stack direction from elf.c
  * thereby making the elf.c code machine independent.
@@ -166,7 +176,7 @@ struct execsw {
 	int	(*exec_func)(struct vnode *vp, struct execa *uap,
 		    struct uarg *args, struct intpdata *idata, int level,
 		    long *execsz, int setid, caddr_t exec_file,
-		    struct cred *cred);
+		    struct cred *cred, int brand_action);
 	int	(*exec_core)(struct vnode *vp, struct proc *p,
 		    struct cred *cred, rlim64_t rlimit, int sig,
 		    core_content_t content);
@@ -198,10 +208,10 @@ extern int exec_args(execa_t *, uarg_t *, intpdata_t *, void **);
 extern int exec(const char *fname, const char **argp);
 extern int exece(const char *fname, const char **argp, const char **envp);
 extern int exec_common(const char *fname, const char **argp,
-    const char **envp);
+    const char **envp, int brand_action);
 extern int gexec(vnode_t **vp, struct execa *uap, struct uarg *args,
     struct intpdata *idata, int level, long *execsz, caddr_t exec_file,
-    struct cred *cred);
+    struct cred *cred, int brand_action);
 extern struct execsw *allocate_execsw(char *name, char *magic,
     size_t magic_size);
 extern struct execsw *findexecsw(char *magic);
diff --git a/usr/src/uts/common/sys/klwp.h b/usr/src/uts/common/sys/klwp.h
index ade26b4f82..7dea5b4941 100644
--- a/usr/src/uts/common/sys/klwp.h
+++ b/usr/src/uts/common/sys/klwp.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -211,6 +210,8 @@ typedef struct _klwp {
 	 */
 	struct ct_template *lwp_ct_active[CTT_MAXTYPE]; /* active templates */
 	struct contract	*lwp_ct_latest[CTT_MAXTYPE]; /* last created contract */
+
+	void	*lwp_brand;		/* per-lwp brand data */
 } klwp_t;
 
 /* lwp states */
diff --git a/usr/src/uts/common/sys/modctl.h b/usr/src/uts/common/sys/modctl.h
index 1093eddef6..5e9450dde5 100644
--- a/usr/src/uts/common/sys/modctl.h
+++ b/usr/src/uts/common/sys/modctl.h
@@ -61,6 +61,7 @@ struct mod_ops {
  * The defined set of mod_ops structures for each loadable module type
  * Defined in modctl.c
  */
+extern struct mod_ops mod_brandops;
 #if defined(__i386) || defined(__amd64)
 extern struct mod_ops mod_cpuops;
 #endif
@@ -175,6 +176,13 @@ struct modlpcbe {
 	struct __pcbe_ops	*pcbe_ops;
 };
 
+/* For Brand modules */
+struct modlbrand {
+	struct mod_ops		*brand_modops;
+	char			*brand_linkinfo;
+	struct brand		*brand_branddef;
+};
+
 /* for devname fs */
 struct modldev {
 	struct mod_ops		*dev_modops;
diff --git a/usr/src/uts/common/sys/proc.h b/usr/src/uts/common/sys/proc.h
index fadcbf4a6d..13a3605e66 100644
--- a/usr/src/uts/common/sys/proc.h
+++ b/usr/src/uts/common/sys/proc.h
@@ -115,6 +115,7 @@ typedef struct lwpdir {
 struct pool;
 struct task;
 struct zone;
+struct brand;
 struct corectl_path;
 struct corectl_content;
 
@@ -336,6 +337,11 @@ typedef struct	proc {
 	uintptr_t	p_portcnt;	/* event ports counter */
 	struct zone	*p_zone;	/* zone in which process lives */
 	struct vnode	*p_execdir;	/* directory that p_exec came from */
+	struct brand	*p_brand;	/* process's brand  */
+	void		*p_brand_data;	/* per-process brand state */
+
+	/* additional lock to protect p_sessp (but not its contents) */
+	kmutex_t p_splock;
 } proc_t;
 
 #define	PROC_T				/* headers relying on proc_t are OK */
@@ -408,6 +414,10 @@ struct plock {
 extern proc_t p0;		/* process 0 */
 extern struct plock p0lock;	/* p0's plock */
 extern struct pid pid0;		/* p0's pid */
+
+/* pid_allocate() flags */
+#define	PID_ALLOC_PROC	0x0001	/* assign a /proc slot as well */
+
 #endif /* _KERNEL */
 
 /* stat codes */
@@ -588,7 +598,8 @@ extern int sigcheck(proc_t *, kthread_t *);
 extern void sigdefault(proc_t *);
 
 extern void pid_setmin(void);
-extern pid_t pid_assign(proc_t *);
+extern pid_t pid_allocate(proc_t *, int);
+extern struct pid *pid_find(pid_t);
 extern int pid_rele(struct pid *);
 extern void pid_exit(proc_t *);
 extern void proc_entry_free(struct pid *);
@@ -724,6 +735,7 @@ extern	void	lwp_rtt(void);
 extern	void	lwp_rtt_initial(void);
 extern	int	lwp_setprivate(klwp_t *, int, uintptr_t);
 extern	void	lwp_stat_update(lwp_stat_id_t, long);
+extern	void	lwp_attach_brand_hdlrs(klwp_t *);
 
 /*
  * Signal queue function prototypes. Must be here due to header ordering
diff --git a/usr/src/uts/common/sys/ptms.h b/usr/src/uts/common/sys/ptms.h
index 9aa6493956..120503539b 100644
--- a/usr/src/uts/common/sys/ptms.h
+++ b/usr/src/uts/common/sys/ptms.h
@@ -35,6 +35,8 @@
 extern "C" {
 #endif
 
+#ifdef _KERNEL
+
 /*
  * Structures and definitions supporting the pseudo terminal
  * drivers. This structure is private and should not be used by any
@@ -63,8 +65,6 @@ struct pt_ttys {
 #define	PTSOPEN 	0x04	/* slave side is open */
 #define	PTSTTY		0x08	/* slave side is tty */
 
-#ifdef _KERNEL
-
 /*
  * Multi-threading primitives.
  * Values of pt_refcnt: -1 if a writer is accessing the struct
@@ -129,18 +129,29 @@ extern void ptms_logp(char *, uintptr_t);
 #define	DDBGP(a, b)
 #endif
 
+typedef struct __ptmptsopencb_arg *ptmptsopencb_arg_t;
+typedef struct ptmptsopencb {
+	boolean_t		(*ppocb_func)(ptmptsopencb_arg_t);
+	ptmptsopencb_arg_t	ppocb_arg;
+} ptmptsopencb_t;
+
 #endif /* _KERNEL */
 
+typedef struct pt_own {
+	uid_t	pto_ruid;
+	gid_t	pto_rgid;
+} pt_own_t;
+
 /*
  * ioctl commands
  *
- *   ISPTM: Determines whether the file descriptor is that of an open master
- *	    device. Return code of zero indicates that the file descriptor
- *	    represents master device.
+ *  ISPTM: Determines whether the file descriptor is that of an open master
+ *	   device. Return code of zero indicates that the file descriptor
+ *	   represents master device.
  *
- *  UNLKPT: Unlocks the master and slave devices.  It returns 0 on success. On
- *	    failure, the errno is set to EINVAL indicating that the master
- *	    device is not open.
+ * UNLKPT: Unlocks the master and slave devices.  It returns 0 on success. On
+ *	   failure, the errno is set to EINVAL indicating that the master
+ *	   device is not open.
  *
  *  ZONEPT: Sets the zoneid of the pair of master and slave devices.  It
  *	    returns 0 upon success.  Used to force a pty 'into' a zone upon
@@ -149,16 +160,24 @@ extern void ptms_logp(char *, uintptr_t);
  * PT_OWNER: Sets uid and gid for slave device.  It returns 0 on success.
  *
  */
-#define	ISPTM	(('P'<<8)|1)	/* query for master */
-#define	UNLKPT	(('P'<<8)|2)	/* unlock master/slave pair */
-#define	PTSSTTY	(('P'<<8)|3)	/* set tty flag */
-#define	ZONEPT	(('P'<<8)|4)	/* set zone of master/slave pair */
-#define	PT_OWNER (('P'<<8)|5)	/* set owner and group for slave device */
+#define	ISPTM		(('P'<<8)|1)	/* query for master */
+#define	UNLKPT		(('P'<<8)|2)	/* unlock master/slave pair */
+#define	PTSSTTY		(('P'<<8)|3)	/* set tty flag */
+#define	ZONEPT		(('P'<<8)|4)	/* set zone of master/slave pair */
+#define	PT_OWNER	(('P'<<8)|5)	/* set owner/group for slave device */
 
-typedef struct pt_own {
-	uid_t	pto_ruid;
-	gid_t	pto_rgid;
-} pt_own_t;
+#ifdef _KERNEL
+/*
+ * kernel ioctl commands
+ *
+ * PTMPTSOPENCB: Returns a callback function pointer and opaque argument.
+ *	      The return value of the callback function when it's invoked
+ *	      with the opaque argument passed to it will indicate if the
+ *	      pts slave device is currently open.
+ */
+#define	PTMPTSOPENCB	(('P'<<8)|6)	/* check if the slave is open */
+
+#endif /* _KERNEL */
 
 #ifdef	__cplusplus
 }
diff --git a/usr/src/uts/common/sys/session.h b/usr/src/uts/common/sys/session.h
index 639d6bf69d..8db8a8a5bb 100644
--- a/usr/src/uts/common/sys/session.h
+++ b/usr/src/uts/common/sys/session.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
  * CDDL HEADER END
  */
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -37,54 +36,96 @@
 extern "C" {
 #endif
 
+/*
+ * Session structure overview.
+ *
+ * Currently, the only structure in the kernel which has a pointer to a
+ * session structures is the proc_t via the p_sessp pointer.  To
+ * access a session proc_t->p_sessp pointer a caller must hold either
+ * pidlock or p_splock.  These locks only protect the p_sessp pointer
+ * itself and do not protect any of the contents of the session structure.
+ * To prevent the contents of a the session structure from changing the
+ * caller must grab s_lock.
+ *
+ * No callers should ever update the contents of the session structure
+ * directly.  Only the session management code should ever modify the
+ * contents of the session structure.  When the session code attempts
+ * to modify the contents of a session structure it must hold multiple
+ * locks.  The locking order for all the locks that may need to be
+ * acquired is:
+ * 	sd_lock -> pidlock -> p_splock -> s_lock
+ *
+ * If a caller requires access to a session structure for long
+ * periods of time or across operations that may block it should
+ * use the tty_hold() and sess_hold() interfaces.
+ *
+ * sess_hold() returns a pointer to a session structure associated
+ * with the proc_t that was passed in.  It also increments the reference
+ * count associated with that session structure to ensure that it
+ * can't be freed until after the caller is done with it and calls
+ * sess_rele().  This hold doesn't actually protect any of the
+ * contents of the session structure.
+ *
+ * tty_hold() returns a pointer to a session structure associated
+ * with the curproc.  It also "locks" the contents of the session
+ * structure.  This hold should be used when the caller will be
+ * doing operations on a controlling tty associated with the session.
+ * This operation doesn an implicit sess_hold() so that the session
+ * structure can't be free'd until after the caller is done with it
+ * and invokes tty_rele().
+ *
+ * NOTE: Neither of these functions (sess_hold() or tty_hold())
+ * prevent a process from changing its session.  Once these functions
+ * return a session pointer, that session pointer may no longer be
+ * associated with the current process.  If a caller wants to prevent
+ * a process from changing its session then it must hold pidlock or
+ * p_splock.
+ */
+
 typedef struct sess {
-	uint_t		s_ref; 		/* reference count */
-	dev_t		s_dev;		/* tty's device number */
-	struct vnode	*s_vp;		/* tty's vnode */
-	struct pid	*s_sidp;	/* session ID info */
-	struct cred	*s_cred;	/* allocation credentials */
-	kmutex_t	s_lock;		/* sync s_vp use with freectty */
-	kcondvar_t	s_wait_cv;	/* Condvar for sleeping */
-	int		s_cnt;		/* # of active users of this session */
-	int		s_flag;		/* session state flag see below */
-} sess_t;
+	struct pid *s_sidp;		/* session ID info, never changes */
 
-#define	SESS_CLOSE	1		/* session about to close */
-#define	s_sid s_sidp->pid_id
+	kmutex_t s_lock;		/* protects everything below */
+	uint_t s_ref; 			/* reference count */
+	boolean_t s_sighuped;		/* ctty had sighup sent to it */
 
-#if defined(_KERNEL)
+	boolean_t s_exit;		/* sesion leader is exiting */
+	kcondvar_t s_exit_cv;		/* Condvar for s_exit */
 
-extern sess_t session0;
+	int s_cnt;			/* active users of this ctty */
+	kcondvar_t s_cnt_cv;		/* Condvar for s_cnt */
 
-#define	SESS_HOLD(sp)	(++(sp)->s_ref)
-#define	SESS_RELE(sp)	sess_rele(sp)
+	/*
+	 * The following fields can only be updated while s_lock is held
+	 * and s_cnt is 0.  (ie, no one has a tty_hold() on this session.)
+	 */
+	dev_t s_dev;			/* tty's device number */
+	struct vnode *s_vp;		/* tty's vnode */
+	struct cred *s_cred;		/* allocation credentials */
+} sess_t;
 
-/*
- * Used to synchronize session vnode users with freectty()
- */
+#define	s_sid s_sidp->pid_id
 
-#define	TTY_HOLD(sp)	{ \
-	mutex_enter(&(sp)->s_lock); \
-	(++(sp)->s_cnt); \
-	mutex_exit(&(sp)->s_lock); \
-}
+#if defined(_KERNEL)
 
-#define	TTY_RELE(sp)	{ \
-	mutex_enter(&(sp)->s_lock); \
-	if ((--(sp)->s_cnt) == 0) \
-		cv_signal(&(sp)->s_wait_cv); \
-	mutex_exit(&(sp)->s_lock); \
-}
+extern sess_t session0;
 
 /* forward referenced structure tags */
 struct vnode;
 struct proc;
+struct stdata;
+
+extern void sess_hold(proc_t *p);
+extern void sess_rele(sess_t *, boolean_t);
+extern sess_t *tty_hold(void);
+extern void tty_rele(sess_t *sp);
+
 
-extern void sess_rele(sess_t *);
 extern void sess_create(void);
-extern void freectty(sess_t *);
-extern void alloctty(struct proc *, struct vnode *);
+extern int strctty(struct stdata *);
+extern int freectty(boolean_t);
 extern dev_t cttydev(struct proc *);
+extern void ctty_clear_sighuped(void);
 
 #endif /* defined(_KERNEL) */
 
diff --git a/usr/src/uts/common/sys/socketvar.h b/usr/src/uts/common/sys/socketvar.h
index d00220f2a9..39112e6c97 100644
--- a/usr/src/uts/common/sys/socketvar.h
+++ b/usr/src/uts/common/sys/socketvar.h
@@ -544,11 +544,21 @@ struct sonodeops {
 	(((len) + _CMSG_HDR_ALIGNMENT - 1) & ~(_CMSG_HDR_ALIGNMENT - 1))
 
 /*
- * Used in parsing msg_control
+ * Macros that operate on struct cmsghdr.
+ * Used in parsing msg_control.
+ * The CMSG_VALID macro does not assume that the last option buffer is padded.
  */
 #define	CMSG_NEXT(cmsg)						\
 	(struct cmsghdr *)((uintptr_t)(cmsg) +			\
 	    ROUNDUP_cmsglen((cmsg)->cmsg_len))
+#define	CMSG_CONTENT(cmsg)	(&((cmsg)[1]))
+#define	CMSG_CONTENTLEN(cmsg)	((cmsg)->cmsg_len - sizeof (struct cmsghdr))
+#define	CMSG_VALID(cmsg, start, end)					\
+	(ISALIGNED_cmsghdr(cmsg) &&					\
+	((uintptr_t)(cmsg) >= (uintptr_t)(start)) &&			\
+	((uintptr_t)(cmsg) < (uintptr_t)(end)) &&			\
+	((ssize_t)(cmsg)->cmsg_len >= sizeof (struct cmsghdr)) &&	\
+	((uintptr_t)(cmsg) + (cmsg)->cmsg_len <= (uintptr_t)(end)))
 
 /*
  * Maximum size of any argument that is copied in (addresses, options,
diff --git a/usr/src/uts/common/sys/strsubr.h b/usr/src/uts/common/sys/strsubr.h
index 27403d72cc..4f424e96e1 100644
--- a/usr/src/uts/common/sys/strsubr.h
+++ b/usr/src/uts/common/sys/strsubr.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
 
 
 /*
- * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -1113,8 +1112,6 @@ extern mblk_t *strrput_misc(vnode_t *, mblk_t *,
 extern int getiocseqno(void);
 extern int strwaitbuf(size_t, int);
 extern int strwaitq(stdata_t *, int, ssize_t, int, clock_t, int *);
-extern void stralloctty(struct stdata *);
-extern void strfreectty(struct stdata *);
 extern struct stdata *shalloc(queue_t *);
 extern void shfree(struct stdata *s);
 extern queue_t *allocq(void);
diff --git a/usr/src/uts/common/sys/syscall.h b/usr/src/uts/common/sys/syscall.h
index 43dee30f0b..1a6412b70b 100644
--- a/usr/src/uts/common/sys/syscall.h
+++ b/usr/src/uts/common/sys/syscall.h
@@ -99,7 +99,7 @@ extern "C" {
 	 *	getpgid(pid)	  :: syscall(39,4,pid)
 	 *	setpgid(pid,pgid) :: syscall(39,5,pid,pgid)
 	 */
-#define	SYS_reserved_40	40	/* 40 not used, was xenix */
+#define	SYS_uucopystr	40
 #define	SYS_dup		41
 #define	SYS_pipe	42
 #define	SYS_times	43
@@ -355,7 +355,7 @@ extern "C" {
 #define	SYS_pwrite		174
 #define	SYS_llseek		175
 #define	SYS_inst_sync		176
-#define	SYS_reserved_177	177	/* 177 reserved */
+#define	SYS_brand		177
 #define	SYS_kaio		178
 	/*
 	 * subcodes:
@@ -464,6 +464,8 @@ extern "C" {
 	 *	zone_list(...) :: zone(ZONE_LIST, ...)
 	 *	zone_shutdown(...) :: zone(ZONE_SHUTDOWN, ...)
 	 *	zone_lookup(...) :: zone(ZONE_LOOKUP, ...)
+	 *	zone_setattr(...) :: zone(ZONE_SETATTR, ...)
+	 *	zone_getattr(...) :: zone(ZONE_GETATTR, ...)
 	 */
 #define	SYS_autofssys		228
 #define	SYS_getcwd		229
@@ -494,7 +496,7 @@ extern "C" {
 #define	SYS_lwp_mutex_trylock	251
 #define	SYS_lwp_mutex_init	252
 #define	SYS_cladm		253
-#define	SYS_reserved_254	254	/* 254 reserved */
+#define	SYS_uucopy		254
 #define	SYS_umount2		255
 
 
diff --git a/usr/src/uts/common/sys/systm.h b/usr/src/uts/common/sys/systm.h
index c96ea5b4ac..ac465ad49f 100644
--- a/usr/src/uts/common/sys/systm.h
+++ b/usr/src/uts/common/sys/systm.h
@@ -246,6 +246,7 @@ int copyoutstr_noerr(const char *, char *, size_t, size_t *);
 int copystr(const char *, char *, size_t, size_t *);
 void bcopy(const void *, void *, size_t);
 void ucopy(const void *, void *, size_t);
+void ucopystr(const char *, char *, size_t, size_t *);
 void pgcopy(const void *, void *, size_t);
 void ovbcopy(const void *, void *, size_t);
 void bzero(void *, size_t);
diff --git a/usr/src/uts/common/sys/termios.h b/usr/src/uts/common/sys/termios.h
index 8bd020e5c1..2d99f70bc2 100644
--- a/usr/src/uts/common/sys/termios.h
+++ b/usr/src/uts/common/sys/termios.h
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -24,7 +23,7 @@
 
 
 /*
- * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
@@ -382,6 +381,24 @@ extern pid_t tcgetsid();
 #define	TCSETSF		(_TIOC|16)
 
 /*
+ * linux terminal ioctls we need to be aware of
+ */
+#define	TIOCSETLD	(_TIOC|123)	/* set line discipline parms */
+#define	TIOCGETLD	(_TIOC|124)	/* get line discipline parms */
+
+/*
+ * The VMIN and VTIME and solaris overlap with VEOF and VEOL - This is
+ * perfectly legal except, linux expects them to be separate. So we keep
+ * them separately.
+ */
+struct lx_cc {
+	unsigned char veof;	/* veof value */
+	unsigned char veol;	/* veol value */
+	unsigned char vmin;	/* vmin value */
+	unsigned char vtime;	/* vtime value */
+};
+
+/*
  * NTP PPS ioctls
  */
 #define	TIOCGPPS	(_TIOC|125)
@@ -457,6 +474,7 @@ struct ppsclockev32 {
 #define	TIOCGLTC	(tIOC|116)	/* get local special chars */
 #define	TIOCOUTQ	(tIOC|115)	/* driver output queue size */
 #define	TIOCNOTTY	(tIOC|113)	/* void tty association */
+#define	TIOCSCTTY	(tIOC|132)	/* get a ctty */
 #define	TIOCSTOP	(tIOC|111)	/* stop output, like ^S */
 #define	TIOCSTART	(tIOC|110)	/* start output, like ^Q */
 #define	TIOCSILOOP	(tIOC|109)	/* private to Sun; do not use */
diff --git a/usr/src/uts/common/sys/zone.h b/usr/src/uts/common/sys/zone.h
index 56c23d00ad..636b8acc0f 100644
--- a/usr/src/uts/common/sys/zone.h
+++ b/usr/src/uts/common/sys/zone.h
@@ -87,6 +87,10 @@ extern "C" {
 #define	ZONE_ATTR_SLBL		8
 #define	ZONE_ATTR_INITNAME	9
 #define	ZONE_ATTR_BOOTARGS	10
+#define	ZONE_ATTR_BRAND		11
+
+/* Start of the brand-specific attribute namespace */
+#define	ZONE_ATTR_BRAND_ATTRS	32768
 
 #define	ZONE_EVENT_CHANNEL	"com.sun:zones:status"
 #define	ZONE_EVENT_STATUS_CLASS	"status"
@@ -103,6 +107,49 @@ extern "C" {
 #define	ZONE_CB_TIMESTAMP	"when"
 #define	ZONE_CB_ZONEID		"zoneid"
 
+/*
+ * Exit values that may be returned by scripts or programs invoked by various
+ * zone commands.
+ *
+ * These are defined as:
+ *
+ *	ZONE_SUBPROC_OK
+ *	===============
+ *	The subprocess completed successfully.
+ *
+ *	ZONE_SUBPROC_USAGE
+ *	==================
+ *	The subprocess failed with a usage message, or a usage message should
+ *	be output in its behalf.
+ *
+ *	ZONE_SUBPROC_NOTCOMPLETE
+ *	========================
+ *	The subprocess did not complete, but the actions performed by the
+ *	subprocess require no recovery actions by the user.
+ *
+ *	For example, if the subprocess were called by "zoneadm install," the
+ *	installation of the zone did not succeed but the user need not perform
+ *	a "zoneadm uninstall" before attempting another install.
+ *
+ *	ZONE_SUBPROC_FATAL
+ *	==================
+ *	The subprocess failed in a fatal manner, usually one that will require
+ *	some type of recovery action by the user.
+ *
+ *	For example, if the subprocess were called by "zoneadm install," the
+ *	installation of the zone did not succeed and the user will need to
+ *	perform a "zoneadm uninstall" before another install attempt is
+ *	possible.
+ *
+ *	The non-success exit values are large to avoid accidental collision
+ *	with values used internally by some commands (e.g. "Z_ERR" and
+ *	"Z_USAGE" as used by zoneadm.)
+ */
+#define	ZONE_SUBPROC_OK			0
+#define	ZONE_SUBPROC_USAGE		253
+#define	ZONE_SUBPROC_NOTCOMPLETE	254
+#define	ZONE_SUBPROC_FATAL		255
+
 #ifdef _SYSCALL32
 typedef struct {
 	caddr32_t zone_name;
@@ -159,8 +206,8 @@ typedef enum {
  * communicates with zoneadmd, but only uses Z_REBOOT and Z_HALT.
  */
 typedef enum zone_cmd {
-	Z_READY, Z_BOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING,
-	Z_MOUNT, Z_UNMOUNT
+	Z_READY, Z_BOOT, Z_FORCEBOOT, Z_REBOOT, Z_HALT, Z_NOTE_UNINSTALLING,
+	Z_MOUNT, Z_FORCEMOUNT, Z_UNMOUNT
 } zone_cmd_t;
 
 /*
@@ -223,6 +270,7 @@ typedef struct zone_cmd_rval {
 #define	ZF_IS_SCRATCH		0x4	/* scratch zone */
 
 struct pool;
+struct brand;
 
 /*
  * Structure to record list of ZFS datasets exported to a zone.
@@ -318,6 +366,8 @@ typedef struct zone {
 	int		zone_match;	/* require label match for packets */
 	tsol_mlp_list_t zone_mlps;	/* MLPs on zone-private addresses */
 
+	boolean_t	zone_restart_init;	/* Restart init if it dies? */
+	struct brand	*zone_brand;		/* zone's brand */
 } zone_t;
 
 /*
@@ -330,8 +380,6 @@ extern zone_t *global_zone;
 extern uint_t maxzones;
 extern rctl_hndl_t rc_zone_nlwps;
 
-extern const char * const zone_initname;
-
 extern long zone(int, void *, void *, void *, void *);
 extern void zone_zsd_init(void);
 extern void zone_init(void);
diff --git a/usr/src/uts/common/syscall/brandsys.c b/usr/src/uts/common/syscall/brandsys.c
new file mode 100644
index 0000000000..9b4bd38baa
--- /dev/null
+++ b/usr/src/uts/common/syscall/brandsys.c
@@ -0,0 +1,56 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/brand.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/zone.h>
+
+/*
+ * brand(2) system call.
+ */
+int64_t
+brandsys(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
+    uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
+{
+	struct proc *p = curthread->t_procp;
+	int64_t rval = 0;
+	int err;
+
+	/*
+	 * The brandsys system call can only be executed from inside a
+	 * branded zone.
+	 */
+	if (INGLOBALZONE(p) || !ZONE_IS_BRANDED(p->p_zone))
+		return (set_errno(ENOSYS));
+
+	if ((err = ZBROP(p->p_zone)->b_brandsys(cmd, &rval, arg1, arg2, arg3,
+	    arg4, arg5, arg6)) != 0)
+		return (set_errno(err));
+
+	return (rval);
+}
diff --git a/usr/src/uts/common/syscall/pgrpsys.c b/usr/src/uts/common/syscall/pgrpsys.c
index e8be876537..8f60747663 100644
--- a/usr/src/uts/common/syscall/pgrpsys.c
+++ b/usr/src/uts/common/syscall/pgrpsys.c
@@ -2,9 +2,8 @@
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License").  You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
@@ -19,11 +18,16 @@
  *
  * CDDL HEADER END
  */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 /*	  All Rights Reserved	*/
 
 
-#ident	"%Z%%M%	%I%	%E% SMI"	/* from SVr4.0 1.78 */
+#pragma ident	"%Z%%M%	%I%	%E% SMI"	/* from SVr4.0 1.78 */
 
 #include <sys/param.h>
 #include <sys/types.h>
@@ -39,8 +43,9 @@
 int
 setpgrp(int flag, int pid, int pgid)
 {
-	register proc_t *p =  ttoproc(curthread);
-	register int	retval = 0;
+	proc_t	*p = curproc;
+	int	retval = 0;
+	int	sid;
 
 	switch (flag) {
 
@@ -51,7 +56,10 @@ setpgrp(int flag, int pid, int pgid)
 			sess_create();
 		} else
 			mutex_exit(&pidlock);
-		return (p->p_sessp->s_sid);
+		mutex_enter(&p->p_splock);
+		sid = p->p_sessp->s_sid;
+		mutex_exit(&p->p_splock);
+		return (sid);
 
 	case 3: /* setsid() */
 		mutex_enter(&pidlock);
@@ -61,7 +69,10 @@ setpgrp(int flag, int pid, int pgid)
 		}
 		mutex_exit(&pidlock);
 		sess_create();
-		return (p->p_sessp->s_sid);
+		mutex_enter(&p->p_splock);
+		sid = p->p_sessp->s_sid;
+		mutex_exit(&p->p_splock);
+		return (sid);
 
 	case 5: /* setpgid() */
 	{
diff --git a/usr/src/uts/common/syscall/uucopy.c b/usr/src/uts/common/syscall/uucopy.c
new file mode 100644
index 0000000000..c301599e2f
--- /dev/null
+++ b/usr/src/uts/common/syscall/uucopy.c
@@ -0,0 +1,59 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+#include <sys/systm.h>
+
+int
+uucopy(const void *from, void *to, size_t size)
+{
+	label_t ljb;
+
+	if (on_fault(&ljb))
+		return (set_errno(EFAULT));
+
+	ucopy(from, to, size);
+
+	no_fault();
+
+	return (0);
+}
+
+ssize_t
+uucopystr(const char *from, char *to, size_t size)
+{
+	label_t ljb;
+	size_t len;
+
+	if (on_fault(&ljb))
+		return (set_errno(EFAULT));
+
+	ucopystr(from, to, size, &len);
+
+	no_fault();
+
+	return ((ssize_t)len);
+}
author	nn35248 <none@none>	2006-09-11 22:51:59 -0700
committer	nn35248 <none@none>	2006-09-11 22:51:59 -0700
commit	9acbbeaf2a1ffe5c14b244867d427714fab43c5c (patch)
tree	d1ecd54896325c19a463220e9cbc50864874fc82 /usr/src/uts/common
parent	da51466dc253d7c98dda4956059042bd0c476328 (diff)
download	illumos-gate-9acbbeaf2a1ffe5c14b244867d427714fab43c5c.tar.gz