OS-4665 LX brand want devfs which allows symlinks to devices in root of /dev

Reviewed by: Patrick Mooney <patrick.mooney@joyent.com> Reviewed by: Alex Wilson <alex.wilson@joyent.com>
author: Jerry Jelinek <jerry.jelinek@joyent.com> 2015-12-28 18:00:01 +0000
committer: Jerry Jelinek <jerry.jelinek@joyent.com> 2015-12-28 18:00:01 +0000
commit: 673d120c71e85f5f52136d47d1e44282d0e41632 (patch)
tree: dbca459bc3367d492053c4c00975c1343742e9ce
parent: 180116059a3bc57660669b92f988a75f95d54257 (diff)
download: illumos-joyent-673d120c71e85f5f52136d47d1e44282d0e41632.tar.gz
10 files changed, 3561 insertions, 2 deletions
diff --git a/manifest b/manifest
index 39c667e38b..1c86232dd3 100644
--- a/manifest
+++ b/manifest
@@ -4571,6 +4571,7 @@ d usr/kernel/fs/amd64 0755 root sys
 f usr/kernel/fs/amd64/fdfs 0755 root sys
 f usr/kernel/fs/amd64/lxautofs 0755 root sys
 f usr/kernel/fs/amd64/lx_cgroup 0755 root sys
+f usr/kernel/fs/amd64/lx_devfs 0755 root sys
 f usr/kernel/fs/amd64/lx_proc 0755 root sys
 f usr/kernel/fs/amd64/lx_sysfs 0755 root sys
 f usr/kernel/fs/amd64/pcfs 0755 root sys
diff --git a/usr/src/lib/brand/lx/zone/platform.xml b/usr/src/lib/brand/lx/zone/platform.xml
index cb3c9bb124..049ebbfd18 100644
--- a/usr/src/lib/brand/lx/zone/platform.xml
+++ b/usr/src/lib/brand/lx/zone/platform.xml
@@ -57,7 +57,7 @@
 	    directory="/native/etc/zones/%z.xml" opt="ro" type="lofs" />
 
 	<!-- Local filesystems to mount when booting the zone -->
-	<mount special="/native/dev" directory="/dev" type="lofs" />
+	<mount special="/native/dev" directory="/dev" type="lx_devfs" />
 	<mount special="proc" directory="/native/proc" type="proc" />
 	<mount special="swap" directory="/native/etc/svc/volatile"
 	    type="tmpfs" />
diff --git a/usr/src/uts/common/brand/lx/devfs/lxd.h b/usr/src/uts/common/brand/lx/devfs/lxd.h
new file mode 100644
index 0000000000..add9515891
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/devfs/lxd.h
@@ -0,0 +1,237 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#ifndef	_LXD_H
+#define	_LXD_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/*
+ * lxd.h: declarations, data structures and macros for lxd (lxd devfs).
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/policy.h>
+#include <sys/dirent.h>
+#include <sys/errno.h>
+#include <sys/kmem.h>
+#include <sys/pathname.h>
+#include <sys/systm.h>
+#include <sys/var.h>
+#include <sys/sysmacros.h>
+#include <sys/cred.h>
+#include <sys/priv.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/cmn_err.h>
+#include <sys/zone.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/atomic.h>
+#include <vm/anon.h>
+#include <sys/lx_types.h>
+
+#if defined(_KERNEL)
+
+#include <sys/lx_brand.h>
+
+/*
+ * It's unlikely that we need to create more than 50-60 subdirs/symlinks
+ * as front files so we size the file system hash for 2x that number.
+ * The back devfs typically has ~80 nodes so this is also a comfortable size
+ * for the back hash table.
+ */
+#define	LXD_HASH_SZ	128
+
+#define	LXD_BACK_HASH(v)	((((intptr_t)(v)) >> 10) & ((LXD_HASH_SZ) - 1))
+
+#define	LXD_NM_HASH(ldn, name, hash)				\
+	{							\
+		char Xc, *Xcp;					\
+		hash = (uint_t)(uintptr_t)(ldn) >> 8;		\
+		for (Xcp = (name); (Xc = *Xcp) != 0; Xcp++)	\
+			hash = (hash << 4) + hash + (uint_t)Xc;	\
+		hash &= (LXD_HASH_SZ - 1);			\
+	}
+
+
+enum lxd_node_type	{ LXDNT_NONE, LXDNT_BACK, LXDNT_FRONT };
+
+/*
+ * lxd per-mount data structure.
+ *
+ * All fields are protected by lxd_contents.
+ * File renames on a specific file system are protected lxdm_renamelck.
+ */
+typedef struct lxd_mnt {
+	struct vfs	*lxdm_vfsp;	/* filesystem's vfs struct */
+	struct lxd_node *lxdm_rootnode;	/* root lxd_node */
+	char 		*lxdm_mntpath;	/* name of lxd mount point */
+	dev_t		lxdm_dev;	/* unique dev # of mounted `device' */
+	kmutex_t	lxdm_contents;	/* per-mount lock */
+	kmutex_t	lxdm_renamelck;	/* rename lock for this mount */
+	uint_t		lxdm_gen;	/* node ID source for files */
+
+	/* protects buckets in both "dir ent" and "back" hash tables */
+	kmutex_t	lxdm_hash_mutex[LXD_HASH_SZ];
+
+	/* per-mount data for "back" vnodes in the fs */
+	uint_t		lxdm_back_refcnt; /* # outstanding "back" vnodes */
+	struct lxd_node *lxdm_back_htable[LXD_HASH_SZ];
+
+	/*
+	 * Per-mount directory data for "front" nodes in the fs.
+	 * Each front node has a directory entry but directory entries can live
+	 * on either front or back nodes.
+	 */
+	uint_t		lxdm_dent_refcnt; /* # outstanding dir ents */
+	struct lxd_dirent *lxdm_dent_htable[LXD_HASH_SZ];
+} lxd_mnt_t;
+
+/*
+ * lxd_node is the file system dependent node for lxd.
+ *
+ * The node is used to represent both front and back files. For front files
+ * the node can represent either a directory or symlink.
+ */
+typedef struct lxd_node {
+	enum lxd_node_type	lxdn_type;
+
+	/* Data for "front" nodes */
+	struct lxd_node		*lxdn_prev;	/* lnked lst of lxd nodes */
+	struct lxd_node		*lxdn_next;	/* lnked lst of lxd nodes */
+	struct lxd_node		*lxdn_parent;	/* dir containing this node */
+	krwlock_t		lxdn_rwlock;	/* serialize mods/dir updates */
+	kmutex_t		lxdn_tlock;	/* time, flag, and nlink lock */
+
+	/* these could be in a union ala tmpfs but not really necessary */
+	uint_t			lxdn_dirents;	/* number of dirents */
+	struct lxd_dirent	*lxdn_dir;	/* dirent list */
+	char			*lxdn_symlink;	/* pointer to symlink */
+	struct vattr		lxdn_attr;	/* attributes */
+
+	/* Hash table link */
+	struct lxd_node		*lxdn_hnxt;	/* link in per-mount entry */
+						/* hash table */
+	vnode_t 		*lxdn_vnode;	/* vnode for this lxd_node */
+
+	vnode_t			*lxdn_real_vp;	/* back file - real vnode */
+} lxd_node_t;
+
+/*
+ * Attributes
+ */
+#define	lxdn_mask	lxdn_attr.va_mask
+#define	lxdn_mode	lxdn_attr.va_mode
+#define	lxdn_uid	lxdn_attr.va_uid
+#define	lxdn_gid	lxdn_attr.va_gid
+#define	lxdn_fsid	lxdn_attr.va_fsid
+#define	lxdn_nodeid	lxdn_attr.va_nodeid
+#define	lxdn_nlink	lxdn_attr.va_nlink
+#define	lxdn_size	lxdn_attr.va_size
+#define	lxdn_atime	lxdn_attr.va_atime
+#define	lxdn_mtime	lxdn_attr.va_mtime
+#define	lxdn_ctime	lxdn_attr.va_ctime
+#define	lxdn_rdev	lxdn_attr.va_rdev
+#define	lxdn_blksize	lxdn_attr.va_blksize
+#define	lxdn_nblocks	lxdn_attr.va_nblocks
+#define	lxdn_seq	lxdn_attr.va_seq
+
+/*
+ * lx devfs conversion macros
+ */
+#define	VFSTOLXDM(vfsp)		((lxd_mnt_t *)(vfsp)->vfs_data)
+#define	VTOLXDM(vp)		((lxd_mnt_t *)(vp)->v_vfsp->vfs_data)
+#define	VTOLDN(vp)		((lxd_node_t *)(vp)->v_data)
+#define	LDNTOV(ln)		((ln)->lxdn_vnode)
+#define	ldnode_hold(ln)		VN_HOLD(LDNTOV(ln))
+#define	ldnode_rele(ln)		VN_RELE(LDNTOV(ln))
+
+#define	REALVP(vp)		(VTOLDN(vp)->lxdn_real_vp)
+
+/*
+ * front directories are made up of a linked list of lxd_dirent structures
+ * hanging off directory lxdn_nodes.  File names are not fixed length, but are
+ * null terminated.
+ */
+typedef struct lxd_dirent {
+	lxd_node_t		*lddir_node;	/* lxd node for this file */
+	struct lxd_dirent	*lddir_next;	/* next directory entry */
+	struct lxd_dirent	*lddir_prev;	/* prev directory entry */
+	uint_t			lddir_offset;	/* "offset" of dir entry */
+	uint_t			lddir_hash;	/* a hash of lddir_name */
+	struct lxd_dirent	*lddir_link;	/* linked via hash table */
+	lxd_node_t		*lddir_parent;	/* parent, dir we are in */
+	char			*lddir_name;	/* null terminated */
+} lxd_dirent_t;
+
+enum de_op	{ DE_CREATE, DE_MKDIR, DE_RENAME };	/* direnter ops */
+enum dr_op	{ DR_REMOVE, DR_RMDIR, DR_RENAME };	/* dirremove ops */
+
+#define	LX_MAJORSHIFT		8
+#define	LX_MINORMASK		((1 << LX_MAJORSHIFT) - 1)
+#define	LX_MAKEDEVICE(lx_maj, lx_min)	\
+	((lx_dev_t)((lx_maj) << LX_MAJORSHIFT | ((lx_min) & LX_MINORMASK)))
+
+typedef struct lxd_minor_translator {
+	char	*lxd_mt_path;		/* illumos minor node path */
+	minor_t	lxd_mt_minor;		/* illumos minor node number */
+	int	lxd_mt_lx_major;	/* linux major node number */
+	int	lxd_mt_lx_minor;	/* linux minor node number */
+} lxd_minor_translator_t;
+
+enum lxd_xl_tp	{ DTT_INVALID, DTT_LIST, DTT_CUSTOM };
+
+#define	xl_list		lxd_xl_minor.lxd_xl_list
+#define	xl_custom	lxd_xl_minor.lxd_xl_custom
+
+typedef struct lxd_devt_translator {
+	char		*lxd_xl_driver;	/* driver name */
+	major_t		lxd_xl_major;	/* driver number */
+
+	enum lxd_xl_tp	lxd_xl_type;	/* dictates how we intrep. xl_minor */
+	union {
+		uintptr_t		lxd_xl_foo; /* required to compile */
+		lxd_minor_translator_t	*lxd_xl_list;
+		int			(*lxd_xl_custom)(dev_t, lx_dev_t *);
+	} lxd_xl_minor;
+} lxd_devt_translator_t;
+
+extern struct vnodeops *lxd_vnodeops;
+extern lxd_devt_translator_t lxd_devt_translators[];
+
+vnode_t *lxd_make_back_node(vnode_t *, lxd_mnt_t *);
+void lxd_free_back_node(lxd_node_t *);
+int lxd_dirdelete(lxd_node_t *, lxd_node_t *, char *, enum dr_op, cred_t *);
+int lxd_direnter(lxd_mnt_t *, lxd_node_t *, char *, enum de_op, lxd_node_t *,
+	lxd_node_t *, struct vattr *, lxd_node_t **, cred_t *,
+	caller_context_t *);
+void lxd_dirinit(lxd_node_t *, lxd_node_t *, cred_t *);
+int lxd_dirlookup(lxd_node_t *, char *, lxd_node_t **, cred_t *);
+void lxd_dirtrunc(lxd_node_t *);
+void lxd_node_init(lxd_mnt_t *, lxd_node_t *, vnode_t *, vattr_t *, cred_t *);
+int lxd_naccess(void *, int, cred_t *);
+
+#endif /* KERNEL */
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* _LXD_H */
diff --git a/usr/src/uts/common/brand/lx/devfs/lxd_node.c b/usr/src/uts/common/brand/lx/devfs/lxd_node.c
new file mode 100644
index 0000000000..9e67f988bc
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/devfs/lxd_node.c
@@ -0,0 +1,1004 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vfs.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/stat.h>
+#include <sys/mode.h>
+#include <sys/policy.h>
+#include <sys/sdt.h>
+
+#include "lxd.h"
+
+#define	LXD_HASH_SIZE	8192		/* must be power of 2 */
+#define	LXD_MUTEX_SIZE	64
+
+
+#define	MODESHIFT	3
+
+typedef enum lxd_nodehold {
+	NOHOLD,
+	HOLD
+} lxd_nodehold_t;
+
+/*
+ * The following functions maintain the per-mount "front" files.
+ */
+static void
+lxd_save_dirent(lxd_dirent_t *de)
+{
+	lxd_mnt_t	*lxdm = VTOLXDM(LDNTOV(de->lddir_parent));
+	uint_t		hash;
+	kmutex_t	*hmtx;
+
+	LXD_NM_HASH(de->lddir_parent, de->lddir_name, hash);
+	de->lddir_hash = hash;
+
+	hmtx = &lxdm->lxdm_hash_mutex[hash];
+
+	mutex_enter(hmtx);
+	ASSERT(de->lddir_link == NULL);
+	de->lddir_link = lxdm->lxdm_dent_htable[hash];
+	lxdm->lxdm_dent_htable[hash] = de;
+	mutex_exit(hmtx);
+
+	atomic_inc_32(&lxdm->lxdm_dent_refcnt);
+}
+
+static void
+lxd_rm_dirent(lxd_dirent_t *de)
+{
+	lxd_mnt_t	*lxdm = VTOLXDM(LDNTOV(de->lddir_parent));
+	uint_t		hash;
+	lxd_dirent_t	**prevpp;
+	kmutex_t	*hmtx;
+
+	hash = de->lddir_hash;
+	hmtx = &lxdm->lxdm_hash_mutex[hash];
+
+	mutex_enter(hmtx);
+	prevpp = &lxdm->lxdm_dent_htable[hash];
+	while (*prevpp != de)
+		prevpp = &(*prevpp)->lddir_link;
+	*prevpp = de->lddir_link;
+	de->lddir_link = NULL;
+	mutex_exit(hmtx);
+
+	ASSERT(lxdm->lxdm_dent_refcnt > 0);
+	atomic_dec_32(&lxdm->lxdm_dent_refcnt);
+}
+
+static lxd_dirent_t *
+lxd_find_dirent(char *name, lxd_node_t *parent, lxd_nodehold_t do_hold,
+    lxd_node_t **found)
+{
+	lxd_mnt_t	*lxdm = VTOLXDM(LDNTOV(parent));
+	lxd_dirent_t	*de;
+	uint_t		hash;
+	kmutex_t	*hmtx;
+
+	LXD_NM_HASH(parent, name, hash);
+	hmtx = &lxdm->lxdm_hash_mutex[hash];
+
+	mutex_enter(hmtx);
+	de = lxdm->lxdm_dent_htable[hash];
+	while (de) {
+		if (de->lddir_hash == hash && de->lddir_parent == parent &&
+		    strcmp(de->lddir_name, name) == 0) {
+			lxd_node_t *ldn = de->lddir_node;
+
+			if (do_hold == HOLD) {
+				ASSERT(ldn != NULL);
+				ldnode_hold(ldn);
+			}
+			if (found != NULL)
+				*found = ldn;
+			mutex_exit(hmtx);
+			return (de);
+		}
+
+		de = de->lddir_link;
+	}
+	mutex_exit(hmtx);
+	return (NULL);
+}
+
+int
+lxd_naccess(void *vcp, int mode, cred_t *cr)
+{
+	lxd_node_t *ldn = vcp;
+	int shift = 0;
+	/*
+	 * Check access based on owner, group and public perms in lxd_node.
+	 */
+	if (crgetuid(cr) != ldn->lxdn_uid) {
+		shift += MODESHIFT;
+		if (groupmember(ldn->lxdn_gid, cr) == 0)
+			shift += MODESHIFT;
+	}
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (secpolicy_vnode_access2(cr, LDNTOV(ldn),
+		    ldn->lxdn_uid, ldn->lxdn_mode << shift, mode));
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	return (VOP_ACCESS(ldn->lxdn_real_vp, mode, 0, cr, NULL));
+}
+
+static lxd_node_t *
+lxd_find_back(struct vnode *vp, uint_t hash, lxd_mnt_t *lxdm)
+{
+	lxd_node_t *l;
+
+	ASSERT(MUTEX_HELD(&lxdm->lxdm_hash_mutex[hash]));
+
+	for (l = lxdm->lxdm_back_htable[hash]; l != NULL; l = l->lxdn_hnxt) {
+		if (l->lxdn_real_vp == vp) {
+			ASSERT(l->lxdn_type == LXDNT_BACK);
+
+			VN_HOLD(LDNTOV(l));
+			return (l);
+		}
+	}
+	return (NULL);
+}
+
+static void
+lxd_save_back(lxd_node_t *l, uint_t hash, lxd_mnt_t *lxdm)
+{
+	ASSERT(l->lxdn_type == LXDNT_BACK);
+	ASSERT(l->lxdn_real_vp != NULL);
+	ASSERT(MUTEX_HELD(&lxdm->lxdm_hash_mutex[hash]));
+
+	atomic_inc_32(&lxdm->lxdm_back_refcnt);
+
+	l->lxdn_hnxt = lxdm->lxdm_back_htable[hash];
+	lxdm->lxdm_back_htable[hash] = l;
+}
+
+
+struct vnode *
+lxd_make_back_node(struct vnode *vp, lxd_mnt_t *lxdm)
+{
+	uint_t hash;
+	kmutex_t *hmtx;
+	lxd_node_t *l;
+
+	hash = LXD_BACK_HASH(vp);	/* Note: hashing with realvp */
+	hmtx = &lxdm->lxdm_hash_mutex[hash];
+	mutex_enter(hmtx);
+
+	l = lxd_find_back(vp, hash, lxdm);
+	if (l == NULL) {
+		vnode_t *nvp;
+
+		l = kmem_zalloc(sizeof (lxd_node_t), KM_SLEEP);
+		nvp = vn_alloc(KM_SLEEP);
+
+		rw_init(&l->lxdn_rwlock, NULL, RW_DEFAULT, NULL);
+		mutex_init(&l->lxdn_tlock, NULL, MUTEX_DEFAULT, NULL);
+
+		l->lxdn_vnode = nvp;
+		l->lxdn_type = LXDNT_BACK;
+		l->lxdn_real_vp = vp;
+
+		VN_SET_VFS_TYPE_DEV(nvp, lxdm->lxdm_vfsp, vp->v_type,
+		    vp->v_rdev);
+		nvp->v_flag |= (vp->v_flag & (VNOMOUNT|VNOMAP|VDIROPEN));
+		vn_setops(nvp, lxd_vnodeops);
+		nvp->v_data = (caddr_t)l;
+
+		lxd_save_back(l, hash, lxdm);
+		vn_exists(vp);
+	} else {
+		VN_RELE(vp);
+	}
+
+	mutex_exit(hmtx);
+	return (LDNTOV(l));
+}
+
+void
+lxd_free_back_node(lxd_node_t *lp)
+{
+	uint_t hash;
+	kmutex_t *hmtx;
+	lxd_node_t *l;
+	lxd_node_t *lprev = NULL;
+	vnode_t *vp = LDNTOV(lp);
+	vnode_t *realvp = REALVP(vp);
+	lxd_mnt_t *lxdm = VTOLXDM(vp);
+
+	/* in lxd_make_back_node we call lxd_find_back with the realvp */
+	hash = LXD_BACK_HASH(realvp);
+	hmtx = &lxdm->lxdm_hash_mutex[hash];
+	mutex_enter(hmtx);
+
+	mutex_enter(&vp->v_lock);
+	if (vp->v_count > 1) {
+		vp->v_count--;	/* release our hold from vn_rele */
+		mutex_exit(&vp->v_lock);
+		mutex_exit(hmtx);
+		return;
+	}
+	mutex_exit(&vp->v_lock);
+
+	for (l = lxdm->lxdm_back_htable[hash]; l != NULL;
+	    lprev = l, l = l->lxdn_hnxt) {
+
+		if (l != lp)
+			continue;
+
+		ASSERT(l->lxdn_type == LXDNT_BACK);
+		ASSERT(lxdm->lxdm_back_refcnt > 0);
+
+		atomic_dec_32(&lxdm->lxdm_back_refcnt);
+		vn_invalid(vp);
+
+		if (lprev == NULL) {
+			lxdm->lxdm_back_htable[hash] = l->lxdn_hnxt;
+		} else {
+			lprev->lxdn_hnxt = l->lxdn_hnxt;
+		}
+
+		mutex_exit(hmtx);
+		rw_destroy(&l->lxdn_rwlock);
+		mutex_destroy(&l->lxdn_tlock);
+		kmem_free(l, sizeof (lxd_node_t));
+		vn_free(vp);
+		VN_RELE(realvp);
+		return;
+	}
+
+	panic("lxd_free_back_node");
+	/*NOTREACHED*/
+}
+/*
+ * Search directory 'parent' for entry 'name'.
+ *
+ * 0 is returned on success and *foundcp points
+ * to the found lxd_node with its vnode held.
+ */
+int
+lxd_dirlookup(lxd_node_t *parent, char *name, lxd_node_t **foundnp, cred_t *cr)
+{
+	int error;
+
+	*foundnp = NULL;
+	if (parent->lxdn_vnode->v_type != VDIR)
+		return (ENOTDIR);
+
+	if ((error = lxd_naccess(parent, VEXEC, cr)))
+		return (error);
+
+	if (*name == '\0') {
+		ldnode_hold(parent);
+		*foundnp = parent;
+		return (0);
+	}
+
+	/*
+	 * Search the directory for the matching name
+	 * We need the lock protecting the lxdn_dir list
+	 * so that it doesn't change out from underneath us.
+	 * lxd_find_dirent() will pass back the lxd_node
+	 * with a hold on it.
+	 */
+
+	if (lxd_find_dirent(name, parent, HOLD, foundnp) != NULL) {
+		ASSERT(*foundnp);
+		return (0);
+	}
+
+	return (ENOENT);
+}
+
+/*
+ * Check if the source directory is in the path of the target directory.
+ * The target directory is locked by the caller.
+ */
+static int
+lxd_dircheckpath(lxd_node_t *fromnode, lxd_node_t *toparent, cred_t *cr)
+{
+	int error = 0;
+	lxd_node_t *dir, *dotdot;
+
+	ASSERT(RW_WRITE_HELD(&toparent->lxdn_rwlock));
+	ASSERT(toparent->lxdn_vnode->v_type == VDIR);
+
+	dotdot = toparent->lxdn_parent;
+	if (dotdot == NULL)
+		return (ENOENT);
+	ldnode_hold(dotdot);
+
+	if (dotdot == toparent) {
+		/* root of fs.  search trivially satisfied. */
+		ldnode_rele(dotdot);
+		return (0);
+	}
+
+	for (;;) {
+		/*
+		 * Return error for cases like "mv c c/d",
+		 * "mv c c/d/e" and so on.
+		 */
+		if (dotdot == fromnode) {
+			ldnode_rele(dotdot);
+			error = EINVAL;
+			break;
+		}
+
+		dir = dotdot;
+		dotdot = dir->lxdn_parent;
+		if (dotdot == NULL) {
+			ldnode_rele(dir);
+			error = ENOENT;
+			break;
+		}
+		ldnode_hold(dotdot);
+
+		/*
+		 * We're okay if we traverse the directory tree up to
+		 * the root directory and don't run into the
+		 * parent directory.
+		 */
+		if (dir == dotdot) {
+			ldnode_rele(dir);
+			ldnode_rele(dotdot);
+			break;
+		}
+		ldnode_rele(dir);
+	}
+
+	return (error);
+}
+
+static int
+lxd_dir_make_node(lxd_node_t *dir, lxd_mnt_t *lxdm, struct vattr *va,
+    enum de_op op, lxd_node_t **newnode, struct cred *cred)
+{
+	lxd_node_t *ldn;
+
+	ASSERT(va != NULL);
+
+	if (((va->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&va->va_atime)) ||
+	    ((va->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&va->va_mtime)))
+		return (EOVERFLOW);
+
+	ldn = kmem_zalloc(sizeof (lxd_node_t), KM_SLEEP);
+
+	ldn->lxdn_type = LXDNT_FRONT;
+	lxd_node_init(lxdm, ldn, NULL, va, cred);
+
+	ldn->lxdn_vnode->v_rdev = ldn->lxdn_rdev = NODEV;
+	ldn->lxdn_vnode->v_type = va->va_type;
+	ldn->lxdn_uid = crgetuid(cred);
+	ldn->lxdn_gid = crgetgid(cred);
+	ldn->lxdn_nodeid = lxdm->lxdm_gen++;
+
+	if (va->va_mask & AT_ATIME)
+		ldn->lxdn_atime = va->va_atime;
+	if (va->va_mask & AT_MTIME)
+		ldn->lxdn_mtime = va->va_mtime;
+
+	if (op == DE_MKDIR) {
+		lxd_dirinit(dir, ldn, cred);
+	}
+
+	*newnode = ldn;
+	return (0);
+}
+
+static int
+lxd_diraddentry(lxd_node_t *dir, lxd_node_t *ldn, char *name, enum de_op op)
+{
+	lxd_dirent_t	*dp, *pdp;
+	size_t		namelen, alloc_size;
+	timestruc_t	now;
+
+	/*
+	 * Make sure the parent directory wasn't removed from
+	 * underneath the caller.
+	 */
+	if (dir->lxdn_dir == NULL)
+		return (ENOENT);
+
+	/* Check that everything is on the same filesystem. */
+	if (ldn->lxdn_vnode->v_vfsp != dir->lxdn_vnode->v_vfsp)
+		return (EXDEV);
+
+	/* Allocate and initialize directory entry */
+	namelen = strlen(name) + 1;
+	alloc_size = namelen + sizeof (lxd_dirent_t);
+	dp = kmem_zalloc(alloc_size, KM_NOSLEEP | KM_NORMALPRI);
+	if (dp == NULL)
+		return (ENOSPC);
+
+	ldn->lxdn_parent = dir;
+
+	dir->lxdn_size += alloc_size;
+	dir->lxdn_dirents++;
+	dp->lddir_node = ldn;
+	dp->lddir_parent = dir;
+
+	/* The directory entry and its name were allocated sequentially. */
+	dp->lddir_name = (char *)dp + sizeof (lxd_dirent_t);
+	(void) strcpy(dp->lddir_name, name);
+
+	lxd_save_dirent(dp);
+
+	/*
+	 * Some utilities expect the size of a directory to remain
+	 * somewhat static.  For example, a routine which removes
+	 * subdirectories between calls to readdir(); the size of the
+	 * directory changes from underneath it and so the real
+	 * directory offset in bytes is invalid.  To circumvent
+	 * this problem, we initialize a directory entry with an
+	 * phony offset, and use this offset to determine end of
+	 * file in lxd_readdir.
+	 */
+	pdp = dir->lxdn_dir->lddir_prev;
+	/*
+	 * Install at first empty "slot" in directory list.
+	 */
+	while (pdp->lddir_next != NULL &&
+	    (pdp->lddir_next->lddir_offset - pdp->lddir_offset) <= 1) {
+		ASSERT(pdp->lddir_next != pdp);
+		ASSERT(pdp->lddir_prev != pdp);
+		ASSERT(pdp->lddir_next->lddir_offset > pdp->lddir_offset);
+		pdp = pdp->lddir_next;
+	}
+	dp->lddir_offset = pdp->lddir_offset + 1;
+
+	/*
+	 * If we're at the end of the dirent list and the offset (which
+	 * is necessarily the largest offset in this directory) is more
+	 * than twice the number of dirents, that means the directory is
+	 * 50% holes.  At this point we reset the slot pointer back to
+	 * the beginning of the directory so we start using the holes.
+	 * The idea is that if there are N dirents, there must also be
+	 * N holes, so we can satisfy the next N creates by walking at
+	 * most 2N entries; thus the average cost of a create is constant.
+	 * Note that we use the first dirent's lddir_prev as the roving
+	 * slot pointer; it's ugly, but it saves a word in every dirent.
+	 */
+	if (pdp->lddir_next == NULL &&
+	    pdp->lddir_offset > 2 * dir->lxdn_dirents)
+		dir->lxdn_dir->lddir_prev = dir->lxdn_dir->lddir_next;
+	else
+		dir->lxdn_dir->lddir_prev = dp;
+
+	ASSERT(pdp->lddir_next != pdp);
+	ASSERT(pdp->lddir_prev != pdp);
+
+	dp->lddir_next = pdp->lddir_next;
+	if (dp->lddir_next) {
+		dp->lddir_next->lddir_prev = dp;
+	}
+	dp->lddir_prev = pdp;
+	pdp->lddir_next = dp;
+
+	ASSERT(dp->lddir_next != dp);
+	ASSERT(dp->lddir_prev != dp);
+	ASSERT(pdp->lddir_next != pdp);
+	ASSERT(pdp->lddir_prev != pdp);
+
+	gethrestime(&now);
+	dir->lxdn_mtime = now;
+	dir->lxdn_ctime = now;
+
+	return (0);
+}
+
+/*
+ * Enter a directory entry for 'name' into directory 'dir'
+ *
+ * Returns 0 on success.
+ */
+int
+lxd_direnter(
+	lxd_mnt_t	*lxdm,
+	lxd_node_t	*dir,		/* target directory to make entry in */
+	char		*name,		/* name of entry */
+	enum de_op	op,		/* entry operation */
+	lxd_node_t	*fromparent,    /* original directory if rename */
+	lxd_node_t	*ldn,		/* existing lxd_node, if rename */
+	struct vattr	*va,
+	lxd_node_t	**rnp,		/* return lxd_node, if create/mkdir */
+	cred_t		*cr,
+	caller_context_t *ctp)
+{
+	lxd_dirent_t *dirp;
+	lxd_node_t *found = NULL;
+	int error = 0;
+	char *s;
+
+	/* lxdn_rwlock is held to serialize direnter and dirdeletes */
+	ASSERT(RW_WRITE_HELD(&dir->lxdn_rwlock));
+	ASSERT(dir->lxdn_vnode->v_type == VDIR);
+
+	/*
+	 * Don't allow '/' characters in pathname component,
+	 */
+	for (s = name; *s; s++)
+		if (*s == '/')
+			return (EACCES);
+
+	if (name[0] == '\0')
+		panic("lxd_direnter: NULL name");
+
+	/*
+	 * For rename lock the source entry and check the link count
+	 * to see if it has been removed while it was unlocked.
+	 */
+	if (op == DE_RENAME) {
+		mutex_enter(&ldn->lxdn_tlock);
+		if (ldn->lxdn_nlink == 0) {
+			mutex_exit(&ldn->lxdn_tlock);
+			return (ENOENT);
+		}
+
+		if (ldn->lxdn_nlink == MAXLINK) {
+			mutex_exit(&ldn->lxdn_tlock);
+			return (EMLINK);
+		}
+		ldn->lxdn_nlink++;
+		gethrestime(&ldn->lxdn_ctime);
+		mutex_exit(&ldn->lxdn_tlock);
+	}
+
+	/*
+	 * This might be a "dangling detached directory" (it could have been
+	 * removed, but a reference to it kept in u_cwd). Don't bother
+	 * searching it, and with any luck the user will get tired of dealing
+	 * with us and cd to some absolute pathway (thus in ufs, too).
+	 */
+	if (dir->lxdn_nlink == 0) {
+		error = ENOENT;
+		goto out;
+	}
+
+	/*
+	 * If this is a rename of a directory and the parent is different
+	 * (".." must be changed), then the source directory must not be in the
+	 * directory hierarchy above the target, as this would orphan
+	 * everything below the source directory.
+	 */
+	if (op == DE_RENAME) {
+		if (ldn == dir) {
+			error = EINVAL;
+			goto out;
+		}
+		if ((ldn->lxdn_vnode->v_type) == VDIR) {
+			if ((fromparent != dir) &&
+			    (error = lxd_dircheckpath(ldn, dir, cr)) != 0) {
+				goto out;
+			}
+		}
+	}
+
+	/* Search for an existing entry. */
+	dirp = lxd_find_dirent(name, dir, HOLD, &found);
+	if (dirp != NULL) {
+		ASSERT(found != NULL);
+		switch (op) {
+		case DE_CREATE:
+		case DE_MKDIR:
+			if (rnp != NULL) {
+				*rnp = found;
+				error = EEXIST;
+			} else {
+				ldnode_rele(found);
+			}
+			break;
+
+		case DE_RENAME:
+			/*
+			 * Note that we only hit this path when we're renaming
+			 * a symlink from one directory to another and there is
+			 * a pre-existing symlink as the target. lxd_rename
+			 * will unlink the src from the original directory but
+			 * here we need to unlink the dest that we collided
+			 * with, then create the new directory entry as we do
+			 * below when there is no pre-existing symlink.
+			 */
+			if ((error = lxd_naccess(dir, VWRITE, cr)) != 0)
+				goto out;
+
+			ASSERT(found->lxdn_vnode->v_type == VLNK);
+			/* dir rw lock is already held and asserted above */
+			rw_enter(&found->lxdn_rwlock, RW_WRITER);
+			error = lxd_dirdelete(dir, found, name, DR_RENAME, cr);
+			rw_exit(&found->lxdn_rwlock);
+			ldnode_rele(found);
+			if (error != 0)
+				goto out;
+
+			error = lxd_diraddentry(dir, ldn, name, op);
+			if (error == 0 && rnp != NULL)
+				*rnp = ldn;
+			break;
+		}
+	} else {
+
+		/*
+		 * The directory entry does not exist, but the node might if
+		 * this is a rename. Check write permission in directory to
+		 * see if entry can be created.
+		 */
+		if ((error = lxd_naccess(dir, VWRITE, cr)) != 0)
+			goto out;
+		if (op == DE_CREATE || op == DE_MKDIR) {
+			/*
+			 * Make new lxd_node and directory entry as required.
+			 */
+			error = lxd_dir_make_node(dir, lxdm, va, op, &ldn, cr);
+			if (error)
+				goto out;
+		}
+
+		error = lxd_diraddentry(dir, ldn, name, op);
+		if (error != 0) {
+			if (op == DE_CREATE || op == DE_MKDIR) {
+				/*
+				 * Unmake the inode we just made.
+				 */
+				rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+				if ((ldn->lxdn_vnode->v_type) == VDIR) {
+					ASSERT(dirp == NULL);
+					/*
+					 * cleanup allocs made by lxd_dirinit
+					 */
+					lxd_dirtrunc(ldn);
+				}
+				mutex_enter(&ldn->lxdn_tlock);
+				ldn->lxdn_nlink = 0;
+				gethrestime(&ldn->lxdn_ctime);
+				mutex_exit(&ldn->lxdn_tlock);
+				rw_exit(&ldn->lxdn_rwlock);
+				ldnode_rele(ldn);
+				ldn = NULL;
+			}
+		} else if (rnp != NULL) {
+			*rnp = ldn;
+		} else if (op == DE_CREATE || op == DE_MKDIR) {
+			ldnode_rele(ldn);
+		}
+	}
+
+out:
+	if (error && op == DE_RENAME) {
+		/* Undo bumped link count. */
+		mutex_enter(&ldn->lxdn_tlock);
+		ldn->lxdn_nlink--;
+		gethrestime(&ldn->lxdn_ctime);
+		mutex_exit(&ldn->lxdn_tlock);
+	}
+	return (error);
+}
+
+/*
+ * Delete entry ldn of name "nm" from parent dir. This is used to both remove
+ * a directory and to remove file nodes within the directory (by recursively
+ * calling itself). It frees the dir entry space and decrements link count on
+ * lxd_node(s).
+ *
+ * Return 0 on success.
+ */
+int
+lxd_dirdelete(lxd_node_t *dir, lxd_node_t *ldn, char *nm, enum dr_op op,
+    cred_t *cred)
+{
+	lxd_dirent_t *dirp;
+	int error;
+	size_t namelen;
+	lxd_node_t *fndnp;
+	timestruc_t now;
+
+	ASSERT(RW_WRITE_HELD(&dir->lxdn_rwlock));
+	ASSERT(RW_WRITE_HELD(&ldn->lxdn_rwlock));
+	ASSERT(dir->lxdn_vnode->v_type == VDIR);
+
+	if (nm[0] == '\0')
+		panic("lxd_dirdelete: empty name for 0x%p", (void *)ldn);
+
+	/*
+	 * return error when removing . and ..
+	 */
+	if (nm[0] == '.') {
+		if (nm[1] == '\0')
+			return (EINVAL);
+		if (nm[1] == '.' && nm[2] == '\0')
+			return (EEXIST); /* thus in ufs */
+	}
+
+	if ((error = lxd_naccess(dir, VEXEC|VWRITE, cred)) != 0)
+		return (error);
+
+	if (dir->lxdn_dir == NULL)
+		return (ENOENT);
+
+	if (op == DR_RMDIR) {
+		/*
+		 * This is the top-level removal of a directory. Start by
+		 * removing any file entries from the dir. We do this by
+		 * recursively calling back into this function with a different
+		 * op code. The caller of this function has already verified
+		 * that it is safe to remove this directory.
+		 */
+		lxd_dirent_t *dirp;
+
+		ASSERT(ldn->lxdn_vnode->v_type == VDIR);
+
+		dirp = ldn->lxdn_dir;
+		while (dirp) {
+			lxd_node_t *dn;
+			lxd_dirent_t *nextp;
+
+			if (strcmp(dirp->lddir_name, ".") == 0 ||
+			    strcmp(dirp->lddir_name, "..") == 0) {
+				dirp = dirp->lddir_next;
+				continue;
+			}
+
+			dn = dirp->lddir_node;
+			nextp = dirp->lddir_next;
+
+			ldnode_hold(dn);
+			error = lxd_dirdelete(ldn, dn, dirp->lddir_name,
+			    DR_REMOVE, cred);
+			ldnode_rele(dn);
+
+			dirp = nextp;
+		}
+	}
+
+	dirp = lxd_find_dirent(nm, dir, NOHOLD, &fndnp);
+	VERIFY(dirp != NULL);
+	VERIFY(ldn == fndnp);
+
+	lxd_rm_dirent(dirp);
+
+	/* Take dirp out of the directory list. */
+	ASSERT(dirp->lddir_next != dirp);
+	ASSERT(dirp->lddir_prev != dirp);
+	if (dirp->lddir_prev) {
+		dirp->lddir_prev->lddir_next = dirp->lddir_next;
+	}
+	if (dirp->lddir_next) {
+		dirp->lddir_next->lddir_prev = dirp->lddir_prev;
+	}
+
+	/*
+	 * If the roving slot pointer happens to match dirp,
+	 * point it at the previous dirent.
+	 */
+	if (dir->lxdn_dir->lddir_prev == dirp) {
+		dir->lxdn_dir->lddir_prev = dirp->lddir_prev;
+	}
+	ASSERT(dirp->lddir_next != dirp);
+	ASSERT(dirp->lddir_prev != dirp);
+
+	/* dirp points to the correct directory entry */
+	namelen = strlen(dirp->lddir_name) + 1;
+
+	kmem_free(dirp, sizeof (lxd_dirent_t) + namelen);
+	dir->lxdn_size -= (sizeof (lxd_dirent_t) + namelen);
+	dir->lxdn_dirents--;
+
+	gethrestime(&now);
+	dir->lxdn_mtime = now;
+	dir->lxdn_ctime = now;
+	ldn->lxdn_ctime = now;
+
+	ASSERT(ldn->lxdn_nlink > 0);
+	mutex_enter(&ldn->lxdn_tlock);
+	ldn->lxdn_nlink--;
+	mutex_exit(&ldn->lxdn_tlock);
+	if (op == DR_RMDIR && ldn->lxdn_vnode->v_type == VDIR) {
+		lxd_dirtrunc(ldn);
+		ASSERT(ldn->lxdn_nlink == 0);
+	}
+	return (0);
+}
+
+/*
+ * Initialize a lxd_node and add it to file list under mount point.
+ */
+void
+lxd_node_init(lxd_mnt_t *lxdm, lxd_node_t *ldn, vnode_t *realvp, vattr_t *vap,
+    cred_t *cred)
+{
+	struct vnode *vp;
+	timestruc_t now;
+
+	ASSERT(vap != NULL);
+
+	rw_init(&ldn->lxdn_rwlock, NULL, RW_DEFAULT, NULL);
+	mutex_init(&ldn->lxdn_tlock, NULL, MUTEX_DEFAULT, NULL);
+	ldn->lxdn_mode = MAKEIMODE(vap->va_type, vap->va_mode);
+	ldn->lxdn_mask = 0;
+	ldn->lxdn_attr.va_type = vap->va_type;
+	ldn->lxdn_nlink = 1;
+	ldn->lxdn_size = 0;
+
+	if (cred == NULL) {
+		ldn->lxdn_uid = vap->va_uid;
+		ldn->lxdn_gid = vap->va_gid;
+	} else {
+		ldn->lxdn_uid = crgetuid(cred);
+		ldn->lxdn_gid = crgetgid(cred);
+	}
+
+	ldn->lxdn_fsid = lxdm->lxdm_dev;
+	ldn->lxdn_rdev = vap->va_rdev;
+	ldn->lxdn_blksize = PAGESIZE;
+	ldn->lxdn_nblocks = 0;
+	gethrestime(&now);
+	ldn->lxdn_atime = now;
+	ldn->lxdn_mtime = now;
+	ldn->lxdn_ctime = now;
+	ldn->lxdn_seq = 0;
+	ldn->lxdn_dir = NULL;
+
+	ldn->lxdn_real_vp = realvp;
+
+	ldn->lxdn_vnode = vn_alloc(KM_SLEEP);
+	vp = LDNTOV(ldn);
+	vn_setops(vp, lxd_vnodeops);
+	vp->v_vfsp = lxdm->lxdm_vfsp;
+	vp->v_type = vap->va_type;
+	vp->v_rdev = vap->va_rdev;
+	vp->v_data = (caddr_t)ldn;
+
+	mutex_enter(&lxdm->lxdm_contents);
+	ldn->lxdn_nodeid = lxdm->lxdm_gen++;
+
+	/*
+	 * Add new lxd_node to end of linked list of lxd_nodes for this
+	 * lxdevfs. Root directory is handled specially in lxd_mount.
+	 */
+	if (lxdm->lxdm_rootnode != (lxd_node_t *)NULL) {
+		ldn->lxdn_next = NULL;
+		ldn->lxdn_prev = lxdm->lxdm_rootnode->lxdn_prev;
+		ldn->lxdn_prev->lxdn_next = lxdm->lxdm_rootnode->lxdn_prev =
+		    ldn;
+	}
+	mutex_exit(&lxdm->lxdm_contents);
+	vn_exists(vp);
+}
+
+/*
+ * lxd_dirinit is used internally to initialize a directory (dir)
+ * with '.' and '..' entries without checking permissions and locking
+ * It also creates the entries for the pseudo file nodes that reside in the
+ * directory.
+ */
+void
+lxd_dirinit(lxd_node_t *parent, lxd_node_t *dir, cred_t *cr)
+{
+	lxd_dirent_t *dot, *dotdot;
+	timestruc_t now;
+	lxd_mnt_t *lxdm = VTOLXDM(dir->lxdn_vnode);
+	struct vattr nattr;
+
+	ASSERT(RW_WRITE_HELD(&parent->lxdn_rwlock));
+	ASSERT(dir->lxdn_vnode->v_type == VDIR);
+
+	dir->lxdn_nodeid = lxdm->lxdm_gen++;
+
+	/*
+	 * Initialize the entries
+	 */
+	dot = kmem_zalloc(sizeof (lxd_dirent_t) + 2, KM_SLEEP);
+	dot->lddir_node = dir;
+	dot->lddir_offset = 0;
+	dot->lddir_name = (char *)dot + sizeof (lxd_dirent_t);
+	dot->lddir_name[0] = '.';
+	dot->lddir_parent = dir;
+	lxd_save_dirent(dot);
+
+	dotdot = kmem_zalloc(sizeof (lxd_dirent_t) + 3, KM_SLEEP);
+	dotdot->lddir_node = parent;
+	dotdot->lddir_offset = 1;
+	dotdot->lddir_name = (char *)dotdot + sizeof (lxd_dirent_t);
+	dotdot->lddir_name[0] = '.';
+	dotdot->lddir_name[1] = '.';
+	dotdot->lddir_parent = dir;
+	lxd_save_dirent(dotdot);
+
+	/*
+	 * Initialize directory entry list.
+	 */
+	dot->lddir_next = dotdot;
+	dot->lddir_prev = dotdot; /* dot's lddir_prev holds roving slot ptr */
+	dotdot->lddir_next = NULL;
+	dotdot->lddir_prev = dot;
+
+	gethrestime(&now);
+	dir->lxdn_mtime = now;
+	dir->lxdn_ctime = now;
+
+	parent->lxdn_nlink++;
+	parent->lxdn_ctime = now;
+
+	dir->lxdn_dir = dot;
+	dir->lxdn_size = 2 * sizeof (lxd_dirent_t) + 5;	/* dot and dotdot */
+	dir->lxdn_dirents = 2;
+	dir->lxdn_nlink = 2;
+	dir->lxdn_parent = parent;
+
+	bzero(&nattr, sizeof (struct vattr));
+	nattr.va_mode = (mode_t)(0644);
+	nattr.va_type = VREG;
+	nattr.va_rdev = 0;
+}
+
+/*
+ * lxd_dirtrunc is called to remove all directory entries under this directory.
+ */
+void
+lxd_dirtrunc(lxd_node_t *dir)
+{
+	lxd_dirent_t *ldp;
+	timestruc_t now;
+
+	ASSERT(RW_WRITE_HELD(&dir->lxdn_rwlock));
+	ASSERT(dir->lxdn_vnode->v_type == VDIR);
+
+	for (ldp = dir->lxdn_dir; ldp; ldp = dir->lxdn_dir) {
+		size_t namelen;
+		lxd_node_t *ldn;
+
+		ASSERT(ldp->lddir_next != ldp);
+		ASSERT(ldp->lddir_prev != ldp);
+		ASSERT(ldp->lddir_node);
+
+		dir->lxdn_dir = ldp->lddir_next;
+		namelen = strlen(ldp->lddir_name) + 1;
+
+		/*
+		 * Adjust the link counts to account for this directory entry
+		 * removal. We do hold/rele operations to free up these nodes.
+		 */
+		ldn = ldp->lddir_node;
+
+		ASSERT(ldn->lxdn_nlink > 0);
+		mutex_enter(&ldn->lxdn_tlock);
+		ldn->lxdn_nlink--;
+		mutex_exit(&ldn->lxdn_tlock);
+
+		lxd_rm_dirent(ldp);
+		kmem_free(ldp, sizeof (lxd_dirent_t) + namelen);
+		dir->lxdn_size -= (sizeof (lxd_dirent_t) + namelen);
+		dir->lxdn_dirents--;
+	}
+
+	gethrestime(&now);
+	dir->lxdn_mtime = now;
+	dir->lxdn_ctime = now;
+
+	ASSERT(dir->lxdn_dir == NULL);
+	ASSERT(dir->lxdn_size == 0);
+	ASSERT(dir->lxdn_dirents == 0);
+}
diff --git a/usr/src/uts/common/brand/lx/devfs/lxd_vfsops.c b/usr/src/uts/common/brand/lx/devfs/lxd_vfsops.c
new file mode 100644
index 0000000000..bf5913f025
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/devfs/lxd_vfsops.c
@@ -0,0 +1,781 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+/*
+ * The lx devfs (lxd) file system is used within lx branded zones to provide
+ * the Linux view of /dev.
+ *
+ * In the past, the Linux /dev was simply a lofs mount pointing at /native/dev.
+ * lxd now provides the Linux /dev.
+ *
+ * The lxd file system is a hybrid of lofs and tmpfs. It supports a "back" file
+ * system which is the special device and corresponds to the special device in
+ * a lofs mount. As with lofs, all files in the special device are accessible
+ * through the lxd mount. Because the zone's devfs is not directly modifiable
+ * within the zone (also mknod(2) is not generally allowed within a zone) it is
+ * impossible to create files in devfs. For lx, in some cases it's useful to be
+ * able to make new symlinks or new directories under /dev. lxd implements
+ * these operations by creating "files" in memory in the same way as tmpfs
+ * does. Within lxd these are referred to as "front" files. For operations such
+ * as lookup or readdir, lxd provides a merged view of both the front and back
+ * files. lxd does not support regular front files or simple I/O (read/write)
+ * to front files, since there is no need for that. For back files, all
+ * operations are simply passed through to the real vnode, as is done with
+ * lofs. Front files are not allowed to mask back files.
+ *
+ * The Linux /dev is now a lxd mount with the special file (i.e. the back
+ * file system) as /native/dev.
+ *
+ * In addition, lx has a need for some illumos/Linux translation for the
+ * various *stat(2) system calls when used on a device. This translation can
+ * be centralized within lxd's getattr vnode entry point.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <sys/time.h>
+#include <sys/pathname.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/cmn_err.h>
+#include <sys/cred.h>
+#include <sys/statvfs.h>
+#include <sys/mount.h>
+#include <sys/systm.h>
+#include <sys/mntent.h>
+#include <sys/policy.h>
+#include <sys/sdt.h>
+#include <sys/ddi.h>
+#include <sys/lx_brand.h>
+#include <sys/lx_ptm.h>
+
+#include "lxd.h"
+
+/* Module level parameters */
+static int	lxd_fstype;
+static dev_t	lxd_dev;
+
+/*
+ * lxd_mountcount is used to prevent module unloads while there is still
+ * state from a former mount hanging around. The filesystem module must not be
+ * allowed to go away before the last VFS_FREEVFS() call has been made. Since
+ * this is just an atomic counter, there's no need for locking.
+ */
+static uint32_t lxd_mountcount;
+
+/*
+ * lxd_minfree is the minimum amount of swap space that lx devfs leaves for
+ * the rest of the zone.
+ */
+size_t lxd_minfree = 0;
+
+/*
+ * LXDMINFREE -- the value from which lxd_minfree is derived -- should be
+ * configured to a value that is roughly the smallest practical value for
+ * memory + swap minus the largest reasonable size for lxd in such
+ * a configuration. As of this writing, the smallest practical memory + swap
+ * configuration is 128MB, and it seems reasonable to allow lxd to consume
+ * no more than ~10% of this, yielding a LXDMINFREE of 12MB.
+ */
+#define	LXDMINFREE	12 * 1024 * 1024	/* 12 Megabytes */
+
+extern pgcnt_t swapfs_minfree;
+
+extern int stat64(char *, struct stat64 *);
+
+/*
+ * lxd vfs operations.
+ */
+static int lxd_init(int, char *);
+static int lxd_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
+static int lxd_unmount(vfs_t *, int, cred_t *);
+static int lxd_root(vfs_t *, vnode_t **);
+static int lxd_statvfs(vfs_t *, statvfs64_t *);
+static void lxd_freevfs(vfs_t *vfsp);
+
+/*
+ * Loadable module wrapper
+ */
+#include <sys/modctl.h>
+
+static vfsdef_t vfw = {
+	VFSDEF_VERSION,
+	"lx_devfs",
+	lxd_init,
+	VSW_ZMOUNT,
+	NULL
+};
+
+/*
+ * Module linkage information
+ */
+static struct modlfs modlfs = {
+	&mod_fsops, "lx brand devfs", &vfw
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1, &modlfs, NULL
+};
+
+/*
+ * Definitions and translators for devt's.
+ */
+static int lxd_pts_devt_translator(dev_t, lx_dev_t *);
+static int lxd_ptm_devt_translator(dev_t, lx_dev_t *);
+
+#define	LX_PTS_MAJOR_MIN	136
+#define	LX_PTS_MAJOR_MAX	143
+#define	LX_PTS_MAX		\
+	((LX_PTS_MAJOR_MAX - LX_PTS_MAJOR_MIN + 1) * LX_MINORMASK)
+
+#define	LX_PTM_MAJOR		5
+#define	LX_PTM_MINOR		2
+
+static lxd_minor_translator_t lxd_mtranslator_mm[] = {
+	{ "/dev/null",		0, 1, 3 },
+	{ "/dev/zero",		0, 1, 5 },
+	{ NULL,			0, 0, 0 }
+};
+static lxd_minor_translator_t lxd_mtranslator_random[] = {
+	{ "/dev/random",	0, 1, 8 },
+	{ "/dev/urandom",	0, 1, 9 },
+	{ NULL,			0, 0, 0 }
+};
+static lxd_minor_translator_t lxd_mtranslator_sy[] = {
+	{ "/dev/tty",		0, 5, 0 },
+	{ NULL,			0, 0, 0 }
+};
+static lxd_minor_translator_t lxd_mtranslator_zcons[] = {
+	{ "/dev/console",	0, 5, 1 },
+	{ NULL,			0, 0, 0 }
+};
+lxd_devt_translator_t lxd_devt_translators[] = {
+	{ "mm",		0, DTT_LIST,	(uintptr_t)&lxd_mtranslator_mm },
+	{ "random",	0, DTT_LIST,	(uintptr_t)&lxd_mtranslator_random },
+	{ "sy",		0, DTT_LIST,	(uintptr_t)&lxd_mtranslator_sy },
+	{ "zcons",	0, DTT_LIST,	(uintptr_t)&lxd_mtranslator_zcons },
+	{ LX_PTM_DRV,	0, DTT_CUSTOM,	(uintptr_t)lxd_ptm_devt_translator },
+	{ "pts",	0, DTT_CUSTOM,	(uintptr_t)lxd_pts_devt_translator },
+	{ NULL,		0, DTT_INVALID,	NULL }
+};
+
+int
+_init()
+{
+	return (mod_install(&modlinkage));
+}
+
+int
+_fini()
+{
+	int error;
+
+	if (lxd_mountcount > 0)
+		return (EBUSY);
+
+	if ((error = mod_remove(&modlinkage)) != 0)
+		return (error);
+
+	/*
+	 * Tear down the operations vectors
+	 */
+	(void) vfs_freevfsops_by_type(lxd_fstype);
+	vn_freevnodeops(lxd_vnodeops);
+	return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+/*
+ * Initialize global locks, etc. Called when loading lxd module.
+ */
+static int
+lxd_init(int fstype, char *name)
+{
+	static const fs_operation_def_t lxd_vfsops_template[] = {
+		VFSNAME_MOUNT,		{ .vfs_mount = lxd_mount },
+		VFSNAME_UNMOUNT,	{ .vfs_unmount = lxd_unmount },
+		VFSNAME_ROOT,		{ .vfs_root = lxd_root },
+		VFSNAME_STATVFS,	{ .vfs_statvfs = lxd_statvfs },
+		VFSNAME_FREEVFS,	{ .vfs_freevfs = lxd_freevfs },
+		NULL,			NULL
+	};
+	extern const struct fs_operation_def lxd_vnodeops_template[];
+	int error;
+	major_t dev;
+	int i;
+
+	lxd_fstype = fstype;
+	ASSERT(lxd_fstype != 0);
+
+	error = vfs_setfsops(fstype, lxd_vfsops_template, NULL);
+	if (error != 0) {
+		cmn_err(CE_WARN, "lxd_init: bad vfs ops template");
+		return (error);
+	}
+
+	error = vn_make_ops(name, lxd_vnodeops_template, &lxd_vnodeops);
+	if (error != 0) {
+		(void) vfs_freevfsops_by_type(fstype);
+		cmn_err(CE_WARN, "lxd_init: bad vnode ops template");
+		return (error);
+	}
+
+	/*
+	 * lxd_minfree doesn't need to be some function of configured
+	 * swap space since it really is an absolute limit of swap space
+	 * which still allows other processes to execute.
+	 */
+	if (lxd_minfree == 0) {
+		/* Set if not patched */
+		lxd_minfree = btopr(LXDMINFREE);
+	}
+
+	if ((dev = getudev()) == (major_t)-1) {
+		cmn_err(CE_WARN, "lxd_init: Can't get unique device number.");
+		dev = 0;
+	}
+
+	/*
+	 * Make the pseudo device
+	 */
+	lxd_dev = makedevice(dev, 0);
+
+	/*
+	 * Initialize device translator mapping table.
+	 */
+	for (i = 0; lxd_devt_translators[i].lxd_xl_driver != NULL; i++) {
+		lxd_minor_translator_t	*mt;
+		int j;
+
+		lxd_devt_translators[i].lxd_xl_major =
+		    mod_name_to_major(lxd_devt_translators[i].lxd_xl_driver);
+
+		/* if this translator doesn't use a list mapping we're done. */
+		if (lxd_devt_translators[i].lxd_xl_type != DTT_LIST)
+			continue;
+
+		/* for each device listed, lookup the minor node number */
+		mt = lxd_devt_translators[i].xl_list;
+		for (j = 0; mt[j].lxd_mt_path != NULL; j++) {
+			vnode_t *vp;
+			struct vattr va;
+			char *tpath;
+			char tnm[MAXPATHLEN];
+
+			/*
+			 * The attach might be triggered in either the global
+			 * zone or in a non-global zone, so we may need to
+			 * adjust the path if we're in a NGZ.
+			 */
+			if (curproc->p_zone->zone_id == GLOBAL_ZONEUNIQID) {
+				tpath = mt[j].lxd_mt_path;
+			} else {
+				(void) snprintf(tnm, sizeof (tnm), "/native%s",
+				    mt[j].lxd_mt_path);
+				tpath = tnm;
+			}
+
+			if (lookupnameat(tpath, UIO_SYSSPACE, FOLLOW, NULL,
+			    &vp, NULL) != 0) {
+				mt[j].lxd_mt_minor = -1;
+				continue;
+			}
+
+			va.va_mask = AT_RDEV;
+			if (VOP_GETATTR(vp, &va, 0, kcred, NULL) != 0) {
+				va.va_rdev = NODEV;
+			} else {
+				ASSERT(getmajor(va.va_rdev) ==
+				    lxd_devt_translators[i].lxd_xl_major);
+				ASSERT(mt[j].lxd_mt_lx_minor < LX_MINORMASK);
+			}
+
+			mt[j].lxd_mt_minor = getminor(va.va_rdev);
+
+			VN_RELE(vp);
+		}
+	}
+
+	return (0);
+}
+
+static int
+lxd_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
+{
+	lxd_mnt_t *lxdm = NULL;
+	struct lxd_node *ldn;
+	struct pathname dpn;
+	int error;
+	int i;
+	int nodev;
+	struct vattr rattr;
+	vnode_t *realrootvp;
+	vnode_t *tvp;
+
+	nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL);
+
+	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
+		return (error);
+
+	if (mvp->v_type != VDIR)
+		return (ENOTDIR);
+
+	/*
+	 * This is the same behavior as with lofs.
+	 * Loopback devices which get "nodevices" added can be done without
+	 * "nodevices" set because we cannot import devices into a zone
+	 * with loopback.  Note that we have all zone privileges when
+	 * this happens; if not, we'd have gotten "nosuid".
+	 */
+	if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
+		vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY);
+
+	/*
+	 * Only allow mounting within lx zones.
+	 */
+	if (curproc->p_zone->zone_brand != &lx_brand)
+		return (EINVAL);
+
+	/*
+	 * Ensure we don't allow overlaying mounts
+	 */
+	mutex_enter(&mvp->v_lock);
+	if ((uap->flags & MS_OVERLAY) == 0 &&
+	    (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
+		mutex_exit(&mvp->v_lock);
+		return (EBUSY);
+	}
+	mutex_exit(&mvp->v_lock);
+
+	/* lxd doesn't support read-only mounts */
+	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
+		error = EINVAL;
+		goto out;
+	}
+
+	error = pn_get(uap->dir,
+	    (uap->flags & MS_SYSSPACE) ? UIO_SYSSPACE : UIO_USERSPACE, &dpn);
+	if (error != 0)
+		goto out;
+
+	/*
+	 * Find real root
+	 */
+	if ((error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ?
+	    UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp))) {
+		pn_free(&dpn);
+		return (error);
+	}
+
+	if ((error = VOP_ACCESS(realrootvp, 0, 0, cr, NULL)) != 0) {
+		pn_free(&dpn);
+		VN_RELE(realrootvp);
+		return (error);
+	}
+
+	/* If realroot is not a devfs, error out */
+	if (strcmp(realrootvp->v_op->vnop_name, "dev") != 0) {
+		pn_free(&dpn);
+		VN_RELE(realrootvp);
+		return (EINVAL);
+	}
+
+	lxdm = kmem_zalloc(sizeof (*lxdm), KM_SLEEP);
+
+	/* init but don't bother entering the mutex (not on mount list yet) */
+	mutex_init(&lxdm->lxdm_contents, NULL, MUTEX_DEFAULT, NULL);
+	mutex_init(&lxdm->lxdm_renamelck, NULL, MUTEX_DEFAULT, NULL);
+
+	/* Initialize the hash table mutexes */
+	for (i = 0; i < LXD_HASH_SZ; i++) {
+		mutex_init(&lxdm->lxdm_hash_mutex[i], NULL, MUTEX_DEFAULT,
+		    NULL);
+	}
+
+	lxdm->lxdm_vfsp = vfsp;
+	lxdm->lxdm_gen = 1;	/* start inode counter at 1 */
+
+	vfsp->vfs_data = (caddr_t)lxdm;
+	vfsp->vfs_fstype = lxd_fstype;
+	vfsp->vfs_dev = lxd_dev;
+	vfsp->vfs_bsize = PAGESIZE;
+	vfsp->vfs_flag |= VFS_NOTRUNC;
+	vfs_make_fsid(&vfsp->vfs_fsid, lxd_dev, lxd_fstype);
+	lxdm->lxdm_mntpath = kmem_zalloc(dpn.pn_pathlen + 1, KM_SLEEP);
+	(void) strcpy(lxdm->lxdm_mntpath, dpn.pn_path);
+
+	/* allocate and initialize root lxd_node structure */
+	bzero(&rattr, sizeof (struct vattr));
+	rattr.va_mode = (mode_t)(S_IFDIR | 0755);
+	rattr.va_type = VDIR;
+	rattr.va_rdev = 0;
+
+	tvp = lxd_make_back_node(realrootvp, lxdm);
+	ldn = VTOLDN(tvp);
+
+	rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+	LDNTOV(ldn)->v_flag |= VROOT;
+
+	/*
+	 * initialize linked list of lxd_nodes so that the back pointer of
+	 * the root lxd_node always points to the last one on the list
+	 * and the forward pointer of the last node is null
+	 */
+	ldn->lxdn_prev = ldn;
+	ldn->lxdn_next = NULL;
+	ldn->lxdn_nlink = 0;
+	lxdm->lxdm_rootnode = ldn;
+
+	ldn->lxdn_nodeid = lxdm->lxdm_gen++;
+	lxd_dirinit(ldn, ldn, cr);
+
+	rw_exit(&ldn->lxdn_rwlock);
+
+	pn_free(&dpn);
+	error = 0;
+	atomic_inc_32(&lxd_mountcount);
+
+out:
+	if (error == 0)
+		vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
+
+	return (error);
+}
+
+static int
+lxd_unmount(struct vfs *vfsp, int flag, struct cred *cr)
+{
+	lxd_mnt_t *lxdm = (lxd_mnt_t *)VFSTOLXDM(vfsp);
+	lxd_node_t *ldn, *cancel;
+	struct vnode	*vp;
+	int error;
+	uint_t cnt;
+
+	if ((error = secpolicy_fs_unmount(cr, vfsp)) != 0)
+		return (error);
+
+	mutex_enter(&lxdm->lxdm_contents);
+
+	/*
+	 * In the normal unmount case only the root node would have a reference
+	 * count.
+	 *
+	 * With lxdm_contents held, nothing can be added or removed.
+	 * If we find a previously referenced node, undo the holds we have
+	 * placed and fail EBUSY.
+	 */
+	ldn = lxdm->lxdm_rootnode;
+
+	vp = LDNTOV(ldn);
+	mutex_enter(&vp->v_lock);
+
+	if (flag & MS_FORCE) {
+		mutex_exit(&vp->v_lock);
+		mutex_exit(&lxdm->lxdm_contents);
+		return (EINVAL);
+	}
+
+	cnt = vp->v_count;
+	if (cnt > 1) {
+		mutex_exit(&vp->v_lock);
+		mutex_exit(&lxdm->lxdm_contents);
+		return (EBUSY);
+	}
+
+	mutex_exit(&vp->v_lock);
+
+	/*
+	 * Check for open files. An open file causes everything to unwind.
+	 */
+	for (ldn = ldn->lxdn_next; ldn; ldn = ldn->lxdn_next) {
+		vp = LDNTOV(ldn);
+		mutex_enter(&vp->v_lock);
+		cnt = vp->v_count;
+		if (cnt > 0) {
+			/* An open file; unwind the holds we've been adding. */
+			mutex_exit(&vp->v_lock);
+			cancel = lxdm->lxdm_rootnode->lxdn_next;
+			while (cancel != ldn) {
+				vp = LDNTOV(cancel);
+				ASSERT(vp->v_count > 0);
+				VN_RELE(vp);
+				cancel = cancel->lxdn_next;
+			}
+			mutex_exit(&lxdm->lxdm_contents);
+			return (EBUSY);
+		} else {
+			/*
+			 * It may seem incorrect for us to have a vnode with
+			 * a count of 0, but this is modeled on tmpfs and works
+			 * the same way. See lxd_front_inactive. There we allow
+			 * the v_count to go to 0 but rely on the link count to
+			 * keep the vnode alive. Since we now want to cleanup
+			 * these vnodes we manually add a VN_HOLD so that the
+			 * VN_RELEs that occur in the lxd_freevfs() cleanup
+			 * will take us down the lxd_inactive code path. We
+			 * can directly add a VN_HOLD since we have the lock.
+			 */
+			vp->v_count++;
+			mutex_exit(&vp->v_lock);
+		}
+	}
+
+	/*
+	 * We can drop the mutex now because
+	 * no one can find this mount anymore
+	 */
+	vfsp->vfs_flag |= VFS_UNMOUNTED;
+	mutex_exit(&lxdm->lxdm_contents);
+
+	return (0);
+}
+
+/*
+ * Implementation of VFS_FREEVFS(). This is called by the vfs framework after
+ * umount and the last VFS_RELE, to trigger the release of any resources still
+ * associated with the given vfs_t. This is normally called immediately after
+ * lxd_unmount.
+ */
+void
+lxd_freevfs(vfs_t *vfsp)
+{
+	lxd_mnt_t *lxdm = (lxd_mnt_t *)VFSTOLXDM(vfsp);
+	lxd_node_t *ldn;
+	struct vnode *vp;
+
+	/*
+	 * Free all kmemalloc'd and anonalloc'd memory associated with
+	 * this filesystem.  To do this, we go through the file list twice,
+	 * once to remove all the directory entries, and then to remove
+	 * all the pseudo files.
+	 */
+
+	/*
+	 * Now that we are tearing ourselves down we need to remove the
+	 * UNMOUNTED flag. If we don't, we'll later hit a VN_RELE when we remove
+	 * files from the system causing us to have a negative value. Doing this
+	 * seems a bit better than trying to set a flag on the lxd_mnt_t that
+	 * says we're tearing down.
+	 */
+	vfsp->vfs_flag &= ~VFS_UNMOUNTED;
+
+	/*
+	 * Remove all directory entries (this doesn't remove top-level dirs).
+	 */
+	for (ldn = lxdm->lxdm_rootnode; ldn; ldn = ldn->lxdn_next) {
+		rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+		if (ldn->lxdn_vnode->v_type == VDIR)
+			lxd_dirtrunc(ldn);
+		rw_exit(&ldn->lxdn_rwlock);
+	}
+
+	ASSERT(lxdm->lxdm_rootnode != NULL);
+
+	/*
+	 * All links are gone, v_count is keeping nodes in place.
+	 * VN_RELE should make the node disappear, unless somebody
+	 * is holding pages against it.  Nap and retry until it disappears.
+	 *
+	 * We re-acquire the lock to prevent others who have a HOLD on a
+	 * lxd_node from blowing it away (in lxd_inactive) while we're trying
+	 * to get to it here. Once we have a HOLD on it we know it'll stick
+	 * around.
+	 */
+	mutex_enter(&lxdm->lxdm_contents);
+
+	/*
+	 * Remove all the files (except the rootnode) backwards.
+	 */
+	while ((ldn = lxdm->lxdm_rootnode->lxdn_prev) != lxdm->lxdm_rootnode) {
+		mutex_exit(&lxdm->lxdm_contents);
+		/*
+		 * All nodes will be released here. Note we handled the link
+		 * count above.
+		 */
+		vp = LDNTOV(ldn);
+		ASSERT(vp->v_type == VLNK || vp->v_type == VDIR);
+		VN_RELE(vp);
+		mutex_enter(&lxdm->lxdm_contents);
+		/*
+		 * It's still there after the RELE. Someone else like pageout
+		 * has a hold on it so wait a bit and then try again - we know
+		 * they'll give it up soon.
+		 */
+		if (ldn == lxdm->lxdm_rootnode->lxdn_prev) {
+			VN_HOLD(vp);
+			mutex_exit(&lxdm->lxdm_contents);
+			delay(hz / 4);
+			mutex_enter(&lxdm->lxdm_contents);
+		}
+	}
+	mutex_exit(&lxdm->lxdm_contents);
+
+	ASSERT(lxdm->lxdm_back_refcnt == 1);
+	ASSERT(lxdm->lxdm_dent_refcnt == 0);
+
+	VN_RELE(LDNTOV(lxdm->lxdm_rootnode));
+
+	ASSERT(lxdm->lxdm_mntpath != NULL);
+	kmem_free(lxdm->lxdm_mntpath, strlen(lxdm->lxdm_mntpath) + 1);
+
+	mutex_destroy(&lxdm->lxdm_contents);
+	mutex_destroy(&lxdm->lxdm_renamelck);
+	kmem_free(lxdm, sizeof (lxd_mnt_t));
+
+	/* Allow _fini() to succeed now */
+	atomic_dec_32(&lxd_mountcount);
+}
+
+/*
+ * return root lxdnode for given vnode
+ */
+static int
+lxd_root(struct vfs *vfsp, struct vnode **vpp)
+{
+	lxd_mnt_t *lxdm = (lxd_mnt_t *)VFSTOLXDM(vfsp);
+	lxd_node_t *ldn = lxdm->lxdm_rootnode;
+	struct vnode *vp;
+
+	ASSERT(ldn != NULL);
+
+	vp = LDNTOV(ldn);
+	VN_HOLD(vp);
+	*vpp = vp;
+	return (0);
+}
+
+static int
+lxd_statvfs(struct vfs *vfsp, statvfs64_t *sbp)
+{
+	lxd_mnt_t *lxdm = (lxd_mnt_t *)VFSTOLXDM(vfsp);
+	ulong_t	blocks;
+	dev32_t d32;
+	zoneid_t eff_zid;
+	struct zone *zp;
+
+	zp = lxdm->lxdm_vfsp->vfs_zone;
+
+	if (zp == NULL)
+		eff_zid = GLOBAL_ZONEUNIQID;
+	else
+		eff_zid = zp->zone_id;
+
+	sbp->f_bsize = PAGESIZE;
+	sbp->f_frsize = PAGESIZE;
+
+	/*
+	 * Find the amount of available physical and memory swap
+	 */
+	mutex_enter(&anoninfo_lock);
+	ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
+	blocks = (ulong_t)CURRENT_TOTAL_AVAILABLE_SWAP;
+	mutex_exit(&anoninfo_lock);
+
+	if (blocks > lxd_minfree)
+		sbp->f_bfree = blocks - lxd_minfree;
+	else
+		sbp->f_bfree = 0;
+
+	sbp->f_bavail = sbp->f_bfree;
+
+	/*
+	 * Total number of blocks is just what's available
+	 */
+	sbp->f_blocks = (fsblkcnt64_t)(sbp->f_bfree);
+
+	if (eff_zid != GLOBAL_ZONEUNIQID &&
+	    zp->zone_max_swap_ctl != UINT64_MAX) {
+		/*
+		 * If the fs is used by a zone with a swap cap,
+		 * then report the capped size.
+		 */
+		rctl_qty_t cap, used;
+		pgcnt_t pgcap, pgused;
+
+		mutex_enter(&zp->zone_mem_lock);
+		cap = zp->zone_max_swap_ctl;
+		used = zp->zone_max_swap;
+		mutex_exit(&zp->zone_mem_lock);
+
+		pgcap = btop(cap);
+		pgused = btop(used);
+
+		sbp->f_bfree = MIN(pgcap - pgused, sbp->f_bfree);
+		sbp->f_bavail = sbp->f_bfree;
+		sbp->f_blocks = MIN(pgcap, sbp->f_blocks);
+	}
+
+	/*
+	 * The maximum number of files available is approximately the number
+	 * of lxd_nodes we can allocate from the remaining kernel memory
+	 * available to lxdevfs in this zone.  This is fairly inaccurate since
+	 * it doesn't take into account the names stored in the directory
+	 * entries.
+	 */
+	sbp->f_ffree = sbp->f_files = ptob(availrmem) /
+	    (sizeof (lxd_node_t) + sizeof (lxd_dirent_t));
+	sbp->f_favail = (fsfilcnt64_t)(sbp->f_ffree);
+	(void) cmpldev(&d32, vfsp->vfs_dev);
+	sbp->f_fsid = d32;
+	(void) strcpy(sbp->f_basetype, vfssw[lxd_fstype].vsw_name);
+	(void) strncpy(sbp->f_fstr, lxdm->lxdm_mntpath, sizeof (sbp->f_fstr));
+	/* ensure null termination */
+	sbp->f_fstr[sizeof (sbp->f_fstr) - 1] = '\0';
+	sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
+	sbp->f_namemax = MAXNAMELEN - 1;
+	return (0);
+}
+
+static int
+lxd_pts_devt_translator(dev_t dev, lx_dev_t *jdev)
+{
+	minor_t	min = getminor(dev);
+	int	lx_maj;
+	int	lx_min;
+
+	/*
+	 * linux has a really small minor number name space (8 bits).
+	 * so if pts devices are limited to one major number you could
+	 * only have 256 of them.  linux addresses this issue by using
+	 * multiple major numbers for pts devices.
+	 */
+	if (min >= LX_PTS_MAX)
+		return (EOVERFLOW);
+
+	lx_maj = LX_PTS_MAJOR_MIN + (min / LX_MINORMASK);
+	lx_min = min % LX_MINORMASK;
+
+	*jdev = LX_MAKEDEVICE(lx_maj, lx_min);
+	return (0);
+}
+
+static int
+lxd_ptm_devt_translator(dev_t dev, lx_dev_t *jdev)
+{
+	*jdev = LX_MAKEDEVICE(LX_PTM_MAJOR, LX_PTM_MINOR);
+	return (0);
+}
diff --git a/usr/src/uts/common/brand/lx/devfs/lxd_vnops.c b/usr/src/uts/common/brand/lx/devfs/lxd_vnops.c
new file mode 100644
index 0000000000..05ca0400ad
--- /dev/null
+++ b/usr/src/uts/common/brand/lx/devfs/lxd_vnops.c
@@ -0,0 +1,1453 @@
+/*
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source.  A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ */
+
+/*
+ * Copyright 2015 Joyent, Inc.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/errno.h>
+#include <sys/vnode.h>
+#include <sys/vfs.h>
+#include <sys/vfs_opreg.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <sys/cred.h>
+#include <sys/pathname.h>
+#include <sys/debug.h>
+#include <sys/sdt.h>
+#include <fs/fs_subr.h>
+#include <vm/as.h>
+#include <vm/seg.h>
+#include <sys/lx_brand.h>
+
+#include "lxd.h"
+
+static int
+lxd_open(vnode_t **vpp, int flag, struct cred *cr, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(*vpp);
+	vnode_t *vp = *vpp;
+	vnode_t *rvp;
+	vnode_t *oldvp;
+	int error;
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (0);
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	oldvp = vp;
+	vp = rvp = REALVP(vp);
+	/*
+	 * Need to hold new reference to vp since VOP_OPEN() may
+	 * decide to release it.
+	 */
+	VN_HOLD(vp);
+	error = VOP_OPEN(&rvp, flag, cr, ct);
+
+	if (!error && rvp != vp) {
+		/*
+		 * the FS which we called should have released the
+		 * new reference on vp
+		 */
+		*vpp = lxd_make_back_node(rvp, VFSTOLXDM(oldvp->v_vfsp));
+
+		if (IS_DEVVP(*vpp)) {
+			vnode_t *svp;
+
+			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
+			VN_RELE(*vpp);
+			if (svp == NULL)
+				error = ENOSYS;
+			else
+				*vpp = svp;
+		}
+		VN_RELE(oldvp);
+	} else {
+		ASSERT(rvp->v_count > 1);
+		VN_RELE(rvp);
+	}
+
+	return (error);
+}
+
+static int
+lxd_close(vnode_t *vp, int flag, int count, offset_t offset, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (0);
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_CLOSE(vp, flag, count, offset, cr, ct));
+}
+
+static int
+lxd_read(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (ENOTSUP);
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_READ(vp, uiop, ioflag, cr, ct));
+}
+
+static int
+lxd_write(vnode_t *vp, struct uio *uiop, int ioflag, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (ENOTSUP);
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_WRITE(vp, uiop, ioflag, cr, ct));
+}
+
+static int
+lxd_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, struct cred *cr,
+    int *rvalp, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (ENOTSUP);
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_IOCTL(vp, cmd, arg, flag, cr, rvalp, ct));
+}
+
+static int
+lxd_setfl(vnode_t *vp, int oflags, int nflags, cred_t *cr, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (ENOTSUP);
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_SETFL(vp, oflags, nflags, cr, ct));
+}
+
+/*
+ * Translate SunOS devt to Linux devt.
+ */
+static int
+lxd_s2l_devt(dev_t dev, lx_dev_t *rdev)
+{
+	lxd_minor_translator_t	*mt;
+	int			i, j;
+	major_t			maj = getmajor(dev);
+	minor_t			min = getminor(dev);
+
+	/* look for a devt translator for this major number */
+	for (i = 0; lxd_devt_translators[i].lxd_xl_driver != NULL; i++) {
+		if (lxd_devt_translators[i].lxd_xl_major == maj)
+			break;
+	}
+
+	if (lxd_devt_translators[i].lxd_xl_driver != NULL) {
+		/* try to translate the illumos devt to a linux devt */
+		switch (lxd_devt_translators[i].lxd_xl_type) {
+		case DTT_INVALID:
+			ASSERT(0);
+			break;
+
+		case DTT_LIST:
+			mt = lxd_devt_translators[i].xl_list;
+			for (j = 0; mt[j].lxd_mt_path != NULL; j++) {
+				if (mt[j].lxd_mt_minor == min) {
+					ASSERT(mt[j].lxd_mt_minor <
+					    LX_MINORMASK);
+
+					/* found a translation */
+					*rdev = LX_MAKEDEVICE(
+					    mt[j].lxd_mt_lx_major,
+					    mt[j].lxd_mt_lx_minor);
+					return (0);
+				}
+			}
+			break;
+
+		case DTT_CUSTOM:
+			return (lxd_devt_translators[i].xl_custom(dev, rdev));
+		}
+	}
+
+	/* we don't have a translator for this device */
+	*rdev = LX_MAKEDEVICE(maj, min);
+	return (0);
+}
+
+static int
+lxd_getattr(vnode_t *vp, struct vattr *vap, int flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+	int error;
+	vnode_t *rvp;
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		mutex_enter(&ldn->lxdn_tlock);
+
+		vap->va_type = vp->v_type;
+		vap->va_mode = ldn->lxdn_mode & MODEMASK;
+		vap->va_uid = ldn->lxdn_uid;
+		vap->va_gid = ldn->lxdn_gid;
+		vap->va_fsid = ldn->lxdn_fsid;
+		vap->va_nodeid = (ino64_t)ldn->lxdn_nodeid;
+		vap->va_nlink = ldn->lxdn_nlink;
+		vap->va_size = (u_offset_t)ldn->lxdn_size;
+		vap->va_atime = ldn->lxdn_atime;
+		vap->va_mtime = ldn->lxdn_mtime;
+		vap->va_ctime = ldn->lxdn_ctime;
+		vap->va_blksize = PAGESIZE;
+		vap->va_rdev = 0;	/* no devs in front */
+		vap->va_seq = ldn->lxdn_seq;
+
+		vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(
+		    vap->va_size)));
+		mutex_exit(&ldn->lxdn_tlock);
+		return (0);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	rvp = REALVP(vp);
+	if ((error = VOP_GETATTR(rvp, vap, flags, cr, ct)))
+		return (error);
+
+	/* Skip devt translation for native programs */
+	if (curproc->p_brand != &lx_brand)
+		return (0);
+
+	if (rvp->v_type == VCHR) {
+		major_t major;
+		int i;
+
+		major = getmajor(vap->va_rdev);
+		for (i = 0; lxd_devt_translators[i].lxd_xl_driver != NULL;
+		    i++) {
+			if (lxd_devt_translators[i].lxd_xl_major == major) {
+				lx_dev_t ldev;
+
+				(void) lxd_s2l_devt(vap->va_rdev, &ldev);
+				DTRACE_PROBE3(lxd__devxl, void *, rvp,
+				    void *, vap, int, ldev);
+				/*
+				 * TBD: enable device translation for back
+				 * nodes.
+				 */
+				/* vap->va_rdev = ldev; */
+				break;
+			}
+		}
+	}
+
+	return (0);
+}
+
+static int
+lxd_setattr(vnode_t *vp, struct vattr *vap, int flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		int error = 0;
+		struct vattr *set;
+		long mask = vap->va_mask;
+
+		/* Cannot set these attributes */
+		if ((mask & AT_NOSET) || (mask & AT_XVATTR) ||
+		    (mask & AT_MODE && vap->va_mode & (S_ISUID | S_ISGID)) ||
+		    (mask & AT_SIZE))
+			return (EINVAL);
+
+		mutex_enter(&ldn->lxdn_tlock);
+
+		set = &ldn->lxdn_attr;
+		/*
+		 * Change file access modes. Must be owner or have sufficient
+		 * privileges.
+		 */
+		error = secpolicy_vnode_setattr(cr, vp, vap, set, flags,
+		    lxd_naccess, ldn);
+		if (error) {
+			mutex_exit(&ldn->lxdn_tlock);
+			return (error);
+		}
+
+		if (mask & AT_MODE) {
+			set->va_mode &= S_IFMT;
+			set->va_mode |= vap->va_mode & ~S_IFMT;
+		}
+
+		if (mask & AT_UID)
+			set->va_uid = vap->va_uid;
+		if (mask & AT_GID)
+			set->va_gid = vap->va_gid;
+		if (mask & AT_ATIME)
+			set->va_atime = vap->va_atime;
+		if (mask & AT_MTIME)
+			set->va_mtime = vap->va_mtime;
+
+		if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME))
+			gethrestime(&ldn->lxdn_ctime);
+
+		mutex_exit(&ldn->lxdn_tlock);
+		return (error);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_SETATTR(vp, vap, flags, cr, ct));
+}
+
+static int
+lxd_access(vnode_t *vp, int mode, int flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		int error;
+
+		mutex_enter(&ldn->lxdn_tlock);
+		error = lxd_naccess(ldn, mode, cr);
+		mutex_exit(&ldn->lxdn_tlock);
+		return (error);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	if (mode & VWRITE) {
+		if (vp->v_type == VREG && vn_is_readonly(vp))
+			return (EROFS);
+	}
+	vp = REALVP(vp);
+	return (VOP_ACCESS(vp, mode, flags, cr, ct));
+}
+
+static int
+lxd_fsync(vnode_t *vp, int syncflag, struct cred *cr, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (0);
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_FSYNC(vp, syncflag, cr, ct));
+}
+
+static void
+lxd_front_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+	lxd_mnt_t *lxdm = VTOLXDM(vp);
+
+	ASSERT(ldn->lxdn_type == LXDNT_FRONT);
+	rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+
+	mutex_enter(&ldn->lxdn_tlock);
+	mutex_enter(&vp->v_lock);
+	ASSERT(vp->v_count >= 1);
+
+	/*
+	 * If we don't have the last hold or the link count is non-zero,
+	 * there's little to do -- just drop our hold.
+	 */
+	if (vp->v_count > 1 || ldn->lxdn_nlink != 0) {
+		vp->v_count--;
+
+		mutex_exit(&vp->v_lock);
+		mutex_exit(&ldn->lxdn_tlock);
+		rw_exit(&ldn->lxdn_rwlock);
+		return;
+	}
+
+	/*
+	 * We have the last hold *and* the link count is zero, so this node is
+	 * dead from the filesystem's viewpoint.
+	 */
+	if (ldn->lxdn_size != 0) {
+		if (ldn->lxdn_vnode->v_type == VLNK)
+			kmem_free(ldn->lxdn_symlink, ldn->lxdn_size + 1);
+	}
+
+	mutex_exit(&vp->v_lock);
+	mutex_exit(&ldn->lxdn_tlock);
+
+	vn_invalid(LDNTOV(ldn));
+
+	mutex_enter(&lxdm->lxdm_contents);
+	if (ldn->lxdn_next == NULL)
+		lxdm->lxdm_rootnode->lxdn_prev = ldn->lxdn_prev;
+	else
+		ldn->lxdn_next->lxdn_prev = ldn->lxdn_prev;
+	ldn->lxdn_prev->lxdn_next = ldn->lxdn_next;
+
+	mutex_exit(&lxdm->lxdm_contents);
+	rw_exit(&ldn->lxdn_rwlock);
+	rw_destroy(&ldn->lxdn_rwlock);
+	mutex_destroy(&ldn->lxdn_tlock);
+
+	vn_free(LDNTOV(ldn));
+	kmem_free(ldn, sizeof (lxd_node_t));
+}
+
+/*ARGSUSED*/
+static void
+lxd_inactive(vnode_t *vp, struct cred *cr, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		lxd_front_inactive(vp, cr, ct);
+		return;
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	lxd_free_back_node(ldn);
+}
+
+/* ARGSUSED */
+static int
+lxd_fid(vnode_t *vp, struct fid *fidp, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT)
+		return (ENOTSUP);
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_FID(vp, fidp, ct));
+}
+
+/*
+ * For a front node lookup in the dirent hash table and return a shadow vnode
+ * (lxd_node_t type) of type LXDNT_FRONT.
+ *
+ * For a back node, lookup nm name and return a shadow vnode (lxd_node_t type)
+ * of the real vnode found.
+ */
+static int
+lxd_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
+    int flags, vnode_t *rdir, struct cred *cr, caller_context_t *ct,
+    int *direntflags, pathname_t *realpnp)
+{
+	vnode_t *vp = NULL;
+	int error;
+	vnode_t *realdvp;
+	lxd_mnt_t *lxdm = VTOLXDM(dvp);
+	int doingdotdot = 0;
+	lxd_node_t *ldn = VTOLDN(dvp);
+	lxd_node_t *nldn = NULL;
+
+	/*
+	 * First check for front file which could be instantiated on either a
+	 * front or back node (e.g. the top-level moint point directory node is
+	 * a back node which can have front files created in it).
+	 */
+
+	/* disallow extended attrs */
+	if (flags & LOOKUP_XATTR)
+		return (EINVAL);
+
+	/* Null component name is a synonym for dir being searched. */
+	if (*nm == '\0') {
+		VN_HOLD(dvp);
+		*vpp = dvp;
+		return (0);
+	}
+
+	rw_enter(&ldn->lxdn_rwlock, RW_READER);
+	error = lxd_dirlookup(ldn, nm, &nldn, cr);
+	rw_exit(&ldn->lxdn_rwlock);
+
+	if (error == 0) {
+		/* found */
+		ASSERT(nldn != NULL);
+		*vpp = LDNTOV(nldn);
+		return (0);
+	}
+
+	/* At this point, if dir node is a front node, error */
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (ENOENT);
+	}
+
+	realdvp = REALVP(dvp);
+
+	if (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0') {
+		doingdotdot++;
+		/*
+		 * Handle ".." out of mounted filesystem
+		 */
+		while ((realdvp->v_flag & VROOT) && realdvp != rootdir) {
+			realdvp = realdvp->v_vfsp->vfs_vnodecovered;
+			ASSERT(realdvp != NULL);
+		}
+	}
+
+	*vpp = NULL;	/* default(error) case */
+
+	/*
+	 * Do the normal lookup
+	 */
+	if ((error = VOP_LOOKUP(realdvp, nm, &vp, pnp, flags, rdir, cr,
+	    ct, direntflags, realpnp)) != 0) {
+		vp = NULL;
+		goto out;
+	}
+
+	/*
+	 * We do this check here to avoid returning a stale file handle to the
+	 * caller.
+	 */
+	if (nm[0] == '.' && nm[1] == '\0') {
+		ASSERT(vp == realdvp);
+		VN_HOLD(dvp);
+		VN_RELE(vp);
+		*vpp = dvp;
+		return (0);
+	}
+
+	if (doingdotdot) {
+		*vpp = lxd_make_back_node(vp, lxdm);
+		return (0);
+	}
+
+	/*
+	 * If this vnode is mounted on, then we
+	 * traverse to the vnode which is the root of
+	 * the mounted file system.
+	 */
+	if ((error = traverse(&vp)) != 0)
+		goto out;
+
+	/*
+	 * Make a lxd node for the real vnode.
+	 */
+	*vpp = lxd_make_back_node(vp, lxdm);
+	if (vp->v_type != VDIR) {
+		if (IS_DEVVP(*vpp)) {
+			vnode_t *svp;
+
+			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
+			VN_RELE(*vpp);
+			if (svp == NULL) {
+				VN_RELE(vp);
+				error = ENOSYS;
+			} else {
+				*vpp = svp;
+			}
+		}
+		return (error);
+	}
+
+out:
+	if (error != 0 && vp != NULL)
+		VN_RELE(vp);
+
+	return (error);
+}
+
+/*ARGSUSED*/
+static int
+lxd_create(vnode_t *dvp, char *nm, struct vattr *va, enum vcexcl exclusive,
+    int mode, vnode_t **vpp, struct cred *cr, int flag, caller_context_t *ct,
+    vsecattr_t *vsecp)
+{
+	int error;
+	vnode_t *vp = NULL;
+	lxd_node_t *parent = VTOLDN(dvp);
+
+	/*
+	 * We currently don't support creating simple files under lx devfs
+	 * (i.e. Create front nodes. We only allow directories and symlinks).
+	 */
+	if (parent->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	/*
+	 * We cannot create files in the back devfs but we want to allow for
+	 * o_creat on existing files, so pass this through and let the back
+	 * file system allow or deny it.
+	 */
+
+	ASSERT(parent->lxdn_type == LXDNT_BACK);
+	if (*nm == '\0') {
+		ASSERT(vpp && dvp == *vpp);
+		vp = REALVP(*vpp);
+	}
+
+	error = VOP_CREATE(REALVP(dvp), nm, va, exclusive, mode, &vp, cr, flag,
+	    ct, vsecp);
+	if (!error) {
+		*vpp = lxd_make_back_node(vp, VFSTOLXDM(dvp->v_vfsp));
+		if (IS_DEVVP(*vpp)) {
+			vnode_t *svp;
+
+			svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
+			VN_RELE(*vpp);
+			if (svp == NULL)
+				error = ENOSYS;
+			else
+				*vpp = svp;
+		}
+	}
+
+	return (error);
+}
+
+static int
+lxd_remove(vnode_t *dvp, char *nm, struct cred *cr, caller_context_t *ct,
+    int flags)
+{
+	lxd_node_t *parent = VTOLDN(dvp);
+	lxd_node_t *ldn = NULL;
+	int error;
+
+	/* can only remove existing front nodes */
+	error = lxd_dirlookup(parent, nm, &ldn, cr);
+	if (error) {
+		return (error);
+	}
+
+	ASSERT(ldn != NULL);
+	ASSERT(ldn->lxdn_type == LXDNT_FRONT);
+	rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+	rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+
+	error = lxd_dirdelete(parent, ldn, nm, DR_REMOVE, cr);
+
+	rw_exit(&ldn->lxdn_rwlock);
+	rw_exit(&parent->lxdn_rwlock);
+
+	ldnode_rele(ldn);
+
+	return (error);
+}
+
+static int
+lxd_link(vnode_t *tdvp, vnode_t *vp, char *tnm, struct cred *cr,
+    caller_context_t *ct, int flags)
+{
+	return (ENOTSUP);
+}
+
+static int
+lxd_rename(vnode_t *odvp, char *onm, vnode_t *ndvp, char *nnm, struct cred *cr,
+    caller_context_t *ct, int flags)
+{
+	lxd_node_t *oldparent = VTOLDN(odvp);
+	lxd_node_t *newparent;
+	lxd_mnt_t *lxdm = VTOLXDM(oldparent->lxdn_vnode);
+	lxd_node_t *fromnode = NULL;
+	int error;
+	int samedir = 0;
+
+	if (!vn_matchops(ndvp, lxd_vnodeops)) {
+		/* cannot rename out of this file system */
+		return (EACCES);
+	}
+
+	mutex_enter(&lxdm->lxdm_renamelck);
+
+	newparent = VTOLDN(ndvp);
+
+	/*
+	 * We can only rename front nodes.
+	 */
+	error = lxd_dirlookup(oldparent, onm, &fromnode, cr);
+	if (error != 0) {
+		/* not found in front */
+		mutex_exit(&lxdm->lxdm_renamelck);
+		return (error);
+	}
+
+	/*
+	 * Make sure we can delete the old (source) entry.  This
+	 * requires write permission on the containing directory.  If
+	 * that directory is "sticky" it requires further checks.
+	 */
+	if ((error = lxd_naccess(oldparent, VWRITE, cr)) != 0)
+		goto done;
+
+	/*
+	 * Check for renaming to or from '.' or '..' or that
+	 * fromnode == oldparent
+	 */
+	if ((onm[0] == '.' &&
+	    (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) ||
+	    (nnm[0] == '.' &&
+	    (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) ||
+	    (oldparent == fromnode)) {
+		error = EINVAL;
+		goto done;
+	}
+
+	samedir = (oldparent == newparent);
+
+	/*
+	 * Make sure we can search and rename into the destination directory.
+	 */
+	if (!samedir) {
+		if ((error = lxd_naccess(newparent, VEXEC|VWRITE, cr)) != 0)
+			goto done;
+	}
+
+	/*
+	 * Link source to new target
+	 */
+	rw_enter(&newparent->lxdn_rwlock, RW_WRITER);
+	error = lxd_direnter(lxdm, newparent, nnm, DE_RENAME,
+	    oldparent, fromnode, (struct vattr *)NULL, (lxd_node_t **)NULL,
+	    cr, ct);
+	rw_exit(&newparent->lxdn_rwlock);
+
+	if (error)
+		goto done;
+
+	/*
+	 * Unlink from source.
+	 */
+	rw_enter(&oldparent->lxdn_rwlock, RW_WRITER);
+	rw_enter(&fromnode->lxdn_rwlock, RW_WRITER);
+
+	error = lxd_dirdelete(oldparent, fromnode, onm, DR_RENAME, cr);
+
+	/*
+	 * The following handles the case where our source node was
+	 * removed before we got to it.
+	 */
+	if (error == ENOENT)
+		error = 0;
+
+	rw_exit(&fromnode->lxdn_rwlock);
+	rw_exit(&oldparent->lxdn_rwlock);
+
+done:
+	ldnode_rele(fromnode);
+	mutex_exit(&lxdm->lxdm_renamelck);
+	return (error);
+}
+
+static int
+lxd_mkdir(vnode_t *dvp, char *nm, struct vattr *va, vnode_t **vpp,
+    struct cred *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
+{
+	int error;
+	vnode_t *tvp;
+	lxd_node_t *ndir = NULL;
+	lxd_node_t *parent = VTOLDN(dvp);
+	lxd_mnt_t *lxdm = VTOLXDM(parent->lxdn_vnode);
+
+	/* check for existence in both front and back */
+	if (lxd_lookup(dvp, nm, &tvp, NULL, 0, NULL, cr, ct, NULL, NULL) == 0) {
+		/* The entry already exists */
+		VN_RELE(tvp);
+		return (EEXIST);
+	}
+
+	/* make front directory */
+	rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+	error = lxd_direnter(lxdm, parent, nm, DE_MKDIR, NULL, NULL,
+	    va, &ndir, cr, ct);
+	rw_exit(&parent->lxdn_rwlock);
+
+	if (error != 0) {
+		if (ndir != NULL)
+			ldnode_rele(ndir);
+	} else {
+		*vpp = LDNTOV(ndir);
+	}
+
+	return (error);
+}
+
+static int
+lxd_realvp(vnode_t *vp, vnode_t **vpp, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		*vpp = vp;
+		return (0);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	while (vn_matchops(vp, lxd_vnodeops))
+		vp = REALVP(vp);
+
+	if (VOP_REALVP(vp, vpp, ct) != 0)
+		*vpp = vp;
+	return (0);
+}
+
+static int
+lxd_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, struct cred *cr,
+    caller_context_t *ct, int flags)
+{
+	int error;
+	lxd_node_t *ldn;
+	struct vnode *vp;
+	lxd_node_t *parent = VTOLDN(dvp);
+
+	/*
+	 * Return error if trying to remove . or ..
+	 */
+	if (strcmp(nm, ".") == 0)
+		return (EINVAL);
+	if (strcmp(nm, "..") == 0)
+		return (EEXIST);
+
+	error = lxd_dirlookup(VTOLDN(dvp), nm, &ldn, cr);
+	if (error != 0) {
+		/* not found in front */
+		return (error);
+	}
+
+	rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+	rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+
+	vp = LDNTOV(ldn);
+	if (vp == dvp || vp == cdir) {
+		error = EINVAL;
+		goto err;
+	}
+
+	if (ldn->lxdn_vnode->v_type != VDIR) {
+		error = ENOTDIR;
+		goto err;
+	}
+
+	mutex_enter(&ldn->lxdn_tlock);
+	if (ldn->lxdn_nlink > 2) {
+		mutex_exit(&ldn->lxdn_tlock);
+		error = EEXIST;
+		goto err;
+	}
+	mutex_exit(&ldn->lxdn_tlock);
+
+	/* Check for an empty directory */
+	if (ldn->lxdn_dirents > 2) {
+		error = EEXIST;
+		gethrestime(&ldn->lxdn_atime);
+		goto err;
+	}
+
+	if (vn_vfswlock(vp)) {
+		error = EBUSY;
+		goto err;
+	}
+	if (vn_mountedvfs(vp) != NULL) {
+		error = EBUSY;
+		vn_vfsunlock(vp);
+		goto err;
+	}
+
+	error = lxd_dirdelete(parent, ldn, nm, DR_RMDIR, cr);
+	vn_vfsunlock(vp);
+
+err:
+	rw_exit(&ldn->lxdn_rwlock);
+	rw_exit(&parent->lxdn_rwlock);
+	ldnode_rele(ldn);
+
+	return (error);
+}
+
+static int
+lxd_symlink(vnode_t *dvp, char *nm, struct vattr *tva, char *tnm,
+    struct cred *cr, caller_context_t *ct, int flags)
+{
+	lxd_node_t *parent = VTOLDN(dvp);
+	lxd_mnt_t *lxdm = VTOLXDM(parent->lxdn_vnode);
+	lxd_node_t *self = NULL;
+	vnode_t *tvp;
+	char *cp = NULL;
+	int error;
+	size_t len;
+
+	/* this will check for existence in both front and back */
+	if (lxd_lookup(dvp, nm, &tvp, NULL, 0, NULL, cr, ct, NULL, NULL) == 0) {
+		/* The entry already exists */
+		VN_RELE(tvp);
+		return (EEXIST);
+	}
+
+	/* make symlink in the front */
+	rw_enter(&parent->lxdn_rwlock, RW_WRITER);
+	error = lxd_direnter(lxdm, parent, nm, DE_CREATE, NULL, NULL,
+	    tva, &self, cr, ct);
+	rw_exit(&parent->lxdn_rwlock);
+
+	if (error) {
+		if (self != NULL)
+			ldnode_rele(self);
+		return (error);
+	}
+
+	len = strlen(tnm) + 1;
+	cp = kmem_alloc(len, KM_NOSLEEP | KM_NORMALPRI);
+	if (cp == NULL) {
+		ldnode_rele(self);
+		return (ENOSPC);
+	}
+	(void) strcpy(cp, tnm);
+
+	self->lxdn_symlink = cp;
+	self->lxdn_size = len - 1;
+	ldnode_rele(self);
+
+	return (error);
+}
+
+static int
+lxd_readlink(vnode_t *vp, struct uio *uiop, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		int error;
+
+		if (vp->v_type != VLNK)
+			return (EINVAL);
+
+		rw_enter(&ldn->lxdn_rwlock, RW_READER);
+		error = uiomove(ldn->lxdn_symlink, ldn->lxdn_size, UIO_READ,
+		    uiop);
+		gethrestime(&ldn->lxdn_atime);
+		rw_exit(&ldn->lxdn_rwlock);
+		return (error);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_READLINK(vp, uiop, cr, ct));
+}
+
+static int
+lx_merge_front(vnode_t *vp, struct uio *uiop, off_t req_off, int *eofp)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+	struct dirent *sd;
+	lxd_dirent_t *ldp;
+	enum lxd_node_type type = ldn->lxdn_type;
+	ssize_t uresid;
+	off_t front_off;
+	int error = 0;
+	int sdlen;
+
+	/* skip the front entries if the back read was incomplete */
+	if (*eofp == 0)
+		return (0);
+
+	/*
+	 * If this was a back node then reading that node has completed and we
+	 * may have a partially full uio struct. eof should be set to true.
+	 * Leave it set since we're likely to hit eof for the front nodes (if
+	 * any).
+	 */
+
+	front_off = uiop->uio_offset + 1;
+	sdlen = sizeof (struct dirent) + MAXPATHLEN;
+	/* zalloc to ensure we don't have anything in the d_name buffer */
+	sd = (struct dirent *)kmem_zalloc(sdlen, KM_SLEEP);
+	ldp = ldn->lxdn_dir;
+	while (ldp != NULL && (uresid = uiop->uio_resid) > 0) {
+		int namelen;
+		int reclen;
+
+		/*
+		 * Skip dot and dotdot for back nodes since we have them
+		 * already.
+		 */
+		if (type == LXDNT_BACK &&
+		    (strcmp(ldp->lddir_name, ".") == 0 ||
+		    strcmp(ldp->lddir_name, "..") == 0)) {
+			ldp = ldp->lddir_next;
+			continue;
+		}
+
+		/*
+		 * Might have previously had a partial readdir of the front
+		 * nodes, and now we're back for more, or we may just be
+		 * be doing a follow-up readdir after we've previously
+		 * returned all front and back nodes.
+		 */
+		if (front_off > req_off) {
+			namelen = strlen(ldp->lddir_name); /* no +1 needed */
+			reclen = (int)DIRENT64_RECLEN(namelen);
+
+			/*
+			 * If the size of the data to transfer is greater
+			 * than that requested, then we can't do it this
+			 * transfer.
+			 */
+			if (reclen > uresid) {
+				*eofp = 0;
+				/* Buffer too small for any entries. */
+				if (front_off == 0)
+					error = EINVAL;
+				break;
+			}
+
+			(void) strncpy(sd->d_name, ldp->lddir_name,
+			    DIRENT64_NAMELEN(reclen));
+			sd->d_reclen = (ushort_t)reclen;
+			sd->d_ino = (ino_t)ldp->lddir_node->lxdn_nodeid;
+			sd->d_off = front_off;
+
+			/* uiomove will adjust iov_base properly */
+			if ((error = uiomove((caddr_t)sd, reclen, UIO_READ,
+			    uiop)) != 0) {
+				*eofp = 0;
+				break;
+			}
+		}
+
+		/*
+		 * uiomove() above updates both uio_resid and uio_offset by the
+		 * same amount but we want uio_offset to change in increments
+		 * of 1, which is different from the number of bytes being
+		 * returned to the caller, so we set uio_offset explicitly,
+		 * ignoring what uiomove() did.
+		 */
+		uiop->uio_offset = front_off;
+		front_off++;
+
+		ldp = ldp->lddir_next;
+	}
+
+	kmem_free(sd, sdlen);
+	return (error);
+}
+
+static int
+lxd_readdir(vnode_t *vp, struct uio *uiop, struct cred *cr, int *eofp,
+    caller_context_t *ct, int flags)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+	vnode_t *rvp;
+	int res;
+	caddr_t base;
+	off_t req_off;
+
+	if (uiop->uio_iovcnt != 1)
+		return (EINVAL);
+
+	if (vp->v_type != VDIR)
+		return (ENOTDIR);
+
+	base = uiop->uio_iov->iov_base;
+	req_off = uiop->uio_offset;
+
+	/* First read the back node (if it is one) */
+	if (ldn->lxdn_type == LXDNT_BACK) {
+		rvp = REALVP(vp);
+		res = VOP_READDIR(rvp, uiop, cr, eofp, ct, flags);
+		if (res != 0)
+			return (res);
+	} else {
+		/* setup for merge_front */
+		ASSERT(ldn->lxdn_type == LXDNT_FRONT);
+		/* caller should have already called lxd_rwlock */
+		ASSERT(RW_READ_HELD(&ldn->lxdn_rwlock));
+
+		*eofp = 1;
+		/*
+		 * The merge code starts the offset calculation from uio_offset,
+		 * which is normally already set to the high value by the back
+		 * code, but in this case we need to count up from 0.
+		 */
+		uiop->uio_offset = 0;
+	}
+
+	/*
+	 * Our back nodes can also have front entries hanging on them so we
+	 * need to merge those in. Or, we may simply have a front node (i.e. a
+	 * front subdir).
+	 */
+	res = lx_merge_front(vp, uiop, req_off, eofp);
+	return (res);
+}
+
+static int
+lxd_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		if (write_lock) {
+			rw_enter(&ldn->lxdn_rwlock, RW_WRITER);
+		} else {
+			rw_enter(&ldn->lxdn_rwlock, RW_READER);
+		}
+		return (write_lock);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_RWLOCK(vp, write_lock, ct));
+}
+
+static void
+lxd_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		rw_exit(&ldn->lxdn_rwlock);
+		return;
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	VOP_RWUNLOCK(vp, write_lock, ct);
+}
+
+static int
+lxd_seek(vnode_t *vp, offset_t ooff, offset_t *noffp, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_SEEK(vp, ooff, noffp, ct));
+}
+
+static int
+lxd_cmp(vnode_t *vp1, vnode_t *vp2, caller_context_t *ct)
+{
+	while (vn_matchops(vp1, lxd_vnodeops) &&
+	    VTOLDN(vp1)->lxdn_type == LXDNT_BACK) {
+		vp1 = REALVP(vp1);
+	}
+	while (vn_matchops(vp2, lxd_vnodeops) &&
+	    VTOLDN(vp2)->lxdn_type == LXDNT_BACK) {
+		vp2 = REALVP(vp2);
+	}
+
+	if (vn_matchops(vp1, lxd_vnodeops) || vn_matchops(vp2, lxd_vnodeops))
+		return (vp1 == vp2);
+
+	return (VOP_CMP(vp1, vp2, ct));
+}
+
+static int
+lxd_frlock(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
+    struct flk_callback *flk_cbp, cred_t *cr, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_FRLOCK(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
+}
+
+static int
+lxd_space(vnode_t *vp, int cmd, struct flock64 *bfp, int flag, offset_t offset,
+    struct cred *cr, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_SPACE(vp, cmd, bfp, flag, offset, cr, ct));
+}
+
+static int
+lxd_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *prot,
+    struct page *parr[], size_t psz, struct seg *seg, caddr_t addr,
+    enum seg_rw rw, struct cred *cr, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_GETPAGE(vp, off, len, prot, parr, psz, seg, addr, rw, cr,
+	    ct));
+}
+
+static int
+lxd_putpage(vnode_t *vp, offset_t off, size_t len, int flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_PUTPAGE(vp, off, len, flags, cr, ct));
+}
+
+static int
+lxd_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp, size_t len,
+    uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_MAP(vp, off, as, addrp, len, prot, maxprot, flags, cr, ct));
+}
+
+static int
+lxd_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, size_t len,
+    uchar_t prot, uchar_t maxprot, uint_t flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_ADDMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
+	    ct));
+}
+
+static int
+lxd_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr, size_t len,
+    uint_t prot, uint_t maxprot, uint_t flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_DELMAP(vp, off, as, addr, len, prot, maxprot, flags, cr,
+	    ct));
+}
+
+static int
+lxd_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
+    struct pollhead **phpp, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_POLL(vp, events, anyyet, reventsp, phpp, ct));
+}
+
+static int
+lxd_dump(vnode_t *vp, caddr_t addr, offset_t bn, offset_t count,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_DUMP(vp, addr, bn, count, ct));
+}
+
+static int
+lxd_pathconf(vnode_t *vp, int cmd, ulong_t *valp, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_PATHCONF(vp, cmd, valp, cr, ct));
+}
+
+static int
+lxd_pageio(vnode_t *vp, struct page *pp, u_offset_t io_off, size_t io_len,
+    int flags, cred_t *cr, caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_PAGEIO(vp, pp, io_off, io_len, flags, cr, ct));
+}
+
+static void
+lxd_dispose(vnode_t *vp, page_t *pp, int fl, int dn, cred_t *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return;
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	if (vp != NULL && !VN_ISKAS(vp))
+		VOP_DISPOSE(vp, pp, fl, dn, cr, ct);
+}
+
+static int
+lxd_setsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (ENOSYS);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	if (vn_is_readonly(vp))
+		return (EROFS);
+
+	vp = REALVP(vp);
+	return (VOP_SETSECATTR(vp, secattr, flags, cr, ct));
+}
+
+static int
+lxd_getsecattr(vnode_t *vp, vsecattr_t *secattr, int flags, struct cred *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (ENOSYS);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_GETSECATTR(vp, secattr, flags, cr, ct));
+}
+
+static int
+lxd_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
+    caller_context_t *ct)
+{
+	lxd_node_t *ldn = VTOLDN(vp);
+
+	if (ldn->lxdn_type == LXDNT_FRONT) {
+		return (EINVAL);
+	}
+
+	ASSERT(ldn->lxdn_type == LXDNT_BACK);
+	vp = REALVP(vp);
+	return (VOP_SHRLOCK(vp, cmd, shr, flag, cr, ct));
+}
+
+/*
+ * Loopback vnode operations vector.
+ */
+
+struct vnodeops *lxd_vnodeops;
+
+const fs_operation_def_t lxd_vnodeops_template[] = {
+	VOPNAME_OPEN,		{ .vop_open = lxd_open },
+	VOPNAME_CLOSE,		{ .vop_close = lxd_close },
+	VOPNAME_READ,		{ .vop_read = lxd_read },
+	VOPNAME_WRITE,		{ .vop_write = lxd_write },
+	VOPNAME_IOCTL,		{ .vop_ioctl = lxd_ioctl },
+	VOPNAME_SETFL,		{ .vop_setfl = lxd_setfl },
+	VOPNAME_GETATTR,	{ .vop_getattr = lxd_getattr },
+	VOPNAME_SETATTR,	{ .vop_setattr = lxd_setattr },
+	VOPNAME_ACCESS,		{ .vop_access = lxd_access },
+	VOPNAME_LOOKUP,		{ .vop_lookup = lxd_lookup },
+	VOPNAME_CREATE,		{ .vop_create = lxd_create },
+	VOPNAME_REMOVE,		{ .vop_remove = lxd_remove },
+	VOPNAME_LINK,		{ .vop_link = lxd_link },
+	VOPNAME_RENAME,		{ .vop_rename = lxd_rename },
+	VOPNAME_MKDIR,		{ .vop_mkdir = lxd_mkdir },
+	VOPNAME_RMDIR,		{ .vop_rmdir = lxd_rmdir },
+	VOPNAME_READDIR,	{ .vop_readdir = lxd_readdir },
+	VOPNAME_SYMLINK,	{ .vop_symlink = lxd_symlink },
+	VOPNAME_READLINK,	{ .vop_readlink = lxd_readlink },
+	VOPNAME_FSYNC,		{ .vop_fsync = lxd_fsync },
+	VOPNAME_INACTIVE,	{ .vop_inactive = lxd_inactive },
+	VOPNAME_FID,		{ .vop_fid = lxd_fid },
+	VOPNAME_RWLOCK,		{ .vop_rwlock = lxd_rwlock },
+	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = lxd_rwunlock },
+	VOPNAME_SEEK,		{ .vop_seek = lxd_seek },
+	VOPNAME_CMP,		{ .vop_cmp = lxd_cmp },
+	VOPNAME_FRLOCK,		{ .vop_frlock = lxd_frlock },
+	VOPNAME_SPACE,		{ .vop_space = lxd_space },
+	VOPNAME_REALVP,		{ .vop_realvp = lxd_realvp },
+	VOPNAME_GETPAGE,	{ .vop_getpage = lxd_getpage },
+	VOPNAME_PUTPAGE,	{ .vop_putpage = lxd_putpage },
+	VOPNAME_MAP,		{ .vop_map = lxd_map },
+	VOPNAME_ADDMAP,		{ .vop_addmap = lxd_addmap },
+	VOPNAME_DELMAP,		{ .vop_delmap = lxd_delmap },
+	VOPNAME_POLL,		{ .vop_poll = lxd_poll },
+	VOPNAME_DUMP,		{ .vop_dump = lxd_dump },
+	VOPNAME_DUMPCTL,	{ .error = fs_error },
+	VOPNAME_PATHCONF,	{ .vop_pathconf = lxd_pathconf },
+	VOPNAME_PAGEIO,		{ .vop_pageio = lxd_pageio },
+	VOPNAME_DISPOSE,	{ .vop_dispose = lxd_dispose },
+	VOPNAME_SETSECATTR,	{ .vop_setsecattr = lxd_setsecattr },
+	VOPNAME_GETSECATTR,	{ .vop_getsecattr = lxd_getsecattr },
+	VOPNAME_SHRLOCK,	{ .vop_shrlock = lxd_shrlock },
+	NULL,			NULL
+};
diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files
index 6ec848acf9..337ad94679 100644
--- a/usr/src/uts/intel/Makefile.files
+++ b/usr/src/uts/intel/Makefile.files
@@ -105,6 +105,11 @@ LX_CGROUP_OBJS +=	\
 	cgrps_vfsops.o	\
 	cgrps_vnops.o
 
+LX_DEVFS_OBJS +=	\
+	lxd_node.o	\
+	lxd_vfsops.o	\
+	lxd_vnops.o
+
 LX_PROC_OBJS +=	\
 	lx_prsubr.o	\
 	lx_prvfsops.o	\
diff --git a/usr/src/uts/intel/Makefile.intel b/usr/src/uts/intel/Makefile.intel
index b1d41d1e88..7972c61df2 100644
--- a/usr/src/uts/intel/Makefile.intel
+++ b/usr/src/uts/intel/Makefile.intel
@@ -545,7 +545,7 @@ SCHED_KMODS	+= IA RT TS RT_DPTBL TS_DPTBL FSS FX FX_DPTBL SDC
 FS_KMODS	+= autofs ctfs dcfs dev devfs fdfs fifofs hsfs hyprlofs
 FS_KMODS	+= lofs lxautofs lx_proc lxprocfs mntfs namefs nfs objfs zfs zut
 FS_KMODS	+= pcfs procfs sockfs specfs tmpfs udfs ufs sharefs lx_sysfs
-FS_KMODS	+= smbfs bootfs lx_cgroup
+FS_KMODS	+= smbfs bootfs lx_cgroup lx_devfs
 
 #
 #	Streams Modules (/kernel/strmod):
diff --git a/usr/src/uts/intel/lx_devfs/Makefile b/usr/src/uts/intel/lx_devfs/Makefile
new file mode 100644
index 0000000000..1254f596eb
--- /dev/null
+++ b/usr/src/uts/intel/lx_devfs/Makefile
@@ -0,0 +1,57 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.
+#
+
+UTSBASE	= ../..
+
+LX_CMN		= $(SRC)/common/brand/lx
+
+MODULE		= lx_devfs
+OBJECTS		= $(LX_DEVFS_OBJS:%=$(OBJS_DIR)/%)
+LINTS		= $(LX_DEVFS_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE	= $(USR_FS_DIR)/$(MODULE)
+
+INC_PATH	+= -I$(UTSBASE)/common/brand/lx -I$(LX_CMN)
+
+include $(UTSBASE)/intel/Makefile.intel
+
+ALL_TARGET	= $(BINARY)
+LINT_TARGET	= $(MODULE).lint
+INSTALL_TARGET	= $(BINARY) $(ROOTMODULE)
+
+CFLAGS		+= $(CCVERBOSE)
+
+LDFLAGS		+= -dy -Nbrand/lx_brand
+
+.KEEP_STATE:
+
+def:		$(DEF_DEPS)
+
+all:		$(ALL_DEPS)
+
+clean:		$(CLEAN_DEPS)
+
+clobber:	$(CLOBBER_DEPS)
+
+lint:		$(LINT_DEPS)
+
+modlintlib:	$(MODLINTLIB_DEPS)
+
+clean.lint:	$(CLEAN_LINT_DEPS)
+
+install:	$(INSTALL_DEPS)
+
+include $(UTSBASE)/intel/Makefile.targ
+
+include $(UTSBASE)/intel/lx_devfs/Makefile.rules
diff --git a/usr/src/uts/intel/lx_devfs/Makefile.rules b/usr/src/uts/intel/lx_devfs/Makefile.rules
new file mode 100644
index 0000000000..4b9748314c
--- /dev/null
+++ b/usr/src/uts/intel/lx_devfs/Makefile.rules
@@ -0,0 +1,21 @@
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright 2015 Joyent, Inc.  All rights reserved.
+#
+
+$(OBJS_DIR)/%.o:		$(UTSBASE)/common/brand/lx/devfs/%.c
+	$(COMPILE.c) -o $@ $<
+	$(CTFCONVERT_O)
+
+$(LINTS_DIR)/%.ln:		$(UTSBASE)/common/brand/lx/devfs/%.c
+	@($(LHEAD) $(LINT.c) $< $(LTAIL))
author	Jerry Jelinek <jerry.jelinek@joyent.com>	2015-12-28 18:00:01 +0000
committer	Jerry Jelinek <jerry.jelinek@joyent.com>	2015-12-28 18:00:01 +0000
commit	673d120c71e85f5f52136d47d1e44282d0e41632 (patch)
tree	dbca459bc3367d492053c4c00975c1343742e9ce
parent	180116059a3bc57660669b92f988a75f95d54257 (diff)
download	illumos-joyent-673d120c71e85f5f52136d47d1e44282d0e41632.tar.gz