OpenSolaris Launch

author: stevel@tonic-gate <none@none> 2005-06-14 00:00:00 -0700
committer: stevel@tonic-gate <none@none> 2005-06-14 00:00:00 -0700
commit: 7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
tree: c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/uts/common/io/devinfo.c
download: illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz
1 files changed, 3819 insertions, 0 deletions
diff --git a/usr/src/uts/common/io/devinfo.c b/usr/src/uts/common/io/devinfo.c
new file mode 100644
index 0000000000..bdcd90a038
--- /dev/null
+++ b/usr/src/uts/common/io/devinfo.c
@@ -0,0 +1,3819 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License").  You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident	"%Z%%M%	%I%	%E% SMI"
+
+/*
+ * driver for accessing kernel devinfo tree.
+ */
+#include <sys/types.h>
+#include <sys/pathname.h>
+#include <sys/debug.h>
+#include <sys/autoconf.h>
+#include <sys/conf.h>
+#include <sys/file.h>
+#include <sys/kmem.h>
+#include <sys/modctl.h>
+#include <sys/stat.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunldi_impl.h>
+#include <sys/sunndi.h>
+#include <sys/esunddi.h>
+#include <sys/sunmdi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ndi_impldefs.h>
+#include <sys/mdi_impldefs.h>
+#include <sys/devinfo_impl.h>
+#include <sys/thread.h>
+#include <sys/modhash.h>
+#include <sys/bitmap.h>
+#include <util/qsort.h>
+#include <sys/disp.h>
+#include <sys/kobj.h>
+#include <sys/crc32.h>
+
+
+#ifdef DEBUG
+static int di_debug;
+#define	dcmn_err(args) if (di_debug >= 1) cmn_err args
+#define	dcmn_err2(args) if (di_debug >= 2) cmn_err args
+#define	dcmn_err3(args) if (di_debug >= 3) cmn_err args
+#else
+#define	dcmn_err(args) /* nothing */
+#define	dcmn_err2(args) /* nothing */
+#define	dcmn_err3(args) /* nothing */
+#endif
+
+/*
+ * We partition the space of devinfo minor nodes equally between the full and
+ * unprivileged versions of the driver.  The even-numbered minor nodes are the
+ * full version, while the odd-numbered ones are the read-only version.
+ */
+static int di_max_opens = 32;
+
+#define	DI_FULL_PARENT		0
+#define	DI_READONLY_PARENT	1
+#define	DI_NODE_SPECIES		2
+#define	DI_UNPRIVILEGED_NODE(x)	(((x) % 2) != 0)
+
+#define	IOC_IDLE	0	/* snapshot ioctl states */
+#define	IOC_SNAP	1	/* snapshot in progress */
+#define	IOC_DONE	2	/* snapshot done, but not copied out */
+#define	IOC_COPY	3	/* copyout in progress */
+
+/*
+ * Keep max alignment so we can move snapshot to different platforms
+ */
+#define	DI_ALIGN(addr)	((addr + 7l) & ~7l)
+
+/*
+ * To avoid wasting memory, make a linked list of memory chunks.
+ * Size of each chunk is buf_size.
+ */
+struct di_mem {
+	struct di_mem *next;	/* link to next chunk */
+	char *buf;		/* contiguous kernel memory */
+	size_t buf_size;	/* size of buf in bytes */
+	devmap_cookie_t cook;	/* cookie from ddi_umem_alloc */
+};
+
+/*
+ * This is a stack for walking the tree without using recursion.
+ * When the devinfo tree height is above some small size, one
+ * gets watchdog resets on sun4m.
+ */
+struct di_stack {
+	void		*offset[MAX_TREE_DEPTH];
+	struct dev_info *dip[MAX_TREE_DEPTH];
+	int		circ[MAX_TREE_DEPTH];
+	int		depth;	/* depth of current node to be copied */
+};
+
+#define	TOP_OFFSET(stack)	\
+	((di_off_t *)(stack)->offset[(stack)->depth - 1])
+#define	TOP_NODE(stack)		\
+	((stack)->dip[(stack)->depth - 1])
+#define	PARENT_OFFSET(stack)	\
+	((di_off_t *)(stack)->offset[(stack)->depth - 2])
+#define	EMPTY_STACK(stack)	((stack)->depth == 0)
+#define	POP_STACK(stack)	{ \
+	ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
+		(stack)->circ[(stack)->depth - 1]); \
+	((stack)->depth--); \
+}
+#define	PUSH_STACK(stack, node, offp)	{ \
+	ASSERT(node != NULL); \
+	ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
+	(stack)->dip[(stack)->depth] = (node); \
+	(stack)->offset[(stack)->depth] = (void *)(offp); \
+	((stack)->depth)++; \
+}
+
+#define	DI_ALL_PTR(s)	((struct di_all *)di_mem_addr((s), 0))
+
+/*
+ * With devfs, the device tree has no global locks. The device tree is
+ * dynamic and dips may come and go if they are not locked locally. Under
+ * these conditions, pointers are no longer reliable as unique IDs.
+ * Specifically, these pointers cannot be used as keys for hash tables
+ * as the same devinfo structure may be freed in one part of the tree only
+ * to be allocated as the structure for a different device in another
+ * part of the tree. This can happen if DR and the snapshot are
+ * happening concurrently.
+ * The following data structures act as keys for devinfo nodes and
+ * pathinfo nodes.
+ */
+
+enum di_ktype {
+	DI_DKEY = 1,
+	DI_PKEY = 2
+};
+
+struct di_dkey {
+	dev_info_t	*dk_dip;
+	major_t		dk_major;
+	int		dk_inst;
+	dnode_t		dk_nodeid;
+};
+
+struct di_pkey {
+	mdi_pathinfo_t	*pk_pip;
+	char		*pk_path_addr;
+	dev_info_t	*pk_client;
+	dev_info_t	*pk_phci;
+};
+
+struct di_key {
+	enum di_ktype	k_type;
+	union {
+		struct di_dkey dkey;
+		struct di_pkey pkey;
+	} k_u;
+};
+
+
+struct i_lnode;
+
+typedef struct i_link {
+	/*
+	 * If a di_link struct representing this i_link struct makes it
+	 * into the snapshot, then self will point to the offset of
+	 * the di_link struct in the snapshot
+	 */
+	di_off_t	self;
+
+	int		spec_type;	/* block or char access type */
+	struct i_lnode	*src_lnode;	/* src i_lnode */
+	struct i_lnode	*tgt_lnode;	/* tgt i_lnode */
+	struct i_link	*src_link_next;	/* next src i_link /w same i_lnode */
+	struct i_link	*tgt_link_next;	/* next tgt i_link /w same i_lnode */
+} i_link_t;
+
+typedef struct i_lnode {
+	/*
+	 * If a di_lnode struct representing this i_lnode struct makes it
+	 * into the snapshot, then self will point to the offset of
+	 * the di_lnode struct in the snapshot
+	 */
+	di_off_t	self;
+
+	/*
+	 * used for hashing and comparing i_lnodes
+	 */
+	int		modid;
+
+	/*
+	 * public information describing a link endpoint
+	 */
+	struct di_node	*di_node;	/* di_node in snapshot */
+	dev_t		devt;		/* devt */
+
+	/*
+	 * i_link ptr to links coming into this i_lnode node
+	 * (this i_lnode is the target of these i_links)
+	 */
+	i_link_t	*link_in;
+
+	/*
+	 * i_link ptr to links going out of this i_lnode node
+	 * (this i_lnode is the source of these i_links)
+	 */
+	i_link_t	*link_out;
+} i_lnode_t;
+
+/*
+ * Soft state associated with each instance of driver open.
+ */
+static struct di_state {
+	di_off_t mem_size;	/* total # bytes in memlist	*/
+	struct di_mem *memlist;	/* head of memlist		*/
+	uint_t command;		/* command from ioctl		*/
+	int di_iocstate;	/* snapshot ioctl state		*/
+	mod_hash_t *reg_dip_hash;
+	mod_hash_t *reg_pip_hash;
+	int lnode_count;
+	int link_count;
+
+	mod_hash_t *lnode_hash;
+	mod_hash_t *link_hash;
+} **di_states;
+
+static kmutex_t di_lock;	/* serialize instance assignment */
+
+typedef enum {
+	DI_QUIET = 0,	/* DI_QUIET must always be 0 */
+	DI_ERR,
+	DI_INFO,
+	DI_TRACE,
+	DI_TRACE1,
+	DI_TRACE2
+} di_cache_debug_t;
+
+static uint_t	di_chunk = 32;		/* I/O chunk size in pages */
+
+#define	DI_CACHE_LOCK(c)	(mutex_enter(&(c).cache_lock))
+#define	DI_CACHE_UNLOCK(c)	(mutex_exit(&(c).cache_lock))
+#define	DI_CACHE_LOCKED(c)	(mutex_owned(&(c).cache_lock))
+
+#define	CACHE_DEBUG(args)	\
+	{ if (di_cache_debug != DI_QUIET) di_cache_print args; }
+
+static int di_open(dev_t *, int, int, cred_t *);
+static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
+static int di_close(dev_t, int, int, cred_t *);
+static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
+static int di_attach(dev_info_t *, ddi_attach_cmd_t);
+static int di_detach(dev_info_t *, ddi_detach_cmd_t);
+
+static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
+static di_off_t di_snapshot(struct di_state *);
+static di_off_t di_copydevnm(di_off_t *, struct di_state *);
+static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
+static di_off_t di_copynode(struct di_stack *, struct di_state *);
+static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
+    struct di_state *);
+static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
+static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
+static di_off_t di_getprop(struct ddi_prop *, di_off_t *,
+    struct di_state *, struct dev_info *, int);
+static void di_allocmem(struct di_state *, size_t);
+static void di_freemem(struct di_state *);
+static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
+static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
+static caddr_t di_mem_addr(struct di_state *, di_off_t);
+static int di_setstate(struct di_state *, int);
+static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
+static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
+static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
+    struct di_state *, int);
+static di_off_t di_getlink_data(di_off_t, struct di_state *);
+static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
+
+static int cache_args_valid(struct di_state *st, int *error);
+static int snapshot_is_cacheable(struct di_state *st);
+static int di_cache_lookup(struct di_state *st);
+static int di_cache_update(struct di_state *st);
+static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
+
+static struct cb_ops di_cb_ops = {
+	di_open,		/* open */
+	di_close,		/* close */
+	nodev,			/* strategy */
+	nodev,			/* print */
+	nodev,			/* dump */
+	nodev,			/* read */
+	nodev,			/* write */
+	di_ioctl,		/* ioctl */
+	nodev,			/* devmap */
+	nodev,			/* mmap */
+	nodev,			/* segmap */
+	nochpoll,		/* poll */
+	ddi_prop_op,		/* prop_op */
+	NULL,			/* streamtab  */
+	D_NEW | D_MP		/* Driver compatibility flag */
+};
+
+static struct dev_ops di_ops = {
+	DEVO_REV,		/* devo_rev, */
+	0,			/* refcnt  */
+	di_info,		/* info */
+	nulldev,		/* identify */
+	nulldev,		/* probe */
+	di_attach,		/* attach */
+	di_detach,		/* detach */
+	nodev,			/* reset */
+	&di_cb_ops,		/* driver operations */
+	NULL			/* bus operations */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+	&mod_driverops,
+	"DEVINFO Driver %I%",
+	&di_ops
+};
+
+static struct modlinkage modlinkage = {
+	MODREV_1,
+	&modldrv,
+	NULL
+};
+
+int
+_init(void)
+{
+	int	error;
+
+	mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
+
+	error = mod_install(&modlinkage);
+	if (error != 0) {
+		mutex_destroy(&di_lock);
+		return (error);
+	}
+
+	return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+	return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+	int	error;
+
+	error = mod_remove(&modlinkage);
+	if (error != 0) {
+		return (error);
+	}
+
+	mutex_destroy(&di_lock);
+	return (0);
+}
+
+static dev_info_t *di_dip;
+
+/*ARGSUSED*/
+static int
+di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
+{
+	int error = DDI_FAILURE;
+
+	switch (infocmd) {
+	case DDI_INFO_DEVT2DEVINFO:
+		*result = (void *)di_dip;
+		error = DDI_SUCCESS;
+		break;
+	case DDI_INFO_DEVT2INSTANCE:
+		/*
+		 * All dev_t's map to the same, single instance.
+		 */
+		*result = (void *)0;
+		error = DDI_SUCCESS;
+		break;
+	default:
+		break;
+	}
+
+	return (error);
+}
+
+static int
+di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+	int error = DDI_FAILURE;
+
+	switch (cmd) {
+	case DDI_ATTACH:
+		di_states = kmem_zalloc(
+		    di_max_opens * sizeof (struct di_state *), KM_SLEEP);
+
+		if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
+		    DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
+		    ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
+		    DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
+			kmem_free(di_states,
+			    di_max_opens * sizeof (struct di_state *));
+			ddi_remove_minor_node(dip, NULL);
+			error = DDI_FAILURE;
+		} else {
+			di_dip = dip;
+			ddi_report_dev(dip);
+
+			error = DDI_SUCCESS;
+		}
+		break;
+	default:
+		error = DDI_FAILURE;
+		break;
+	}
+
+	return (error);
+}
+
+static int
+di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+	int error = DDI_FAILURE;
+
+	switch (cmd) {
+	case DDI_DETACH:
+		ddi_remove_minor_node(dip, NULL);
+		di_dip = NULL;
+		kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
+
+		error = DDI_SUCCESS;
+		break;
+	default:
+		error = DDI_FAILURE;
+		break;
+	}
+
+	return (error);
+}
+
+/*
+ * Allow multiple opens by tweaking the dev_t such that it looks like each
+ * open is getting a different minor device.  Each minor gets a separate
+ * entry in the di_states[] table.  Based on the original minor number, we
+ * discriminate opens of the full and read-only nodes.  If all of the instances
+ * of the selected minor node are currently open, we return EAGAIN.
+ */
+/*ARGSUSED*/
+static int
+di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+{
+	int m;
+	minor_t minor_parent = getminor(*devp);
+
+	if (minor_parent != DI_FULL_PARENT &&
+	    minor_parent != DI_READONLY_PARENT)
+		return (ENXIO);
+
+	mutex_enter(&di_lock);
+
+	for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
+		if (di_states[m] != NULL)
+			continue;
+
+		di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
+		break;	/* It's ours. */
+	}
+
+	if (m >= di_max_opens) {
+		/*
+		 * maximum open instance for device reached
+		 */
+		mutex_exit(&di_lock);
+		dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
+		return (EAGAIN);
+	}
+	mutex_exit(&di_lock);
+
+	ASSERT(m < di_max_opens);
+	*devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
+
+	dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
+		(void *)curthread, m + DI_NODE_SPECIES));
+
+	return (0);
+}
+
+/*ARGSUSED*/
+static int
+di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
+{
+	struct di_state *st;
+	int m = (int)getminor(dev) - DI_NODE_SPECIES;
+
+	if (m < 0) {
+		cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
+		    m + DI_NODE_SPECIES);
+		return (ENXIO);
+	}
+
+	st = di_states[m];
+	ASSERT(m < di_max_opens && st != NULL);
+
+	di_freemem(st);
+	kmem_free(st, sizeof (struct di_state));
+
+	/*
+	 * empty slot in state table
+	 */
+	mutex_enter(&di_lock);
+	di_states[m] = NULL;
+	dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
+		(void *)curthread, m + DI_NODE_SPECIES));
+	mutex_exit(&di_lock);
+
+	return (0);
+}
+
+
+/*ARGSUSED*/
+static int
+di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
+{
+	int rv, error;
+	di_off_t off;
+	struct di_all *all;
+	struct di_state *st;
+	int m = (int)getminor(dev) - DI_NODE_SPECIES;
+
+	major_t i;
+	char *drv_name;
+	size_t map_size, size;
+	struct di_mem *dcp;
+	int ndi_flags;
+
+	if (m < 0 || m >= di_max_opens) {
+		return (ENXIO);
+	}
+
+	st = di_states[m];
+	ASSERT(st != NULL);
+
+	dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
+
+	switch (cmd) {
+	case DINFOIDENT:
+		/*
+		 * This is called from di_init to verify that the driver
+		 * opened is indeed devinfo. The purpose is to guard against
+		 * sending ioctl to an unknown driver in case of an
+		 * unresolved major number conflict during bfu.
+		 */
+		*rvalp = DI_MAGIC;
+		return (0);
+
+	case DINFOLODRV:
+		/*
+		 * Hold an installed driver and return the result
+		 */
+		if (DI_UNPRIVILEGED_NODE(m)) {
+			/*
+			 * Only the fully enabled instances may issue
+			 * DINFOLDDRV.
+			 */
+			return (EACCES);
+		}
+
+		drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+		if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
+			kmem_free(drv_name, MAXNAMELEN);
+			return (EFAULT);
+		}
+
+		/*
+		 * Some 3rd party driver's _init() walks the device tree,
+		 * so we load the driver module before configuring driver.
+		 */
+		i = ddi_name_to_major(drv_name);
+		if (ddi_hold_driver(i) == NULL) {
+			kmem_free(drv_name, MAXNAMELEN);
+			return (ENXIO);
+		}
+
+		ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
+
+		/*
+		 * i_ddi_load_drvconf() below will trigger a reprobe
+		 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
+		 * needed here.
+		 */
+		modunload_disable();
+		(void) i_ddi_load_drvconf(i);
+		(void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
+		kmem_free(drv_name, MAXNAMELEN);
+		ddi_rele_driver(i);
+		rv = i_ddi_devs_attached(i);
+		modunload_enable();
+
+		i_ddi_di_cache_invalidate(KM_SLEEP);
+
+		return ((rv == DDI_SUCCESS)? 0 : ENXIO);
+
+	case DINFOUSRLD:
+		/*
+		 * The case for copying snapshot to userland
+		 */
+		if (di_setstate(st, IOC_COPY) == -1)
+			return (EBUSY);
+
+		map_size = ((struct di_all *)di_mem_addr(st, 0))->map_size;
+		if (map_size == 0) {
+			(void) di_setstate(st, IOC_DONE);
+			return (EFAULT);
+		}
+
+		/*
+		 * copyout the snapshot
+		 */
+		map_size = (map_size + PAGEOFFSET) & PAGEMASK;
+
+		/*
+		 * Return the map size, so caller may do a sanity
+		 * check against the return value of snapshot ioctl()
+		 */
+		*rvalp = (int)map_size;
+
+		/*
+		 * Copy one chunk at a time
+		 */
+		off = 0;
+		dcp = st->memlist;
+		while (map_size) {
+			size = dcp->buf_size;
+			if (map_size <= size) {
+				size = map_size;
+			}
+
+			if (ddi_copyout(di_mem_addr(st, off),
+			    (void *)(arg + off), size, mode) != 0) {
+				(void) di_setstate(st, IOC_DONE);
+				return (EFAULT);
+			}
+
+			map_size -= size;
+			off += size;
+			dcp = dcp->next;
+		}
+
+		di_freemem(st);
+		(void) di_setstate(st, IOC_IDLE);
+		return (0);
+
+	default:
+		if ((cmd & ~DIIOC_MASK) != DIIOC) {
+			/*
+			 * Invalid ioctl command
+			 */
+			return (ENOTTY);
+		}
+		/*
+		 * take a snapshot
+		 */
+		st->command = cmd & DIIOC_MASK;
+		/*FALLTHROUGH*/
+	}
+
+	/*
+	 * Obtain enough memory to hold header + rootpath.  We prevent kernel
+	 * memory exhaustion by freeing any previously allocated snapshot and
+	 * refusing the operation; otherwise we would be allowing ioctl(),
+	 * ioctl(), ioctl(), ..., panic.
+	 */
+	if (di_setstate(st, IOC_SNAP) == -1)
+		return (EBUSY);
+
+	size = sizeof (struct di_all) +
+	    sizeof (((struct dinfo_io *)(NULL))->root_path);
+	if (size < PAGESIZE)
+		size = PAGESIZE;
+	di_allocmem(st, size);
+
+	all = (struct di_all *)di_mem_addr(st, 0);
+	all->devcnt = devcnt;
+	all->command = st->command;
+	all->version = DI_SNAPSHOT_VERSION;
+
+	/*
+	 * Note the endianness in case we need to transport snapshot
+	 * over the network.
+	 */
+#if defined(_LITTLE_ENDIAN)
+	all->endianness = DI_LITTLE_ENDIAN;
+#else
+	all->endianness = DI_BIG_ENDIAN;
+#endif
+
+	/* Copyin ioctl args, store in the snapshot. */
+	if (copyinstr((void *)arg, all->root_path,
+	    sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
+		di_freemem(st);
+		(void) di_setstate(st, IOC_IDLE);
+		return (EFAULT);
+	}
+
+	error = 0;
+	if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
+		di_freemem(st);
+		(void) di_setstate(st, IOC_IDLE);
+		return (error);
+	}
+
+	off = DI_ALIGN(sizeof (struct di_all) + size);
+
+	/*
+	 * Only the fully enabled version may force load drivers or read
+	 * the parent private data from a driver.
+	 */
+	if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
+	    DI_UNPRIVILEGED_NODE(m)) {
+		di_freemem(st);
+		(void) di_setstate(st, IOC_IDLE);
+		return (EACCES);
+	}
+
+	/* Do we need private data? */
+	if (st->command & DINFOPRIVDATA) {
+		arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
+
+#ifdef _MULTI_DATAMODEL
+		switch (ddi_model_convert_from(mode & FMODELS)) {
+		case DDI_MODEL_ILP32: {
+			/*
+			 * Cannot copy private data from 64-bit kernel
+			 * to 32-bit app
+			 */
+			di_freemem(st);
+			(void) di_setstate(st, IOC_IDLE);
+			return (EINVAL);
+		}
+		case DDI_MODEL_NONE:
+			if ((off = di_copyformat(off, st, arg, mode)) == 0) {
+				di_freemem(st);
+				(void) di_setstate(st, IOC_IDLE);
+				return (EFAULT);
+			}
+			break;
+		}
+#else /* !_MULTI_DATAMODEL */
+		if ((off = di_copyformat(off, st, arg, mode)) == 0) {
+			di_freemem(st);
+			(void) di_setstate(st, IOC_IDLE);
+			return (EFAULT);
+		}
+#endif /* _MULTI_DATAMODEL */
+	}
+
+	all->top_devinfo = DI_ALIGN(off);
+
+	/*
+	 * For cache lookups we reallocate memory from scratch,
+	 * so the value of "all" is no longer valid.
+	 */
+	all = NULL;
+
+	if (st->command & DINFOCACHE) {
+		*rvalp = di_cache_lookup(st);
+	} else if (snapshot_is_cacheable(st)) {
+		DI_CACHE_LOCK(di_cache);
+		*rvalp = di_cache_update(st);
+		DI_CACHE_UNLOCK(di_cache);
+	} else {
+		modunload_disable();
+		*rvalp = di_snapshot(st);
+		modunload_enable();
+	}
+
+	if (*rvalp) {
+		DI_ALL_PTR(st)->map_size = *rvalp;
+		(void) di_setstate(st, IOC_DONE);
+	} else {
+		di_freemem(st);
+		(void) di_setstate(st, IOC_IDLE);
+	}
+
+	return (0);
+}
+
+/*
+ * Get a chunk of memory >= size, for the snapshot
+ */
+static void
+di_allocmem(struct di_state *st, size_t size)
+{
+	struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem),
+	    KM_SLEEP);
+	/*
+	 * Round up size to nearest power of 2. If it is less
+	 * than st->mem_size, set it to st->mem_size (i.e.,
+	 * the mem_size is doubled every time) to reduce the
+	 * number of memory allocations.
+	 */
+	size_t tmp = 1;
+	while (tmp < size) {
+		tmp <<= 1;
+	}
+	size = (tmp > st->mem_size) ? tmp : st->mem_size;
+
+	mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
+	mem->buf_size = size;
+
+	dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
+
+	if (st->mem_size == 0) {	/* first chunk */
+		st->memlist = mem;
+	} else {
+		/*
+		 * locate end of linked list and add a chunk at the end
+		 */
+		struct di_mem *dcp = st->memlist;
+		while (dcp->next != NULL) {
+			dcp = dcp->next;
+		}
+
+		dcp->next = mem;
+	}
+
+	st->mem_size += size;
+}
+
+/*
+ * Copy upto bufsiz bytes of the memlist to buf
+ */
+static void
+di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
+{
+	struct di_mem *dcp;
+	size_t copysz;
+
+	if (st->mem_size == 0) {
+		ASSERT(st->memlist == NULL);
+		return;
+	}
+
+	copysz = 0;
+	for (dcp = st->memlist; dcp; dcp = dcp->next) {
+
+		ASSERT(bufsiz > 0);
+
+		if (bufsiz <= dcp->buf_size)
+			copysz = bufsiz;
+		else
+			copysz = dcp->buf_size;
+
+		bcopy(dcp->buf, buf, copysz);
+
+		buf += copysz;
+		bufsiz -= copysz;
+
+		if (bufsiz == 0)
+			break;
+	}
+}
+
+/*
+ * Free all memory for the snapshot
+ */
+static void
+di_freemem(struct di_state *st)
+{
+	struct di_mem *dcp, *tmp;
+
+	dcmn_err2((CE_CONT, "di_freemem\n"));
+
+	if (st->mem_size) {
+		dcp = st->memlist;
+		while (dcp) {	/* traverse the linked list */
+			tmp = dcp;
+			dcp = dcp->next;
+			ddi_umem_free(tmp->cook);
+			kmem_free(tmp, sizeof (struct di_mem));
+		}
+		st->mem_size = 0;
+		st->memlist = NULL;
+	}
+
+	ASSERT(st->mem_size == 0);
+	ASSERT(st->memlist == NULL);
+}
+
+/*
+ * Copies cached data to the di_state structure.
+ * Returns:
+ *	- size of data copied, on SUCCESS
+ *	- 0 on failure
+ */
+static int
+di_cache2mem(struct di_cache *cache, struct di_state *st)
+{
+	caddr_t	pa;
+
+	ASSERT(st->mem_size == 0);
+	ASSERT(st->memlist == NULL);
+	ASSERT(!servicing_interrupt());
+	ASSERT(DI_CACHE_LOCKED(*cache));
+
+	if (cache->cache_size == 0) {
+		ASSERT(cache->cache_data == NULL);
+		CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
+		return (0);
+	}
+
+	ASSERT(cache->cache_data);
+
+	di_allocmem(st, cache->cache_size);
+
+	pa = di_mem_addr(st, 0);
+
+	ASSERT(pa);
+
+	/*
+	 * Verify that di_allocmem() allocates contiguous memory,
+	 * so that it is safe to do straight bcopy()
+	 */
+	ASSERT(st->memlist != NULL);
+	ASSERT(st->memlist->next == NULL);
+	bcopy(cache->cache_data, pa, cache->cache_size);
+
+	return (cache->cache_size);
+}
+
+/*
+ * Copies a snapshot from di_state to the cache
+ * Returns:
+ *	- 0 on failure
+ *	- size of copied data on success
+ */
+static int
+di_mem2cache(struct di_state *st, struct di_cache *cache)
+{
+	size_t map_size;
+
+	ASSERT(cache->cache_size == 0);
+	ASSERT(cache->cache_data == NULL);
+	ASSERT(!servicing_interrupt());
+	ASSERT(DI_CACHE_LOCKED(*cache));
+
+	if (st->mem_size == 0) {
+		ASSERT(st->memlist == NULL);
+		CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
+		return (0);
+	}
+
+	ASSERT(st->memlist);
+
+	/*
+	 * The size of the memory list may be much larger than the
+	 * size of valid data (map_size). Cache only the valid data
+	 */
+	map_size = DI_ALL_PTR(st)->map_size;
+	if (map_size == 0 || map_size < sizeof (struct di_all) ||
+	    map_size > st->mem_size) {
+		CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
+		return (0);
+	}
+
+	cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
+	cache->cache_size = map_size;
+	di_copymem(st, cache->cache_data, cache->cache_size);
+
+	return (map_size);
+}
+
+/*
+ * Make sure there is at least "size" bytes memory left before
+ * going on. Otherwise, start on a new chunk.
+ */
+static di_off_t
+di_checkmem(struct di_state *st, di_off_t off, size_t size)
+{
+	dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
+			off, (int)size));
+
+	/*
+	 * di_checkmem() shouldn't be called with a size of zero.
+	 * But in case it is, we want to make sure we return a valid
+	 * offset within the memlist and not an offset that points us
+	 * at the end of the memlist.
+	 */
+	if (size == 0) {
+		dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
+		size = 1;
+	}
+
+	off = DI_ALIGN(off);
+	if ((st->mem_size - off) < size) {
+		off = st->mem_size;
+		di_allocmem(st, size);
+	}
+
+	return (off);
+}
+
+/*
+ * Copy the private data format from ioctl arg.
+ * On success, the ending offset is returned. On error 0 is returned.
+ */
+static di_off_t
+di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
+{
+	di_off_t size;
+	struct di_priv_data *priv;
+	struct di_all *all = (struct di_all *)di_mem_addr(st, 0);
+
+	dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
+		off, (void *)arg, mode));
+
+	/*
+	 * Copyin data and check version.
+	 * We only handle private data version 0.
+	 */
+	priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
+	if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
+	    mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
+		kmem_free(priv, sizeof (struct di_priv_data));
+		return (0);
+	}
+
+	/*
+	 * Save di_priv_data copied from userland in snapshot.
+	 */
+	all->pd_version = priv->version;
+	all->n_ppdata = priv->n_parent;
+	all->n_dpdata = priv->n_driver;
+
+	/*
+	 * copyin private data format, modify offset accordingly
+	 */
+	if (all->n_ppdata) {	/* parent private data format */
+		/*
+		 * check memory
+		 */
+		size = all->n_ppdata * sizeof (struct di_priv_format);
+		off = di_checkmem(st, off, size);
+		all->ppdata_format = off;
+		if (ddi_copyin(priv->parent, di_mem_addr(st, off), size,
+		    mode) != 0) {
+			kmem_free(priv, sizeof (struct di_priv_data));
+			return (0);
+		}
+
+		off += size;
+	}
+
+	if (all->n_dpdata) {	/* driver private data format */
+		/*
+		 * check memory
+		 */
+		size = all->n_dpdata * sizeof (struct di_priv_format);
+		off = di_checkmem(st, off, size);
+		all->dpdata_format = off;
+		if (ddi_copyin(priv->driver, di_mem_addr(st, off), size,
+		    mode) != 0) {
+			kmem_free(priv, sizeof (struct di_priv_data));
+			return (0);
+		}
+
+		off += size;
+	}
+
+	kmem_free(priv, sizeof (struct di_priv_data));
+	return (off);
+}
+
+/*
+ * Return the real address based on the offset (off) within snapshot
+ */
+static caddr_t
+di_mem_addr(struct di_state *st, di_off_t off)
+{
+	struct di_mem *dcp = st->memlist;
+
+	dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n",
+		(void *)dcp, off));
+
+	ASSERT(off < st->mem_size);
+
+	while (off >= dcp->buf_size) {
+		off -= dcp->buf_size;
+		dcp = dcp->next;
+	}
+
+	dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n",
+		off, (void *)(dcp->buf + off)));
+
+	return (dcp->buf + off);
+}
+
+/*
+ * Ideally we would use the whole key to derive the hash
+ * value. However, the probability that two keys will
+ * have the same dip (or pip) is very low, so
+ * hashing by dip (or pip) pointer should suffice.
+ */
+static uint_t
+di_hash_byptr(void *arg, mod_hash_key_t key)
+{
+	struct di_key *dik = key;
+	size_t rshift;
+	void *ptr;
+
+	ASSERT(arg == NULL);
+
+	switch (dik->k_type) {
+	case DI_DKEY:
+		ptr = dik->k_u.dkey.dk_dip;
+		rshift = highbit(sizeof (struct dev_info));
+		break;
+	case DI_PKEY:
+		ptr = dik->k_u.pkey.pk_pip;
+		rshift = highbit(sizeof (struct mdi_pathinfo));
+		break;
+	default:
+		panic("devinfo: unknown key type");
+		/*NOTREACHED*/
+	}
+	return (mod_hash_byptr((void *)rshift, ptr));
+}
+
+static void
+di_key_dtor(mod_hash_key_t key)
+{
+	char		*path_addr;
+	struct di_key	*dik = key;
+
+	switch (dik->k_type) {
+	case DI_DKEY:
+		break;
+	case DI_PKEY:
+		path_addr = dik->k_u.pkey.pk_path_addr;
+		if (path_addr)
+			kmem_free(path_addr, strlen(path_addr) + 1);
+		break;
+	default:
+		panic("devinfo: unknown key type");
+		/*NOTREACHED*/
+	}
+
+	kmem_free(dik, sizeof (struct di_key));
+}
+
+static int
+di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2)
+{
+	if (dk1->dk_dip !=  dk2->dk_dip)
+		return (dk1->dk_dip > dk2->dk_dip ? 1 : -1);
+
+	if (dk1->dk_major != -1 && dk2->dk_major != -1) {
+		if (dk1->dk_major !=  dk2->dk_major)
+			return (dk1->dk_major > dk2->dk_major ? 1 : -1);
+
+		if (dk1->dk_inst !=  dk2->dk_inst)
+			return (dk1->dk_inst > dk2->dk_inst ? 1 : -1);
+	}
+
+	if (dk1->dk_nodeid != dk2->dk_nodeid)
+		return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1);
+
+	return (0);
+}
+
+static int
+di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2)
+{
+	char *p1, *p2;
+	int rv;
+
+	if (pk1->pk_pip !=  pk2->pk_pip)
+		return (pk1->pk_pip > pk2->pk_pip ? 1 : -1);
+
+	p1 = pk1->pk_path_addr;
+	p2 = pk2->pk_path_addr;
+
+	p1 = p1 ? p1 : "";
+	p2 = p2 ? p2 : "";
+
+	rv = strcmp(p1, p2);
+	if (rv)
+		return (rv > 0  ? 1 : -1);
+
+	if (pk1->pk_client !=  pk2->pk_client)
+		return (pk1->pk_client > pk2->pk_client ? 1 : -1);
+
+	if (pk1->pk_phci !=  pk2->pk_phci)
+		return (pk1->pk_phci > pk2->pk_phci ? 1 : -1);
+
+	return (0);
+}
+
+static int
+di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
+{
+	struct di_key *dik1, *dik2;
+
+	dik1 = key1;
+	dik2 = key2;
+
+	if (dik1->k_type != dik2->k_type) {
+		panic("devinfo: mismatched keys");
+		/*NOTREACHED*/
+	}
+
+	switch (dik1->k_type) {
+	case DI_DKEY:
+		return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey)));
+	case DI_PKEY:
+		return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey)));
+	default:
+		panic("devinfo: unknown key type");
+		/*NOTREACHED*/
+	}
+}
+
+/*
+ * This is the main function that takes a snapshot
+ */
+static di_off_t
+di_snapshot(struct di_state *st)
+{
+	di_off_t off;
+	struct di_all *all;
+	dev_info_t *rootnode;
+	char buf[80];
+
+	all = (struct di_all *)di_mem_addr(st, 0);
+	dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n"));
+
+	/*
+	 * Hold the devinfo node referred by the path.
+	 */
+	rootnode = e_ddi_hold_devi_by_path(all->root_path, 0);
+	if (rootnode == NULL) {
+		dcmn_err((CE_CONT, "Devinfo node %s not found\n",
+		    all->root_path));
+		return (0);
+	}
+
+	(void) snprintf(buf, sizeof (buf),
+	    "devinfo registered dips (statep=%p)", (void *)st);
+
+	st->reg_dip_hash = mod_hash_create_extended(buf, 64,
+	    di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
+	    NULL, di_key_cmp, KM_SLEEP);
+
+
+	(void) snprintf(buf, sizeof (buf),
+	    "devinfo registered pips (statep=%p)", (void *)st);
+
+	st->reg_pip_hash = mod_hash_create_extended(buf, 64,
+	    di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
+	    NULL, di_key_cmp, KM_SLEEP);
+
+	/*
+	 * copy the device tree
+	 */
+	off = di_copytree(DEVI(rootnode), &all->top_devinfo, st);
+
+	ddi_release_devi(rootnode);
+
+	/*
+	 * copy the devnames array
+	 */
+	all->devnames = off;
+	off = di_copydevnm(&all->devnames, st);
+
+
+	/* initialize the hash tables */
+	st->lnode_count = 0;
+	st->link_count = 0;
+
+	if (DINFOLYR & st->command) {
+		off = di_getlink_data(off, st);
+	}
+
+	/*
+	 * Free up hash tables
+	 */
+	mod_hash_destroy_hash(st->reg_dip_hash);
+	mod_hash_destroy_hash(st->reg_pip_hash);
+
+	/*
+	 * Record the timestamp now that we are done with snapshot.
+	 *
+	 * We compute the checksum later and then only if we cache
+	 * the snapshot, since checksumming adds some overhead.
+	 * The checksum is checked later if we read the cache file.
+	 * from disk.
+	 *
+	 * Set checksum field to 0 as CRC is calculated with that
+	 * field set to 0.
+	 */
+	all->snapshot_time = ddi_get_time();
+	all->cache_checksum = 0;
+
+	return (off);
+}
+
+/*
+ * Assumes all devinfo nodes in device tree have been snapshotted
+ */
+static void
+snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *poff_p)
+{
+	struct dev_info *node;
+	struct di_node *me;
+	di_off_t off;
+
+	ASSERT(mutex_owned(&dnp->dn_lock));
+
+	node = DEVI(dnp->dn_head);
+	for (; node; node = node->devi_next) {
+		if (di_dip_find(st, (dev_info_t *)node, &off) != 0)
+			continue;
+
+		ASSERT(off > 0);
+		me = (struct di_node *)di_mem_addr(st, off);
+		ASSERT(me->next == 0 || me->next == -1);
+		/*
+		 * Only nodes which were BOUND when they were
+		 * snapshotted will be added to per-driver list.
+		 */
+		if (me->next != -1)
+			continue;
+
+		*poff_p = off;
+		poff_p = &me->next;
+	}
+
+	*poff_p = 0;
+}
+
+/*
+ * Copy the devnames array, so we have a list of drivers in the snapshot.
+ * Also makes it possible to locate the per-driver devinfo nodes.
+ */
+static di_off_t
+di_copydevnm(di_off_t *off_p, struct di_state *st)
+{
+	int i;
+	di_off_t off;
+	size_t size;
+	struct di_devnm *dnp;
+
+	dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p));
+
+	/*
+	 * make sure there is some allocated memory
+	 */
+	size = devcnt * sizeof (struct di_devnm);
+	off = di_checkmem(st, *off_p, size);
+	*off_p = off;
+
+	dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n",
+		devcnt, off));
+
+	dnp = (struct di_devnm *)di_mem_addr(st, off);
+	off += size;
+
+	for (i = 0; i < devcnt; i++) {
+		if (devnamesp[i].dn_name == NULL) {
+			continue;
+		}
+
+		/*
+		 * dn_name is not freed during driver unload or removal.
+		 *
+		 * There is a race condition when make_devname() changes
+		 * dn_name during our strcpy. This should be rare since
+		 * only add_drv does this. At any rate, we never had a
+		 * problem with ddi_name_to_major(), which should have
+		 * the same problem.
+		 */
+		dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n",
+			devnamesp[i].dn_name, devnamesp[i].dn_instance,
+			off));
+
+		off = di_checkmem(st, off, strlen(devnamesp[i].dn_name) + 1);
+		dnp[i].name = off;
+		(void) strcpy((char *)di_mem_addr(st, off),
+			devnamesp[i].dn_name);
+		off += DI_ALIGN(strlen(devnamesp[i].dn_name) + 1);
+
+		mutex_enter(&devnamesp[i].dn_lock);
+
+		/*
+		 * Snapshot per-driver node list
+		 */
+		snap_driver_list(st, &devnamesp[i], &dnp[i].head);
+
+		/*
+		 * This is not used by libdevinfo, leave it for now
+		 */
+		dnp[i].flags = devnamesp[i].dn_flags;
+		dnp[i].instance = devnamesp[i].dn_instance;
+
+		/*
+		 * get global properties
+		 */
+		if ((DINFOPROP & st->command) &&
+		    devnamesp[i].dn_global_prop_ptr) {
+			dnp[i].global_prop = off;
+			off = di_getprop(
+			    devnamesp[i].dn_global_prop_ptr->prop_list,
+			    &dnp[i].global_prop, st, NULL, DI_PROP_GLB_LIST);
+		}
+
+		/*
+		 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str
+		 */
+		if (CB_DRV_INSTALLED(devopsp[i])) {
+			if (devopsp[i]->devo_cb_ops) {
+				dnp[i].ops |= DI_CB_OPS;
+				if (devopsp[i]->devo_cb_ops->cb_str)
+					dnp[i].ops |= DI_STREAM_OPS;
+			}
+			if (NEXUS_DRV(devopsp[i])) {
+				dnp[i].ops |= DI_BUS_OPS;
+			}
+		}
+
+		mutex_exit(&devnamesp[i].dn_lock);
+	}
+
+	dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off));
+
+	return (off);
+}
+
+/*
+ * Copy the kernel devinfo tree. The tree and the devnames array forms
+ * the entire snapshot (see also di_copydevnm).
+ */
+static di_off_t
+di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st)
+{
+	di_off_t off;
+	struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP);
+
+	dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n",
+		(void *)root, *off_p));
+
+	/* force attach drivers */
+	if ((i_ddi_node_state((dev_info_t *)root) == DS_READY) &&
+	    (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) {
+		(void) ndi_devi_config((dev_info_t *)root,
+		    NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT |
+		    NDI_DRV_CONF_REPROBE);
+	}
+
+	/*
+	 * Push top_devinfo onto a stack
+	 *
+	 * The stack is necessary to avoid recursion, which can overrun
+	 * the kernel stack.
+	 */
+	PUSH_STACK(dsp, root, off_p);
+
+	/*
+	 * As long as there is a node on the stack, copy the node.
+	 * di_copynode() is responsible for pushing and popping
+	 * child and sibling nodes on the stack.
+	 */
+	while (!EMPTY_STACK(dsp)) {
+		off = di_copynode(dsp, st);
+	}
+
+	/*
+	 * Free the stack structure
+	 */
+	kmem_free(dsp, sizeof (struct di_stack));
+
+	return (off);
+}
+
+/*
+ * This is the core function, which copies all data associated with a single
+ * node into the snapshot. The amount of information is determined by the
+ * ioctl command.
+ */
+static di_off_t
+di_copynode(struct di_stack *dsp, struct di_state *st)
+{
+	di_off_t off;
+	struct di_node *me;
+	struct dev_info *node;
+
+	dcmn_err2((CE_CONT, "di_copynode: depth = %x\n",
+			dsp->depth));
+
+	node = TOP_NODE(dsp);
+
+	ASSERT(node != NULL);
+
+	/*
+	 * check memory usage, and fix offsets accordingly.
+	 */
+	off = di_checkmem(st, *(TOP_OFFSET(dsp)), sizeof (struct di_node));
+	*(TOP_OFFSET(dsp)) = off;
+	me = DI_NODE(di_mem_addr(st, off));
+
+	dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n",
+			node->devi_node_name, node->devi_instance, off));
+
+	/*
+	 * Node parameters:
+	 * self		-- offset of current node within snapshot
+	 * nodeid	-- pointer to PROM node (tri-valued)
+	 * state	-- hot plugging device state
+	 * node_state	-- devinfo node state (CF1, CF2, etc.)
+	 */
+	me->self = off;
+	me->instance = node->devi_instance;
+	me->nodeid = node->devi_nodeid;
+	me->node_class = node->devi_node_class;
+	me->attributes = node->devi_node_attributes;
+	me->state = node->devi_state;
+	me->node_state = node->devi_node_state;
+	me->user_private_data = NULL;
+
+	/*
+	 * Get parent's offset in snapshot from the stack
+	 * and store it in the current node
+	 */
+	if (dsp->depth > 1) {
+		me->parent = *(PARENT_OFFSET(dsp));
+	}
+
+	/*
+	 * Save the offset of this di_node in a hash table.
+	 * This is used later to resolve references to this
+	 * dip from other parts of the tree (per-driver list,
+	 * multipathing linkages, layered usage linkages).
+	 * The key used for the hash table is derived from
+	 * information in the dip.
+	 */
+	di_register_dip(st, (dev_info_t *)node, me->self);
+
+	/*
+	 * increment offset
+	 */
+	off += sizeof (struct di_node);
+
+#ifdef	DEVID_COMPATIBILITY
+	/* check for devid as property marker */
+	if (node->devi_devid) {
+		ddi_devid_t	devid;
+		char 		*devidstr;
+		int		devid_size;
+
+		/*
+		 * The devid is now represented as a property.
+		 * For micro release compatibility with di_devid interface
+		 * in libdevinfo we must return it as a binary structure in'
+		 * the snapshot.  When di_devid is removed from libdevinfo
+		 * in a future release (and devi_devid is deleted) then
+		 * code related to DEVID_COMPATIBILITY can be removed.
+		 */
+		ASSERT(node->devi_devid == DEVID_COMPATIBILITY);
+/* XXX should be DDI_DEV_T_NONE! */
+		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, (dev_info_t *)node,
+		    DDI_PROP_DONTPASS, DEVID_PROP_NAME, &devidstr) ==
+		    DDI_PROP_SUCCESS) {
+			if (ddi_devid_str_decode(devidstr, &devid, NULL) ==
+			    DDI_SUCCESS) {
+				devid_size = ddi_devid_sizeof(devid);
+				off = di_checkmem(st, off, devid_size);
+				me->devid = off;
+				bcopy(devid,
+				    di_mem_addr(st, off), devid_size);
+				off += devid_size;
+				ddi_devid_free(devid);
+			}
+			ddi_prop_free(devidstr);
+		}
+	}
+#endif	/* DEVID_COMPATIBILITY */
+
+	if (node->devi_node_name) {
+		off = di_checkmem(st, off, strlen(node->devi_node_name) + 1);
+		me->node_name = off;
+		(void) strcpy(di_mem_addr(st, off), node->devi_node_name);
+		off += strlen(node->devi_node_name) + 1;
+	}
+
+	if (node->devi_compat_names && (node->devi_compat_length > 1)) {
+		off = di_checkmem(st, off, node->devi_compat_length);
+		me->compat_names = off;
+		me->compat_length = node->devi_compat_length;
+		bcopy(node->devi_compat_names, di_mem_addr(st, off),
+			node->devi_compat_length);
+		off += node->devi_compat_length;
+	}
+
+	if (node->devi_addr) {
+		off = di_checkmem(st, off, strlen(node->devi_addr) + 1);
+		me->address = off;
+		(void) strcpy(di_mem_addr(st, off), node->devi_addr);
+		off += strlen(node->devi_addr) + 1;
+	}
+
+	if (node->devi_binding_name) {
+		off = di_checkmem(st, off, strlen(node->devi_binding_name) + 1);
+		me->bind_name = off;
+		(void) strcpy(di_mem_addr(st, off), node->devi_binding_name);
+		off += strlen(node->devi_binding_name) + 1;
+	}
+
+	me->drv_major = node->devi_major;
+
+	/*
+	 * If the dip is BOUND, set the next pointer of the
+	 * per-instance list to -1, indicating that it is yet to be resolved.
+	 * This will be resolved later in snap_driver_list().
+	 */
+	if (me->drv_major != -1) {
+		me->next = -1;
+	} else {
+		me->next = 0;
+	}
+
+	/*
+	 * An optimization to skip mutex_enter when not needed.
+	 */
+	if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) {
+		goto priv_data;
+	}
+
+	/*
+	 * Grab current per dev_info node lock to
+	 * get minor data and properties.
+	 */
+	mutex_enter(&(node->devi_lock));
+
+	if (!(DINFOMINOR & st->command)) {
+		goto path;
+	}
+
+	if (node->devi_minor) {		/* minor data */
+		me->minor_data = DI_ALIGN(off);
+		off = di_getmdata(node->devi_minor, &me->minor_data,
+		    me->self, st);
+	}
+
+path:
+	if (!(DINFOPATH & st->command)) {
+		goto property;
+	}
+
+	if (MDI_CLIENT(node)) {
+		me->multipath_client = DI_ALIGN(off);
+		off = di_getpath_data((dev_info_t *)node, &me->multipath_client,
+		    me->self, st, 1);
+		dcmn_err((CE_WARN, "me->multipath_client = %x for node %p "
+		    "component type = %d.  off=%d",
+		    me->multipath_client,
+		    (void *)node, node->devi_mdi_component, off));
+	}
+
+	if (MDI_PHCI(node)) {
+		me->multipath_phci = DI_ALIGN(off);
+		off = di_getpath_data((dev_info_t *)node, &me->multipath_phci,
+		    me->self, st, 0);
+		dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p "
+		    "component type = %d.  off=%d",
+		    me->multipath_phci,
+		    (void *)node, node->devi_mdi_component, off));
+	}
+
+property:
+	if (!(DINFOPROP & st->command)) {
+		goto unlock;
+	}
+
+	if (node->devi_drv_prop_ptr) {	/* driver property list */
+		me->drv_prop = DI_ALIGN(off);
+		off = di_getprop(node->devi_drv_prop_ptr, &me->drv_prop, st,
+			node, DI_PROP_DRV_LIST);
+	}
+
+	if (node->devi_sys_prop_ptr) {	/* system property list */
+		me->sys_prop = DI_ALIGN(off);
+		off = di_getprop(node->devi_sys_prop_ptr, &me->sys_prop, st,
+			node, DI_PROP_SYS_LIST);
+	}
+
+	if (node->devi_hw_prop_ptr) {	/* hardware property list */
+		me->hw_prop = DI_ALIGN(off);
+		off = di_getprop(node->devi_hw_prop_ptr, &me->hw_prop, st,
+			node, DI_PROP_HW_LIST);
+	}
+
+	if (node->devi_global_prop_list == NULL) {
+		me->glob_prop = (di_off_t)-1;	/* not global property */
+	} else {
+		/*
+		 * Make copy of global property list if this devinfo refers
+		 * global properties different from what's on the devnames
+		 * array. It can happen if there has been a forced
+		 * driver.conf update. See mod_drv(1M).
+		 */
+		ASSERT(me->drv_major != -1);
+		if (node->devi_global_prop_list !=
+		    devnamesp[me->drv_major].dn_global_prop_ptr) {
+			me->glob_prop = DI_ALIGN(off);
+			off = di_getprop(node->devi_global_prop_list->prop_list,
+			    &me->glob_prop, st, node, DI_PROP_GLB_LIST);
+		}
+	}
+
+unlock:
+	/*
+	 * release current per dev_info node lock
+	 */
+	mutex_exit(&(node->devi_lock));
+
+priv_data:
+	if (!(DINFOPRIVDATA & st->command)) {
+		goto pm_info;
+	}
+
+	if (ddi_get_parent_data((dev_info_t *)node) != NULL) {
+		me->parent_data = DI_ALIGN(off);
+		off = di_getppdata(node, &me->parent_data, st);
+	}
+
+	if (ddi_get_driver_private((dev_info_t *)node) != NULL) {
+		me->driver_data = DI_ALIGN(off);
+		off = di_getdpdata(node, &me->driver_data, st);
+	}
+
+pm_info: /* NOT implemented */
+
+subtree:
+	if (!(DINFOSUBTREE & st->command)) {
+		POP_STACK(dsp);
+		return (DI_ALIGN(off));
+	}
+
+child:
+	/*
+	 * If there is a child--push child onto stack.
+	 * Hold the parent busy while doing so.
+	 */
+	if (node->devi_child) {
+		me->child = DI_ALIGN(off);
+		PUSH_STACK(dsp, node->devi_child, &me->child);
+		return (me->child);
+	}
+
+sibling:
+	/*
+	 * no child node, unroll the stack till a sibling of
+	 * a parent node is found or root node is reached
+	 */
+	POP_STACK(dsp);
+	while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) {
+		node = TOP_NODE(dsp);
+		me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp))));
+		POP_STACK(dsp);
+	}
+
+	if (!EMPTY_STACK(dsp)) {
+		/*
+		 * a sibling is found, replace top of stack by its sibling
+		 */
+		me->sibling = DI_ALIGN(off);
+		PUSH_STACK(dsp, node->devi_sibling, &me->sibling);
+		return (me->sibling);
+	}
+
+	/*
+	 * DONE with all nodes
+	 */
+	return (DI_ALIGN(off));
+}
+
+static i_lnode_t *
+i_lnode_alloc(int modid)
+{
+	i_lnode_t	*i_lnode;
+
+	i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP);
+
+	ASSERT(modid != -1);
+	i_lnode->modid = modid;
+
+	return (i_lnode);
+}
+
+static void
+i_lnode_free(i_lnode_t *i_lnode)
+{
+	kmem_free(i_lnode, sizeof (i_lnode_t));
+}
+
+static void
+i_lnode_check_free(i_lnode_t *i_lnode)
+{
+	/* This lnode and its dip must have been snapshotted */
+	ASSERT(i_lnode->self > 0);
+	ASSERT(i_lnode->di_node->self > 0);
+
+	/* at least 1 link (in or out) must exist for this lnode */
+	ASSERT(i_lnode->link_in || i_lnode->link_out);
+
+	i_lnode_free(i_lnode);
+}
+
+static i_link_t *
+i_link_alloc(int spec_type)
+{
+	i_link_t *i_link;
+
+	i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP);
+	i_link->spec_type = spec_type;
+
+	return (i_link);
+}
+
+static void
+i_link_check_free(i_link_t *i_link)
+{
+	/* This link must have been snapshotted */
+	ASSERT(i_link->self > 0);
+
+	/* Both endpoint lnodes must exist for this link */
+	ASSERT(i_link->src_lnode);
+	ASSERT(i_link->tgt_lnode);
+
+	kmem_free(i_link, sizeof (i_link_t));
+}
+
+/*ARGSUSED*/
+static uint_t
+i_lnode_hashfunc(void *arg, mod_hash_key_t key)
+{
+	i_lnode_t	*i_lnode = (i_lnode_t *)key;
+	struct di_node	*ptr;
+	dev_t		dev;
+
+	dev = i_lnode->devt;
+	if (dev != DDI_DEV_T_NONE)
+		return (i_lnode->modid + getminor(dev) + getmajor(dev));
+
+	ptr = i_lnode->di_node;
+	ASSERT(ptr->self > 0);
+	if (ptr) {
+		uintptr_t k = (uintptr_t)ptr;
+		k >>= (int)highbit(sizeof (struct di_node));
+		return ((uint_t)k);
+	}
+
+	return (i_lnode->modid);
+}
+
+static int
+i_lnode_cmp(void *arg1, void *arg2)
+{
+	i_lnode_t	*i_lnode1 = (i_lnode_t *)arg1;
+	i_lnode_t	*i_lnode2 = (i_lnode_t *)arg2;
+
+	if (i_lnode1->modid != i_lnode2->modid) {
+		return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1);
+	}
+
+	if (i_lnode1->di_node != i_lnode2->di_node)
+		return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1);
+
+	if (i_lnode1->devt != i_lnode2->devt)
+		return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1);
+
+	return (0);
+}
+
+/*
+ * An lnode represents a {dip, dev_t} tuple. A link represents a
+ * {src_lnode, tgt_lnode, spec_type} tuple.
+ * The following callback assumes that LDI framework ref-counts the
+ * src_dip and tgt_dip while invoking this callback.
+ */
+static int
+di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg)
+{
+	struct di_state	*st = (struct di_state *)arg;
+	i_lnode_t	*src_lnode, *tgt_lnode, *i_lnode;
+	i_link_t	**i_link_next, *i_link;
+	di_off_t	soff, toff;
+	mod_hash_val_t	nodep = NULL;
+	int		res;
+
+	/*
+	 * if the source or target of this device usage information doesn't
+	 * corrospond to a device node then we don't report it via
+	 * libdevinfo so return.
+	 */
+	if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL))
+		return (LDI_USAGE_CONTINUE);
+
+	ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip));
+	ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip));
+
+	/*
+	 * Skip the ldi_usage if either src or tgt dip is not in the
+	 * snapshot. This saves us from pruning bad lnodes/links later.
+	 */
+	if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0)
+		return (LDI_USAGE_CONTINUE);
+	if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0)
+		return (LDI_USAGE_CONTINUE);
+
+	ASSERT(soff > 0);
+	ASSERT(toff > 0);
+
+	/*
+	 * allocate an i_lnode and add it to the lnode hash
+	 * if it is not already present. For this particular
+	 * link the lnode is a source, but it may
+	 * participate as tgt or src in any number of layered
+	 * operations - so it may already be in the hash.
+	 */
+	i_lnode = i_lnode_alloc(ldi_usage->src_modid);
+	i_lnode->di_node = (struct di_node *)di_mem_addr(st, soff);
+	i_lnode->devt = ldi_usage->src_devt;
+
+	res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
+	if (res == MH_ERR_NOTFOUND) {
+		/*
+		 * new i_lnode
+		 * add it to the hash and increment the lnode count
+		 */
+		res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
+		ASSERT(res == 0);
+		st->lnode_count++;
+		src_lnode = i_lnode;
+	} else {
+		/* this i_lnode already exists in the lnode_hash */
+		i_lnode_free(i_lnode);
+		src_lnode = (i_lnode_t *)nodep;
+	}
+
+	/*
+	 * allocate a tgt i_lnode and add it to the lnode hash
+	 */
+	i_lnode = i_lnode_alloc(ldi_usage->tgt_modid);
+	i_lnode->di_node = (struct di_node *)di_mem_addr(st, toff);
+	i_lnode->devt = ldi_usage->tgt_devt;
+
+	res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
+	if (res == MH_ERR_NOTFOUND) {
+		/*
+		 * new i_lnode
+		 * add it to the hash and increment the lnode count
+		 */
+		res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
+		ASSERT(res == 0);
+		st->lnode_count++;
+		tgt_lnode = i_lnode;
+	} else {
+		/* this i_lnode already exists in the lnode_hash */
+		i_lnode_free(i_lnode);
+		tgt_lnode = (i_lnode_t *)nodep;
+	}
+
+	/*
+	 * allocate a i_link
+	 */
+	i_link = i_link_alloc(ldi_usage->tgt_spec_type);
+	i_link->src_lnode = src_lnode;
+	i_link->tgt_lnode = tgt_lnode;
+
+	/*
+	 * add this link onto the src i_lnodes outbound i_link list
+	 */
+	i_link_next = &(src_lnode->link_out);
+	while (*i_link_next != NULL) {
+		if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) &&
+		    (i_link->spec_type == (*i_link_next)->spec_type)) {
+			/* this link already exists */
+			kmem_free(i_link, sizeof (i_link_t));
+			return (LDI_USAGE_CONTINUE);
+		}
+		i_link_next = &((*i_link_next)->src_link_next);
+	}
+	*i_link_next = i_link;
+
+	/*
+	 * add this link onto the tgt i_lnodes inbound i_link list
+	 */
+	i_link_next = &(tgt_lnode->link_in);
+	while (*i_link_next != NULL) {
+		ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0);
+		i_link_next = &((*i_link_next)->tgt_link_next);
+	}
+	*i_link_next = i_link;
+
+	/*
+	 * add this i_link to the link hash
+	 */
+	res = mod_hash_insert(st->link_hash, i_link, i_link);
+	ASSERT(res == 0);
+	st->link_count++;
+
+	return (LDI_USAGE_CONTINUE);
+}
+
+struct i_layer_data {
+	struct di_state	*st;
+	int		lnode_count;
+	int		link_count;
+	di_off_t	lnode_off;
+	di_off_t 	link_off;
+};
+
+/*ARGSUSED*/
+static uint_t
+i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+	i_link_t		*i_link  = (i_link_t *)key;
+	struct i_layer_data	*data = arg;
+	struct di_link		*me;
+	struct di_lnode		*melnode;
+	struct di_node		*medinode;
+
+	ASSERT(i_link->self == 0);
+
+	i_link->self = data->link_off +
+	    (data->link_count * sizeof (struct di_link));
+	data->link_count++;
+
+	ASSERT(data->link_off > 0 && data->link_count > 0);
+	ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */
+	ASSERT(data->link_count <= data->st->link_count);
+
+	/* fill in fields for the di_link snapshot */
+	me = (struct di_link *)di_mem_addr(data->st, i_link->self);
+	me->self = i_link->self;
+	me->spec_type = i_link->spec_type;
+
+	/*
+	 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t
+	 * are created during the LDI table walk. Since we are
+	 * walking the link hash, the lnode hash has already been
+	 * walked and the lnodes have been snapshotted. Save lnode
+	 * offsets.
+	 */
+	me->src_lnode = i_link->src_lnode->self;
+	me->tgt_lnode = i_link->tgt_lnode->self;
+
+	/*
+	 * Save this link's offset in the src_lnode snapshot's link_out
+	 * field
+	 */
+	melnode = (struct di_lnode *)di_mem_addr(data->st, me->src_lnode);
+	me->src_link_next = melnode->link_out;
+	melnode->link_out = me->self;
+
+	/*
+	 * Put this link on the tgt_lnode's link_in field
+	 */
+	melnode = (struct di_lnode *)di_mem_addr(data->st, me->tgt_lnode);
+	me->tgt_link_next = melnode->link_in;
+	melnode->link_in = me->self;
+
+	/*
+	 * An i_lnode_t is only created if the corresponding dip exists
+	 * in the snapshot. A pointer to the di_node is saved in the
+	 * i_lnode_t when it is allocated. For this link, get the di_node
+	 * for the source lnode. Then put the link on the di_node's list
+	 * of src links
+	 */
+	medinode = i_link->src_lnode->di_node;
+	me->src_node_next = medinode->src_links;
+	medinode->src_links = me->self;
+
+	/*
+	 * Put this link on the tgt_links list of the target
+	 * dip.
+	 */
+	medinode = i_link->tgt_lnode->di_node;
+	me->tgt_node_next = medinode->tgt_links;
+	medinode->tgt_links = me->self;
+
+	return (MH_WALK_CONTINUE);
+}
+
+/*ARGSUSED*/
+static uint_t
+i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
+{
+	i_lnode_t		*i_lnode = (i_lnode_t *)key;
+	struct i_layer_data	*data = arg;
+	struct di_lnode		*me;
+	struct di_node		*medinode;
+
+	ASSERT(i_lnode->self == 0);
+
+	i_lnode->self = data->lnode_off +
+	    (data->lnode_count * sizeof (struct di_lnode));
+	data->lnode_count++;
+
+	ASSERT(data->lnode_off > 0 && data->lnode_count > 0);
+	ASSERT(data->link_count == 0); /* links not done yet */
+	ASSERT(data->lnode_count <= data->st->lnode_count);
+
+	/* fill in fields for the di_lnode snapshot */
+	me = (struct di_lnode *)di_mem_addr(data->st, i_lnode->self);
+	me->self = i_lnode->self;
+
+	if (i_lnode->devt == DDI_DEV_T_NONE) {
+		me->dev_major = (major_t)-1;
+		me->dev_minor = (minor_t)-1;
+	} else {
+		me->dev_major = getmajor(i_lnode->devt);
+		me->dev_minor = getminor(i_lnode->devt);
+	}
+
+	/*
+	 * The dip corresponding to this lnode must exist in
+	 * the snapshot or we wouldn't have created the i_lnode_t
+	 * during LDI walk. Save the offset of the dip.
+	 */
+	ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0);
+	me->node = i_lnode->di_node->self;
+
+	/*
+	 * There must be at least one link in or out of this lnode
+	 * or we wouldn't have created it. These fields will be set
+	 * during the link hash walk.
+	 */
+	ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL));
+
+	/*
+	 * set the offset of the devinfo node associated with this
+	 * lnode. Also update the node_next next pointer.  this pointer
+	 * is set if there are multiple lnodes associated with the same
+	 * devinfo node.  (could occure when multiple minor nodes
+	 * are open for one device, etc.)
+	 */
+	medinode = i_lnode->di_node;
+	me->node_next = medinode->lnodes;
+	medinode->lnodes = me->self;
+
+	return (MH_WALK_CONTINUE);
+}
+
+static di_off_t
+di_getlink_data(di_off_t off, struct di_state *st)
+{
+	struct i_layer_data data = {0};
+	size_t size;
+
+	dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off));
+
+	st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32,
+	    mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free,
+	    i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP);
+
+	st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32,
+	    (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t));
+
+	/* get driver layering information */
+	(void) ldi_usage_walker(st, di_ldi_callback);
+
+	/* check if there is any link data to include in the snapshot */
+	if (st->lnode_count == 0) {
+		ASSERT(st->link_count == 0);
+		goto out;
+	}
+
+	ASSERT(st->link_count != 0);
+
+	/* get a pointer to snapshot memory for all the di_lnodes */
+	size = sizeof (struct di_lnode) * st->lnode_count;
+	data.lnode_off = off = di_checkmem(st, off, size);
+	off += DI_ALIGN(size);
+
+	/* get a pointer to snapshot memory for all the di_links */
+	size = sizeof (struct di_link) * st->link_count;
+	data.link_off = off = di_checkmem(st, off, size);
+	off += DI_ALIGN(size);
+
+	data.lnode_count = data.link_count = 0;
+	data.st = st;
+
+	/*
+	 * We have lnodes and links that will go into the
+	 * snapshot, so let's walk the respective hashes
+	 * and snapshot them. The various linkages are
+	 * also set up during the walk.
+	 */
+	mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data);
+	ASSERT(data.lnode_count == st->lnode_count);
+
+	mod_hash_walk(st->link_hash, i_link_walker, (void *)&data);
+	ASSERT(data.link_count == st->link_count);
+
+out:
+	/* free up the i_lnodes and i_links used to create the snapshot */
+	mod_hash_destroy_hash(st->lnode_hash);
+	mod_hash_destroy_hash(st->link_hash);
+	st->lnode_count = 0;
+	st->link_count = 0;
+
+	return (off);
+}
+
+
+/*
+ * Copy all minor data nodes attached to a devinfo node into the snapshot.
+ * It is called from di_copynode with devi_lock held.
+ */
+static di_off_t
+di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node,
+	struct di_state *st)
+{
+	di_off_t off;
+	struct di_minor *me;
+
+	dcmn_err2((CE_CONT, "di_getmdata:\n"));
+
+	/*
+	 * check memory first
+	 */
+	off = di_checkmem(st, *off_p, sizeof (struct di_minor));
+	*off_p = off;
+
+	do {
+		me = (struct di_minor *)di_mem_addr(st, off);
+		me->self = off;
+		me->type = mnode->type;
+		me->node = node;
+		me->user_private_data = NULL;
+
+		off += DI_ALIGN(sizeof (struct di_minor));
+
+		/*
+		 * Split dev_t to major/minor, so it works for
+		 * both ILP32 and LP64 model
+		 */
+		me->dev_major = getmajor(mnode->ddm_dev);
+		me->dev_minor = getminor(mnode->ddm_dev);
+		me->spec_type = mnode->ddm_spec_type;
+
+		if (mnode->ddm_name) {
+			off = di_checkmem(st, off,
+				strlen(mnode->ddm_name) + 1);
+			me->name = off;
+			(void) strcpy(di_mem_addr(st, off), mnode->ddm_name);
+			off += DI_ALIGN(strlen(mnode->ddm_name) + 1);
+		}
+
+		if (mnode->ddm_node_type) {
+			off = di_checkmem(st, off,
+				strlen(mnode->ddm_node_type) + 1);
+			me->node_type = off;
+			(void) strcpy(di_mem_addr(st, off),
+					mnode->ddm_node_type);
+			off += DI_ALIGN(strlen(mnode->ddm_node_type) + 1);
+		}
+
+		off = di_checkmem(st, off, sizeof (struct di_minor));
+		me->next = off;
+		mnode = mnode->next;
+	} while (mnode);
+
+	me->next = 0;
+
+	return (off);
+}
+
+/*
+ * di_register_dip(), di_find_dip(): The dip must be protected
+ * from deallocation when using these routines - this can either
+ * be a reference count, a busy hold or a per-driver lock.
+ */
+
+static void
+di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off)
+{
+	struct dev_info *node = DEVI(dip);
+	struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
+	struct di_dkey *dk;
+
+	ASSERT(dip);
+	ASSERT(off > 0);
+
+	key->k_type = DI_DKEY;
+	dk = &(key->k_u.dkey);
+
+	dk->dk_dip = dip;
+	dk->dk_major = node->devi_major;
+	dk->dk_inst = node->devi_instance;
+	dk->dk_nodeid = node->devi_nodeid;
+
+	if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key,
+	    (mod_hash_val_t)(uintptr_t)off) != 0) {
+		panic(
+		    "duplicate devinfo (%p) registered during device "
+		    "tree walk", (void *)dip);
+	}
+}
+
+
+static int
+di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p)
+{
+	/*
+	 * uintptr_t must be used because it matches the size of void *;
+	 * mod_hash expects clients to place results into pointer-size
+	 * containers; since di_off_t is always a 32-bit offset, alignment
+	 * would otherwise be broken on 64-bit kernels.
+	 */
+	uintptr_t	offset;
+	struct		di_key key = {0};
+	struct		di_dkey *dk;
+
+	ASSERT(st->reg_dip_hash);
+	ASSERT(dip);
+	ASSERT(off_p);
+
+
+	key.k_type = DI_DKEY;
+	dk = &(key.k_u.dkey);
+
+	dk->dk_dip = dip;
+	dk->dk_major = DEVI(dip)->devi_major;
+	dk->dk_inst = DEVI(dip)->devi_instance;
+	dk->dk_nodeid = DEVI(dip)->devi_nodeid;
+
+	if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key,
+	    (mod_hash_val_t *)&offset) == 0) {
+		*off_p = (di_off_t)offset;
+		return (0);
+	} else {
+		return (-1);
+	}
+}
+
+/*
+ * di_register_pip(), di_find_pip(): The pip must be protected from deallocation
+ * when using these routines. The caller must do this by protecting the
+ * client(or phci)<->pip linkage while traversing the list and then holding the
+ * pip when it is found in the list.
+ */
+
+static void
+di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off)
+{
+	struct di_key	*key = kmem_zalloc(sizeof (*key), KM_SLEEP);
+	char		*path_addr;
+	struct di_pkey	*pk;
+
+	ASSERT(pip);
+	ASSERT(off > 0);
+
+	key->k_type = DI_PKEY;
+	pk = &(key->k_u.pkey);
+
+	pk->pk_pip = pip;
+	path_addr = mdi_pi_get_addr(pip);
+	if (path_addr)
+		pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP);
+	pk->pk_client = mdi_pi_get_client(pip);
+	pk->pk_phci = mdi_pi_get_phci(pip);
+
+	if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key,
+	    (mod_hash_val_t)(uintptr_t)off) != 0) {
+		panic(
+		    "duplicate pathinfo (%p) registered during device "
+		    "tree walk", (void *)pip);
+	}
+}
+
+/*
+ * As with di_register_pip, the caller must hold or lock the pip
+ */
+static int
+di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p)
+{
+	/*
+	 * uintptr_t must be used because it matches the size of void *;
+	 * mod_hash expects clients to place results into pointer-size
+	 * containers; since di_off_t is always a 32-bit offset, alignment
+	 * would otherwise be broken on 64-bit kernels.
+	 */
+	uintptr_t	offset;
+	struct di_key	key = {0};
+	struct di_pkey	*pk;
+
+	ASSERT(st->reg_pip_hash);
+	ASSERT(off_p);
+
+	if (pip == NULL) {
+		*off_p = 0;
+		return (0);
+	}
+
+	key.k_type = DI_PKEY;
+	pk = &(key.k_u.pkey);
+
+	pk->pk_pip = pip;
+	pk->pk_path_addr = mdi_pi_get_addr(pip);
+	pk->pk_client = mdi_pi_get_client(pip);
+	pk->pk_phci = mdi_pi_get_phci(pip);
+
+	if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key,
+	    (mod_hash_val_t *)&offset) == 0) {
+		*off_p = (di_off_t)offset;
+		return (0);
+	} else {
+		return (-1);
+	}
+}
+
+static di_path_state_t
+path_state_convert(mdi_pathinfo_state_t st)
+{
+	switch (st) {
+	case MDI_PATHINFO_STATE_ONLINE:
+		return (DI_PATH_STATE_ONLINE);
+	case MDI_PATHINFO_STATE_STANDBY:
+		return (DI_PATH_STATE_STANDBY);
+	case MDI_PATHINFO_STATE_OFFLINE:
+		return (DI_PATH_STATE_OFFLINE);
+	case MDI_PATHINFO_STATE_FAULT:
+		return (DI_PATH_STATE_FAULT);
+	default:
+		return (DI_PATH_STATE_UNKNOWN);
+	}
+}
+
+
+static di_off_t
+di_path_getprop(mdi_pathinfo_t *pip, di_off_t off, di_off_t *off_p,
+    struct di_state *st)
+{
+	nvpair_t *prop = NULL;
+	struct di_path_prop *me;
+
+	if (mdi_pi_get_next_prop(pip, NULL) == NULL) {
+		*off_p = 0;
+		return (off);
+	}
+
+	off = di_checkmem(st, off, sizeof (struct di_path_prop));
+	*off_p = off;
+
+	while (prop = mdi_pi_get_next_prop(pip, prop)) {
+		int delta = 0;
+
+		me = (struct di_path_prop *)di_mem_addr(st, off);
+		me->self = off;
+		off += sizeof (struct di_path_prop);
+
+		/*
+		 * property name
+		 */
+		off = di_checkmem(st, off, strlen(nvpair_name(prop)) + 1);
+		me->prop_name = off;
+		(void) strcpy(di_mem_addr(st, off), nvpair_name(prop));
+		off += strlen(nvpair_name(prop)) + 1;
+
+		switch (nvpair_type(prop)) {
+		case DATA_TYPE_BYTE:
+		case DATA_TYPE_INT16:
+		case DATA_TYPE_UINT16:
+		case DATA_TYPE_INT32:
+		case DATA_TYPE_UINT32:
+			delta = sizeof (int32_t);
+			me->prop_type = DDI_PROP_TYPE_INT;
+			off = di_checkmem(st, off, delta);
+			(void) nvpair_value_int32(prop,
+			    (int32_t *)di_mem_addr(st, off));
+			break;
+
+		case DATA_TYPE_INT64:
+		case DATA_TYPE_UINT64:
+			delta = sizeof (int64_t);
+			me->prop_type = DDI_PROP_TYPE_INT64;
+			off = di_checkmem(st, off, delta);
+			(void) nvpair_value_int64(prop,
+			    (int64_t *)di_mem_addr(st, off));
+			break;
+
+		case DATA_TYPE_STRING:
+		{
+			char *str;
+			(void) nvpair_value_string(prop, &str);
+			delta = strlen(str) + 1;
+			me->prop_type = DDI_PROP_TYPE_STRING;
+			off = di_checkmem(st, off, delta);
+			(void) strcpy(di_mem_addr(st, off), str);
+			break;
+		}
+		case DATA_TYPE_BYTE_ARRAY:
+		case DATA_TYPE_INT16_ARRAY:
+		case DATA_TYPE_UINT16_ARRAY:
+		case DATA_TYPE_INT32_ARRAY:
+		case DATA_TYPE_UINT32_ARRAY:
+		case DATA_TYPE_INT64_ARRAY:
+		case DATA_TYPE_UINT64_ARRAY:
+		{
+			uchar_t *buf;
+			uint_t nelems;
+			(void) nvpair_value_byte_array(prop, &buf, &nelems);
+			delta = nelems;
+			me->prop_type = DDI_PROP_TYPE_BYTE;
+			if (nelems != 0) {
+				off = di_checkmem(st, off, delta);
+				bcopy(buf, di_mem_addr(st, off), nelems);
+			}
+			break;
+		}
+
+		default:	/* Unknown or unhandled type; skip it */
+			delta = 0;
+			break;
+		}
+
+		if (delta > 0) {
+			me->prop_data = off;
+		}
+
+		me->prop_len = delta;
+		off += delta;
+
+		off = di_checkmem(st, off, sizeof (struct di_path_prop));
+		me->prop_next = off;
+	}
+
+	me->prop_next = 0;
+	return (off);
+}
+
+
+static void
+di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp,
+    int get_client)
+{
+	if (get_client) {
+		ASSERT(me->path_client == 0);
+		me->path_client = noff;
+		ASSERT(me->path_c_link == 0);
+		*off_pp = &me->path_c_link;
+		me->path_snap_state &=
+		    ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK);
+	} else {
+		ASSERT(me->path_phci == 0);
+		me->path_phci = noff;
+		ASSERT(me->path_p_link == 0);
+		*off_pp = &me->path_p_link;
+		me->path_snap_state &=
+		    ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK);
+	}
+}
+
+/*
+ * poff_p: pointer to the linkage field. This links pips along the client|phci
+ *	   linkage list.
+ * noff  : Offset for the endpoint dip snapshot.
+ */
+static di_off_t
+di_getpath_data(dev_info_t *dip, di_off_t *poff_p, di_off_t noff,
+    struct di_state *st, int get_client)
+{
+	di_off_t off;
+	mdi_pathinfo_t *pip;
+	struct di_path *me;
+	mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *);
+
+	dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client));
+
+	/*
+	 * The naming of the following mdi_xyz() is unfortunately
+	 * non-intuitive. mdi_get_next_phci_path() follows the
+	 * client_link i.e. the list of pip's belonging to the
+	 * given client dip.
+	 */
+	if (get_client)
+		next_pip = &mdi_get_next_phci_path;
+	else
+		next_pip = &mdi_get_next_client_path;
+
+	off = *poff_p;
+
+	pip = NULL;
+	while (pip = (*next_pip)(dip, pip)) {
+		mdi_pathinfo_state_t state;
+		di_off_t stored_offset;
+
+		dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip));
+
+		mdi_pi_lock(pip);
+
+		if (di_pip_find(st, pip, &stored_offset) != -1) {
+			/*
+			 * We've already seen this pathinfo node so we need to
+			 * take care not to snap it again; However, one endpoint
+			 * and linkage will be set here. The other endpoint
+			 * and linkage has already been set when the pip was
+			 * first snapshotted i.e. when the other endpoint dip
+			 * was snapshotted.
+			 */
+			me = (struct di_path *)di_mem_addr(st, stored_offset);
+
+			*poff_p = stored_offset;
+
+			di_path_one_endpoint(me, noff, &poff_p, get_client);
+
+			/*
+			 * The other endpoint and linkage were set when this
+			 * pip was snapshotted. So we are done with both
+			 * endpoints and linkages.
+			 */
+			ASSERT(!(me->path_snap_state &
+			    (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI)));
+			ASSERT(!(me->path_snap_state &
+			    (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK)));
+
+			mdi_pi_unlock(pip);
+			continue;
+		}
+
+		/*
+		 * Now that we need to snapshot this pip, check memory
+		 */
+		off = di_checkmem(st, off, sizeof (struct di_path));
+		me = (struct di_path *)di_mem_addr(st, off);
+		me->self = off;
+		*poff_p = off;
+		off += sizeof (struct di_path);
+
+		me->path_snap_state =
+		    DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK;
+		me->path_snap_state |=
+		    DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI;
+
+		/*
+		 * Zero out fields as di_checkmem() doesn't guarantee
+		 * zero-filled memory
+		 */
+		me->path_client = me->path_phci = 0;
+		me->path_c_link = me->path_p_link = 0;
+
+		di_path_one_endpoint(me, noff, &poff_p, get_client);
+
+		/*
+		 * Note the existence of this pathinfo
+		 */
+		di_register_pip(st, pip, me->self);
+
+		state = mdi_pi_get_state(pip);
+		me->path_state = path_state_convert(state);
+
+		/*
+		 * Get intermediate addressing info.
+		 */
+		off = di_checkmem(st, off, strlen(mdi_pi_get_addr(pip)) + 1);
+		me->path_addr = off;
+		(void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip));
+		off += strlen(mdi_pi_get_addr(pip)) + 1;
+
+		/*
+		 * Get path properties if props are to be included in the
+		 * snapshot
+		 */
+		if (DINFOPROP & st->command) {
+			off = di_path_getprop(pip, off, &me->path_prop, st);
+		} else {
+			me->path_prop = 0;
+		}
+
+		mdi_pi_unlock(pip);
+	}
+
+	*poff_p = 0;
+
+	return (off);
+}
+
+/*
+ * Copy a list of properties attached to a devinfo node. Called from
+ * di_copynode with devi_lock held. The major number is passed in case
+ * we need to call driver's prop_op entry. The value of list indicates
+ * which list we are copying. Possible values are:
+ * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST
+ */
+static di_off_t
+di_getprop(struct ddi_prop *prop, di_off_t *off_p, struct di_state *st,
+	struct dev_info *dip, int list)
+{
+	dev_t dev;
+	int (*prop_op)();
+	int off, need_prop_op = 0;
+	int prop_op_fail = 0;
+	ddi_prop_t *propp = NULL;
+	struct di_prop *pp;
+	struct dev_ops *ops = NULL;
+	int prop_len;
+	caddr_t prop_val;
+
+
+	dcmn_err2((CE_CONT, "di_getprop:\n"));
+
+	ASSERT(st != NULL);
+
+	dcmn_err((CE_CONT, "copy property list at addr %p\n", (void *)prop));
+
+	/*
+	 * Figure out if we need to call driver's prop_op entry point.
+	 * The conditions are:
+	 *	-- driver property list
+	 *	-- driver must be attached and held
+	 *	-- driver's cb_prop_op != ddi_prop_op
+	 *		or parent's bus_prop_op != ddi_bus_prop_op
+	 */
+
+	if (list != DI_PROP_DRV_LIST) {
+		goto getprop;
+	}
+
+	/*
+	 * If driver is not attached or if major is -1, we ignore
+	 * the driver property list. No one should rely on such
+	 * properties.
+	 */
+	if (i_ddi_node_state((dev_info_t *)dip) < DS_ATTACHED) {
+		off = *off_p;
+		*off_p = 0;
+		return (off);
+	}
+
+	/*
+	 * Now we have a driver which is held. We can examine entry points
+	 * and check the condition listed above.
+	 */
+	ops = dip->devi_ops;
+
+	/*
+	 * Some nexus drivers incorrectly set cb_prop_op to nodev,
+	 * nulldev or even NULL.
+	 */
+	if (ops && ops->devo_cb_ops &&
+	    (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) &&
+	    (ops->devo_cb_ops->cb_prop_op != nodev) &&
+	    (ops->devo_cb_ops->cb_prop_op != nulldev) &&
+	    (ops->devo_cb_ops->cb_prop_op != NULL)) {
+		need_prop_op = 1;
+	}
+
+getprop:
+	/*
+	 * check memory availability
+	 */
+	off = di_checkmem(st, *off_p, sizeof (struct di_prop));
+	*off_p = off;
+	/*
+	 * Now copy properties
+	 */
+	do {
+		pp = (struct di_prop *)di_mem_addr(st, off);
+		pp->self = off;
+		/*
+		 * Split dev_t to major/minor, so it works for
+		 * both ILP32 and LP64 model
+		 */
+		pp->dev_major = getmajor(prop->prop_dev);
+		pp->dev_minor = getminor(prop->prop_dev);
+		pp->prop_flags = prop->prop_flags;
+		pp->prop_list = list;
+
+		/*
+		 * property name
+		 */
+		off += sizeof (struct di_prop);
+		if (prop->prop_name) {
+			off = di_checkmem(st, off, strlen(prop->prop_name)
+			    + 1);
+			pp->prop_name = off;
+			(void) strcpy(di_mem_addr(st, off), prop->prop_name);
+			off += strlen(prop->prop_name) + 1;
+		}
+
+		/*
+		 * Set prop_len here. This may change later
+		 * if cb_prop_op returns a different length.
+		 */
+		pp->prop_len = prop->prop_len;
+		if (!need_prop_op) {
+			if (prop->prop_val == NULL) {
+				dcmn_err((CE_WARN,
+				    "devinfo: property fault at %p",
+				    (void *)prop));
+				pp->prop_data = -1;
+			} else if (prop->prop_len != 0) {
+				off = di_checkmem(st, off, prop->prop_len);
+				pp->prop_data = off;
+				bcopy(prop->prop_val, di_mem_addr(st, off),
+				    prop->prop_len);
+				off += DI_ALIGN(pp->prop_len);
+			}
+		}
+
+		off = di_checkmem(st, off, sizeof (struct di_prop));
+		pp->next = off;
+		prop = prop->prop_next;
+	} while (prop);
+
+	pp->next = 0;
+
+	if (!need_prop_op) {
+		dcmn_err((CE_CONT, "finished property "
+		    "list at offset 0x%x\n", off));
+		return (off);
+	}
+
+	/*
+	 * If there is a need to call driver's prop_op entry,
+	 * we must release driver's devi_lock, because the
+	 * cb_prop_op entry point will grab it.
+	 *
+	 * The snapshot memory has already been allocated above,
+	 * which means the length of an active property should
+	 * remain fixed for this implementation to work.
+	 */
+
+
+	prop_op = ops->devo_cb_ops->cb_prop_op;
+	pp = (struct di_prop *)di_mem_addr(st, *off_p);
+
+	mutex_exit(&dip->devi_lock);
+
+	do {
+		int err;
+		struct di_prop *tmp;
+
+		if (pp->next) {
+			tmp = (struct di_prop *)
+			    di_mem_addr(st, pp->next);
+		} else {
+			tmp = NULL;
+		}
+
+		/*
+		 * call into driver's prop_op entry point
+		 *
+		 * Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY
+		 */
+		dev = makedevice(pp->dev_major, pp->dev_minor);
+		if (dev == DDI_DEV_T_NONE)
+			dev = DDI_DEV_T_ANY;
+
+		dcmn_err((CE_CONT, "call prop_op"
+		    "(%lx, %p, PROP_LEN_AND_VAL_BUF, "
+		    "DDI_PROP_DONTPASS, \"%s\", %p, &%d)\n",
+		    dev,
+		    (void *)dip,
+		    (char *)di_mem_addr(st, pp->prop_name),
+		    (void *)di_mem_addr(st, pp->prop_data),
+		    pp->prop_len));
+
+		if ((err = (*prop_op)(dev, (dev_info_t)dip,
+		    PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS,
+		    (char *)di_mem_addr(st, pp->prop_name),
+		    &prop_val, &prop_len)) != DDI_PROP_SUCCESS) {
+			if ((propp = i_ddi_prop_search(dev,
+			    (char *)di_mem_addr(st, pp->prop_name),
+			    (uint_t)pp->prop_flags,
+			    &(DEVI(dip)->devi_drv_prop_ptr))) != NULL) {
+				pp->prop_len = propp->prop_len;
+				if (pp->prop_len != 0) {
+					off = di_checkmem(st, off,
+					    pp->prop_len);
+					pp->prop_data = off;
+					bcopy(propp->prop_val, di_mem_addr(st,
+					    pp->prop_data), propp->prop_len);
+					off += DI_ALIGN(pp->prop_len);
+				}
+			} else {
+				prop_op_fail = 1;
+			}
+		} else if (prop_len != 0) {
+			pp->prop_len = prop_len;
+			off = di_checkmem(st, off, prop_len);
+			pp->prop_data = off;
+			bcopy(prop_val, di_mem_addr(st, off), prop_len);
+			off += DI_ALIGN(prop_len);
+			kmem_free(prop_val, prop_len);
+		}
+
+		if (prop_op_fail) {
+			pp->prop_data = -1;
+			dcmn_err((CE_WARN, "devinfo: prop_op failure "
+			    "for \"%s\" err %d",
+			    di_mem_addr(st, pp->prop_name), err));
+		}
+
+		pp = tmp;
+
+	} while (pp);
+
+	mutex_enter(&dip->devi_lock);
+	dcmn_err((CE_CONT, "finished property list at offset 0x%x\n", off));
+	return (off);
+}
+
+/*
+ * find private data format attached to a dip
+ * parent = 1 to match driver name of parent dip (for parent private data)
+ *	0 to match driver name of current dip (for driver private data)
+ */
+#define	DI_MATCH_DRIVER	0
+#define	DI_MATCH_PARENT	1
+
+struct di_priv_format *
+di_match_drv_name(struct dev_info *node, struct di_state *st, int match)
+{
+	int i, count, len;
+	char *drv_name;
+	major_t major;
+	struct di_all *all;
+	struct di_priv_format *form;
+
+	dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n",
+		node->devi_node_name, match));
+
+	if (match == DI_MATCH_PARENT) {
+		node = DEVI(node->devi_parent);
+	}
+
+	if (node == NULL) {
+		return (NULL);
+	}
+
+	major = ddi_name_to_major(node->devi_binding_name);
+	if (major == (major_t)(-1)) {
+		return (NULL);
+	}
+
+	/*
+	 * Match the driver name.
+	 */
+	drv_name = ddi_major_to_name(major);
+	if ((drv_name == NULL) || *drv_name == '\0') {
+		return (NULL);
+	}
+
+	/* Now get the di_priv_format array */
+	all = (struct di_all *)di_mem_addr(st, 0);
+
+	if (match == DI_MATCH_PARENT) {
+		count = all->n_ppdata;
+		form = (struct di_priv_format *)
+			(di_mem_addr(st, 0) + all->ppdata_format);
+	} else {
+		count = all->n_dpdata;
+		form = (struct di_priv_format *)
+			((caddr_t)all + all->dpdata_format);
+	}
+
+	len = strlen(drv_name);
+	for (i = 0; i < count; i++) {
+		char *tmp;
+
+		tmp = form[i].drv_name;
+		while (tmp && (*tmp != '\0')) {
+			if (strncmp(drv_name, tmp, len) == 0) {
+				return (&form[i]);
+			}
+			/*
+			 * Move to next driver name, skipping a white space
+			 */
+			if (tmp = strchr(tmp, ' ')) {
+				tmp++;
+			}
+		}
+	}
+
+	return (NULL);
+}
+
+/*
+ * The following functions copy data as specified by the format passed in.
+ * To prevent invalid format from panicing the system, we call on_fault().
+ * A return value of 0 indicates an error. Otherwise, the total offset
+ * is returned.
+ */
+#define	DI_MAX_PRIVDATA	(PAGESIZE >> 1)	/* max private data size */
+
+static di_off_t
+di_getprvdata(struct di_priv_format *pdp, void *data, di_off_t *off_p,
+	struct di_state *st)
+{
+	caddr_t pa;
+	void *ptr;
+	int i, size, repeat;
+	di_off_t off, off0, *tmp;
+
+	label_t ljb;
+
+	dcmn_err2((CE_CONT, "di_getprvdata:\n"));
+
+	/*
+	 * check memory availability. Private data size is
+	 * limited to DI_MAX_PRIVDATA.
+	 */
+	off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA);
+
+	if ((pdp->bytes <= 0) || pdp->bytes > DI_MAX_PRIVDATA) {
+		goto failure;
+	}
+
+	if (!on_fault(&ljb)) {
+		/* copy the struct */
+		bcopy(data, di_mem_addr(st, off), pdp->bytes);
+		off0 = DI_ALIGN(pdp->bytes);
+
+		/* dereferencing pointers */
+		for (i = 0; i < MAX_PTR_IN_PRV; i++) {
+
+			if (pdp->ptr[i].size == 0) {
+				goto success;	/* no more ptrs */
+			}
+
+			/*
+			 * first, get the pointer content
+			 */
+			if ((pdp->ptr[i].offset < 0) ||
+				(pdp->ptr[i].offset >
+				pdp->bytes - sizeof (char *)))
+				goto failure;	/* wrong offset */
+
+			pa = di_mem_addr(st, off + pdp->ptr[i].offset);
+			tmp = (di_off_t *)pa;	/* to store off_t later */
+
+			ptr = *((void **) pa);	/* get pointer value */
+			if (ptr == NULL) {	/* if NULL pointer, go on */
+				continue;
+			}
+
+			/*
+			 * next, find the repeat count (array dimension)
+			 */
+			repeat = pdp->ptr[i].len_offset;
+
+			/*
+			 * Positive value indicates a fixed sized array.
+			 * 0 or negative value indicates variable sized array.
+			 *
+			 * For variable sized array, the variable must be
+			 * an int member of the structure, with an offset
+			 * equal to the absolution value of struct member.
+			 */
+			if (repeat > pdp->bytes - sizeof (int)) {
+				goto failure;	/* wrong offset */
+			}
+
+			if (repeat >= 0) {
+				repeat = *((int *)((caddr_t)data + repeat));
+			} else {
+				repeat = -repeat;
+			}
+
+			/*
+			 * next, get the size of the object to be copied
+			 */
+			size = pdp->ptr[i].size * repeat;
+
+			/*
+			 * Arbitrarily limit the total size of object to be
+			 * copied (1 byte to 1/4 page).
+			 */
+			if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) {
+				goto failure;	/* wrong size or too big */
+			}
+
+			/*
+			 * Now copy the data
+			 */
+			*tmp = off0;
+			bcopy(ptr, di_mem_addr(st, off + off0), size);
+			off0 += DI_ALIGN(size);
+		}
+	} else {
+		goto failure;
+	}
+
+success:
+	/*
+	 * success if reached here
+	 */
+	no_fault();
+	*off_p = off;
+
+	return (off + off0);
+	/*NOTREACHED*/
+
+failure:
+	/*
+	 * fault occurred
+	 */
+	no_fault();
+	cmn_err(CE_WARN, "devinfo: fault in private data at %p", data);
+	*off_p = -1;	/* set private data to indicate error */
+
+	return (off);
+}
+
+/*
+ * get parent private data; on error, returns original offset
+ */
+static di_off_t
+di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
+{
+	int off;
+	struct di_priv_format *ppdp;
+
+	dcmn_err2((CE_CONT, "di_getppdata:\n"));
+
+	/* find the parent data format */
+	if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) {
+		off = *off_p;
+		*off_p = 0;	/* set parent data to none */
+		return (off);
+	}
+
+	return (di_getprvdata(ppdp, ddi_get_parent_data((dev_info_t *)node),
+	    off_p, st));
+}
+
+/*
+ * get parent private data; returns original offset
+ */
+static di_off_t
+di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
+{
+	int off;
+	struct di_priv_format *dpdp;
+
+	dcmn_err2((CE_CONT, "di_getdpdata:"));
+
+	/* find the parent data format */
+	if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) {
+		off = *off_p;
+		*off_p = 0;	/* set driver data to none */
+		return (off);
+	}
+
+	return (di_getprvdata(dpdp, ddi_get_driver_private((dev_info_t *)node),
+	    off_p, st));
+}
+
+/*
+ * The driver is stateful across DINFOCPYALL and DINFOUSRLD.
+ * This function encapsulates the state machine:
+ *
+ *	-> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY ->
+ *	|		SNAPSHOT		USRLD	 |
+ *	--------------------------------------------------
+ *
+ * Returns 0 on success and -1 on failure
+ */
+static int
+di_setstate(struct di_state *st, int new_state)
+{
+	int ret = 0;
+
+	mutex_enter(&di_lock);
+	switch (new_state) {
+	case IOC_IDLE:
+	case IOC_DONE:
+		break;
+	case IOC_SNAP:
+		if (st->di_iocstate != IOC_IDLE)
+			ret = -1;
+		break;
+	case IOC_COPY:
+		if (st->di_iocstate != IOC_DONE)
+			ret = -1;
+		break;
+	default:
+		ret = -1;
+	}
+
+	if (ret == 0)
+		st->di_iocstate = new_state;
+	else
+		cmn_err(CE_NOTE, "incorrect state transition from %d to %d",
+		    st->di_iocstate, new_state);
+	mutex_exit(&di_lock);
+	return (ret);
+}
+
+/*
+ * We cannot assume the presence of the entire
+ * snapshot in this routine. All we are guaranteed
+ * is the di_all struct + 1 byte (for root_path)
+ */
+static int
+header_plus_one_ok(struct di_all *all)
+{
+	/*
+	 * Refuse to read old versions
+	 */
+	if (all->version != DI_SNAPSHOT_VERSION) {
+		CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version));
+		return (0);
+	}
+
+	if (all->cache_magic != DI_CACHE_MAGIC) {
+		CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic));
+		return (0);
+	}
+
+	if (all->snapshot_time <= 0) {
+		CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time));
+		return (0);
+	}
+
+	if (all->top_devinfo == 0) {
+		CACHE_DEBUG((DI_ERR, "NULL top devinfo"));
+		return (0);
+	}
+
+	if (all->map_size < sizeof (*all) + 1) {
+		CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size));
+		return (0);
+	}
+
+	if (all->root_path[0] != '/' || all->root_path[1] != '\0') {
+		CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c",
+		    all->root_path[0], all->root_path[1]));
+		return (0);
+	}
+
+	/*
+	 * We can't check checksum here as we just have the header
+	 */
+
+	return (1);
+}
+
+static int
+chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len)
+{
+	rlim64_t	rlimit;
+	ssize_t		resid;
+	int		error = 0;
+
+
+	rlimit = RLIM64_INFINITY;
+
+	while (len) {
+		resid = 0;
+		error = vn_rdwr(UIO_WRITE, vp, buf, len, off,
+		    UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
+
+		if (error || resid < 0) {
+			error = error ? error : EIO;
+			CACHE_DEBUG((DI_ERR, "write error: %d", error));
+			break;
+		}
+
+		/*
+		 * Check if we are making progress
+		 */
+		if (resid >= len) {
+			error = ENOSPC;
+			break;
+		}
+		buf += len - resid;
+		off += len - resid;
+		len = resid;
+	}
+
+	return (error);
+}
+
+extern int modrootloaded;
+
+static void
+di_cache_write(struct di_cache *cache)
+{
+	struct di_all	*all;
+	struct vnode	*vp;
+	int		oflags;
+	size_t		map_size;
+	size_t		chunk;
+	offset_t	off;
+	int		error;
+	char		*buf;
+
+	ASSERT(DI_CACHE_LOCKED(*cache));
+	ASSERT(!servicing_interrupt());
+
+	if (cache->cache_size == 0) {
+		ASSERT(cache->cache_data == NULL);
+		CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write"));
+		return;
+	}
+
+	ASSERT(cache->cache_size > 0);
+	ASSERT(cache->cache_data);
+
+	if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) {
+		CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write"));
+		return;
+	}
+
+	all = (struct di_all *)cache->cache_data;
+
+	if (!header_plus_one_ok(all)) {
+		CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write"));
+		return;
+	}
+
+	ASSERT(strcmp(all->root_path, "/") == 0);
+
+	/*
+	 * The cache_size is the total allocated memory for the cache.
+	 * The map_size is the actual size of valid data in the cache.
+	 * map_size may be smaller than cache_size but cannot exceed
+	 * cache_size.
+	 */
+	if (all->map_size > cache->cache_size) {
+		CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)."
+		    " Skipping write", all->map_size, cache->cache_size));
+		return;
+	}
+
+	/*
+	 * First unlink the temp file
+	 */
+	error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE);
+	if (error && error != ENOENT) {
+		CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d",
+		    DI_CACHE_TEMP, error));
+	}
+
+	if (error == EROFS) {
+		CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write"));
+		return;
+	}
+
+	vp = NULL;
+	oflags = (FCREAT|FWRITE);
+	if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags,
+	    DI_CACHE_PERMS, &vp, CRCREAT, 0)) {
+		CACHE_DEBUG((DI_ERR, "%s: create failed: %d",
+		    DI_CACHE_TEMP, error));
+		return;
+	}
+
+	ASSERT(vp);
+
+	/*
+	 * Paranoid: Check if the file is on a read-only FS
+	 */
+	if (vn_is_readonly(vp)) {
+		CACHE_DEBUG((DI_ERR, "cannot write: readonly FS"));
+		goto fail;
+	}
+
+	/*
+	 * Note that we only write map_size bytes to disk - this saves
+	 * space as the actual cache size may be larger than size of
+	 * valid data in the cache.
+	 * Another advantage is that it makes verification of size
+	 * easier when the file is read later.
+	 */
+	map_size = all->map_size;
+	off = 0;
+	buf = cache->cache_data;
+
+	while (map_size) {
+		ASSERT(map_size > 0);
+		/*
+		 * Write in chunks so that VM system
+		 * is not overwhelmed
+		 */
+		if (map_size > di_chunk * PAGESIZE)
+			chunk = di_chunk * PAGESIZE;
+		else
+			chunk = map_size;
+
+		error = chunk_write(vp, off, buf, chunk);
+		if (error) {
+			CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d",
+			    off, error));
+			goto fail;
+		}
+
+		off += chunk;
+		buf += chunk;
+		map_size -= chunk;
+
+		/* Give pageout a chance to run */
+		delay(1);
+	}
+
+	/*
+	 * Now sync the file and close it
+	 */
+	if (error = VOP_FSYNC(vp, FSYNC, kcred)) {
+		CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error));
+	}
+
+	if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred)) {
+		CACHE_DEBUG((DI_ERR, "close() failed: %d", error));
+		VN_RELE(vp);
+		return;
+	}
+
+	VN_RELE(vp);
+
+	/*
+	 * Now do the rename
+	 */
+	if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) {
+		CACHE_DEBUG((DI_ERR, "rename failed: %d", error));
+		return;
+	}
+
+	CACHE_DEBUG((DI_INFO, "Cache write successful."));
+
+	return;
+
+fail:
+	(void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred);
+	VN_RELE(vp);
+}
+
+
+/*
+ * Since we could be called early in boot,
+ * use kobj_read_file()
+ */
+static void
+di_cache_read(struct di_cache *cache)
+{
+	struct _buf	*file;
+	struct di_all	*all;
+	int		n;
+	size_t		map_size, sz, chunk;
+	offset_t	off;
+	caddr_t		buf;
+	uint32_t	saved_crc, crc;
+
+	ASSERT(modrootloaded);
+	ASSERT(DI_CACHE_LOCKED(*cache));
+	ASSERT(cache->cache_data == NULL);
+	ASSERT(cache->cache_size == 0);
+	ASSERT(!servicing_interrupt());
+
+	file = kobj_open_file(DI_CACHE_FILE);
+	if (file == (struct _buf *)-1) {
+		CACHE_DEBUG((DI_ERR, "%s: open failed: %d",
+		    DI_CACHE_FILE, ENOENT));
+		return;
+	}
+
+	/*
+	 * Read in the header+root_path first. The root_path must be "/"
+	 */
+	all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP);
+	n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0);
+
+	if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) {
+		kmem_free(all, sizeof (*all) + 1);
+		kobj_close_file(file);
+		CACHE_DEBUG((DI_ERR, "cache header: read error or invalid"));
+		return;
+	}
+
+	map_size = all->map_size;
+
+	kmem_free(all, sizeof (*all) + 1);
+
+	ASSERT(map_size >= sizeof (*all) + 1);
+
+	buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP);
+	sz = map_size;
+	off = 0;
+	while (sz) {
+		/* Don't overload VM with large reads */
+		chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz;
+		n = kobj_read_file(file, buf, chunk, off);
+		if (n != chunk) {
+			CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld",
+			    DI_CACHE_FILE, off));
+			goto fail;
+		}
+		off += chunk;
+		buf += chunk;
+		sz -= chunk;
+	}
+
+	ASSERT(off == map_size);
+
+	/*
+	 * Read past expected EOF to verify size.
+	 */
+	if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) {
+		CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE));
+		goto fail;
+	}
+
+	all = (struct di_all *)di_cache.cache_data;
+	if (!header_plus_one_ok(all)) {
+		CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE));
+		goto fail;
+	}
+
+	/*
+	 * Compute CRC with checksum field in the cache data set to 0
+	 */
+	saved_crc = all->cache_checksum;
+	all->cache_checksum = 0;
+	CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table);
+	all->cache_checksum = saved_crc;
+
+	if (crc != all->cache_checksum) {
+		CACHE_DEBUG((DI_ERR,
+		    "%s: checksum error: expected=0x%x actual=0x%x",
+		    DI_CACHE_FILE, all->cache_checksum, crc));
+		goto fail;
+	}
+
+	if (all->map_size != map_size) {
+		CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE));
+		goto fail;
+	}
+
+	kobj_close_file(file);
+
+	di_cache.cache_size = map_size;
+
+	return;
+
+fail:
+	kmem_free(di_cache.cache_data, map_size);
+	kobj_close_file(file);
+	di_cache.cache_data = NULL;
+	di_cache.cache_size = 0;
+}
+
+
+/*
+ * Checks if arguments are valid for using the cache.
+ */
+static int
+cache_args_valid(struct di_state *st, int *error)
+{
+	ASSERT(error);
+	ASSERT(st->mem_size > 0);
+	ASSERT(st->memlist != NULL);
+
+	if (!modrootloaded || !i_ddi_io_initialized()) {
+		CACHE_DEBUG((DI_ERR,
+		    "cache lookup failure: I/O subsystem not inited"));
+		*error = ENOTACTIVE;
+		return (0);
+	}
+
+	/*
+	 * No other flags allowed with DINFOCACHE
+	 */
+	if (st->command != (DINFOCACHE & DIIOC_MASK)) {
+		CACHE_DEBUG((DI_ERR,
+		    "cache lookup failure: bad flags: 0x%x",
+		    st->command));
+		*error = EINVAL;
+		return (0);
+	}
+
+	if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
+		CACHE_DEBUG((DI_ERR,
+		    "cache lookup failure: bad root: %s",
+		    DI_ALL_PTR(st)->root_path));
+		*error = EINVAL;
+		return (0);
+	}
+
+	CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command));
+
+	*error = 0;
+
+	return (1);
+}
+
+static int
+snapshot_is_cacheable(struct di_state *st)
+{
+	ASSERT(st->mem_size > 0);
+	ASSERT(st->memlist != NULL);
+
+	if (st->command != (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
+		CACHE_DEBUG((DI_INFO,
+		    "not cacheable: incompatible flags: 0x%x",
+		    st->command));
+		return (0);
+	}
+
+	if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
+		CACHE_DEBUG((DI_INFO,
+		    "not cacheable: incompatible root path: %s",
+		    DI_ALL_PTR(st)->root_path));
+		return (0);
+	}
+
+	CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command));
+
+	return (1);
+}
+
+static int
+di_cache_lookup(struct di_state *st)
+{
+	size_t	rval;
+	int	cache_valid;
+
+	ASSERT(cache_args_valid(st, &cache_valid));
+	ASSERT(modrootloaded);
+
+	DI_CACHE_LOCK(di_cache);
+
+	/*
+	 * The following assignment determines the validity
+	 * of the cache as far as this snapshot is concerned.
+	 */
+	cache_valid = di_cache.cache_valid;
+
+	if (cache_valid && di_cache.cache_data == NULL) {
+		di_cache_read(&di_cache);
+		/* check for read or file error */
+		if (di_cache.cache_data == NULL)
+			cache_valid = 0;
+	}
+
+	if (cache_valid) {
+		/*
+		 * Ok, the cache was valid as of this particular
+		 * snapshot. Copy the cached snapshot. This is safe
+		 * to do as the cache cannot be freed (we hold the
+		 * cache lock). Free the memory allocated in di_state
+		 * up until this point - we will simply copy everything
+		 * in the cache.
+		 */
+
+		ASSERT(di_cache.cache_data != NULL);
+		ASSERT(di_cache.cache_size > 0);
+
+		di_freemem(st);
+
+		rval = 0;
+		if (di_cache2mem(&di_cache, st) > 0) {
+
+			ASSERT(DI_ALL_PTR(st));
+
+			/*
+			 * map_size is size of valid data in the
+			 * cached snapshot and may be less than
+			 * size of the cache.
+			 */
+			rval = DI_ALL_PTR(st)->map_size;
+
+			ASSERT(rval >= sizeof (struct di_all));
+			ASSERT(rval <= di_cache.cache_size);
+		}
+	} else {
+		/*
+		 * The cache isn't valid, we need to take a snapshot.
+		 * Set the command flags appropriately
+		 */
+		ASSERT(st->command == (DINFOCACHE & DIIOC_MASK));
+		st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK);
+		rval = di_cache_update(st);
+		st->command = (DINFOCACHE & DIIOC_MASK);
+	}
+
+	DI_CACHE_UNLOCK(di_cache);
+
+	/*
+	 * For cached snapshots, the devinfo driver always returns
+	 * a snapshot rooted at "/".
+	 */
+	ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0);
+
+	return (rval);
+}
+
+/*
+ * This is a forced update of the cache  - the previous state of the cache
+ * may be:
+ *	- unpopulated
+ *	- populated and invalid
+ *	- populated and valid
+ */
+static int
+di_cache_update(struct di_state *st)
+{
+	int rval;
+	uint32_t crc;
+	struct di_all *all;
+
+	ASSERT(DI_CACHE_LOCKED(di_cache));
+	ASSERT(snapshot_is_cacheable(st));
+
+	/*
+	 * Free the in-core cache and the on-disk file (if they exist)
+	 */
+	i_ddi_di_cache_free(&di_cache);
+
+	/*
+	 * Set valid flag before taking the snapshot,
+	 * so that any invalidations that arrive
+	 * during or after the snapshot are not
+	 * removed by us.
+	 */
+	atomic_or_32(&di_cache.cache_valid, 1);
+
+	modunload_disable();
+	rval = di_snapshot(st);
+	modunload_enable();
+
+	if (rval == 0) {
+		CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
+		return (0);
+	}
+
+	DI_ALL_PTR(st)->map_size = rval;
+
+	if (di_mem2cache(st, &di_cache) == 0) {
+		CACHE_DEBUG((DI_ERR, "can't update cache: copy failed"));
+		return (0);
+	}
+
+	ASSERT(di_cache.cache_data);
+	ASSERT(di_cache.cache_size > 0);
+
+	/*
+	 * Now that we have cached the snapshot, compute its checksum.
+	 * The checksum is only computed over the valid data in the
+	 * cache, not the entire cache.
+	 * Also, set all the fields (except checksum) before computing
+	 * checksum.
+	 */
+	all = (struct di_all *)di_cache.cache_data;
+	all->cache_magic = DI_CACHE_MAGIC;
+	all->map_size = rval;
+
+	ASSERT(all->cache_checksum == 0);
+	CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table);
+	all->cache_checksum = crc;
+
+	di_cache_write(&di_cache);
+
+	return (rval);
+}
+
+static void
+di_cache_print(di_cache_debug_t msglevel, char *fmt, ...)
+{
+	va_list	ap;
+
+	if (di_cache_debug <= DI_QUIET)
+		return;
+
+	if (di_cache_debug < msglevel)
+		return;
+
+	switch (msglevel) {
+		case DI_ERR:
+			msglevel = CE_WARN;
+			break;
+		case DI_INFO:
+		case DI_TRACE:
+		default:
+			msglevel = CE_NOTE;
+			break;
+	}
+
+	va_start(ap, fmt);
+	vcmn_err(msglevel, fmt, ap);
+	va_end(ap);
+}
author	stevel@tonic-gate <none@none>	2005-06-14 00:00:00 -0700
committer	stevel@tonic-gate <none@none>	2005-06-14 00:00:00 -0700
commit	7c478bd95313f5f23a4c958a745db2134aa03244 (patch)
tree	c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/uts/common/io/devinfo.c
download	illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz