diff options
| author | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 | 
|---|---|---|
| committer | stevel@tonic-gate <none@none> | 2005-06-14 00:00:00 -0700 | 
| commit | 7c478bd95313f5f23a4c958a745db2134aa03244 (patch) | |
| tree | c871e58545497667cbb4b0a4f2daf204743e1fe7 /usr/src/uts/common/io/devinfo.c | |
| download | illumos-joyent-7c478bd95313f5f23a4c958a745db2134aa03244.tar.gz | |
OpenSolaris Launch
Diffstat (limited to 'usr/src/uts/common/io/devinfo.c')
| -rw-r--r-- | usr/src/uts/common/io/devinfo.c | 3819 | 
1 files changed, 3819 insertions, 0 deletions
| diff --git a/usr/src/uts/common/io/devinfo.c b/usr/src/uts/common/io/devinfo.c new file mode 100644 index 0000000000..bdcd90a038 --- /dev/null +++ b/usr/src/uts/common/io/devinfo.c @@ -0,0 +1,3819 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License").  You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc.  All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident	"%Z%%M%	%I%	%E% SMI" + +/* + * driver for accessing kernel devinfo tree. + */ +#include <sys/types.h> +#include <sys/pathname.h> +#include <sys/debug.h> +#include <sys/autoconf.h> +#include <sys/conf.h> +#include <sys/file.h> +#include <sys/kmem.h> +#include <sys/modctl.h> +#include <sys/stat.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/sunldi_impl.h> +#include <sys/sunndi.h> +#include <sys/esunddi.h> +#include <sys/sunmdi.h> +#include <sys/ddi_impldefs.h> +#include <sys/ndi_impldefs.h> +#include <sys/mdi_impldefs.h> +#include <sys/devinfo_impl.h> +#include <sys/thread.h> +#include <sys/modhash.h> +#include <sys/bitmap.h> +#include <util/qsort.h> +#include <sys/disp.h> +#include <sys/kobj.h> +#include <sys/crc32.h> + + +#ifdef DEBUG +static int di_debug; +#define	dcmn_err(args) if (di_debug >= 1) cmn_err args +#define	dcmn_err2(args) if (di_debug >= 2) cmn_err args +#define	dcmn_err3(args) if (di_debug >= 3) cmn_err args +#else +#define	dcmn_err(args) /* nothing */ +#define	dcmn_err2(args) /* nothing */ +#define	dcmn_err3(args) /* nothing */ +#endif + +/* + * We partition the space of devinfo minor nodes equally between the full and + * unprivileged versions of the driver.  The even-numbered minor nodes are the + * full version, while the odd-numbered ones are the read-only version. + */ +static int di_max_opens = 32; + +#define	DI_FULL_PARENT		0 +#define	DI_READONLY_PARENT	1 +#define	DI_NODE_SPECIES		2 +#define	DI_UNPRIVILEGED_NODE(x)	(((x) % 2) != 0) + +#define	IOC_IDLE	0	/* snapshot ioctl states */ +#define	IOC_SNAP	1	/* snapshot in progress */ +#define	IOC_DONE	2	/* snapshot done, but not copied out */ +#define	IOC_COPY	3	/* copyout in progress */ + +/* + * Keep max alignment so we can move snapshot to different platforms + */ +#define	DI_ALIGN(addr)	((addr + 7l) & ~7l) + +/* + * To avoid wasting memory, make a linked list of memory chunks. + * Size of each chunk is buf_size. + */ +struct di_mem { +	struct di_mem *next;	/* link to next chunk */ +	char *buf;		/* contiguous kernel memory */ +	size_t buf_size;	/* size of buf in bytes */ +	devmap_cookie_t cook;	/* cookie from ddi_umem_alloc */ +}; + +/* + * This is a stack for walking the tree without using recursion. + * When the devinfo tree height is above some small size, one + * gets watchdog resets on sun4m. + */ +struct di_stack { +	void		*offset[MAX_TREE_DEPTH]; +	struct dev_info *dip[MAX_TREE_DEPTH]; +	int		circ[MAX_TREE_DEPTH]; +	int		depth;	/* depth of current node to be copied */ +}; + +#define	TOP_OFFSET(stack)	\ +	((di_off_t *)(stack)->offset[(stack)->depth - 1]) +#define	TOP_NODE(stack)		\ +	((stack)->dip[(stack)->depth - 1]) +#define	PARENT_OFFSET(stack)	\ +	((di_off_t *)(stack)->offset[(stack)->depth - 2]) +#define	EMPTY_STACK(stack)	((stack)->depth == 0) +#define	POP_STACK(stack)	{ \ +	ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \ +		(stack)->circ[(stack)->depth - 1]); \ +	((stack)->depth--); \ +} +#define	PUSH_STACK(stack, node, offp)	{ \ +	ASSERT(node != NULL); \ +	ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \ +	(stack)->dip[(stack)->depth] = (node); \ +	(stack)->offset[(stack)->depth] = (void *)(offp); \ +	((stack)->depth)++; \ +} + +#define	DI_ALL_PTR(s)	((struct di_all *)di_mem_addr((s), 0)) + +/* + * With devfs, the device tree has no global locks. The device tree is + * dynamic and dips may come and go if they are not locked locally. Under + * these conditions, pointers are no longer reliable as unique IDs. + * Specifically, these pointers cannot be used as keys for hash tables + * as the same devinfo structure may be freed in one part of the tree only + * to be allocated as the structure for a different device in another + * part of the tree. This can happen if DR and the snapshot are + * happening concurrently. + * The following data structures act as keys for devinfo nodes and + * pathinfo nodes. + */ + +enum di_ktype { +	DI_DKEY = 1, +	DI_PKEY = 2 +}; + +struct di_dkey { +	dev_info_t	*dk_dip; +	major_t		dk_major; +	int		dk_inst; +	dnode_t		dk_nodeid; +}; + +struct di_pkey { +	mdi_pathinfo_t	*pk_pip; +	char		*pk_path_addr; +	dev_info_t	*pk_client; +	dev_info_t	*pk_phci; +}; + +struct di_key { +	enum di_ktype	k_type; +	union { +		struct di_dkey dkey; +		struct di_pkey pkey; +	} k_u; +}; + + +struct i_lnode; + +typedef struct i_link { +	/* +	 * If a di_link struct representing this i_link struct makes it +	 * into the snapshot, then self will point to the offset of +	 * the di_link struct in the snapshot +	 */ +	di_off_t	self; + +	int		spec_type;	/* block or char access type */ +	struct i_lnode	*src_lnode;	/* src i_lnode */ +	struct i_lnode	*tgt_lnode;	/* tgt i_lnode */ +	struct i_link	*src_link_next;	/* next src i_link /w same i_lnode */ +	struct i_link	*tgt_link_next;	/* next tgt i_link /w same i_lnode */ +} i_link_t; + +typedef struct i_lnode { +	/* +	 * If a di_lnode struct representing this i_lnode struct makes it +	 * into the snapshot, then self will point to the offset of +	 * the di_lnode struct in the snapshot +	 */ +	di_off_t	self; + +	/* +	 * used for hashing and comparing i_lnodes +	 */ +	int		modid; + +	/* +	 * public information describing a link endpoint +	 */ +	struct di_node	*di_node;	/* di_node in snapshot */ +	dev_t		devt;		/* devt */ + +	/* +	 * i_link ptr to links coming into this i_lnode node +	 * (this i_lnode is the target of these i_links) +	 */ +	i_link_t	*link_in; + +	/* +	 * i_link ptr to links going out of this i_lnode node +	 * (this i_lnode is the source of these i_links) +	 */ +	i_link_t	*link_out; +} i_lnode_t; + +/* + * Soft state associated with each instance of driver open. + */ +static struct di_state { +	di_off_t mem_size;	/* total # bytes in memlist	*/ +	struct di_mem *memlist;	/* head of memlist		*/ +	uint_t command;		/* command from ioctl		*/ +	int di_iocstate;	/* snapshot ioctl state		*/ +	mod_hash_t *reg_dip_hash; +	mod_hash_t *reg_pip_hash; +	int lnode_count; +	int link_count; + +	mod_hash_t *lnode_hash; +	mod_hash_t *link_hash; +} **di_states; + +static kmutex_t di_lock;	/* serialize instance assignment */ + +typedef enum { +	DI_QUIET = 0,	/* DI_QUIET must always be 0 */ +	DI_ERR, +	DI_INFO, +	DI_TRACE, +	DI_TRACE1, +	DI_TRACE2 +} di_cache_debug_t; + +static uint_t	di_chunk = 32;		/* I/O chunk size in pages */ + +#define	DI_CACHE_LOCK(c)	(mutex_enter(&(c).cache_lock)) +#define	DI_CACHE_UNLOCK(c)	(mutex_exit(&(c).cache_lock)) +#define	DI_CACHE_LOCKED(c)	(mutex_owned(&(c).cache_lock)) + +#define	CACHE_DEBUG(args)	\ +	{ if (di_cache_debug != DI_QUIET) di_cache_print args; } + +static int di_open(dev_t *, int, int, cred_t *); +static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); +static int di_close(dev_t, int, int, cred_t *); +static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **); +static int di_attach(dev_info_t *, ddi_attach_cmd_t); +static int di_detach(dev_info_t *, ddi_detach_cmd_t); + +static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int); +static di_off_t di_snapshot(struct di_state *); +static di_off_t di_copydevnm(di_off_t *, struct di_state *); +static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *); +static di_off_t di_copynode(struct di_stack *, struct di_state *); +static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t, +    struct di_state *); +static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *); +static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *); +static di_off_t di_getprop(struct ddi_prop *, di_off_t *, +    struct di_state *, struct dev_info *, int); +static void di_allocmem(struct di_state *, size_t); +static void di_freemem(struct di_state *); +static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz); +static di_off_t di_checkmem(struct di_state *, di_off_t, size_t); +static caddr_t di_mem_addr(struct di_state *, di_off_t); +static int di_setstate(struct di_state *, int); +static void di_register_dip(struct di_state *, dev_info_t *, di_off_t); +static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t); +static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t, +    struct di_state *, int); +static di_off_t di_getlink_data(di_off_t, struct di_state *); +static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p); + +static int cache_args_valid(struct di_state *st, int *error); +static int snapshot_is_cacheable(struct di_state *st); +static int di_cache_lookup(struct di_state *st); +static int di_cache_update(struct di_state *st); +static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...); + +static struct cb_ops di_cb_ops = { +	di_open,		/* open */ +	di_close,		/* close */ +	nodev,			/* strategy */ +	nodev,			/* print */ +	nodev,			/* dump */ +	nodev,			/* read */ +	nodev,			/* write */ +	di_ioctl,		/* ioctl */ +	nodev,			/* devmap */ +	nodev,			/* mmap */ +	nodev,			/* segmap */ +	nochpoll,		/* poll */ +	ddi_prop_op,		/* prop_op */ +	NULL,			/* streamtab  */ +	D_NEW | D_MP		/* Driver compatibility flag */ +}; + +static struct dev_ops di_ops = { +	DEVO_REV,		/* devo_rev, */ +	0,			/* refcnt  */ +	di_info,		/* info */ +	nulldev,		/* identify */ +	nulldev,		/* probe */ +	di_attach,		/* attach */ +	di_detach,		/* detach */ +	nodev,			/* reset */ +	&di_cb_ops,		/* driver operations */ +	NULL			/* bus operations */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { +	&mod_driverops, +	"DEVINFO Driver %I%", +	&di_ops +}; + +static struct modlinkage modlinkage = { +	MODREV_1, +	&modldrv, +	NULL +}; + +int +_init(void) +{ +	int	error; + +	mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL); + +	error = mod_install(&modlinkage); +	if (error != 0) { +		mutex_destroy(&di_lock); +		return (error); +	} + +	return (0); +} + +int +_info(struct modinfo *modinfop) +{ +	return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ +	int	error; + +	error = mod_remove(&modlinkage); +	if (error != 0) { +		return (error); +	} + +	mutex_destroy(&di_lock); +	return (0); +} + +static dev_info_t *di_dip; + +/*ARGSUSED*/ +static int +di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) +{ +	int error = DDI_FAILURE; + +	switch (infocmd) { +	case DDI_INFO_DEVT2DEVINFO: +		*result = (void *)di_dip; +		error = DDI_SUCCESS; +		break; +	case DDI_INFO_DEVT2INSTANCE: +		/* +		 * All dev_t's map to the same, single instance. +		 */ +		*result = (void *)0; +		error = DDI_SUCCESS; +		break; +	default: +		break; +	} + +	return (error); +} + +static int +di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ +	int error = DDI_FAILURE; + +	switch (cmd) { +	case DDI_ATTACH: +		di_states = kmem_zalloc( +		    di_max_opens * sizeof (struct di_state *), KM_SLEEP); + +		if (ddi_create_minor_node(dip, "devinfo", S_IFCHR, +		    DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE || +		    ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR, +		    DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) { +			kmem_free(di_states, +			    di_max_opens * sizeof (struct di_state *)); +			ddi_remove_minor_node(dip, NULL); +			error = DDI_FAILURE; +		} else { +			di_dip = dip; +			ddi_report_dev(dip); + +			error = DDI_SUCCESS; +		} +		break; +	default: +		error = DDI_FAILURE; +		break; +	} + +	return (error); +} + +static int +di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ +	int error = DDI_FAILURE; + +	switch (cmd) { +	case DDI_DETACH: +		ddi_remove_minor_node(dip, NULL); +		di_dip = NULL; +		kmem_free(di_states, di_max_opens * sizeof (struct di_state *)); + +		error = DDI_SUCCESS; +		break; +	default: +		error = DDI_FAILURE; +		break; +	} + +	return (error); +} + +/* + * Allow multiple opens by tweaking the dev_t such that it looks like each + * open is getting a different minor device.  Each minor gets a separate + * entry in the di_states[] table.  Based on the original minor number, we + * discriminate opens of the full and read-only nodes.  If all of the instances + * of the selected minor node are currently open, we return EAGAIN. + */ +/*ARGSUSED*/ +static int +di_open(dev_t *devp, int flag, int otyp, cred_t *credp) +{ +	int m; +	minor_t minor_parent = getminor(*devp); + +	if (minor_parent != DI_FULL_PARENT && +	    minor_parent != DI_READONLY_PARENT) +		return (ENXIO); + +	mutex_enter(&di_lock); + +	for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) { +		if (di_states[m] != NULL) +			continue; + +		di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP); +		break;	/* It's ours. */ +	} + +	if (m >= di_max_opens) { +		/* +		 * maximum open instance for device reached +		 */ +		mutex_exit(&di_lock); +		dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached")); +		return (EAGAIN); +	} +	mutex_exit(&di_lock); + +	ASSERT(m < di_max_opens); +	*devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES)); + +	dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n", +		(void *)curthread, m + DI_NODE_SPECIES)); + +	return (0); +} + +/*ARGSUSED*/ +static int +di_close(dev_t dev, int flag, int otype, cred_t *cred_p) +{ +	struct di_state *st; +	int m = (int)getminor(dev) - DI_NODE_SPECIES; + +	if (m < 0) { +		cmn_err(CE_WARN, "closing non-existent devinfo minor %d", +		    m + DI_NODE_SPECIES); +		return (ENXIO); +	} + +	st = di_states[m]; +	ASSERT(m < di_max_opens && st != NULL); + +	di_freemem(st); +	kmem_free(st, sizeof (struct di_state)); + +	/* +	 * empty slot in state table +	 */ +	mutex_enter(&di_lock); +	di_states[m] = NULL; +	dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n", +		(void *)curthread, m + DI_NODE_SPECIES)); +	mutex_exit(&di_lock); + +	return (0); +} + + +/*ARGSUSED*/ +static int +di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) +{ +	int rv, error; +	di_off_t off; +	struct di_all *all; +	struct di_state *st; +	int m = (int)getminor(dev) - DI_NODE_SPECIES; + +	major_t i; +	char *drv_name; +	size_t map_size, size; +	struct di_mem *dcp; +	int ndi_flags; + +	if (m < 0 || m >= di_max_opens) { +		return (ENXIO); +	} + +	st = di_states[m]; +	ASSERT(st != NULL); + +	dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd)); + +	switch (cmd) { +	case DINFOIDENT: +		/* +		 * This is called from di_init to verify that the driver +		 * opened is indeed devinfo. The purpose is to guard against +		 * sending ioctl to an unknown driver in case of an +		 * unresolved major number conflict during bfu. +		 */ +		*rvalp = DI_MAGIC; +		return (0); + +	case DINFOLODRV: +		/* +		 * Hold an installed driver and return the result +		 */ +		if (DI_UNPRIVILEGED_NODE(m)) { +			/* +			 * Only the fully enabled instances may issue +			 * DINFOLDDRV. +			 */ +			return (EACCES); +		} + +		drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); +		if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) { +			kmem_free(drv_name, MAXNAMELEN); +			return (EFAULT); +		} + +		/* +		 * Some 3rd party driver's _init() walks the device tree, +		 * so we load the driver module before configuring driver. +		 */ +		i = ddi_name_to_major(drv_name); +		if (ddi_hold_driver(i) == NULL) { +			kmem_free(drv_name, MAXNAMELEN); +			return (ENXIO); +		} + +		ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT; + +		/* +		 * i_ddi_load_drvconf() below will trigger a reprobe +		 * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't +		 * needed here. +		 */ +		modunload_disable(); +		(void) i_ddi_load_drvconf(i); +		(void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i); +		kmem_free(drv_name, MAXNAMELEN); +		ddi_rele_driver(i); +		rv = i_ddi_devs_attached(i); +		modunload_enable(); + +		i_ddi_di_cache_invalidate(KM_SLEEP); + +		return ((rv == DDI_SUCCESS)? 0 : ENXIO); + +	case DINFOUSRLD: +		/* +		 * The case for copying snapshot to userland +		 */ +		if (di_setstate(st, IOC_COPY) == -1) +			return (EBUSY); + +		map_size = ((struct di_all *)di_mem_addr(st, 0))->map_size; +		if (map_size == 0) { +			(void) di_setstate(st, IOC_DONE); +			return (EFAULT); +		} + +		/* +		 * copyout the snapshot +		 */ +		map_size = (map_size + PAGEOFFSET) & PAGEMASK; + +		/* +		 * Return the map size, so caller may do a sanity +		 * check against the return value of snapshot ioctl() +		 */ +		*rvalp = (int)map_size; + +		/* +		 * Copy one chunk at a time +		 */ +		off = 0; +		dcp = st->memlist; +		while (map_size) { +			size = dcp->buf_size; +			if (map_size <= size) { +				size = map_size; +			} + +			if (ddi_copyout(di_mem_addr(st, off), +			    (void *)(arg + off), size, mode) != 0) { +				(void) di_setstate(st, IOC_DONE); +				return (EFAULT); +			} + +			map_size -= size; +			off += size; +			dcp = dcp->next; +		} + +		di_freemem(st); +		(void) di_setstate(st, IOC_IDLE); +		return (0); + +	default: +		if ((cmd & ~DIIOC_MASK) != DIIOC) { +			/* +			 * Invalid ioctl command +			 */ +			return (ENOTTY); +		} +		/* +		 * take a snapshot +		 */ +		st->command = cmd & DIIOC_MASK; +		/*FALLTHROUGH*/ +	} + +	/* +	 * Obtain enough memory to hold header + rootpath.  We prevent kernel +	 * memory exhaustion by freeing any previously allocated snapshot and +	 * refusing the operation; otherwise we would be allowing ioctl(), +	 * ioctl(), ioctl(), ..., panic. +	 */ +	if (di_setstate(st, IOC_SNAP) == -1) +		return (EBUSY); + +	size = sizeof (struct di_all) + +	    sizeof (((struct dinfo_io *)(NULL))->root_path); +	if (size < PAGESIZE) +		size = PAGESIZE; +	di_allocmem(st, size); + +	all = (struct di_all *)di_mem_addr(st, 0); +	all->devcnt = devcnt; +	all->command = st->command; +	all->version = DI_SNAPSHOT_VERSION; + +	/* +	 * Note the endianness in case we need to transport snapshot +	 * over the network. +	 */ +#if defined(_LITTLE_ENDIAN) +	all->endianness = DI_LITTLE_ENDIAN; +#else +	all->endianness = DI_BIG_ENDIAN; +#endif + +	/* Copyin ioctl args, store in the snapshot. */ +	if (copyinstr((void *)arg, all->root_path, +	    sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) { +		di_freemem(st); +		(void) di_setstate(st, IOC_IDLE); +		return (EFAULT); +	} + +	error = 0; +	if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) { +		di_freemem(st); +		(void) di_setstate(st, IOC_IDLE); +		return (error); +	} + +	off = DI_ALIGN(sizeof (struct di_all) + size); + +	/* +	 * Only the fully enabled version may force load drivers or read +	 * the parent private data from a driver. +	 */ +	if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 && +	    DI_UNPRIVILEGED_NODE(m)) { +		di_freemem(st); +		(void) di_setstate(st, IOC_IDLE); +		return (EACCES); +	} + +	/* Do we need private data? */ +	if (st->command & DINFOPRIVDATA) { +		arg += sizeof (((struct dinfo_io *)(NULL))->root_path); + +#ifdef _MULTI_DATAMODEL +		switch (ddi_model_convert_from(mode & FMODELS)) { +		case DDI_MODEL_ILP32: { +			/* +			 * Cannot copy private data from 64-bit kernel +			 * to 32-bit app +			 */ +			di_freemem(st); +			(void) di_setstate(st, IOC_IDLE); +			return (EINVAL); +		} +		case DDI_MODEL_NONE: +			if ((off = di_copyformat(off, st, arg, mode)) == 0) { +				di_freemem(st); +				(void) di_setstate(st, IOC_IDLE); +				return (EFAULT); +			} +			break; +		} +#else /* !_MULTI_DATAMODEL */ +		if ((off = di_copyformat(off, st, arg, mode)) == 0) { +			di_freemem(st); +			(void) di_setstate(st, IOC_IDLE); +			return (EFAULT); +		} +#endif /* _MULTI_DATAMODEL */ +	} + +	all->top_devinfo = DI_ALIGN(off); + +	/* +	 * For cache lookups we reallocate memory from scratch, +	 * so the value of "all" is no longer valid. +	 */ +	all = NULL; + +	if (st->command & DINFOCACHE) { +		*rvalp = di_cache_lookup(st); +	} else if (snapshot_is_cacheable(st)) { +		DI_CACHE_LOCK(di_cache); +		*rvalp = di_cache_update(st); +		DI_CACHE_UNLOCK(di_cache); +	} else { +		modunload_disable(); +		*rvalp = di_snapshot(st); +		modunload_enable(); +	} + +	if (*rvalp) { +		DI_ALL_PTR(st)->map_size = *rvalp; +		(void) di_setstate(st, IOC_DONE); +	} else { +		di_freemem(st); +		(void) di_setstate(st, IOC_IDLE); +	} + +	return (0); +} + +/* + * Get a chunk of memory >= size, for the snapshot + */ +static void +di_allocmem(struct di_state *st, size_t size) +{ +	struct di_mem *mem = kmem_zalloc(sizeof (struct di_mem), +	    KM_SLEEP); +	/* +	 * Round up size to nearest power of 2. If it is less +	 * than st->mem_size, set it to st->mem_size (i.e., +	 * the mem_size is doubled every time) to reduce the +	 * number of memory allocations. +	 */ +	size_t tmp = 1; +	while (tmp < size) { +		tmp <<= 1; +	} +	size = (tmp > st->mem_size) ? tmp : st->mem_size; + +	mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook); +	mem->buf_size = size; + +	dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size)); + +	if (st->mem_size == 0) {	/* first chunk */ +		st->memlist = mem; +	} else { +		/* +		 * locate end of linked list and add a chunk at the end +		 */ +		struct di_mem *dcp = st->memlist; +		while (dcp->next != NULL) { +			dcp = dcp->next; +		} + +		dcp->next = mem; +	} + +	st->mem_size += size; +} + +/* + * Copy upto bufsiz bytes of the memlist to buf + */ +static void +di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz) +{ +	struct di_mem *dcp; +	size_t copysz; + +	if (st->mem_size == 0) { +		ASSERT(st->memlist == NULL); +		return; +	} + +	copysz = 0; +	for (dcp = st->memlist; dcp; dcp = dcp->next) { + +		ASSERT(bufsiz > 0); + +		if (bufsiz <= dcp->buf_size) +			copysz = bufsiz; +		else +			copysz = dcp->buf_size; + +		bcopy(dcp->buf, buf, copysz); + +		buf += copysz; +		bufsiz -= copysz; + +		if (bufsiz == 0) +			break; +	} +} + +/* + * Free all memory for the snapshot + */ +static void +di_freemem(struct di_state *st) +{ +	struct di_mem *dcp, *tmp; + +	dcmn_err2((CE_CONT, "di_freemem\n")); + +	if (st->mem_size) { +		dcp = st->memlist; +		while (dcp) {	/* traverse the linked list */ +			tmp = dcp; +			dcp = dcp->next; +			ddi_umem_free(tmp->cook); +			kmem_free(tmp, sizeof (struct di_mem)); +		} +		st->mem_size = 0; +		st->memlist = NULL; +	} + +	ASSERT(st->mem_size == 0); +	ASSERT(st->memlist == NULL); +} + +/* + * Copies cached data to the di_state structure. + * Returns: + *	- size of data copied, on SUCCESS + *	- 0 on failure + */ +static int +di_cache2mem(struct di_cache *cache, struct di_state *st) +{ +	caddr_t	pa; + +	ASSERT(st->mem_size == 0); +	ASSERT(st->memlist == NULL); +	ASSERT(!servicing_interrupt()); +	ASSERT(DI_CACHE_LOCKED(*cache)); + +	if (cache->cache_size == 0) { +		ASSERT(cache->cache_data == NULL); +		CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy")); +		return (0); +	} + +	ASSERT(cache->cache_data); + +	di_allocmem(st, cache->cache_size); + +	pa = di_mem_addr(st, 0); + +	ASSERT(pa); + +	/* +	 * Verify that di_allocmem() allocates contiguous memory, +	 * so that it is safe to do straight bcopy() +	 */ +	ASSERT(st->memlist != NULL); +	ASSERT(st->memlist->next == NULL); +	bcopy(cache->cache_data, pa, cache->cache_size); + +	return (cache->cache_size); +} + +/* + * Copies a snapshot from di_state to the cache + * Returns: + *	- 0 on failure + *	- size of copied data on success + */ +static int +di_mem2cache(struct di_state *st, struct di_cache *cache) +{ +	size_t map_size; + +	ASSERT(cache->cache_size == 0); +	ASSERT(cache->cache_data == NULL); +	ASSERT(!servicing_interrupt()); +	ASSERT(DI_CACHE_LOCKED(*cache)); + +	if (st->mem_size == 0) { +		ASSERT(st->memlist == NULL); +		CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy")); +		return (0); +	} + +	ASSERT(st->memlist); + +	/* +	 * The size of the memory list may be much larger than the +	 * size of valid data (map_size). Cache only the valid data +	 */ +	map_size = DI_ALL_PTR(st)->map_size; +	if (map_size == 0 || map_size < sizeof (struct di_all) || +	    map_size > st->mem_size) { +		CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size)); +		return (0); +	} + +	cache->cache_data = kmem_alloc(map_size, KM_SLEEP); +	cache->cache_size = map_size; +	di_copymem(st, cache->cache_data, cache->cache_size); + +	return (map_size); +} + +/* + * Make sure there is at least "size" bytes memory left before + * going on. Otherwise, start on a new chunk. + */ +static di_off_t +di_checkmem(struct di_state *st, di_off_t off, size_t size) +{ +	dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n", +			off, (int)size)); + +	/* +	 * di_checkmem() shouldn't be called with a size of zero. +	 * But in case it is, we want to make sure we return a valid +	 * offset within the memlist and not an offset that points us +	 * at the end of the memlist. +	 */ +	if (size == 0) { +		dcmn_err((CE_WARN, "di_checkmem: invalid zero size used")); +		size = 1; +	} + +	off = DI_ALIGN(off); +	if ((st->mem_size - off) < size) { +		off = st->mem_size; +		di_allocmem(st, size); +	} + +	return (off); +} + +/* + * Copy the private data format from ioctl arg. + * On success, the ending offset is returned. On error 0 is returned. + */ +static di_off_t +di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode) +{ +	di_off_t size; +	struct di_priv_data *priv; +	struct di_all *all = (struct di_all *)di_mem_addr(st, 0); + +	dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n", +		off, (void *)arg, mode)); + +	/* +	 * Copyin data and check version. +	 * We only handle private data version 0. +	 */ +	priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP); +	if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data), +	    mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) { +		kmem_free(priv, sizeof (struct di_priv_data)); +		return (0); +	} + +	/* +	 * Save di_priv_data copied from userland in snapshot. +	 */ +	all->pd_version = priv->version; +	all->n_ppdata = priv->n_parent; +	all->n_dpdata = priv->n_driver; + +	/* +	 * copyin private data format, modify offset accordingly +	 */ +	if (all->n_ppdata) {	/* parent private data format */ +		/* +		 * check memory +		 */ +		size = all->n_ppdata * sizeof (struct di_priv_format); +		off = di_checkmem(st, off, size); +		all->ppdata_format = off; +		if (ddi_copyin(priv->parent, di_mem_addr(st, off), size, +		    mode) != 0) { +			kmem_free(priv, sizeof (struct di_priv_data)); +			return (0); +		} + +		off += size; +	} + +	if (all->n_dpdata) {	/* driver private data format */ +		/* +		 * check memory +		 */ +		size = all->n_dpdata * sizeof (struct di_priv_format); +		off = di_checkmem(st, off, size); +		all->dpdata_format = off; +		if (ddi_copyin(priv->driver, di_mem_addr(st, off), size, +		    mode) != 0) { +			kmem_free(priv, sizeof (struct di_priv_data)); +			return (0); +		} + +		off += size; +	} + +	kmem_free(priv, sizeof (struct di_priv_data)); +	return (off); +} + +/* + * Return the real address based on the offset (off) within snapshot + */ +static caddr_t +di_mem_addr(struct di_state *st, di_off_t off) +{ +	struct di_mem *dcp = st->memlist; + +	dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n", +		(void *)dcp, off)); + +	ASSERT(off < st->mem_size); + +	while (off >= dcp->buf_size) { +		off -= dcp->buf_size; +		dcp = dcp->next; +	} + +	dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n", +		off, (void *)(dcp->buf + off))); + +	return (dcp->buf + off); +} + +/* + * Ideally we would use the whole key to derive the hash + * value. However, the probability that two keys will + * have the same dip (or pip) is very low, so + * hashing by dip (or pip) pointer should suffice. + */ +static uint_t +di_hash_byptr(void *arg, mod_hash_key_t key) +{ +	struct di_key *dik = key; +	size_t rshift; +	void *ptr; + +	ASSERT(arg == NULL); + +	switch (dik->k_type) { +	case DI_DKEY: +		ptr = dik->k_u.dkey.dk_dip; +		rshift = highbit(sizeof (struct dev_info)); +		break; +	case DI_PKEY: +		ptr = dik->k_u.pkey.pk_pip; +		rshift = highbit(sizeof (struct mdi_pathinfo)); +		break; +	default: +		panic("devinfo: unknown key type"); +		/*NOTREACHED*/ +	} +	return (mod_hash_byptr((void *)rshift, ptr)); +} + +static void +di_key_dtor(mod_hash_key_t key) +{ +	char		*path_addr; +	struct di_key	*dik = key; + +	switch (dik->k_type) { +	case DI_DKEY: +		break; +	case DI_PKEY: +		path_addr = dik->k_u.pkey.pk_path_addr; +		if (path_addr) +			kmem_free(path_addr, strlen(path_addr) + 1); +		break; +	default: +		panic("devinfo: unknown key type"); +		/*NOTREACHED*/ +	} + +	kmem_free(dik, sizeof (struct di_key)); +} + +static int +di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2) +{ +	if (dk1->dk_dip !=  dk2->dk_dip) +		return (dk1->dk_dip > dk2->dk_dip ? 1 : -1); + +	if (dk1->dk_major != -1 && dk2->dk_major != -1) { +		if (dk1->dk_major !=  dk2->dk_major) +			return (dk1->dk_major > dk2->dk_major ? 1 : -1); + +		if (dk1->dk_inst !=  dk2->dk_inst) +			return (dk1->dk_inst > dk2->dk_inst ? 1 : -1); +	} + +	if (dk1->dk_nodeid != dk2->dk_nodeid) +		return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1); + +	return (0); +} + +static int +di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2) +{ +	char *p1, *p2; +	int rv; + +	if (pk1->pk_pip !=  pk2->pk_pip) +		return (pk1->pk_pip > pk2->pk_pip ? 1 : -1); + +	p1 = pk1->pk_path_addr; +	p2 = pk2->pk_path_addr; + +	p1 = p1 ? p1 : ""; +	p2 = p2 ? p2 : ""; + +	rv = strcmp(p1, p2); +	if (rv) +		return (rv > 0  ? 1 : -1); + +	if (pk1->pk_client !=  pk2->pk_client) +		return (pk1->pk_client > pk2->pk_client ? 1 : -1); + +	if (pk1->pk_phci !=  pk2->pk_phci) +		return (pk1->pk_phci > pk2->pk_phci ? 1 : -1); + +	return (0); +} + +static int +di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2) +{ +	struct di_key *dik1, *dik2; + +	dik1 = key1; +	dik2 = key2; + +	if (dik1->k_type != dik2->k_type) { +		panic("devinfo: mismatched keys"); +		/*NOTREACHED*/ +	} + +	switch (dik1->k_type) { +	case DI_DKEY: +		return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey))); +	case DI_PKEY: +		return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey))); +	default: +		panic("devinfo: unknown key type"); +		/*NOTREACHED*/ +	} +} + +/* + * This is the main function that takes a snapshot + */ +static di_off_t +di_snapshot(struct di_state *st) +{ +	di_off_t off; +	struct di_all *all; +	dev_info_t *rootnode; +	char buf[80]; + +	all = (struct di_all *)di_mem_addr(st, 0); +	dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n")); + +	/* +	 * Hold the devinfo node referred by the path. +	 */ +	rootnode = e_ddi_hold_devi_by_path(all->root_path, 0); +	if (rootnode == NULL) { +		dcmn_err((CE_CONT, "Devinfo node %s not found\n", +		    all->root_path)); +		return (0); +	} + +	(void) snprintf(buf, sizeof (buf), +	    "devinfo registered dips (statep=%p)", (void *)st); + +	st->reg_dip_hash = mod_hash_create_extended(buf, 64, +	    di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, +	    NULL, di_key_cmp, KM_SLEEP); + + +	(void) snprintf(buf, sizeof (buf), +	    "devinfo registered pips (statep=%p)", (void *)st); + +	st->reg_pip_hash = mod_hash_create_extended(buf, 64, +	    di_key_dtor, mod_hash_null_valdtor, di_hash_byptr, +	    NULL, di_key_cmp, KM_SLEEP); + +	/* +	 * copy the device tree +	 */ +	off = di_copytree(DEVI(rootnode), &all->top_devinfo, st); + +	ddi_release_devi(rootnode); + +	/* +	 * copy the devnames array +	 */ +	all->devnames = off; +	off = di_copydevnm(&all->devnames, st); + + +	/* initialize the hash tables */ +	st->lnode_count = 0; +	st->link_count = 0; + +	if (DINFOLYR & st->command) { +		off = di_getlink_data(off, st); +	} + +	/* +	 * Free up hash tables +	 */ +	mod_hash_destroy_hash(st->reg_dip_hash); +	mod_hash_destroy_hash(st->reg_pip_hash); + +	/* +	 * Record the timestamp now that we are done with snapshot. +	 * +	 * We compute the checksum later and then only if we cache +	 * the snapshot, since checksumming adds some overhead. +	 * The checksum is checked later if we read the cache file. +	 * from disk. +	 * +	 * Set checksum field to 0 as CRC is calculated with that +	 * field set to 0. +	 */ +	all->snapshot_time = ddi_get_time(); +	all->cache_checksum = 0; + +	return (off); +} + +/* + * Assumes all devinfo nodes in device tree have been snapshotted + */ +static void +snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *poff_p) +{ +	struct dev_info *node; +	struct di_node *me; +	di_off_t off; + +	ASSERT(mutex_owned(&dnp->dn_lock)); + +	node = DEVI(dnp->dn_head); +	for (; node; node = node->devi_next) { +		if (di_dip_find(st, (dev_info_t *)node, &off) != 0) +			continue; + +		ASSERT(off > 0); +		me = (struct di_node *)di_mem_addr(st, off); +		ASSERT(me->next == 0 || me->next == -1); +		/* +		 * Only nodes which were BOUND when they were +		 * snapshotted will be added to per-driver list. +		 */ +		if (me->next != -1) +			continue; + +		*poff_p = off; +		poff_p = &me->next; +	} + +	*poff_p = 0; +} + +/* + * Copy the devnames array, so we have a list of drivers in the snapshot. + * Also makes it possible to locate the per-driver devinfo nodes. + */ +static di_off_t +di_copydevnm(di_off_t *off_p, struct di_state *st) +{ +	int i; +	di_off_t off; +	size_t size; +	struct di_devnm *dnp; + +	dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p)); + +	/* +	 * make sure there is some allocated memory +	 */ +	size = devcnt * sizeof (struct di_devnm); +	off = di_checkmem(st, *off_p, size); +	*off_p = off; + +	dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n", +		devcnt, off)); + +	dnp = (struct di_devnm *)di_mem_addr(st, off); +	off += size; + +	for (i = 0; i < devcnt; i++) { +		if (devnamesp[i].dn_name == NULL) { +			continue; +		} + +		/* +		 * dn_name is not freed during driver unload or removal. +		 * +		 * There is a race condition when make_devname() changes +		 * dn_name during our strcpy. This should be rare since +		 * only add_drv does this. At any rate, we never had a +		 * problem with ddi_name_to_major(), which should have +		 * the same problem. +		 */ +		dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n", +			devnamesp[i].dn_name, devnamesp[i].dn_instance, +			off)); + +		off = di_checkmem(st, off, strlen(devnamesp[i].dn_name) + 1); +		dnp[i].name = off; +		(void) strcpy((char *)di_mem_addr(st, off), +			devnamesp[i].dn_name); +		off += DI_ALIGN(strlen(devnamesp[i].dn_name) + 1); + +		mutex_enter(&devnamesp[i].dn_lock); + +		/* +		 * Snapshot per-driver node list +		 */ +		snap_driver_list(st, &devnamesp[i], &dnp[i].head); + +		/* +		 * This is not used by libdevinfo, leave it for now +		 */ +		dnp[i].flags = devnamesp[i].dn_flags; +		dnp[i].instance = devnamesp[i].dn_instance; + +		/* +		 * get global properties +		 */ +		if ((DINFOPROP & st->command) && +		    devnamesp[i].dn_global_prop_ptr) { +			dnp[i].global_prop = off; +			off = di_getprop( +			    devnamesp[i].dn_global_prop_ptr->prop_list, +			    &dnp[i].global_prop, st, NULL, DI_PROP_GLB_LIST); +		} + +		/* +		 * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str +		 */ +		if (CB_DRV_INSTALLED(devopsp[i])) { +			if (devopsp[i]->devo_cb_ops) { +				dnp[i].ops |= DI_CB_OPS; +				if (devopsp[i]->devo_cb_ops->cb_str) +					dnp[i].ops |= DI_STREAM_OPS; +			} +			if (NEXUS_DRV(devopsp[i])) { +				dnp[i].ops |= DI_BUS_OPS; +			} +		} + +		mutex_exit(&devnamesp[i].dn_lock); +	} + +	dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off)); + +	return (off); +} + +/* + * Copy the kernel devinfo tree. The tree and the devnames array forms + * the entire snapshot (see also di_copydevnm). + */ +static di_off_t +di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st) +{ +	di_off_t off; +	struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP); + +	dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n", +		(void *)root, *off_p)); + +	/* force attach drivers */ +	if ((i_ddi_node_state((dev_info_t *)root) == DS_READY) && +	    (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) { +		(void) ndi_devi_config((dev_info_t *)root, +		    NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT | +		    NDI_DRV_CONF_REPROBE); +	} + +	/* +	 * Push top_devinfo onto a stack +	 * +	 * The stack is necessary to avoid recursion, which can overrun +	 * the kernel stack. +	 */ +	PUSH_STACK(dsp, root, off_p); + +	/* +	 * As long as there is a node on the stack, copy the node. +	 * di_copynode() is responsible for pushing and popping +	 * child and sibling nodes on the stack. +	 */ +	while (!EMPTY_STACK(dsp)) { +		off = di_copynode(dsp, st); +	} + +	/* +	 * Free the stack structure +	 */ +	kmem_free(dsp, sizeof (struct di_stack)); + +	return (off); +} + +/* + * This is the core function, which copies all data associated with a single + * node into the snapshot. The amount of information is determined by the + * ioctl command. + */ +static di_off_t +di_copynode(struct di_stack *dsp, struct di_state *st) +{ +	di_off_t off; +	struct di_node *me; +	struct dev_info *node; + +	dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", +			dsp->depth)); + +	node = TOP_NODE(dsp); + +	ASSERT(node != NULL); + +	/* +	 * check memory usage, and fix offsets accordingly. +	 */ +	off = di_checkmem(st, *(TOP_OFFSET(dsp)), sizeof (struct di_node)); +	*(TOP_OFFSET(dsp)) = off; +	me = DI_NODE(di_mem_addr(st, off)); + +	dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n", +			node->devi_node_name, node->devi_instance, off)); + +	/* +	 * Node parameters: +	 * self		-- offset of current node within snapshot +	 * nodeid	-- pointer to PROM node (tri-valued) +	 * state	-- hot plugging device state +	 * node_state	-- devinfo node state (CF1, CF2, etc.) +	 */ +	me->self = off; +	me->instance = node->devi_instance; +	me->nodeid = node->devi_nodeid; +	me->node_class = node->devi_node_class; +	me->attributes = node->devi_node_attributes; +	me->state = node->devi_state; +	me->node_state = node->devi_node_state; +	me->user_private_data = NULL; + +	/* +	 * Get parent's offset in snapshot from the stack +	 * and store it in the current node +	 */ +	if (dsp->depth > 1) { +		me->parent = *(PARENT_OFFSET(dsp)); +	} + +	/* +	 * Save the offset of this di_node in a hash table. +	 * This is used later to resolve references to this +	 * dip from other parts of the tree (per-driver list, +	 * multipathing linkages, layered usage linkages). +	 * The key used for the hash table is derived from +	 * information in the dip. +	 */ +	di_register_dip(st, (dev_info_t *)node, me->self); + +	/* +	 * increment offset +	 */ +	off += sizeof (struct di_node); + +#ifdef	DEVID_COMPATIBILITY +	/* check for devid as property marker */ +	if (node->devi_devid) { +		ddi_devid_t	devid; +		char 		*devidstr; +		int		devid_size; + +		/* +		 * The devid is now represented as a property. +		 * For micro release compatibility with di_devid interface +		 * in libdevinfo we must return it as a binary structure in' +		 * the snapshot.  When di_devid is removed from libdevinfo +		 * in a future release (and devi_devid is deleted) then +		 * code related to DEVID_COMPATIBILITY can be removed. +		 */ +		ASSERT(node->devi_devid == DEVID_COMPATIBILITY); +/* XXX should be DDI_DEV_T_NONE! */ +		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, (dev_info_t *)node, +		    DDI_PROP_DONTPASS, DEVID_PROP_NAME, &devidstr) == +		    DDI_PROP_SUCCESS) { +			if (ddi_devid_str_decode(devidstr, &devid, NULL) == +			    DDI_SUCCESS) { +				devid_size = ddi_devid_sizeof(devid); +				off = di_checkmem(st, off, devid_size); +				me->devid = off; +				bcopy(devid, +				    di_mem_addr(st, off), devid_size); +				off += devid_size; +				ddi_devid_free(devid); +			} +			ddi_prop_free(devidstr); +		} +	} +#endif	/* DEVID_COMPATIBILITY */ + +	if (node->devi_node_name) { +		off = di_checkmem(st, off, strlen(node->devi_node_name) + 1); +		me->node_name = off; +		(void) strcpy(di_mem_addr(st, off), node->devi_node_name); +		off += strlen(node->devi_node_name) + 1; +	} + +	if (node->devi_compat_names && (node->devi_compat_length > 1)) { +		off = di_checkmem(st, off, node->devi_compat_length); +		me->compat_names = off; +		me->compat_length = node->devi_compat_length; +		bcopy(node->devi_compat_names, di_mem_addr(st, off), +			node->devi_compat_length); +		off += node->devi_compat_length; +	} + +	if (node->devi_addr) { +		off = di_checkmem(st, off, strlen(node->devi_addr) + 1); +		me->address = off; +		(void) strcpy(di_mem_addr(st, off), node->devi_addr); +		off += strlen(node->devi_addr) + 1; +	} + +	if (node->devi_binding_name) { +		off = di_checkmem(st, off, strlen(node->devi_binding_name) + 1); +		me->bind_name = off; +		(void) strcpy(di_mem_addr(st, off), node->devi_binding_name); +		off += strlen(node->devi_binding_name) + 1; +	} + +	me->drv_major = node->devi_major; + +	/* +	 * If the dip is BOUND, set the next pointer of the +	 * per-instance list to -1, indicating that it is yet to be resolved. +	 * This will be resolved later in snap_driver_list(). +	 */ +	if (me->drv_major != -1) { +		me->next = -1; +	} else { +		me->next = 0; +	} + +	/* +	 * An optimization to skip mutex_enter when not needed. +	 */ +	if (!((DINFOMINOR | DINFOPROP | DINFOPATH) & st->command)) { +		goto priv_data; +	} + +	/* +	 * Grab current per dev_info node lock to +	 * get minor data and properties. +	 */ +	mutex_enter(&(node->devi_lock)); + +	if (!(DINFOMINOR & st->command)) { +		goto path; +	} + +	if (node->devi_minor) {		/* minor data */ +		me->minor_data = DI_ALIGN(off); +		off = di_getmdata(node->devi_minor, &me->minor_data, +		    me->self, st); +	} + +path: +	if (!(DINFOPATH & st->command)) { +		goto property; +	} + +	if (MDI_CLIENT(node)) { +		me->multipath_client = DI_ALIGN(off); +		off = di_getpath_data((dev_info_t *)node, &me->multipath_client, +		    me->self, st, 1); +		dcmn_err((CE_WARN, "me->multipath_client = %x for node %p " +		    "component type = %d.  off=%d", +		    me->multipath_client, +		    (void *)node, node->devi_mdi_component, off)); +	} + +	if (MDI_PHCI(node)) { +		me->multipath_phci = DI_ALIGN(off); +		off = di_getpath_data((dev_info_t *)node, &me->multipath_phci, +		    me->self, st, 0); +		dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p " +		    "component type = %d.  off=%d", +		    me->multipath_phci, +		    (void *)node, node->devi_mdi_component, off)); +	} + +property: +	if (!(DINFOPROP & st->command)) { +		goto unlock; +	} + +	if (node->devi_drv_prop_ptr) {	/* driver property list */ +		me->drv_prop = DI_ALIGN(off); +		off = di_getprop(node->devi_drv_prop_ptr, &me->drv_prop, st, +			node, DI_PROP_DRV_LIST); +	} + +	if (node->devi_sys_prop_ptr) {	/* system property list */ +		me->sys_prop = DI_ALIGN(off); +		off = di_getprop(node->devi_sys_prop_ptr, &me->sys_prop, st, +			node, DI_PROP_SYS_LIST); +	} + +	if (node->devi_hw_prop_ptr) {	/* hardware property list */ +		me->hw_prop = DI_ALIGN(off); +		off = di_getprop(node->devi_hw_prop_ptr, &me->hw_prop, st, +			node, DI_PROP_HW_LIST); +	} + +	if (node->devi_global_prop_list == NULL) { +		me->glob_prop = (di_off_t)-1;	/* not global property */ +	} else { +		/* +		 * Make copy of global property list if this devinfo refers +		 * global properties different from what's on the devnames +		 * array. It can happen if there has been a forced +		 * driver.conf update. See mod_drv(1M). +		 */ +		ASSERT(me->drv_major != -1); +		if (node->devi_global_prop_list != +		    devnamesp[me->drv_major].dn_global_prop_ptr) { +			me->glob_prop = DI_ALIGN(off); +			off = di_getprop(node->devi_global_prop_list->prop_list, +			    &me->glob_prop, st, node, DI_PROP_GLB_LIST); +		} +	} + +unlock: +	/* +	 * release current per dev_info node lock +	 */ +	mutex_exit(&(node->devi_lock)); + +priv_data: +	if (!(DINFOPRIVDATA & st->command)) { +		goto pm_info; +	} + +	if (ddi_get_parent_data((dev_info_t *)node) != NULL) { +		me->parent_data = DI_ALIGN(off); +		off = di_getppdata(node, &me->parent_data, st); +	} + +	if (ddi_get_driver_private((dev_info_t *)node) != NULL) { +		me->driver_data = DI_ALIGN(off); +		off = di_getdpdata(node, &me->driver_data, st); +	} + +pm_info: /* NOT implemented */ + +subtree: +	if (!(DINFOSUBTREE & st->command)) { +		POP_STACK(dsp); +		return (DI_ALIGN(off)); +	} + +child: +	/* +	 * If there is a child--push child onto stack. +	 * Hold the parent busy while doing so. +	 */ +	if (node->devi_child) { +		me->child = DI_ALIGN(off); +		PUSH_STACK(dsp, node->devi_child, &me->child); +		return (me->child); +	} + +sibling: +	/* +	 * no child node, unroll the stack till a sibling of +	 * a parent node is found or root node is reached +	 */ +	POP_STACK(dsp); +	while (!EMPTY_STACK(dsp) && (node->devi_sibling == NULL)) { +		node = TOP_NODE(dsp); +		me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp)))); +		POP_STACK(dsp); +	} + +	if (!EMPTY_STACK(dsp)) { +		/* +		 * a sibling is found, replace top of stack by its sibling +		 */ +		me->sibling = DI_ALIGN(off); +		PUSH_STACK(dsp, node->devi_sibling, &me->sibling); +		return (me->sibling); +	} + +	/* +	 * DONE with all nodes +	 */ +	return (DI_ALIGN(off)); +} + +static i_lnode_t * +i_lnode_alloc(int modid) +{ +	i_lnode_t	*i_lnode; + +	i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP); + +	ASSERT(modid != -1); +	i_lnode->modid = modid; + +	return (i_lnode); +} + +static void +i_lnode_free(i_lnode_t *i_lnode) +{ +	kmem_free(i_lnode, sizeof (i_lnode_t)); +} + +static void +i_lnode_check_free(i_lnode_t *i_lnode) +{ +	/* This lnode and its dip must have been snapshotted */ +	ASSERT(i_lnode->self > 0); +	ASSERT(i_lnode->di_node->self > 0); + +	/* at least 1 link (in or out) must exist for this lnode */ +	ASSERT(i_lnode->link_in || i_lnode->link_out); + +	i_lnode_free(i_lnode); +} + +static i_link_t * +i_link_alloc(int spec_type) +{ +	i_link_t *i_link; + +	i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP); +	i_link->spec_type = spec_type; + +	return (i_link); +} + +static void +i_link_check_free(i_link_t *i_link) +{ +	/* This link must have been snapshotted */ +	ASSERT(i_link->self > 0); + +	/* Both endpoint lnodes must exist for this link */ +	ASSERT(i_link->src_lnode); +	ASSERT(i_link->tgt_lnode); + +	kmem_free(i_link, sizeof (i_link_t)); +} + +/*ARGSUSED*/ +static uint_t +i_lnode_hashfunc(void *arg, mod_hash_key_t key) +{ +	i_lnode_t	*i_lnode = (i_lnode_t *)key; +	struct di_node	*ptr; +	dev_t		dev; + +	dev = i_lnode->devt; +	if (dev != DDI_DEV_T_NONE) +		return (i_lnode->modid + getminor(dev) + getmajor(dev)); + +	ptr = i_lnode->di_node; +	ASSERT(ptr->self > 0); +	if (ptr) { +		uintptr_t k = (uintptr_t)ptr; +		k >>= (int)highbit(sizeof (struct di_node)); +		return ((uint_t)k); +	} + +	return (i_lnode->modid); +} + +static int +i_lnode_cmp(void *arg1, void *arg2) +{ +	i_lnode_t	*i_lnode1 = (i_lnode_t *)arg1; +	i_lnode_t	*i_lnode2 = (i_lnode_t *)arg2; + +	if (i_lnode1->modid != i_lnode2->modid) { +		return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1); +	} + +	if (i_lnode1->di_node != i_lnode2->di_node) +		return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1); + +	if (i_lnode1->devt != i_lnode2->devt) +		return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1); + +	return (0); +} + +/* + * An lnode represents a {dip, dev_t} tuple. A link represents a + * {src_lnode, tgt_lnode, spec_type} tuple. + * The following callback assumes that LDI framework ref-counts the + * src_dip and tgt_dip while invoking this callback. + */ +static int +di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg) +{ +	struct di_state	*st = (struct di_state *)arg; +	i_lnode_t	*src_lnode, *tgt_lnode, *i_lnode; +	i_link_t	**i_link_next, *i_link; +	di_off_t	soff, toff; +	mod_hash_val_t	nodep = NULL; +	int		res; + +	/* +	 * if the source or target of this device usage information doesn't +	 * corrospond to a device node then we don't report it via +	 * libdevinfo so return. +	 */ +	if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL)) +		return (LDI_USAGE_CONTINUE); + +	ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip)); +	ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip)); + +	/* +	 * Skip the ldi_usage if either src or tgt dip is not in the +	 * snapshot. This saves us from pruning bad lnodes/links later. +	 */ +	if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0) +		return (LDI_USAGE_CONTINUE); +	if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0) +		return (LDI_USAGE_CONTINUE); + +	ASSERT(soff > 0); +	ASSERT(toff > 0); + +	/* +	 * allocate an i_lnode and add it to the lnode hash +	 * if it is not already present. For this particular +	 * link the lnode is a source, but it may +	 * participate as tgt or src in any number of layered +	 * operations - so it may already be in the hash. +	 */ +	i_lnode = i_lnode_alloc(ldi_usage->src_modid); +	i_lnode->di_node = (struct di_node *)di_mem_addr(st, soff); +	i_lnode->devt = ldi_usage->src_devt; + +	res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); +	if (res == MH_ERR_NOTFOUND) { +		/* +		 * new i_lnode +		 * add it to the hash and increment the lnode count +		 */ +		res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); +		ASSERT(res == 0); +		st->lnode_count++; +		src_lnode = i_lnode; +	} else { +		/* this i_lnode already exists in the lnode_hash */ +		i_lnode_free(i_lnode); +		src_lnode = (i_lnode_t *)nodep; +	} + +	/* +	 * allocate a tgt i_lnode and add it to the lnode hash +	 */ +	i_lnode = i_lnode_alloc(ldi_usage->tgt_modid); +	i_lnode->di_node = (struct di_node *)di_mem_addr(st, toff); +	i_lnode->devt = ldi_usage->tgt_devt; + +	res = mod_hash_find(st->lnode_hash, i_lnode, &nodep); +	if (res == MH_ERR_NOTFOUND) { +		/* +		 * new i_lnode +		 * add it to the hash and increment the lnode count +		 */ +		res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode); +		ASSERT(res == 0); +		st->lnode_count++; +		tgt_lnode = i_lnode; +	} else { +		/* this i_lnode already exists in the lnode_hash */ +		i_lnode_free(i_lnode); +		tgt_lnode = (i_lnode_t *)nodep; +	} + +	/* +	 * allocate a i_link +	 */ +	i_link = i_link_alloc(ldi_usage->tgt_spec_type); +	i_link->src_lnode = src_lnode; +	i_link->tgt_lnode = tgt_lnode; + +	/* +	 * add this link onto the src i_lnodes outbound i_link list +	 */ +	i_link_next = &(src_lnode->link_out); +	while (*i_link_next != NULL) { +		if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) && +		    (i_link->spec_type == (*i_link_next)->spec_type)) { +			/* this link already exists */ +			kmem_free(i_link, sizeof (i_link_t)); +			return (LDI_USAGE_CONTINUE); +		} +		i_link_next = &((*i_link_next)->src_link_next); +	} +	*i_link_next = i_link; + +	/* +	 * add this link onto the tgt i_lnodes inbound i_link list +	 */ +	i_link_next = &(tgt_lnode->link_in); +	while (*i_link_next != NULL) { +		ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0); +		i_link_next = &((*i_link_next)->tgt_link_next); +	} +	*i_link_next = i_link; + +	/* +	 * add this i_link to the link hash +	 */ +	res = mod_hash_insert(st->link_hash, i_link, i_link); +	ASSERT(res == 0); +	st->link_count++; + +	return (LDI_USAGE_CONTINUE); +} + +struct i_layer_data { +	struct di_state	*st; +	int		lnode_count; +	int		link_count; +	di_off_t	lnode_off; +	di_off_t 	link_off; +}; + +/*ARGSUSED*/ +static uint_t +i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ +	i_link_t		*i_link  = (i_link_t *)key; +	struct i_layer_data	*data = arg; +	struct di_link		*me; +	struct di_lnode		*melnode; +	struct di_node		*medinode; + +	ASSERT(i_link->self == 0); + +	i_link->self = data->link_off + +	    (data->link_count * sizeof (struct di_link)); +	data->link_count++; + +	ASSERT(data->link_off > 0 && data->link_count > 0); +	ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */ +	ASSERT(data->link_count <= data->st->link_count); + +	/* fill in fields for the di_link snapshot */ +	me = (struct di_link *)di_mem_addr(data->st, i_link->self); +	me->self = i_link->self; +	me->spec_type = i_link->spec_type; + +	/* +	 * The src_lnode and tgt_lnode i_lnode_t for this i_link_t +	 * are created during the LDI table walk. Since we are +	 * walking the link hash, the lnode hash has already been +	 * walked and the lnodes have been snapshotted. Save lnode +	 * offsets. +	 */ +	me->src_lnode = i_link->src_lnode->self; +	me->tgt_lnode = i_link->tgt_lnode->self; + +	/* +	 * Save this link's offset in the src_lnode snapshot's link_out +	 * field +	 */ +	melnode = (struct di_lnode *)di_mem_addr(data->st, me->src_lnode); +	me->src_link_next = melnode->link_out; +	melnode->link_out = me->self; + +	/* +	 * Put this link on the tgt_lnode's link_in field +	 */ +	melnode = (struct di_lnode *)di_mem_addr(data->st, me->tgt_lnode); +	me->tgt_link_next = melnode->link_in; +	melnode->link_in = me->self; + +	/* +	 * An i_lnode_t is only created if the corresponding dip exists +	 * in the snapshot. A pointer to the di_node is saved in the +	 * i_lnode_t when it is allocated. For this link, get the di_node +	 * for the source lnode. Then put the link on the di_node's list +	 * of src links +	 */ +	medinode = i_link->src_lnode->di_node; +	me->src_node_next = medinode->src_links; +	medinode->src_links = me->self; + +	/* +	 * Put this link on the tgt_links list of the target +	 * dip. +	 */ +	medinode = i_link->tgt_lnode->di_node; +	me->tgt_node_next = medinode->tgt_links; +	medinode->tgt_links = me->self; + +	return (MH_WALK_CONTINUE); +} + +/*ARGSUSED*/ +static uint_t +i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) +{ +	i_lnode_t		*i_lnode = (i_lnode_t *)key; +	struct i_layer_data	*data = arg; +	struct di_lnode		*me; +	struct di_node		*medinode; + +	ASSERT(i_lnode->self == 0); + +	i_lnode->self = data->lnode_off + +	    (data->lnode_count * sizeof (struct di_lnode)); +	data->lnode_count++; + +	ASSERT(data->lnode_off > 0 && data->lnode_count > 0); +	ASSERT(data->link_count == 0); /* links not done yet */ +	ASSERT(data->lnode_count <= data->st->lnode_count); + +	/* fill in fields for the di_lnode snapshot */ +	me = (struct di_lnode *)di_mem_addr(data->st, i_lnode->self); +	me->self = i_lnode->self; + +	if (i_lnode->devt == DDI_DEV_T_NONE) { +		me->dev_major = (major_t)-1; +		me->dev_minor = (minor_t)-1; +	} else { +		me->dev_major = getmajor(i_lnode->devt); +		me->dev_minor = getminor(i_lnode->devt); +	} + +	/* +	 * The dip corresponding to this lnode must exist in +	 * the snapshot or we wouldn't have created the i_lnode_t +	 * during LDI walk. Save the offset of the dip. +	 */ +	ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0); +	me->node = i_lnode->di_node->self; + +	/* +	 * There must be at least one link in or out of this lnode +	 * or we wouldn't have created it. These fields will be set +	 * during the link hash walk. +	 */ +	ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL)); + +	/* +	 * set the offset of the devinfo node associated with this +	 * lnode. Also update the node_next next pointer.  this pointer +	 * is set if there are multiple lnodes associated with the same +	 * devinfo node.  (could occure when multiple minor nodes +	 * are open for one device, etc.) +	 */ +	medinode = i_lnode->di_node; +	me->node_next = medinode->lnodes; +	medinode->lnodes = me->self; + +	return (MH_WALK_CONTINUE); +} + +static di_off_t +di_getlink_data(di_off_t off, struct di_state *st) +{ +	struct i_layer_data data = {0}; +	size_t size; + +	dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off)); + +	st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32, +	    mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free, +	    i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP); + +	st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32, +	    (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t)); + +	/* get driver layering information */ +	(void) ldi_usage_walker(st, di_ldi_callback); + +	/* check if there is any link data to include in the snapshot */ +	if (st->lnode_count == 0) { +		ASSERT(st->link_count == 0); +		goto out; +	} + +	ASSERT(st->link_count != 0); + +	/* get a pointer to snapshot memory for all the di_lnodes */ +	size = sizeof (struct di_lnode) * st->lnode_count; +	data.lnode_off = off = di_checkmem(st, off, size); +	off += DI_ALIGN(size); + +	/* get a pointer to snapshot memory for all the di_links */ +	size = sizeof (struct di_link) * st->link_count; +	data.link_off = off = di_checkmem(st, off, size); +	off += DI_ALIGN(size); + +	data.lnode_count = data.link_count = 0; +	data.st = st; + +	/* +	 * We have lnodes and links that will go into the +	 * snapshot, so let's walk the respective hashes +	 * and snapshot them. The various linkages are +	 * also set up during the walk. +	 */ +	mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data); +	ASSERT(data.lnode_count == st->lnode_count); + +	mod_hash_walk(st->link_hash, i_link_walker, (void *)&data); +	ASSERT(data.link_count == st->link_count); + +out: +	/* free up the i_lnodes and i_links used to create the snapshot */ +	mod_hash_destroy_hash(st->lnode_hash); +	mod_hash_destroy_hash(st->link_hash); +	st->lnode_count = 0; +	st->link_count = 0; + +	return (off); +} + + +/* + * Copy all minor data nodes attached to a devinfo node into the snapshot. + * It is called from di_copynode with devi_lock held. + */ +static di_off_t +di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node, +	struct di_state *st) +{ +	di_off_t off; +	struct di_minor *me; + +	dcmn_err2((CE_CONT, "di_getmdata:\n")); + +	/* +	 * check memory first +	 */ +	off = di_checkmem(st, *off_p, sizeof (struct di_minor)); +	*off_p = off; + +	do { +		me = (struct di_minor *)di_mem_addr(st, off); +		me->self = off; +		me->type = mnode->type; +		me->node = node; +		me->user_private_data = NULL; + +		off += DI_ALIGN(sizeof (struct di_minor)); + +		/* +		 * Split dev_t to major/minor, so it works for +		 * both ILP32 and LP64 model +		 */ +		me->dev_major = getmajor(mnode->ddm_dev); +		me->dev_minor = getminor(mnode->ddm_dev); +		me->spec_type = mnode->ddm_spec_type; + +		if (mnode->ddm_name) { +			off = di_checkmem(st, off, +				strlen(mnode->ddm_name) + 1); +			me->name = off; +			(void) strcpy(di_mem_addr(st, off), mnode->ddm_name); +			off += DI_ALIGN(strlen(mnode->ddm_name) + 1); +		} + +		if (mnode->ddm_node_type) { +			off = di_checkmem(st, off, +				strlen(mnode->ddm_node_type) + 1); +			me->node_type = off; +			(void) strcpy(di_mem_addr(st, off), +					mnode->ddm_node_type); +			off += DI_ALIGN(strlen(mnode->ddm_node_type) + 1); +		} + +		off = di_checkmem(st, off, sizeof (struct di_minor)); +		me->next = off; +		mnode = mnode->next; +	} while (mnode); + +	me->next = 0; + +	return (off); +} + +/* + * di_register_dip(), di_find_dip(): The dip must be protected + * from deallocation when using these routines - this can either + * be a reference count, a busy hold or a per-driver lock. + */ + +static void +di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off) +{ +	struct dev_info *node = DEVI(dip); +	struct di_key *key = kmem_zalloc(sizeof (*key), KM_SLEEP); +	struct di_dkey *dk; + +	ASSERT(dip); +	ASSERT(off > 0); + +	key->k_type = DI_DKEY; +	dk = &(key->k_u.dkey); + +	dk->dk_dip = dip; +	dk->dk_major = node->devi_major; +	dk->dk_inst = node->devi_instance; +	dk->dk_nodeid = node->devi_nodeid; + +	if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key, +	    (mod_hash_val_t)(uintptr_t)off) != 0) { +		panic( +		    "duplicate devinfo (%p) registered during device " +		    "tree walk", (void *)dip); +	} +} + + +static int +di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p) +{ +	/* +	 * uintptr_t must be used because it matches the size of void *; +	 * mod_hash expects clients to place results into pointer-size +	 * containers; since di_off_t is always a 32-bit offset, alignment +	 * would otherwise be broken on 64-bit kernels. +	 */ +	uintptr_t	offset; +	struct		di_key key = {0}; +	struct		di_dkey *dk; + +	ASSERT(st->reg_dip_hash); +	ASSERT(dip); +	ASSERT(off_p); + + +	key.k_type = DI_DKEY; +	dk = &(key.k_u.dkey); + +	dk->dk_dip = dip; +	dk->dk_major = DEVI(dip)->devi_major; +	dk->dk_inst = DEVI(dip)->devi_instance; +	dk->dk_nodeid = DEVI(dip)->devi_nodeid; + +	if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key, +	    (mod_hash_val_t *)&offset) == 0) { +		*off_p = (di_off_t)offset; +		return (0); +	} else { +		return (-1); +	} +} + +/* + * di_register_pip(), di_find_pip(): The pip must be protected from deallocation + * when using these routines. The caller must do this by protecting the + * client(or phci)<->pip linkage while traversing the list and then holding the + * pip when it is found in the list. + */ + +static void +di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off) +{ +	struct di_key	*key = kmem_zalloc(sizeof (*key), KM_SLEEP); +	char		*path_addr; +	struct di_pkey	*pk; + +	ASSERT(pip); +	ASSERT(off > 0); + +	key->k_type = DI_PKEY; +	pk = &(key->k_u.pkey); + +	pk->pk_pip = pip; +	path_addr = mdi_pi_get_addr(pip); +	if (path_addr) +		pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP); +	pk->pk_client = mdi_pi_get_client(pip); +	pk->pk_phci = mdi_pi_get_phci(pip); + +	if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key, +	    (mod_hash_val_t)(uintptr_t)off) != 0) { +		panic( +		    "duplicate pathinfo (%p) registered during device " +		    "tree walk", (void *)pip); +	} +} + +/* + * As with di_register_pip, the caller must hold or lock the pip + */ +static int +di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p) +{ +	/* +	 * uintptr_t must be used because it matches the size of void *; +	 * mod_hash expects clients to place results into pointer-size +	 * containers; since di_off_t is always a 32-bit offset, alignment +	 * would otherwise be broken on 64-bit kernels. +	 */ +	uintptr_t	offset; +	struct di_key	key = {0}; +	struct di_pkey	*pk; + +	ASSERT(st->reg_pip_hash); +	ASSERT(off_p); + +	if (pip == NULL) { +		*off_p = 0; +		return (0); +	} + +	key.k_type = DI_PKEY; +	pk = &(key.k_u.pkey); + +	pk->pk_pip = pip; +	pk->pk_path_addr = mdi_pi_get_addr(pip); +	pk->pk_client = mdi_pi_get_client(pip); +	pk->pk_phci = mdi_pi_get_phci(pip); + +	if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key, +	    (mod_hash_val_t *)&offset) == 0) { +		*off_p = (di_off_t)offset; +		return (0); +	} else { +		return (-1); +	} +} + +static di_path_state_t +path_state_convert(mdi_pathinfo_state_t st) +{ +	switch (st) { +	case MDI_PATHINFO_STATE_ONLINE: +		return (DI_PATH_STATE_ONLINE); +	case MDI_PATHINFO_STATE_STANDBY: +		return (DI_PATH_STATE_STANDBY); +	case MDI_PATHINFO_STATE_OFFLINE: +		return (DI_PATH_STATE_OFFLINE); +	case MDI_PATHINFO_STATE_FAULT: +		return (DI_PATH_STATE_FAULT); +	default: +		return (DI_PATH_STATE_UNKNOWN); +	} +} + + +static di_off_t +di_path_getprop(mdi_pathinfo_t *pip, di_off_t off, di_off_t *off_p, +    struct di_state *st) +{ +	nvpair_t *prop = NULL; +	struct di_path_prop *me; + +	if (mdi_pi_get_next_prop(pip, NULL) == NULL) { +		*off_p = 0; +		return (off); +	} + +	off = di_checkmem(st, off, sizeof (struct di_path_prop)); +	*off_p = off; + +	while (prop = mdi_pi_get_next_prop(pip, prop)) { +		int delta = 0; + +		me = (struct di_path_prop *)di_mem_addr(st, off); +		me->self = off; +		off += sizeof (struct di_path_prop); + +		/* +		 * property name +		 */ +		off = di_checkmem(st, off, strlen(nvpair_name(prop)) + 1); +		me->prop_name = off; +		(void) strcpy(di_mem_addr(st, off), nvpair_name(prop)); +		off += strlen(nvpair_name(prop)) + 1; + +		switch (nvpair_type(prop)) { +		case DATA_TYPE_BYTE: +		case DATA_TYPE_INT16: +		case DATA_TYPE_UINT16: +		case DATA_TYPE_INT32: +		case DATA_TYPE_UINT32: +			delta = sizeof (int32_t); +			me->prop_type = DDI_PROP_TYPE_INT; +			off = di_checkmem(st, off, delta); +			(void) nvpair_value_int32(prop, +			    (int32_t *)di_mem_addr(st, off)); +			break; + +		case DATA_TYPE_INT64: +		case DATA_TYPE_UINT64: +			delta = sizeof (int64_t); +			me->prop_type = DDI_PROP_TYPE_INT64; +			off = di_checkmem(st, off, delta); +			(void) nvpair_value_int64(prop, +			    (int64_t *)di_mem_addr(st, off)); +			break; + +		case DATA_TYPE_STRING: +		{ +			char *str; +			(void) nvpair_value_string(prop, &str); +			delta = strlen(str) + 1; +			me->prop_type = DDI_PROP_TYPE_STRING; +			off = di_checkmem(st, off, delta); +			(void) strcpy(di_mem_addr(st, off), str); +			break; +		} +		case DATA_TYPE_BYTE_ARRAY: +		case DATA_TYPE_INT16_ARRAY: +		case DATA_TYPE_UINT16_ARRAY: +		case DATA_TYPE_INT32_ARRAY: +		case DATA_TYPE_UINT32_ARRAY: +		case DATA_TYPE_INT64_ARRAY: +		case DATA_TYPE_UINT64_ARRAY: +		{ +			uchar_t *buf; +			uint_t nelems; +			(void) nvpair_value_byte_array(prop, &buf, &nelems); +			delta = nelems; +			me->prop_type = DDI_PROP_TYPE_BYTE; +			if (nelems != 0) { +				off = di_checkmem(st, off, delta); +				bcopy(buf, di_mem_addr(st, off), nelems); +			} +			break; +		} + +		default:	/* Unknown or unhandled type; skip it */ +			delta = 0; +			break; +		} + +		if (delta > 0) { +			me->prop_data = off; +		} + +		me->prop_len = delta; +		off += delta; + +		off = di_checkmem(st, off, sizeof (struct di_path_prop)); +		me->prop_next = off; +	} + +	me->prop_next = 0; +	return (off); +} + + +static void +di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp, +    int get_client) +{ +	if (get_client) { +		ASSERT(me->path_client == 0); +		me->path_client = noff; +		ASSERT(me->path_c_link == 0); +		*off_pp = &me->path_c_link; +		me->path_snap_state &= +		    ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK); +	} else { +		ASSERT(me->path_phci == 0); +		me->path_phci = noff; +		ASSERT(me->path_p_link == 0); +		*off_pp = &me->path_p_link; +		me->path_snap_state &= +		    ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK); +	} +} + +/* + * poff_p: pointer to the linkage field. This links pips along the client|phci + *	   linkage list. + * noff  : Offset for the endpoint dip snapshot. + */ +static di_off_t +di_getpath_data(dev_info_t *dip, di_off_t *poff_p, di_off_t noff, +    struct di_state *st, int get_client) +{ +	di_off_t off; +	mdi_pathinfo_t *pip; +	struct di_path *me; +	mdi_pathinfo_t *(*next_pip)(dev_info_t *, mdi_pathinfo_t *); + +	dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client)); + +	/* +	 * The naming of the following mdi_xyz() is unfortunately +	 * non-intuitive. mdi_get_next_phci_path() follows the +	 * client_link i.e. the list of pip's belonging to the +	 * given client dip. +	 */ +	if (get_client) +		next_pip = &mdi_get_next_phci_path; +	else +		next_pip = &mdi_get_next_client_path; + +	off = *poff_p; + +	pip = NULL; +	while (pip = (*next_pip)(dip, pip)) { +		mdi_pathinfo_state_t state; +		di_off_t stored_offset; + +		dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip)); + +		mdi_pi_lock(pip); + +		if (di_pip_find(st, pip, &stored_offset) != -1) { +			/* +			 * We've already seen this pathinfo node so we need to +			 * take care not to snap it again; However, one endpoint +			 * and linkage will be set here. The other endpoint +			 * and linkage has already been set when the pip was +			 * first snapshotted i.e. when the other endpoint dip +			 * was snapshotted. +			 */ +			me = (struct di_path *)di_mem_addr(st, stored_offset); + +			*poff_p = stored_offset; + +			di_path_one_endpoint(me, noff, &poff_p, get_client); + +			/* +			 * The other endpoint and linkage were set when this +			 * pip was snapshotted. So we are done with both +			 * endpoints and linkages. +			 */ +			ASSERT(!(me->path_snap_state & +			    (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI))); +			ASSERT(!(me->path_snap_state & +			    (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK))); + +			mdi_pi_unlock(pip); +			continue; +		} + +		/* +		 * Now that we need to snapshot this pip, check memory +		 */ +		off = di_checkmem(st, off, sizeof (struct di_path)); +		me = (struct di_path *)di_mem_addr(st, off); +		me->self = off; +		*poff_p = off; +		off += sizeof (struct di_path); + +		me->path_snap_state = +		    DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK; +		me->path_snap_state |= +		    DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI; + +		/* +		 * Zero out fields as di_checkmem() doesn't guarantee +		 * zero-filled memory +		 */ +		me->path_client = me->path_phci = 0; +		me->path_c_link = me->path_p_link = 0; + +		di_path_one_endpoint(me, noff, &poff_p, get_client); + +		/* +		 * Note the existence of this pathinfo +		 */ +		di_register_pip(st, pip, me->self); + +		state = mdi_pi_get_state(pip); +		me->path_state = path_state_convert(state); + +		/* +		 * Get intermediate addressing info. +		 */ +		off = di_checkmem(st, off, strlen(mdi_pi_get_addr(pip)) + 1); +		me->path_addr = off; +		(void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip)); +		off += strlen(mdi_pi_get_addr(pip)) + 1; + +		/* +		 * Get path properties if props are to be included in the +		 * snapshot +		 */ +		if (DINFOPROP & st->command) { +			off = di_path_getprop(pip, off, &me->path_prop, st); +		} else { +			me->path_prop = 0; +		} + +		mdi_pi_unlock(pip); +	} + +	*poff_p = 0; + +	return (off); +} + +/* + * Copy a list of properties attached to a devinfo node. Called from + * di_copynode with devi_lock held. The major number is passed in case + * we need to call driver's prop_op entry. The value of list indicates + * which list we are copying. Possible values are: + * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST + */ +static di_off_t +di_getprop(struct ddi_prop *prop, di_off_t *off_p, struct di_state *st, +	struct dev_info *dip, int list) +{ +	dev_t dev; +	int (*prop_op)(); +	int off, need_prop_op = 0; +	int prop_op_fail = 0; +	ddi_prop_t *propp = NULL; +	struct di_prop *pp; +	struct dev_ops *ops = NULL; +	int prop_len; +	caddr_t prop_val; + + +	dcmn_err2((CE_CONT, "di_getprop:\n")); + +	ASSERT(st != NULL); + +	dcmn_err((CE_CONT, "copy property list at addr %p\n", (void *)prop)); + +	/* +	 * Figure out if we need to call driver's prop_op entry point. +	 * The conditions are: +	 *	-- driver property list +	 *	-- driver must be attached and held +	 *	-- driver's cb_prop_op != ddi_prop_op +	 *		or parent's bus_prop_op != ddi_bus_prop_op +	 */ + +	if (list != DI_PROP_DRV_LIST) { +		goto getprop; +	} + +	/* +	 * If driver is not attached or if major is -1, we ignore +	 * the driver property list. No one should rely on such +	 * properties. +	 */ +	if (i_ddi_node_state((dev_info_t *)dip) < DS_ATTACHED) { +		off = *off_p; +		*off_p = 0; +		return (off); +	} + +	/* +	 * Now we have a driver which is held. We can examine entry points +	 * and check the condition listed above. +	 */ +	ops = dip->devi_ops; + +	/* +	 * Some nexus drivers incorrectly set cb_prop_op to nodev, +	 * nulldev or even NULL. +	 */ +	if (ops && ops->devo_cb_ops && +	    (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) && +	    (ops->devo_cb_ops->cb_prop_op != nodev) && +	    (ops->devo_cb_ops->cb_prop_op != nulldev) && +	    (ops->devo_cb_ops->cb_prop_op != NULL)) { +		need_prop_op = 1; +	} + +getprop: +	/* +	 * check memory availability +	 */ +	off = di_checkmem(st, *off_p, sizeof (struct di_prop)); +	*off_p = off; +	/* +	 * Now copy properties +	 */ +	do { +		pp = (struct di_prop *)di_mem_addr(st, off); +		pp->self = off; +		/* +		 * Split dev_t to major/minor, so it works for +		 * both ILP32 and LP64 model +		 */ +		pp->dev_major = getmajor(prop->prop_dev); +		pp->dev_minor = getminor(prop->prop_dev); +		pp->prop_flags = prop->prop_flags; +		pp->prop_list = list; + +		/* +		 * property name +		 */ +		off += sizeof (struct di_prop); +		if (prop->prop_name) { +			off = di_checkmem(st, off, strlen(prop->prop_name) +			    + 1); +			pp->prop_name = off; +			(void) strcpy(di_mem_addr(st, off), prop->prop_name); +			off += strlen(prop->prop_name) + 1; +		} + +		/* +		 * Set prop_len here. This may change later +		 * if cb_prop_op returns a different length. +		 */ +		pp->prop_len = prop->prop_len; +		if (!need_prop_op) { +			if (prop->prop_val == NULL) { +				dcmn_err((CE_WARN, +				    "devinfo: property fault at %p", +				    (void *)prop)); +				pp->prop_data = -1; +			} else if (prop->prop_len != 0) { +				off = di_checkmem(st, off, prop->prop_len); +				pp->prop_data = off; +				bcopy(prop->prop_val, di_mem_addr(st, off), +				    prop->prop_len); +				off += DI_ALIGN(pp->prop_len); +			} +		} + +		off = di_checkmem(st, off, sizeof (struct di_prop)); +		pp->next = off; +		prop = prop->prop_next; +	} while (prop); + +	pp->next = 0; + +	if (!need_prop_op) { +		dcmn_err((CE_CONT, "finished property " +		    "list at offset 0x%x\n", off)); +		return (off); +	} + +	/* +	 * If there is a need to call driver's prop_op entry, +	 * we must release driver's devi_lock, because the +	 * cb_prop_op entry point will grab it. +	 * +	 * The snapshot memory has already been allocated above, +	 * which means the length of an active property should +	 * remain fixed for this implementation to work. +	 */ + + +	prop_op = ops->devo_cb_ops->cb_prop_op; +	pp = (struct di_prop *)di_mem_addr(st, *off_p); + +	mutex_exit(&dip->devi_lock); + +	do { +		int err; +		struct di_prop *tmp; + +		if (pp->next) { +			tmp = (struct di_prop *) +			    di_mem_addr(st, pp->next); +		} else { +			tmp = NULL; +		} + +		/* +		 * call into driver's prop_op entry point +		 * +		 * Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY +		 */ +		dev = makedevice(pp->dev_major, pp->dev_minor); +		if (dev == DDI_DEV_T_NONE) +			dev = DDI_DEV_T_ANY; + +		dcmn_err((CE_CONT, "call prop_op" +		    "(%lx, %p, PROP_LEN_AND_VAL_BUF, " +		    "DDI_PROP_DONTPASS, \"%s\", %p, &%d)\n", +		    dev, +		    (void *)dip, +		    (char *)di_mem_addr(st, pp->prop_name), +		    (void *)di_mem_addr(st, pp->prop_data), +		    pp->prop_len)); + +		if ((err = (*prop_op)(dev, (dev_info_t)dip, +		    PROP_LEN_AND_VAL_ALLOC, DDI_PROP_DONTPASS, +		    (char *)di_mem_addr(st, pp->prop_name), +		    &prop_val, &prop_len)) != DDI_PROP_SUCCESS) { +			if ((propp = i_ddi_prop_search(dev, +			    (char *)di_mem_addr(st, pp->prop_name), +			    (uint_t)pp->prop_flags, +			    &(DEVI(dip)->devi_drv_prop_ptr))) != NULL) { +				pp->prop_len = propp->prop_len; +				if (pp->prop_len != 0) { +					off = di_checkmem(st, off, +					    pp->prop_len); +					pp->prop_data = off; +					bcopy(propp->prop_val, di_mem_addr(st, +					    pp->prop_data), propp->prop_len); +					off += DI_ALIGN(pp->prop_len); +				} +			} else { +				prop_op_fail = 1; +			} +		} else if (prop_len != 0) { +			pp->prop_len = prop_len; +			off = di_checkmem(st, off, prop_len); +			pp->prop_data = off; +			bcopy(prop_val, di_mem_addr(st, off), prop_len); +			off += DI_ALIGN(prop_len); +			kmem_free(prop_val, prop_len); +		} + +		if (prop_op_fail) { +			pp->prop_data = -1; +			dcmn_err((CE_WARN, "devinfo: prop_op failure " +			    "for \"%s\" err %d", +			    di_mem_addr(st, pp->prop_name), err)); +		} + +		pp = tmp; + +	} while (pp); + +	mutex_enter(&dip->devi_lock); +	dcmn_err((CE_CONT, "finished property list at offset 0x%x\n", off)); +	return (off); +} + +/* + * find private data format attached to a dip + * parent = 1 to match driver name of parent dip (for parent private data) + *	0 to match driver name of current dip (for driver private data) + */ +#define	DI_MATCH_DRIVER	0 +#define	DI_MATCH_PARENT	1 + +struct di_priv_format * +di_match_drv_name(struct dev_info *node, struct di_state *st, int match) +{ +	int i, count, len; +	char *drv_name; +	major_t major; +	struct di_all *all; +	struct di_priv_format *form; + +	dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n", +		node->devi_node_name, match)); + +	if (match == DI_MATCH_PARENT) { +		node = DEVI(node->devi_parent); +	} + +	if (node == NULL) { +		return (NULL); +	} + +	major = ddi_name_to_major(node->devi_binding_name); +	if (major == (major_t)(-1)) { +		return (NULL); +	} + +	/* +	 * Match the driver name. +	 */ +	drv_name = ddi_major_to_name(major); +	if ((drv_name == NULL) || *drv_name == '\0') { +		return (NULL); +	} + +	/* Now get the di_priv_format array */ +	all = (struct di_all *)di_mem_addr(st, 0); + +	if (match == DI_MATCH_PARENT) { +		count = all->n_ppdata; +		form = (struct di_priv_format *) +			(di_mem_addr(st, 0) + all->ppdata_format); +	} else { +		count = all->n_dpdata; +		form = (struct di_priv_format *) +			((caddr_t)all + all->dpdata_format); +	} + +	len = strlen(drv_name); +	for (i = 0; i < count; i++) { +		char *tmp; + +		tmp = form[i].drv_name; +		while (tmp && (*tmp != '\0')) { +			if (strncmp(drv_name, tmp, len) == 0) { +				return (&form[i]); +			} +			/* +			 * Move to next driver name, skipping a white space +			 */ +			if (tmp = strchr(tmp, ' ')) { +				tmp++; +			} +		} +	} + +	return (NULL); +} + +/* + * The following functions copy data as specified by the format passed in. + * To prevent invalid format from panicing the system, we call on_fault(). + * A return value of 0 indicates an error. Otherwise, the total offset + * is returned. + */ +#define	DI_MAX_PRIVDATA	(PAGESIZE >> 1)	/* max private data size */ + +static di_off_t +di_getprvdata(struct di_priv_format *pdp, void *data, di_off_t *off_p, +	struct di_state *st) +{ +	caddr_t pa; +	void *ptr; +	int i, size, repeat; +	di_off_t off, off0, *tmp; + +	label_t ljb; + +	dcmn_err2((CE_CONT, "di_getprvdata:\n")); + +	/* +	 * check memory availability. Private data size is +	 * limited to DI_MAX_PRIVDATA. +	 */ +	off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA); + +	if ((pdp->bytes <= 0) || pdp->bytes > DI_MAX_PRIVDATA) { +		goto failure; +	} + +	if (!on_fault(&ljb)) { +		/* copy the struct */ +		bcopy(data, di_mem_addr(st, off), pdp->bytes); +		off0 = DI_ALIGN(pdp->bytes); + +		/* dereferencing pointers */ +		for (i = 0; i < MAX_PTR_IN_PRV; i++) { + +			if (pdp->ptr[i].size == 0) { +				goto success;	/* no more ptrs */ +			} + +			/* +			 * first, get the pointer content +			 */ +			if ((pdp->ptr[i].offset < 0) || +				(pdp->ptr[i].offset > +				pdp->bytes - sizeof (char *))) +				goto failure;	/* wrong offset */ + +			pa = di_mem_addr(st, off + pdp->ptr[i].offset); +			tmp = (di_off_t *)pa;	/* to store off_t later */ + +			ptr = *((void **) pa);	/* get pointer value */ +			if (ptr == NULL) {	/* if NULL pointer, go on */ +				continue; +			} + +			/* +			 * next, find the repeat count (array dimension) +			 */ +			repeat = pdp->ptr[i].len_offset; + +			/* +			 * Positive value indicates a fixed sized array. +			 * 0 or negative value indicates variable sized array. +			 * +			 * For variable sized array, the variable must be +			 * an int member of the structure, with an offset +			 * equal to the absolution value of struct member. +			 */ +			if (repeat > pdp->bytes - sizeof (int)) { +				goto failure;	/* wrong offset */ +			} + +			if (repeat >= 0) { +				repeat = *((int *)((caddr_t)data + repeat)); +			} else { +				repeat = -repeat; +			} + +			/* +			 * next, get the size of the object to be copied +			 */ +			size = pdp->ptr[i].size * repeat; + +			/* +			 * Arbitrarily limit the total size of object to be +			 * copied (1 byte to 1/4 page). +			 */ +			if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) { +				goto failure;	/* wrong size or too big */ +			} + +			/* +			 * Now copy the data +			 */ +			*tmp = off0; +			bcopy(ptr, di_mem_addr(st, off + off0), size); +			off0 += DI_ALIGN(size); +		} +	} else { +		goto failure; +	} + +success: +	/* +	 * success if reached here +	 */ +	no_fault(); +	*off_p = off; + +	return (off + off0); +	/*NOTREACHED*/ + +failure: +	/* +	 * fault occurred +	 */ +	no_fault(); +	cmn_err(CE_WARN, "devinfo: fault in private data at %p", data); +	*off_p = -1;	/* set private data to indicate error */ + +	return (off); +} + +/* + * get parent private data; on error, returns original offset + */ +static di_off_t +di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) +{ +	int off; +	struct di_priv_format *ppdp; + +	dcmn_err2((CE_CONT, "di_getppdata:\n")); + +	/* find the parent data format */ +	if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) { +		off = *off_p; +		*off_p = 0;	/* set parent data to none */ +		return (off); +	} + +	return (di_getprvdata(ppdp, ddi_get_parent_data((dev_info_t *)node), +	    off_p, st)); +} + +/* + * get parent private data; returns original offset + */ +static di_off_t +di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st) +{ +	int off; +	struct di_priv_format *dpdp; + +	dcmn_err2((CE_CONT, "di_getdpdata:")); + +	/* find the parent data format */ +	if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) { +		off = *off_p; +		*off_p = 0;	/* set driver data to none */ +		return (off); +	} + +	return (di_getprvdata(dpdp, ddi_get_driver_private((dev_info_t *)node), +	    off_p, st)); +} + +/* + * The driver is stateful across DINFOCPYALL and DINFOUSRLD. + * This function encapsulates the state machine: + * + *	-> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY -> + *	|		SNAPSHOT		USRLD	 | + *	-------------------------------------------------- + * + * Returns 0 on success and -1 on failure + */ +static int +di_setstate(struct di_state *st, int new_state) +{ +	int ret = 0; + +	mutex_enter(&di_lock); +	switch (new_state) { +	case IOC_IDLE: +	case IOC_DONE: +		break; +	case IOC_SNAP: +		if (st->di_iocstate != IOC_IDLE) +			ret = -1; +		break; +	case IOC_COPY: +		if (st->di_iocstate != IOC_DONE) +			ret = -1; +		break; +	default: +		ret = -1; +	} + +	if (ret == 0) +		st->di_iocstate = new_state; +	else +		cmn_err(CE_NOTE, "incorrect state transition from %d to %d", +		    st->di_iocstate, new_state); +	mutex_exit(&di_lock); +	return (ret); +} + +/* + * We cannot assume the presence of the entire + * snapshot in this routine. All we are guaranteed + * is the di_all struct + 1 byte (for root_path) + */ +static int +header_plus_one_ok(struct di_all *all) +{ +	/* +	 * Refuse to read old versions +	 */ +	if (all->version != DI_SNAPSHOT_VERSION) { +		CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version)); +		return (0); +	} + +	if (all->cache_magic != DI_CACHE_MAGIC) { +		CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic)); +		return (0); +	} + +	if (all->snapshot_time <= 0) { +		CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time)); +		return (0); +	} + +	if (all->top_devinfo == 0) { +		CACHE_DEBUG((DI_ERR, "NULL top devinfo")); +		return (0); +	} + +	if (all->map_size < sizeof (*all) + 1) { +		CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size)); +		return (0); +	} + +	if (all->root_path[0] != '/' || all->root_path[1] != '\0') { +		CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c", +		    all->root_path[0], all->root_path[1])); +		return (0); +	} + +	/* +	 * We can't check checksum here as we just have the header +	 */ + +	return (1); +} + +static int +chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len) +{ +	rlim64_t	rlimit; +	ssize_t		resid; +	int		error = 0; + + +	rlimit = RLIM64_INFINITY; + +	while (len) { +		resid = 0; +		error = vn_rdwr(UIO_WRITE, vp, buf, len, off, +		    UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid); + +		if (error || resid < 0) { +			error = error ? error : EIO; +			CACHE_DEBUG((DI_ERR, "write error: %d", error)); +			break; +		} + +		/* +		 * Check if we are making progress +		 */ +		if (resid >= len) { +			error = ENOSPC; +			break; +		} +		buf += len - resid; +		off += len - resid; +		len = resid; +	} + +	return (error); +} + +extern int modrootloaded; + +static void +di_cache_write(struct di_cache *cache) +{ +	struct di_all	*all; +	struct vnode	*vp; +	int		oflags; +	size_t		map_size; +	size_t		chunk; +	offset_t	off; +	int		error; +	char		*buf; + +	ASSERT(DI_CACHE_LOCKED(*cache)); +	ASSERT(!servicing_interrupt()); + +	if (cache->cache_size == 0) { +		ASSERT(cache->cache_data == NULL); +		CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write")); +		return; +	} + +	ASSERT(cache->cache_size > 0); +	ASSERT(cache->cache_data); + +	if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) { +		CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write")); +		return; +	} + +	all = (struct di_all *)cache->cache_data; + +	if (!header_plus_one_ok(all)) { +		CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write")); +		return; +	} + +	ASSERT(strcmp(all->root_path, "/") == 0); + +	/* +	 * The cache_size is the total allocated memory for the cache. +	 * The map_size is the actual size of valid data in the cache. +	 * map_size may be smaller than cache_size but cannot exceed +	 * cache_size. +	 */ +	if (all->map_size > cache->cache_size) { +		CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)." +		    " Skipping write", all->map_size, cache->cache_size)); +		return; +	} + +	/* +	 * First unlink the temp file +	 */ +	error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE); +	if (error && error != ENOENT) { +		CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d", +		    DI_CACHE_TEMP, error)); +	} + +	if (error == EROFS) { +		CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write")); +		return; +	} + +	vp = NULL; +	oflags = (FCREAT|FWRITE); +	if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags, +	    DI_CACHE_PERMS, &vp, CRCREAT, 0)) { +		CACHE_DEBUG((DI_ERR, "%s: create failed: %d", +		    DI_CACHE_TEMP, error)); +		return; +	} + +	ASSERT(vp); + +	/* +	 * Paranoid: Check if the file is on a read-only FS +	 */ +	if (vn_is_readonly(vp)) { +		CACHE_DEBUG((DI_ERR, "cannot write: readonly FS")); +		goto fail; +	} + +	/* +	 * Note that we only write map_size bytes to disk - this saves +	 * space as the actual cache size may be larger than size of +	 * valid data in the cache. +	 * Another advantage is that it makes verification of size +	 * easier when the file is read later. +	 */ +	map_size = all->map_size; +	off = 0; +	buf = cache->cache_data; + +	while (map_size) { +		ASSERT(map_size > 0); +		/* +		 * Write in chunks so that VM system +		 * is not overwhelmed +		 */ +		if (map_size > di_chunk * PAGESIZE) +			chunk = di_chunk * PAGESIZE; +		else +			chunk = map_size; + +		error = chunk_write(vp, off, buf, chunk); +		if (error) { +			CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d", +			    off, error)); +			goto fail; +		} + +		off += chunk; +		buf += chunk; +		map_size -= chunk; + +		/* Give pageout a chance to run */ +		delay(1); +	} + +	/* +	 * Now sync the file and close it +	 */ +	if (error = VOP_FSYNC(vp, FSYNC, kcred)) { +		CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error)); +	} + +	if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred)) { +		CACHE_DEBUG((DI_ERR, "close() failed: %d", error)); +		VN_RELE(vp); +		return; +	} + +	VN_RELE(vp); + +	/* +	 * Now do the rename +	 */ +	if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) { +		CACHE_DEBUG((DI_ERR, "rename failed: %d", error)); +		return; +	} + +	CACHE_DEBUG((DI_INFO, "Cache write successful.")); + +	return; + +fail: +	(void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred); +	VN_RELE(vp); +} + + +/* + * Since we could be called early in boot, + * use kobj_read_file() + */ +static void +di_cache_read(struct di_cache *cache) +{ +	struct _buf	*file; +	struct di_all	*all; +	int		n; +	size_t		map_size, sz, chunk; +	offset_t	off; +	caddr_t		buf; +	uint32_t	saved_crc, crc; + +	ASSERT(modrootloaded); +	ASSERT(DI_CACHE_LOCKED(*cache)); +	ASSERT(cache->cache_data == NULL); +	ASSERT(cache->cache_size == 0); +	ASSERT(!servicing_interrupt()); + +	file = kobj_open_file(DI_CACHE_FILE); +	if (file == (struct _buf *)-1) { +		CACHE_DEBUG((DI_ERR, "%s: open failed: %d", +		    DI_CACHE_FILE, ENOENT)); +		return; +	} + +	/* +	 * Read in the header+root_path first. The root_path must be "/" +	 */ +	all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP); +	n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0); + +	if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) { +		kmem_free(all, sizeof (*all) + 1); +		kobj_close_file(file); +		CACHE_DEBUG((DI_ERR, "cache header: read error or invalid")); +		return; +	} + +	map_size = all->map_size; + +	kmem_free(all, sizeof (*all) + 1); + +	ASSERT(map_size >= sizeof (*all) + 1); + +	buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP); +	sz = map_size; +	off = 0; +	while (sz) { +		/* Don't overload VM with large reads */ +		chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz; +		n = kobj_read_file(file, buf, chunk, off); +		if (n != chunk) { +			CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld", +			    DI_CACHE_FILE, off)); +			goto fail; +		} +		off += chunk; +		buf += chunk; +		sz -= chunk; +	} + +	ASSERT(off == map_size); + +	/* +	 * Read past expected EOF to verify size. +	 */ +	if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) { +		CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE)); +		goto fail; +	} + +	all = (struct di_all *)di_cache.cache_data; +	if (!header_plus_one_ok(all)) { +		CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE)); +		goto fail; +	} + +	/* +	 * Compute CRC with checksum field in the cache data set to 0 +	 */ +	saved_crc = all->cache_checksum; +	all->cache_checksum = 0; +	CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table); +	all->cache_checksum = saved_crc; + +	if (crc != all->cache_checksum) { +		CACHE_DEBUG((DI_ERR, +		    "%s: checksum error: expected=0x%x actual=0x%x", +		    DI_CACHE_FILE, all->cache_checksum, crc)); +		goto fail; +	} + +	if (all->map_size != map_size) { +		CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE)); +		goto fail; +	} + +	kobj_close_file(file); + +	di_cache.cache_size = map_size; + +	return; + +fail: +	kmem_free(di_cache.cache_data, map_size); +	kobj_close_file(file); +	di_cache.cache_data = NULL; +	di_cache.cache_size = 0; +} + + +/* + * Checks if arguments are valid for using the cache. + */ +static int +cache_args_valid(struct di_state *st, int *error) +{ +	ASSERT(error); +	ASSERT(st->mem_size > 0); +	ASSERT(st->memlist != NULL); + +	if (!modrootloaded || !i_ddi_io_initialized()) { +		CACHE_DEBUG((DI_ERR, +		    "cache lookup failure: I/O subsystem not inited")); +		*error = ENOTACTIVE; +		return (0); +	} + +	/* +	 * No other flags allowed with DINFOCACHE +	 */ +	if (st->command != (DINFOCACHE & DIIOC_MASK)) { +		CACHE_DEBUG((DI_ERR, +		    "cache lookup failure: bad flags: 0x%x", +		    st->command)); +		*error = EINVAL; +		return (0); +	} + +	if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { +		CACHE_DEBUG((DI_ERR, +		    "cache lookup failure: bad root: %s", +		    DI_ALL_PTR(st)->root_path)); +		*error = EINVAL; +		return (0); +	} + +	CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command)); + +	*error = 0; + +	return (1); +} + +static int +snapshot_is_cacheable(struct di_state *st) +{ +	ASSERT(st->mem_size > 0); +	ASSERT(st->memlist != NULL); + +	if (st->command != (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) { +		CACHE_DEBUG((DI_INFO, +		    "not cacheable: incompatible flags: 0x%x", +		    st->command)); +		return (0); +	} + +	if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) { +		CACHE_DEBUG((DI_INFO, +		    "not cacheable: incompatible root path: %s", +		    DI_ALL_PTR(st)->root_path)); +		return (0); +	} + +	CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command)); + +	return (1); +} + +static int +di_cache_lookup(struct di_state *st) +{ +	size_t	rval; +	int	cache_valid; + +	ASSERT(cache_args_valid(st, &cache_valid)); +	ASSERT(modrootloaded); + +	DI_CACHE_LOCK(di_cache); + +	/* +	 * The following assignment determines the validity +	 * of the cache as far as this snapshot is concerned. +	 */ +	cache_valid = di_cache.cache_valid; + +	if (cache_valid && di_cache.cache_data == NULL) { +		di_cache_read(&di_cache); +		/* check for read or file error */ +		if (di_cache.cache_data == NULL) +			cache_valid = 0; +	} + +	if (cache_valid) { +		/* +		 * Ok, the cache was valid as of this particular +		 * snapshot. Copy the cached snapshot. This is safe +		 * to do as the cache cannot be freed (we hold the +		 * cache lock). Free the memory allocated in di_state +		 * up until this point - we will simply copy everything +		 * in the cache. +		 */ + +		ASSERT(di_cache.cache_data != NULL); +		ASSERT(di_cache.cache_size > 0); + +		di_freemem(st); + +		rval = 0; +		if (di_cache2mem(&di_cache, st) > 0) { + +			ASSERT(DI_ALL_PTR(st)); + +			/* +			 * map_size is size of valid data in the +			 * cached snapshot and may be less than +			 * size of the cache. +			 */ +			rval = DI_ALL_PTR(st)->map_size; + +			ASSERT(rval >= sizeof (struct di_all)); +			ASSERT(rval <= di_cache.cache_size); +		} +	} else { +		/* +		 * The cache isn't valid, we need to take a snapshot. +		 * Set the command flags appropriately +		 */ +		ASSERT(st->command == (DINFOCACHE & DIIOC_MASK)); +		st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK); +		rval = di_cache_update(st); +		st->command = (DINFOCACHE & DIIOC_MASK); +	} + +	DI_CACHE_UNLOCK(di_cache); + +	/* +	 * For cached snapshots, the devinfo driver always returns +	 * a snapshot rooted at "/". +	 */ +	ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0); + +	return (rval); +} + +/* + * This is a forced update of the cache  - the previous state of the cache + * may be: + *	- unpopulated + *	- populated and invalid + *	- populated and valid + */ +static int +di_cache_update(struct di_state *st) +{ +	int rval; +	uint32_t crc; +	struct di_all *all; + +	ASSERT(DI_CACHE_LOCKED(di_cache)); +	ASSERT(snapshot_is_cacheable(st)); + +	/* +	 * Free the in-core cache and the on-disk file (if they exist) +	 */ +	i_ddi_di_cache_free(&di_cache); + +	/* +	 * Set valid flag before taking the snapshot, +	 * so that any invalidations that arrive +	 * during or after the snapshot are not +	 * removed by us. +	 */ +	atomic_or_32(&di_cache.cache_valid, 1); + +	modunload_disable(); +	rval = di_snapshot(st); +	modunload_enable(); + +	if (rval == 0) { +		CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot")); +		return (0); +	} + +	DI_ALL_PTR(st)->map_size = rval; + +	if (di_mem2cache(st, &di_cache) == 0) { +		CACHE_DEBUG((DI_ERR, "can't update cache: copy failed")); +		return (0); +	} + +	ASSERT(di_cache.cache_data); +	ASSERT(di_cache.cache_size > 0); + +	/* +	 * Now that we have cached the snapshot, compute its checksum. +	 * The checksum is only computed over the valid data in the +	 * cache, not the entire cache. +	 * Also, set all the fields (except checksum) before computing +	 * checksum. +	 */ +	all = (struct di_all *)di_cache.cache_data; +	all->cache_magic = DI_CACHE_MAGIC; +	all->map_size = rval; + +	ASSERT(all->cache_checksum == 0); +	CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table); +	all->cache_checksum = crc; + +	di_cache_write(&di_cache); + +	return (rval); +} + +static void +di_cache_print(di_cache_debug_t msglevel, char *fmt, ...) +{ +	va_list	ap; + +	if (di_cache_debug <= DI_QUIET) +		return; + +	if (di_cache_debug < msglevel) +		return; + +	switch (msglevel) { +		case DI_ERR: +			msglevel = CE_WARN; +			break; +		case DI_INFO: +		case DI_TRACE: +		default: +			msglevel = CE_NOTE; +			break; +	} + +	va_start(ap, fmt); +	vcmn_err(msglevel, fmt, ap); +	va_end(ap); +} | 
