diff options
Diffstat (limited to 'usr/src')
| -rw-r--r-- | usr/src/boot/Makefile.version | 2 | ||||
| -rw-r--r-- | usr/src/boot/lib/libstand/zfs/Makefile.inc | 5 | ||||
| -rw-r--r-- | usr/src/boot/lib/libstand/zfs/zfsimpl.c | 727 | ||||
| -rw-r--r-- | usr/src/boot/sys/boot/efi/libefi/efi_console.c | 19 | ||||
| -rw-r--r-- | usr/src/boot/sys/boot/efi/libefi/i386/Makefile | 3 | ||||
| -rw-r--r-- | usr/src/boot/sys/boot/efi/loader/Makefile.com | 5 | ||||
| -rw-r--r-- | usr/src/boot/sys/boot/i386/loader/Makefile | 5 | ||||
| -rw-r--r-- | usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h | 139 | ||||
| -rw-r--r-- | usr/src/tools/smatch/Makefile | 5 | ||||
| -rw-r--r-- | usr/src/uts/common/io/ib/ibnex/ibnex.c | 30 | ||||
| -rw-r--r-- | usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c | 50 | 
11 files changed, 919 insertions, 71 deletions
| diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version index 50fbef1c79..8c381aa113 100644 --- a/usr/src/boot/Makefile.version +++ b/usr/src/boot/Makefile.version @@ -33,4 +33,4 @@ LOADER_VERSION = 1.1  # Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes.  # The version is processed from left to right, the version number can only  # be increased. -BOOT_VERSION = $(LOADER_VERSION)-2019.08.07.1 +BOOT_VERSION = $(LOADER_VERSION)-2019.08.15.1 diff --git a/usr/src/boot/lib/libstand/zfs/Makefile.inc b/usr/src/boot/lib/libstand/zfs/Makefile.inc index 083b6f8d07..90cfb8e2f8 100644 --- a/usr/src/boot/lib/libstand/zfs/Makefile.inc +++ b/usr/src/boot/lib/libstand/zfs/Makefile.inc @@ -19,12 +19,14 @@ SRCS +=		$(SRC)/common/crypto/edonr/edonr.c  SRCS +=		$(SRC)/common/crypto/skein/skein.c  SRCS +=		$(SRC)/common/crypto/skein/skein_iv.c  SRCS +=		$(SRC)/common/crypto/skein/skein_block.c +SRCS +=		$(SRC)/common/list/list.c  OBJS +=		zfs.o  OBJS +=		gzip.o  OBJS +=		edonr.o  OBJS +=		skein.o  OBJS +=		skein_iv.o  OBJS +=		skein_block.o +OBJS +=		list.o  zfs.o := CPPFLAGS +=	-I../../common  zfs.o := CPPFLAGS +=	-I../../../cddl/boot/zfs @@ -41,4 +43,7 @@ skein_block.o := CPPFLAGS +=	-DSKEIN_LOOP=111  %.o:	$(SRC)/common/crypto/skein/%.c  	$(COMPILE.c) -o $@ $< +%.o:	$(SRC)/common/list/%.c +	$(COMPILE.c) -DNDEBUG $< +  zfs.o: $(ZFSSRC)/zfsimpl.c diff --git a/usr/src/boot/lib/libstand/zfs/zfsimpl.c b/usr/src/boot/lib/libstand/zfs/zfsimpl.c index 8bea34b522..3958b4af4a 100644 --- a/usr/src/boot/lib/libstand/zfs/zfsimpl.c +++ b/usr/src/boot/lib/libstand/zfs/zfsimpl.c @@ -1,4 +1,4 @@ -/*- +/*   * Copyright (c) 2007 Doug Rabson   * All rights reserved.   * @@ -33,6 +33,8 @@  #include <sys/endian.h>  #include <sys/stat.h>  #include <sys/stdint.h> +#include <sys/list.h> +#include <inttypes.h>  #include "zfsimpl.h"  #include "zfssubr.c" @@ -45,6 +47,58 @@ struct zfsmount {  };  /* + * The indirect_child_t represents the vdev that we will read from, when we + * need to read all copies of the data (e.g. for scrub or reconstruction). + * For plain (non-mirror) top-level vdevs (i.e. is_vdev is not a mirror), + * ic_vdev is the same as is_vdev.  However, for mirror top-level vdevs, + * ic_vdev is a child of the mirror. + */ +typedef struct indirect_child { +	void *ic_data; +	vdev_t *ic_vdev; +} indirect_child_t; + +/* + * The indirect_split_t represents one mapped segment of an i/o to the + * indirect vdev. For non-split (contiguously-mapped) blocks, there will be + * only one indirect_split_t, with is_split_offset==0 and is_size==io_size. + * For split blocks, there will be several of these. + */ +typedef struct indirect_split { +	list_node_t is_node; /* link on iv_splits */ + +	/* +	 * is_split_offset is the offset into the i/o. +	 * This is the sum of the previous splits' is_size's. +	 */ +	uint64_t is_split_offset; + +	vdev_t *is_vdev; /* top-level vdev */ +	uint64_t is_target_offset; /* offset on is_vdev */ +	uint64_t is_size; +	int is_children; /* number of entries in is_child[] */ + +	/* +	 * is_good_child is the child that we are currently using to +	 * attempt reconstruction. +	 */ +	int is_good_child; + +	indirect_child_t is_child[1]; /* variable-length */ +} indirect_split_t; + +/* + * The indirect_vsd_t is associated with each i/o to the indirect vdev. + * It is the "Vdev-Specific Data" in the zio_t's io_vsd. + */ +typedef struct indirect_vsd { +	boolean_t iv_split_block; +	boolean_t iv_reconstruct; + +	list_t iv_splits; /* list of indirect_split_t's */ +} indirect_vsd_t; + +/*   * List of all vdevs, chained through v_alllink.   */  static vdev_list_t zfs_vdevs; @@ -70,6 +124,8 @@ static const char *features_for_read[] = {  	"com.datto:bookmark_v2",  	"org.zfsonlinux:allocation_classes",  	"com.datto:resilver_defer", +	"com.delphix:device_removal", +	"com.delphix:obsolete_counts",  	NULL  }; @@ -92,6 +148,14 @@ static int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result);  static int zap_lookup(const spa_t *spa, const dnode_phys_t *dnode,      const char *name, uint64_t integer_size, uint64_t num_integers,      void *value); +static int objset_get_dnode(const spa_t *, const objset_phys_t *, uint64_t, +    dnode_phys_t *); +static int dnode_read(const spa_t *, const dnode_phys_t *, off_t, void *, +    size_t); +static int vdev_indirect_read(vdev_t *, const blkptr_t *, void *, off_t, +    size_t); +static int vdev_mirror_read(vdev_t *, const blkptr_t *, void *, off_t, +    size_t);  static void  zfs_init(void) @@ -161,7 +225,7 @@ xdr_uint64_t(const unsigned char **xdr, uint64_t *lp)  static int  nvlist_find(const unsigned char *nvlist, const char *name, int type, -	    int* elementsp, void *valuep) +	    int *elementsp, void *valuep)  {  	const unsigned char *p, *pair;  	int junk; @@ -426,12 +490,511 @@ vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf,  	rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize);  	if (rc)  		return (rc); -	if (bp && zio_checksum_verify(vdev->spa, bp, buf)) -		return (EIO); +	if (bp != NULL) +		return (zio_checksum_verify(vdev->spa, bp, buf));  	return (0);  } +typedef struct remap_segment { +	vdev_t *rs_vd; +	uint64_t rs_offset; +	uint64_t rs_asize; +	uint64_t rs_split_offset; +	list_node_t rs_node; +} remap_segment_t; + +static remap_segment_t * +rs_alloc(vdev_t *vd, uint64_t offset, uint64_t asize, uint64_t split_offset) +{ +	remap_segment_t *rs = malloc(sizeof (remap_segment_t)); + +	if (rs != NULL) { +		rs->rs_vd = vd; +		rs->rs_offset = offset; +		rs->rs_asize = asize; +		rs->rs_split_offset = split_offset; +	} + +	return (rs); +} + +vdev_indirect_mapping_t * +vdev_indirect_mapping_open(spa_t *spa, objset_phys_t *os, +    uint64_t mapping_object) +{ +	vdev_indirect_mapping_t *vim; +	vdev_indirect_mapping_phys_t *vim_phys; +	int rc; + +	vim = calloc(1, sizeof (*vim)); +	if (vim == NULL) +		return (NULL); + +	vim->vim_dn = calloc(1, sizeof (*vim->vim_dn)); +	if (vim->vim_dn == NULL) { +		free(vim); +		return (NULL); +	} + +	rc = objset_get_dnode(spa, os, mapping_object, vim->vim_dn); +	if (rc != 0) { +		free(vim->vim_dn); +		free(vim); +		return (NULL); +	} + +	vim->vim_spa = spa; +	vim->vim_phys = malloc(sizeof (*vim->vim_phys)); +	if (vim->vim_phys == NULL) { +		free(vim->vim_dn); +		free(vim); +		return (NULL); +	} + +	vim_phys = (vdev_indirect_mapping_phys_t *)DN_BONUS(vim->vim_dn); +	*vim->vim_phys = *vim_phys; + +	vim->vim_objset = os; +	vim->vim_object = mapping_object; +	vim->vim_entries = NULL; + +	vim->vim_havecounts = +	    (vim->vim_dn->dn_bonuslen > VDEV_INDIRECT_MAPPING_SIZE_V0); + +	return (vim); +} + +/* + * Compare an offset with an indirect mapping entry; there are three + * possible scenarios: + * + *     1. The offset is "less than" the mapping entry; meaning the + *        offset is less than the source offset of the mapping entry. In + *        this case, there is no overlap between the offset and the + *        mapping entry and -1 will be returned. + * + *     2. The offset is "greater than" the mapping entry; meaning the + *        offset is greater than the mapping entry's source offset plus + *        the entry's size. In this case, there is no overlap between + *        the offset and the mapping entry and 1 will be returned. + * + *        NOTE: If the offset is actually equal to the entry's offset + *        plus size, this is considered to be "greater" than the entry, + *        and this case applies (i.e. 1 will be returned). Thus, the + *        entry's "range" can be considered to be inclusive at its + *        start, but exclusive at its end: e.g. [src, src + size). + * + *     3. The last case to consider is if the offset actually falls + *        within the mapping entry's range. If this is the case, the + *        offset is considered to be "equal to" the mapping entry and + *        0 will be returned. + * + *        NOTE: If the offset is equal to the entry's source offset, + *        this case applies and 0 will be returned. If the offset is + *        equal to the entry's source plus its size, this case does + *        *not* apply (see "NOTE" above for scenario 2), and 1 will be + *        returned. + */ +static int +dva_mapping_overlap_compare(const void *v_key, const void *v_array_elem) +{ +	const uint64_t *key = v_key; +	const vdev_indirect_mapping_entry_phys_t *array_elem = +	    v_array_elem; +	uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem); + +	if (*key < src_offset) { +		return (-1); +	} else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) { +		return (0); +	} else { +		return (1); +	} +} + +/* + * Return array entry. + */ +static vdev_indirect_mapping_entry_phys_t * +vdev_indirect_mapping_entry(vdev_indirect_mapping_t *vim, uint64_t index) +{ +	uint64_t size; +	off_t offset = 0; +	int rc; + +	if (vim->vim_phys->vimp_num_entries == 0) +		return (NULL); + +	if (vim->vim_entries == NULL) { +		uint64_t bsize; + +		bsize = vim->vim_dn->dn_datablkszsec << SPA_MINBLOCKSHIFT; +		size = vim->vim_phys->vimp_num_entries * +		    sizeof (*vim->vim_entries); +		if (size > bsize) { +			size = bsize / sizeof (*vim->vim_entries); +			size *= sizeof (*vim->vim_entries); +		} +		vim->vim_entries = malloc(size); +		if (vim->vim_entries == NULL) +			return (NULL); +		vim->vim_num_entries = size / sizeof (*vim->vim_entries); +		offset = index * sizeof (*vim->vim_entries); +	} + +	/* We have data in vim_entries */ +	if (offset == 0) { +		if (index >= vim->vim_entry_offset && +		    index <= vim->vim_entry_offset + vim->vim_num_entries) { +			index -= vim->vim_entry_offset; +			return (&vim->vim_entries[index]); +		} +		offset = index * sizeof (*vim->vim_entries); +	} + +	vim->vim_entry_offset = index; +	size = vim->vim_num_entries * sizeof (*vim->vim_entries); +	rc = dnode_read(vim->vim_spa, vim->vim_dn, offset, vim->vim_entries, +	    size); +	if (rc != 0) { +		/* Read error, invalidate vim_entries. */ +		free(vim->vim_entries); +		vim->vim_entries = NULL; +		return (NULL); +	} +	index -= vim->vim_entry_offset; +	return (&vim->vim_entries[index]); +} + +/* + * Returns the mapping entry for the given offset. + * + * It's possible that the given offset will not be in the mapping table + * (i.e. no mapping entries contain this offset), in which case, the + * return value value depends on the "next_if_missing" parameter. + * + * If the offset is not found in the table and "next_if_missing" is + * B_FALSE, then NULL will always be returned. The behavior is intended + * to allow consumers to get the entry corresponding to the offset + * parameter, iff the offset overlaps with an entry in the table. + * + * If the offset is not found in the table and "next_if_missing" is + * B_TRUE, then the entry nearest to the given offset will be returned, + * such that the entry's source offset is greater than the offset + * passed in (i.e. the "next" mapping entry in the table is returned, if + * the offset is missing from the table). If there are no entries whose + * source offset is greater than the passed in offset, NULL is returned. + */ +static vdev_indirect_mapping_entry_phys_t * +vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim, +    uint64_t offset) +{ +	ASSERT(vim->vim_phys->vimp_num_entries > 0); + +	vdev_indirect_mapping_entry_phys_t *entry; + +	uint64_t last = vim->vim_phys->vimp_num_entries - 1; +	uint64_t base = 0; + +	/* +	 * We don't define these inside of the while loop because we use +	 * their value in the case that offset isn't in the mapping. +	 */ +	uint64_t mid; +	int result; + +	while (last >= base) { +		mid = base + ((last - base) >> 1); + +		entry = vdev_indirect_mapping_entry(vim, mid); +		if (entry == NULL) +			break; +		result = dva_mapping_overlap_compare(&offset, entry); + +		if (result == 0) { +			break; +		} else if (result < 0) { +			last = mid - 1; +		} else { +			base = mid + 1; +		} +	} +	return (entry); +} + +/* + * Given an indirect vdev and an extent on that vdev, it duplicates the + * physical entries of the indirect mapping that correspond to the extent + * to a new array and returns a pointer to it. In addition, copied_entries + * is populated with the number of mapping entries that were duplicated. + * + * Finally, since we are doing an allocation, it is up to the caller to + * free the array allocated in this function. + */ +vdev_indirect_mapping_entry_phys_t * +vdev_indirect_mapping_duplicate_adjacent_entries(vdev_t *vd, uint64_t offset, +    uint64_t asize, uint64_t *copied_entries) +{ +	vdev_indirect_mapping_entry_phys_t *duplicate_mappings = NULL; +	vdev_indirect_mapping_t *vim = vd->v_mapping; +	uint64_t entries = 0; + +	vdev_indirect_mapping_entry_phys_t *first_mapping = +	    vdev_indirect_mapping_entry_for_offset(vim, offset); +	ASSERT3P(first_mapping, !=, NULL); + +	vdev_indirect_mapping_entry_phys_t *m = first_mapping; +	while (asize > 0) { +		uint64_t size = DVA_GET_ASIZE(&m->vimep_dst); +		uint64_t inner_offset = offset - DVA_MAPPING_GET_SRC_OFFSET(m); +		uint64_t inner_size = MIN(asize, size - inner_offset); + +		offset += inner_size; +		asize -= inner_size; +		entries++; +		m++; +	} + +	size_t copy_length = entries * sizeof (*first_mapping); +	duplicate_mappings = malloc(copy_length); +	if (duplicate_mappings != NULL) +		bcopy(first_mapping, duplicate_mappings, copy_length); +	else +		entries = 0; + +	*copied_entries = entries; + +	return (duplicate_mappings); +} + +static vdev_t * +vdev_lookup_top(spa_t *spa, uint64_t vdev) +{ +	vdev_t *rvd; + +	STAILQ_FOREACH(rvd, &spa->spa_vdevs, v_childlink) +		if (rvd->v_id == vdev) +			break; + +	return (rvd); +} + +/* + * This is a callback for vdev_indirect_remap() which allocates an + * indirect_split_t for each split segment and adds it to iv_splits. + */ +static void +vdev_indirect_gather_splits(uint64_t split_offset, vdev_t *vd, uint64_t offset, +    uint64_t size, void *arg) +{ +	int n = 1; +	zio_t *zio = arg; +	indirect_vsd_t *iv = zio->io_vsd; + +	if (vd->v_read == vdev_indirect_read) +		return; + +	if (vd->v_read == vdev_mirror_read) +		n = vd->v_nchildren; + +	indirect_split_t *is = +	    malloc(offsetof(indirect_split_t, is_child[n])); +	if (is == NULL) { +		zio->io_error = ENOMEM; +		return; +	} +	bzero(is, offsetof(indirect_split_t, is_child[n])); + +	is->is_children = n; +	is->is_size = size; +	is->is_split_offset = split_offset; +	is->is_target_offset = offset; +	is->is_vdev = vd; + +	/* +	 * Note that we only consider multiple copies of the data for +	 * *mirror* vdevs.  We don't for "replacing" or "spare" vdevs, even +	 * though they use the same ops as mirror, because there's only one +	 * "good" copy under the replacing/spare. +	 */ +	if (vd->v_read == vdev_mirror_read) { +		int i = 0; +		vdev_t *kid; + +		STAILQ_FOREACH(kid, &vd->v_children, v_childlink) { +			is->is_child[i++].ic_vdev = kid; +		} +	} else { +		is->is_child[0].ic_vdev = vd; +	} + +	list_insert_tail(&iv->iv_splits, is); +} + +static void +vdev_indirect_remap(vdev_t *vd, uint64_t offset, uint64_t asize, void *arg) +{ +	list_t stack; +	spa_t *spa = vd->spa; +	zio_t *zio = arg; + +	list_create(&stack, sizeof (remap_segment_t), +	    offsetof(remap_segment_t, rs_node)); + +	for (remap_segment_t *rs = rs_alloc(vd, offset, asize, 0); +	    rs != NULL; rs = list_remove_head(&stack)) { +		vdev_t *v = rs->rs_vd; +		uint64_t num_entries = 0; +		/* vdev_indirect_mapping_t *vim = v->v_mapping; */ +		vdev_indirect_mapping_entry_phys_t *mapping = +		    vdev_indirect_mapping_duplicate_adjacent_entries(v, +		    rs->rs_offset, rs->rs_asize, &num_entries); + +		for (uint64_t i = 0; i < num_entries; i++) { +			vdev_indirect_mapping_entry_phys_t *m = &mapping[i]; +			uint64_t size = DVA_GET_ASIZE(&m->vimep_dst); +			uint64_t dst_offset = DVA_GET_OFFSET(&m->vimep_dst); +			uint64_t dst_vdev = DVA_GET_VDEV(&m->vimep_dst); +			uint64_t inner_offset = rs->rs_offset - +			    DVA_MAPPING_GET_SRC_OFFSET(m); +			uint64_t inner_size = +			    MIN(rs->rs_asize, size - inner_offset); +			vdev_t *dst_v = vdev_lookup_top(spa, dst_vdev); + +			if (dst_v->v_read == vdev_indirect_read) { +				list_insert_head(&stack, +				    rs_alloc(dst_v, dst_offset + inner_offset, +				    inner_size, rs->rs_split_offset)); +			} +			vdev_indirect_gather_splits(rs->rs_split_offset, dst_v, +			    dst_offset + inner_offset, +			    inner_size, arg); + +			/* +			 * vdev_indirect_gather_splits can have memory +			 * allocation error, we can not recover from it. +			 */ +			if (zio->io_error != 0) +				break; +			rs->rs_offset += inner_size; +			rs->rs_asize -= inner_size; +			rs->rs_split_offset += inner_size; +		} + +		free(mapping); +		free(rs); +		if (zio->io_error != 0) +			break; +	} + +	list_destroy(&stack); +} + +static void +vdev_indirect_map_free(zio_t *zio) +{ +	indirect_vsd_t *iv = zio->io_vsd; +	indirect_split_t *is; + +	while ((is = list_head(&iv->iv_splits)) != NULL) { +		for (int c = 0; c < is->is_children; c++) { +			indirect_child_t *ic = &is->is_child[c]; +			free(ic->ic_data); +		} +		list_remove(&iv->iv_splits, is); +		free(is); +	} +	free(iv); +} + +static int +vdev_indirect_read(vdev_t *vdev, const blkptr_t *bp, void *buf, +    off_t offset, size_t bytes) +{ +	zio_t zio = { 0 }; +	spa_t *spa = vdev->spa; +	indirect_vsd_t *iv = malloc(sizeof (*iv)); +	indirect_split_t *first; +	int rc = EIO; + +	if (iv == NULL) +		return (ENOMEM); +	bzero(iv, sizeof (*iv)); + +	list_create(&iv->iv_splits, +	    sizeof (indirect_split_t), offsetof(indirect_split_t, is_node)); + +	zio.io_spa = spa; +	zio.io_bp = (blkptr_t *)bp; +	zio.io_data = buf; +	zio.io_size = bytes; +	zio.io_offset = offset; +	zio.io_vd = vdev; +	zio.io_vsd = iv; + +	if (vdev->v_mapping == NULL) { +		vdev_indirect_config_t *vic; + +		vic = &vdev->vdev_indirect_config; +		vdev->v_mapping = vdev_indirect_mapping_open(spa, +		    &spa->spa_mos, vic->vic_mapping_object); +	} + +	vdev_indirect_remap(vdev, offset, bytes, &zio); +	if (zio.io_error != 0) +		return (zio.io_error); + +	first = list_head(&iv->iv_splits); +	if (first->is_size == zio.io_size) { +		/* +		 * This is not a split block; we are pointing to the entire +		 * data, which will checksum the same as the original data. +		 * Pass the BP down so that the child i/o can verify the +		 * checksum, and try a different location if available +		 * (e.g. on a mirror). +		 * +		 * While this special case could be handled the same as the +		 * general (split block) case, doing it this way ensures +		 * that the vast majority of blocks on indirect vdevs +		 * (which are not split) are handled identically to blocks +		 * on non-indirect vdevs.  This allows us to be less strict +		 * about performance in the general (but rare) case. +		 */ +		rc = first->is_vdev->v_read(first->is_vdev, zio.io_bp, +		    zio.io_data, first->is_target_offset, bytes); +	} else { +		iv->iv_split_block = B_TRUE; +		/* +		 * Read one copy of each split segment, from the +		 * top-level vdev.  Since we don't know the +		 * checksum of each split individually, the child +		 * zio can't ensure that we get the right data. +		 * E.g. if it's a mirror, it will just read from a +		 * random (healthy) leaf vdev.  We have to verify +		 * the checksum in vdev_indirect_io_done(). +		 */ +		for (indirect_split_t *is = list_head(&iv->iv_splits); +		    is != NULL; is = list_next(&iv->iv_splits, is)) { +			char *ptr = zio.io_data; + +			rc = is->is_vdev->v_read(is->is_vdev, zio.io_bp, +			    ptr + is->is_split_offset, is->is_target_offset, +			    is->is_size); +		} +		if (zio_checksum_verify(spa, zio.io_bp, zio.io_data)) +			rc = ECKSUM; +		else +			rc = 0; +	} + +	vdev_indirect_map_free(&zio); +	if (rc == 0) +		rc = zio.io_error; + +	return (rc); +} +  static int  vdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf,      off_t offset, size_t bytes) @@ -498,6 +1061,7 @@ static vdev_t *  vdev_create(uint64_t guid, vdev_read_t *vdev_read)  {  	vdev_t *vdev; +	vdev_indirect_config_t *vic;  	vdev = malloc(sizeof(vdev_t));  	memset(vdev, 0, sizeof(vdev_t)); @@ -505,8 +1069,9 @@ vdev_create(uint64_t guid, vdev_read_t *vdev_read)  	vdev->v_guid = guid;  	vdev->v_state = VDEV_STATE_OFFLINE;  	vdev->v_read = vdev_read; -	vdev->v_phys_read = 0; -	vdev->v_read_priv = 0; + +	vic = &vdev->vdev_indirect_config; +	vic->vic_prev_indirect_vdev = UINT64_MAX;  	STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink);  	return (vdev); @@ -540,6 +1105,7 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev,  	    && strcmp(type, VDEV_TYPE_FILE)  #endif  	    && strcmp(type, VDEV_TYPE_RAIDZ) +	    && strcmp(type, VDEV_TYPE_INDIRECT)  	    && strcmp(type, VDEV_TYPE_REPLACING)) {  		printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n");  		return (EIO); @@ -568,7 +1134,23 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev,  			vdev = vdev_create(guid, vdev_raidz_read);  		else if (!strcmp(type, VDEV_TYPE_REPLACING))  			vdev = vdev_create(guid, vdev_replacing_read); -		else +		else if (!strcmp(type, VDEV_TYPE_INDIRECT)) { +			vdev_indirect_config_t *vic; + +			vdev = vdev_create(guid, vdev_indirect_read); +			vdev->v_state = VDEV_STATE_HEALTHY; +			vic = &vdev->vdev_indirect_config; + +			nvlist_find(nvlist, +			    ZPOOL_CONFIG_INDIRECT_OBJECT, DATA_TYPE_UINT64, +			    NULL, &vic->vic_mapping_object); +			nvlist_find(nvlist, +			    ZPOOL_CONFIG_INDIRECT_BIRTHS, DATA_TYPE_UINT64, +			    NULL, &vic->vic_births_object); +			nvlist_find(nvlist, +			    ZPOOL_CONFIG_PREV_INDIRECT_VDEV, DATA_TYPE_UINT64, +			    NULL, &vic->vic_prev_indirect_vdev); +		} else  			vdev = vdev_create(guid, vdev_disk_read);  		vdev->v_id = id; @@ -603,20 +1185,24 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev,  				vdev->v_devid = NULL;  			}  		} else { +			char *name; +  			if (!strcmp(type, "raidz")) { -				if (vdev->v_nparity == 1) -					vdev->v_name = "raidz1"; -				else if (vdev->v_nparity == 2) -					vdev->v_name = "raidz2"; -				else if (vdev->v_nparity == 3) -					vdev->v_name = "raidz3"; -				else { -					printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n"); +				if (vdev->v_nparity < 1 || +				    vdev->v_nparity > 3) { +					printf("ZFS: can only boot from disk, " +					    "mirror, raidz1, raidz2 and raidz3 " +					    "vdevs\n");  					return (EIO);  				} +				asprintf(&name, "%s%d-%" PRIu64, type, +				    vdev->v_nparity, id);  			} else { -				vdev->v_name = strdup(type); +				asprintf(&name, "%s-%" PRIu64, type, id);  			} +			if (name == NULL) +				return (ENOMEM); +			vdev->v_name = name;  		}  	} else {  		is_new = 0; @@ -2261,10 +2847,48 @@ check_mos_features(const spa_t *spa)  }  static int -zfs_spa_init(spa_t *spa) +load_nvlist(spa_t *spa, uint64_t obj, unsigned char **value)  {  	dnode_phys_t dir; +	size_t size;  	int rc; +	unsigned char *nv; + +	*value = NULL; +	if ((rc = objset_get_dnode(spa, &spa->spa_mos, obj, &dir)) != 0) +		return (rc); +	if (dir.dn_type != DMU_OT_PACKED_NVLIST && +	    dir.dn_bonustype != DMU_OT_PACKED_NVLIST_SIZE) { +		return (EIO); +	} + +	if (dir.dn_bonuslen != sizeof (uint64_t)) +		return (EIO); + +	size = *(uint64_t *)DN_BONUS(&dir); +	nv = malloc(size); +	if (nv == NULL) +		return (ENOMEM); + +	rc = dnode_read(spa, &dir, 0, nv, size); +	if (rc != 0) { +		free(nv); +		nv = NULL; +		return (rc); +	} +	*value = nv; +	return (rc); +} + +static int +zfs_spa_init(spa_t *spa) +{ +	dnode_phys_t dir; +	uint64_t config_object; +	unsigned char *nvlist; +	char *type; +	const unsigned char *nv; +	int nkids, rc;  	if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) {  		printf("ZFS: can't read MOS of pool %s\n", spa->spa_name); @@ -2289,8 +2913,77 @@ zfs_spa_init(spa_t *spa)  	rc = check_mos_features(spa);  	if (rc != 0) {  		printf("ZFS: pool %s is not supported\n", spa->spa_name); +		return (rc);  	} +	rc = zap_lookup(spa, &dir, DMU_POOL_CONFIG, +	    sizeof (config_object), 1, &config_object); +	if (rc != 0) { +		printf("ZFS: can not read MOS %s\n", DMU_POOL_CONFIG); +		return (EIO); +	} +	rc = load_nvlist(spa, config_object, &nvlist); +	if (rc != 0) +		return (rc); + +	/* Update vdevs from MOS config. */ +	if (nvlist_find(nvlist + 4, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, +	    NULL, &nv)) { +		rc = EIO; +		goto done; +	} + +	if (nvlist_find(nv, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, +            NULL, &type)) { +		printf("ZFS: can't find vdev details\n"); +		rc = ENOENT; +		goto done; +	} +	if (strcmp(type, VDEV_TYPE_ROOT) != 0) { +		rc = ENOENT; +		goto done; +	} + +	rc = nvlist_find(nv, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, +            &nkids, &nv); +	if (rc != 0) +		goto done; + +	for (int i = 0; i < nkids; i++) { +		vdev_t *vd, *prev, *kid = NULL; +		rc = vdev_init_from_nvlist(nv, NULL, &kid, 0); +		if (rc != 0) { +			printf("vdev_init_from_nvlist: %d\n", rc); +			break; +		} +		kid->spa = spa; +		prev = NULL; +		STAILQ_FOREACH(vd, &spa->spa_vdevs, v_childlink) { +			/* Already present? */ +			if (kid->v_id == vd->v_id) { +				kid = NULL; +				break; +			} +			if (vd->v_id > kid->v_id) { +				if (prev == NULL) { +					STAILQ_INSERT_HEAD(&spa->spa_vdevs, +					    kid, v_childlink); +				} else { +					STAILQ_INSERT_AFTER(&spa->spa_vdevs, +					    prev, kid, v_childlink); +				} +				kid = NULL; +				break; +			} +			prev = vd; +		} +		if (kid != NULL) +			STAILQ_INSERT_TAIL(&spa->spa_vdevs, kid, v_childlink); +		nv = nvlist_next(nv); +	} +	rc = 0; +done: +	free(nvlist);  	return (rc);  } diff --git a/usr/src/boot/sys/boot/efi/libefi/efi_console.c b/usr/src/boot/sys/boot/efi/libefi/efi_console.c index a4bc6a27ec..1e60d2488a 100644 --- a/usr/src/boot/sys/boot/efi/libefi/efi_console.c +++ b/usr/src/boot/sys/boot/efi/libefi/efi_console.c @@ -57,7 +57,7 @@ struct efi_console_data {  };  #define	KEYBUFSZ 10 -static unsigned keybuf[KEYBUFSZ];      /* keybuf for extended codes */ +static unsigned keybuf[KEYBUFSZ];	/* keybuf for extended codes */  static int key_pending; @@ -218,12 +218,12 @@ plat_cons_update_mode(int mode)  	}  	if (console_control != NULL) -		(void)console_control->SetMode(console_control, console_mode); +		(void) console_control->SetMode(console_control, console_mode);  	/* some firmware enables the cursor when switching modes */  	conout->EnableCursor(conout, FALSE);  	if (console_mode == EfiConsoleControlScreenText) { -		(void)conout->QueryMode(conout, conout->Mode->Mode, +		(void) conout->QueryMode(conout, conout->Mode->Mode,  		    &cols, &rows);  		devinit.version = VIS_CONS_REV;  		devinit.width = cols; @@ -275,7 +275,7 @@ efi_text_devinit(struct vis_devinit *data)  	if (console_mode != EfiConsoleControlScreenText)  		return (1); -	(void)conout->QueryMode(conout, conout->Mode->Mode, &cols, &rows); +	(void) conout->QueryMode(conout, conout->Mode->Mode, &cols, &rows);  	data->version = VIS_CONS_REV;  	data->width = cols;  	data->height = rows; @@ -328,7 +328,7 @@ efi_text_cons_display(struct vis_consdisplay *da)  	tem_char_t *data;  	int i; -	(void)conout->QueryMode(conout, conout->Mode->Mode, &col, &row); +	(void) conout->QueryMode(conout, conout->Mode->Mode, &col, &row);  	/* reduce clear line on bottom row by one to prevent autoscroll */  	if (row - 1 == da->row && da->col == 0 && da->width == col) @@ -365,6 +365,7 @@ static void efi_cons_cursor(struct vis_conscursor *cc)  	case VIS_GET_CURSOR: {	/* only used at startup */  		uint32_t row, col; +		row = col = 0;  		plat_tem_get_prom_pos(&row, &col);  		cc->row = row;  		cc->col = col; @@ -475,7 +476,7 @@ efi_cons_init(struct console *cp, int arg __unused)  	memset(keybuf, 0, KEYBUFSZ);  	status = BS->LocateProtocol(&ccontrol_protocol_guid, NULL, -	    (VOID **)&console_control); +	    (void **)&console_control);  	if (status == EFI_SUCCESS) {  		BOOLEAN GopUgaExists, StdInLocked;  		status = console_control->GetMode(console_control, @@ -485,7 +486,7 @@ efi_cons_init(struct console *cp, int arg __unused)  	}  	max_dim = best_mode = 0; -	for (i = 0; i <= conout->Mode->MaxMode ; i++) { +	for (i = 0; i <= conout->Mode->MaxMode; i++) {  		status = conout->QueryMode(conout, i, &cols, &rows);  		if (EFI_ERROR(status))  			continue; @@ -518,7 +519,7 @@ efi_cons_init(struct console *cp, int arg __unused)  	}  	if (console_control != NULL) -		(void)console_control->SetMode(console_control, console_mode); +		(void) console_control->SetMode(console_control, console_mode);  	/* some firmware enables the cursor when switching modes */  	conout->EnableCursor(conout, FALSE); @@ -742,7 +743,7 @@ efi_cons_efiputchar(int c)  	EFI_STATUS status;  	buf[0] = c; -        buf[1] = 0;     /* terminate string */ +	buf[1] = 0;	/* terminate string */  	status = conout->TestString(conout, buf);  	if (EFI_ERROR(status)) diff --git a/usr/src/boot/sys/boot/efi/libefi/i386/Makefile b/usr/src/boot/sys/boot/efi/libefi/i386/Makefile index cc749255bd..60274fab76 100644 --- a/usr/src/boot/sys/boot/efi/libefi/i386/Makefile +++ b/usr/src/boot/sys/boot/efi/libefi/i386/Makefile @@ -24,9 +24,6 @@ include ../Makefile.com  CFLAGS +=	-m32 -# false positive only with a 64-bit smatch -SMOFF += uninitialized -  CLEANFILES +=	machine x86  $(OBJS): machine x86 diff --git a/usr/src/boot/sys/boot/efi/loader/Makefile.com b/usr/src/boot/sys/boot/efi/loader/Makefile.com index cc1b0f8991..9308371ae8 100644 --- a/usr/src/boot/sys/boot/efi/loader/Makefile.com +++ b/usr/src/boot/sys/boot/efi/loader/Makefile.com @@ -32,7 +32,6 @@ SRCS=	\  	font.c \  	$(FONT).c \  	framebuffer.c \ -	list.c \  	main.c \  	memmap.c \  	multiboot.S \ @@ -52,7 +51,6 @@ OBJS=	\  	font.o \  	$(FONT).o \  	framebuffer.o \ -	list.o \  	main.o \  	memmap.o \  	multiboot.o \ @@ -190,9 +188,6 @@ clean clobber:  %.o: ../../../i386/libi386/%.c  	$(COMPILE.c) $< -%.o: $(SRC)/common/list/%.c -	$(COMPILE.c) -DNDEBUG $< -  %.o: $(SRC)/common/font/%.c  	$(COMPILE.c) $< diff --git a/usr/src/boot/sys/boot/i386/loader/Makefile b/usr/src/boot/sys/boot/i386/loader/Makefile index 0b1b0c6198..4fc97ac951 100644 --- a/usr/src/boot/sys/boot/i386/loader/Makefile +++ b/usr/src/boot/sys/boot/i386/loader/Makefile @@ -59,7 +59,7 @@ SRCS +=	boot.c commands.c console.c devopen.c interp.c  SRCS +=	interp_backslash.c interp_parse.c ls.c misc.c  SRCS +=	module.c linenoise.c multiboot2.c  SRCS +=	zfs_cmd.c -SRCS +=	font.c $(FONT).c list.c tem.c +SRCS +=	font.c $(FONT).c tem.c  tem.o := CPPFLAGS += $(DEFAULT_CONSOLE_COLOR) @@ -164,9 +164,6 @@ install: all $(ROOT_BOOT_DEFAULTS) $(ROOT_BOOT_FORTH) \  %.o:	../../common/linenoise/%.c  	$(COMPILE.c) -o $@ $< -%.o: $(SRC)/common/list/%.c -	$(COMPILE.c) -DNDEBUG $< -  %.o: $(SRC)/common/font/%.c  	$(COMPILE.c) $< diff --git a/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h b/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h index 6c61f6e0c9..5997260616 100644 --- a/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h +++ b/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h @@ -717,6 +717,9 @@ typedef enum {  #define	ZPOOL_CONFIG_CHILDREN		"children"  #define	ZPOOL_CONFIG_ID			"id"  #define	ZPOOL_CONFIG_GUID		"guid" +#define	ZPOOL_CONFIG_INDIRECT_OBJECT	"com.delphix:indirect_object" +#define	ZPOOL_CONFIG_INDIRECT_BIRTHS	"com.delphix:indirect_births" +#define	ZPOOL_CONFIG_PREV_INDIRECT_VDEV	"com.delphix:prev_indirect_vdev"  #define	ZPOOL_CONFIG_PATH		"path"  #define	ZPOOL_CONFIG_DEVID		"devid"  #define	ZPOOL_CONFIG_PHYS_PATH		"phys_path" @@ -761,6 +764,7 @@ typedef enum {  #define	VDEV_TYPE_SPARE			"spare"  #define	VDEV_TYPE_LOG			"log"  #define	VDEV_TYPE_L2CACHE		"l2cache" +#define	VDEV_TYPE_INDIRECT		"indirect"  /*   * This is needed in userland to report the minimum necessary device size. @@ -853,7 +857,7 @@ struct uberblock {   */  #define	DNODE_SHIFT		9	/* 512 bytes */  #define	DN_MIN_INDBLKSHIFT	12	/* 4k */ -#define	DN_MAX_INDBLKSHIFT	14	/* 16k */ +#define	DN_MAX_INDBLKSHIFT	17	/* 128k */  #define	DNODE_BLOCK_SHIFT	14	/* 16k */  #define	DNODE_CORE_SIZE		64	/* 64 bytes for dnode sans blkptrs */  #define	DN_MAX_OBJECT_SHIFT	48	/* 256 trillion (zfs_fid_t limit) */ @@ -1226,6 +1230,9 @@ typedef struct dsl_dataset_phys {  #define	DMU_POOL_HISTORY		"history"  #define	DMU_POOL_PROPS			"pool_props"  #define	DMU_POOL_CHECKSUM_SALT		"org.illumos:checksum_salt" +#define	DMU_POOL_REMOVING		"com.delphix:removing" +#define	DMU_POOL_OBSOLETE_BPOBJ		"com.delphix:obsolete_bpobj" +#define	DMU_POOL_CONDENSING_INDIRECT	"com.delphix:condensing_indirect"  #define	ZAP_MAGIC 0x2F52AB2ABULL @@ -1539,6 +1546,116 @@ typedef int vdev_read_t(struct vdev *vdev, const blkptr_t *bp,  typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t; +typedef struct vdev_indirect_mapping_entry_phys { +	/* +	 * Decode with DVA_MAPPING_* macros. +	 * Contains: +	 *   the source offset (low 63 bits) +	 *   the one-bit "mark", used for garbage collection (by zdb) +	 */ +	uint64_t vimep_src; + +	/* +	 * Note: the DVA's asize is 24 bits, and can thus store ranges +	 * up to 8GB. +	 */ +	dva_t	vimep_dst; +} vdev_indirect_mapping_entry_phys_t; + +#define	DVA_MAPPING_GET_SRC_OFFSET(vimep)	\ +	BF64_GET_SB((vimep)->vimep_src, 0, 63, SPA_MINBLOCKSHIFT, 0) +#define	DVA_MAPPING_SET_SRC_OFFSET(vimep, x)	\ +	BF64_SET_SB((vimep)->vimep_src, 0, 63, SPA_MINBLOCKSHIFT, 0, x) + +typedef struct vdev_indirect_mapping_entry { +	vdev_indirect_mapping_entry_phys_t	vime_mapping; +	uint32_t				vime_obsolete_count; +	list_node_t				vime_node; +} vdev_indirect_mapping_entry_t; + +/* + * This is stored in the bonus buffer of the mapping object, see comment of + * vdev_indirect_config for more details. + */ +typedef struct vdev_indirect_mapping_phys { +	uint64_t	vimp_max_offset; +	uint64_t	vimp_bytes_mapped; +	uint64_t	vimp_num_entries; /* number of v_i_m_entry_phys_t's */ + +	/* +	 * For each entry in the mapping object, this object contains an +	 * entry representing the number of bytes of that mapping entry +	 * that were no longer in use by the pool at the time this indirect +	 * vdev was last condensed. +	 */ +	uint64_t	vimp_counts_object; +} vdev_indirect_mapping_phys_t; + +#define	VDEV_INDIRECT_MAPPING_SIZE_V0	(3 * sizeof (uint64_t)) + +typedef struct vdev_indirect_mapping { +	uint64_t	vim_object; +	boolean_t	vim_havecounts; + +	/* vim_entries segment offset currently in memory. */ +	uint64_t	vim_entry_offset; +	/* vim_entries segment size. */ +	size_t		vim_num_entries; + +	/* Needed by dnode_read() */ +	const void	*vim_spa; +	dnode_phys_t	*vim_dn; + +	/* +	 * An ordered array of mapping entries, sorted by source offset. +	 * Note that vim_entries is needed during a removal (and contains +	 * mappings that have been synced to disk so far) to handle frees +	 * from the removing device. +	 */ +	vdev_indirect_mapping_entry_phys_t *vim_entries; +	objset_phys_t	*vim_objset; +	vdev_indirect_mapping_phys_t	*vim_phys; +} vdev_indirect_mapping_t; + +/* + * On-disk indirect vdev state. + * + * An indirect vdev is described exclusively in the MOS config of a pool. + * The config for an indirect vdev includes several fields, which are + * accessed in memory by a vdev_indirect_config_t. + */ +typedef struct vdev_indirect_config { +	/* +	 * Object (in MOS) which contains the indirect mapping. This object +	 * contains an array of vdev_indirect_mapping_entry_phys_t ordered by +	 * vimep_src. The bonus buffer for this object is a +	 * vdev_indirect_mapping_phys_t. This object is allocated when a vdev +	 * removal is initiated. +	 * +	 * Note that this object can be empty if none of the data on the vdev +	 * has been copied yet. +	 */ +	uint64_t	vic_mapping_object; + +	/* +	 * Object (in MOS) which contains the birth times for the mapping +	 * entries. This object contains an array of +	 * vdev_indirect_birth_entry_phys_t sorted by vibe_offset. The bonus +	 * buffer for this object is a vdev_indirect_birth_phys_t. This object +	 * is allocated when a vdev removal is initiated. +	 * +	 * Note that this object can be empty if none of the vdev has yet been +	 * copied. +	 */ +	uint64_t	vic_births_object; + +/* + * This is the vdev ID which was removed previous to this vdev, or + * UINT64_MAX if there are no previously removed vdevs. + */ +	uint64_t	vic_prev_indirect_vdev; +} vdev_indirect_config_t; +  typedef struct vdev {  	STAILQ_ENTRY(vdev) v_childlink;	/* link in parent's child list */  	STAILQ_ENTRY(vdev) v_alllink;	/* link in global vdev list */ @@ -1557,6 +1674,11 @@ typedef struct vdev {  	vdev_read_t	*v_read;	/* read from vdev */  	void		*v_read_priv;	/* private data for read function */  	struct spa	*spa;		/* link to spa */ +	/* +	 * Values stored in the config for an indirect or removing vdev. +	 */ +	vdev_indirect_config_t vdev_indirect_config; +	vdev_indirect_mapping_t *v_mapping;  } vdev_t;  /* @@ -1578,6 +1700,21 @@ typedef struct spa {  	vdev_t		*spa_boot_vdev;	/* boot device for kernel */  } spa_t; +/* IO related arguments. */ +typedef struct zio { +	spa_t		*io_spa; +	blkptr_t	*io_bp; +	void		*io_data; +	uint64_t	io_size; +	uint64_t	io_offset; + +	/* Stuff for the vdev stack */ +	vdev_t		*io_vd; +	void		*io_vsd; + +	int		io_error; +} zio_t; +  static void decode_embedded_bp_compressed(const blkptr_t *, void *);  #endif	/* _ZFSIMPL_H */ diff --git a/usr/src/tools/smatch/Makefile b/usr/src/tools/smatch/Makefile index 821e2804f0..bd0bce00b2 100644 --- a/usr/src/tools/smatch/Makefile +++ b/usr/src/tools/smatch/Makefile @@ -27,8 +27,11 @@ include ../Makefile.tools  # We have to build smatch before we can use cw  i386_CC = $(GNUC_ROOT)/bin/gcc  sparc_CC = $(GNUC_ROOT)/bin/gcc +# sparc doesn't recognise -msave-args +i386_SMATCHFLAGS = -msave-args +sparc_SMATCHFLAGS = -CFLAGS = -O -m64 -msave-args -D__sun -Wall -Wno-unknown-pragmas -std=gnu99 -nodefaultlibs +CFLAGS = -O -m64 $($(MACH)_SMATCHFLAGS) -D__sun -Wall -Wno-unknown-pragmas -std=gnu99 -nodefaultlibs  SMATCHDATADIR = $(ROOTONBLDSHARE)/smatch diff --git a/usr/src/uts/common/io/ib/ibnex/ibnex.c b/usr/src/uts/common/io/ib/ibnex/ibnex.c index 765f215458..e8cc157c91 100644 --- a/usr/src/uts/common/io/ib/ibnex/ibnex.c +++ b/usr/src/uts/common/io/ib/ibnex/ibnex.c @@ -78,7 +78,7 @@ dev_info_t		*ibnex_commsvc_initnode(dev_info_t *,  static void		ibnex_delete_port_node_data(ibnex_node_data_t *);  int			ibnex_get_dip_from_guid(ib_guid_t, int,  			    ib_pkey_t, dev_info_t **); -int 			ibnex_get_node_and_dip_from_guid(ib_guid_t, int, +int			ibnex_get_node_and_dip_from_guid(ib_guid_t, int,  			    ib_pkey_t, ibnex_node_data_t **, dev_info_t **);  static ibnex_node_data_t *ibnex_is_node_data_present(ibnex_node_type_t,  			    void *, int, ib_pkey_t); @@ -257,7 +257,7 @@ static struct cb_ops ibnex_cbops = {  	ddi_prop_op,		/* prop_op */  	NULL,			/* stream */  	D_MP,			/* cb_flag */ -	CB_REV, 		/* rev */ +	CB_REV,			/* rev */  	nodev,			/* int (*cb_aread)() */  	nodev			/* int (*cb_awrite)() */  }; @@ -526,7 +526,7 @@ ibnex_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)  	 * power management of the phci and client  	 */  	if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, -	    "pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) { +	    "pm-want-child-notification?", NULL, 0) != DDI_PROP_SUCCESS) {  		IBTF_DPRINTF_L2("ibnex",  		    "_attach: create pm-want-child-notification failed");  		(void) ddi_remove_minor_node(dip, NULL); @@ -827,7 +827,7 @@ ibt_status_t  ibnex_ibtl_callback(ibtl_ibnex_cb_args_t *cb_args)  {  	int			retval = IBT_SUCCESS; -	struct dev_ops 		*hca_dev_ops; +	struct dev_ops		*hca_dev_ops;  	dev_info_t		*clnt_dip;  	ibnex_node_data_t	*node_data; @@ -888,7 +888,7 @@ ibnex_ibtl_callback(ibtl_ibnex_cb_args_t *cb_args)  /*   * ibnex_map_fault - * 	IOC drivers need not map memory. Return failure to fail any + *	IOC drivers need not map memory. Return failure to fail any   *	such calls.   */  /*ARGSUSED*/ @@ -903,7 +903,7 @@ ibnex_map_fault(dev_info_t *dip, dev_info_t *rdip, struct hat *hat,  /*   * ibnex_busctl - * 	bus_ctl bus_ops entry point + *	bus_ctl bus_ops entry point   */  /*ARGSUSED*/  int @@ -1134,7 +1134,7 @@ ibnex_bus_config(dev_info_t *parent, uint_t flag,      ddi_bus_config_op_t op, void *devname, dev_info_t **child)  {  	int			ret = IBNEX_SUCCESS, len, circ, need_bus_config; -	char 			*device_name, *cname = NULL, *caddr = NULL; +	char			*device_name, *cname = NULL, *caddr = NULL;  	dev_info_t		*cdip;  	ibnex_node_data_t	*node_data; @@ -1478,9 +1478,9 @@ void  ibnex_create_vppa_nodes(      dev_info_t *parent, ibdm_port_attr_t *port_attr)  { -	int 		idx, ii; +	int		idx, ii;  	int		rval; -	ib_pkey_t 	pkey; +	ib_pkey_t	pkey;  	dev_info_t	*dip;  	IBTF_DPRINTF_L4("ibnex", "\tcreate_vppa_nodes: Begin"); @@ -1715,8 +1715,8 @@ int  ibnex_get_pkey_commsvc_index_portnum(char *device_name, int *index,      ib_pkey_t *pkey, uint8_t *port_num)  { -	char 	*srv, **service_name, *temp; -	int  	ii, ncommsvcs, ret; +	char	*srv, **service_name, *temp; +	int	ii, ncommsvcs, ret;  	if (ibnex_devname_to_portnum(device_name, port_num) !=  	    IBNEX_SUCCESS) { @@ -1946,7 +1946,7 @@ ibnex_pseudo_config_one(ibnex_node_data_t *node_data, char *caddr,  		/*  		 * This function is now called with PHCI / HCA driver  		 * as parent. The format of devicename is : -		 * 	<driver_name>@<driver_name>,<unit_address> +		 *	<driver_name>@<driver_name>,<unit_address>  		 * The "caddr" part of the devicename matches the  		 * format of pseudo_node_addr.  		 * @@ -2975,7 +2975,7 @@ ibnex_commsvc_initnode(dev_info_t *parent, ibdm_port_attr_t *port_attr,  	ibnex_node_data_t	*node_data;  	ibnex_port_node_t	*port_node;  	char devname[MAXNAMELEN]; -	int 			cdip_allocated = 0; +	int			cdip_allocated = 0;  	ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); @@ -3303,7 +3303,7 @@ static int  ibnex_create_port_compatible_prop(dev_info_t *child_dip,      char *comm_svcp, ibdm_port_attr_t *port_attr)  { -	int 	rval, i; +	int	rval, i;  	char	*temp;  	char	*compatible[IBNEX_MAX_IBPORT_COMPAT_NAMES]; @@ -3974,7 +3974,7 @@ ib_vhci_pi_uninit(dev_info_t *vdip, mdi_pathinfo_t *pip, int flag)  /*ARGSUSED*/  static int  ib_vhci_pi_state_change(dev_info_t *vdip, mdi_pathinfo_t *pip, -		mdi_pathinfo_state_t state, uint32_t arg1, int arg2) +    mdi_pathinfo_state_t state, uint32_t arg1, int arg2)  {  	IBTF_DPRINTF_L4("ibnex",  	    "\tpi_state_change: dip %p pip %p state %x", vdip, pip, state); diff --git a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c index e78c3735ad..7b7457160f 100644 --- a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c +++ b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c @@ -60,7 +60,7 @@ static int		ibnex_fill_nodeinfo(nvlist_t **, ibnex_node_data_t *,  static void		ibnex_figure_ap_devstate(ibnex_node_data_t *,  			    devctl_ap_state_t *);  static void		ibnex_figure_ib_apid_devstate(devctl_ap_state_t *); -static	char 		*ibnex_get_apid(struct devctl_iocdata *); +static char		*ibnex_get_apid(struct devctl_iocdata *);  static int		ibnex_get_dip_from_apid(char *, dev_info_t **,  			    ibnex_node_data_t **);  extern int		ibnex_get_node_and_dip_from_guid(ib_guid_t, int, @@ -1010,9 +1010,9 @@ ibnex_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,  		/*  		 * Five types of APIDs are supported:  		 *	o HCA_GUID,0,service-name	(HCA-SVC device) -		 *	o IOC_GUID 			(IOC device) +		 *	o IOC_GUID			(IOC device)  		 *	o PORT_GUID,0,service-name	(Port device) -		 *	o pseudo_name,unit-address, 	(Pseudo device) +		 *	o pseudo_name,unit-address,	(Pseudo device)  		 *	o PORT_GUID,P_Key,service-name	(VPPA device)  		 * If the apid doesn't have "," then treat it as an IOC  		 * If the apid has one "," then it is Pseudo device @@ -1149,7 +1149,7 @@ ibnex_get_snapshot(char **buf, size_t *sz, int allow_probe)  {  	int			i, j, k, l, hca_count;  	nvlist_t		*nvl; -	ib_pkey_t 		pkey; +	ib_pkey_t		pkey;  	boolean_t		found;  	ibdm_ioc_info_t		*ioc_listp;  	ibdm_ioc_info_t		*iocp; @@ -2909,7 +2909,7 @@ out:  	(x)->hca_max_rdma_in_chan	= (y)->hca_max_rdma_in_chan;	\  	(x)->hca_max_rdma_out_chan	= (y)->hca_max_rdma_out_chan;	\  	(x)->hca_max_ipv6_chan		= (y)->hca_max_ipv6_chan;	\ -	(x)->hca_max_ether_chan 	= (y)->hca_max_ether_chan;	\ +	(x)->hca_max_ether_chan		= (y)->hca_max_ether_chan;	\  	(x)->hca_max_mcg_chans		= (y)->hca_max_mcg_chans;	\  	(x)->hca_max_mcg		= (y)->hca_max_mcg;		\  	(x)->hca_max_chan_per_mcg	= (y)->hca_max_chan_per_mcg;	\ @@ -2946,8 +2946,7 @@ out:  	    MAX_HCA_DRVNAME_LEN);					\  	(x)->hca_driver_instance	= (instance);			\  									\ -	(x)->hca_device_path = ((device_path_alloc_sz) >= (device_path_len)) \ -	    ? (device_path) : NULL;					\ +	(x)->hca_device_path		= (device_path);		\  	(x)->hca_device_path_len	= (device_path_len);		\  } @@ -2960,7 +2959,9 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode,  {  	int			rv = 0;  	ibnex_ctl_query_hca_t	*query_hca = NULL; +#ifdef	_MULTI_DATAMODEL  	ibnex_ctl_query_hca_32_t *query_hca_32 = NULL; +#endif  	ibt_hca_attr_t		*hca_attr = NULL;  	char			driver_name[MAX_HCA_DRVNAME_LEN];  	int			instance; @@ -2968,13 +2969,15 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode,  	char			*device_path;  	uint_t			device_path_alloc_sz, hca_device_path_len;  	char			*hca_device_path = NULL; +	uint_t			model;  	IBTF_DPRINTF_L4("ibnex", "\tctl_query_hca: cmd=%x, arg=%p, "  	    "mode=%x, cred=%p, rval=%p, dev=0x%x", cmd, arg, mode, credp,  	    rvalp, dev); +	switch (model = ddi_model_convert_from(mode & FMODELS)) {  #ifdef	_MULTI_DATAMODEL -	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { +	case DDI_MODEL_ILP32:  		query_hca_32 = kmem_zalloc(  		    sizeof (ibnex_ctl_query_hca_32_t), KM_SLEEP); @@ -2989,9 +2992,9 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode,  		hca_guid = query_hca_32->hca_guid;  		device_path = (char *)(uintptr_t)query_hca_32->hca_device_path;  		device_path_alloc_sz = query_hca_32->hca_device_path_alloc_sz; -	} else +		break;  #endif -	{ +	default:  		query_hca = kmem_zalloc(sizeof (ibnex_ctl_query_hca_t),  		    KM_SLEEP); @@ -3006,6 +3009,7 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode,  		hca_guid = query_hca->hca_guid;  		device_path = query_hca->hca_device_path;  		device_path_alloc_sz = query_hca->hca_device_path_alloc_sz; +		break;  	}  	hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP); @@ -3020,11 +3024,19 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode,  	hca_device_path_len = strlen(hca_device_path) + 1; +	switch (model) { +		char		*device_path64;  #ifdef	_MULTI_DATAMODEL -	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { +		caddr32_t	device_path32; +	case DDI_MODEL_ILP32: + +		if (device_path_alloc_sz >= hca_device_path_len) +			device_path32 = (uintptr_t)device_path; +		else +			device_path32 = (uintptr_t)NULL;  		IBNEX_CTL_CP_HCA_INFO(&query_hca_32->hca_info, hca_attr, -		    driver_name, instance, query_hca_32->hca_device_path, +		    driver_name, instance, device_path32,  		    device_path_alloc_sz, hca_device_path_len);  		/* copy hca information to the user space */ @@ -3036,11 +3048,16 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode,  			rv = EFAULT;  			goto out;  		} -	} else +		break;  #endif -	{ +	default: +		if (device_path_alloc_sz >= hca_device_path_len) +			device_path64 = device_path; +		else +			device_path64 = NULL; +  		IBNEX_CTL_CP_HCA_INFO(&query_hca->hca_info, hca_attr, -		    driver_name, instance, device_path, +		    driver_name, instance, device_path64,  		    device_path_alloc_sz, hca_device_path_len);  		/* copy hca information to the user space */ @@ -3052,6 +3069,7 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode,  			rv = EFAULT;  			goto out;  		} +		break;  	}  	if (device_path_alloc_sz >= hca_device_path_len) { @@ -3067,8 +3085,10 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode,  out:  	if (query_hca)  		kmem_free(query_hca, sizeof (ibnex_ctl_query_hca_t)); +#ifdef	_MULTI_DATAMODEL  	if (query_hca_32)  		kmem_free(query_hca_32, sizeof (ibnex_ctl_query_hca_32_t)); +#endif  	if (hca_attr)  		kmem_free(hca_attr, sizeof (ibt_hca_attr_t));  	if (hca_device_path) | 
