diff options
| -rw-r--r-- | usr/src/boot/Makefile.version | 2 | ||||
| -rw-r--r-- | usr/src/boot/lib/libstand/zfs/Makefile.inc | 5 | ||||
| -rw-r--r-- | usr/src/boot/lib/libstand/zfs/zfsimpl.c | 727 | ||||
| -rw-r--r-- | usr/src/boot/sys/boot/efi/libefi/efi_console.c | 19 | ||||
| -rw-r--r-- | usr/src/boot/sys/boot/efi/libefi/i386/Makefile | 3 | ||||
| -rw-r--r-- | usr/src/boot/sys/boot/efi/loader/Makefile.com | 5 | ||||
| -rw-r--r-- | usr/src/boot/sys/boot/i386/loader/Makefile | 5 | ||||
| -rw-r--r-- | usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h | 139 | ||||
| -rw-r--r-- | usr/src/tools/smatch/Makefile | 5 | ||||
| -rw-r--r-- | usr/src/uts/common/io/ib/ibnex/ibnex.c | 30 | ||||
| -rw-r--r-- | usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c | 50 |
11 files changed, 919 insertions, 71 deletions
diff --git a/usr/src/boot/Makefile.version b/usr/src/boot/Makefile.version index 50fbef1c79..8c381aa113 100644 --- a/usr/src/boot/Makefile.version +++ b/usr/src/boot/Makefile.version @@ -33,4 +33,4 @@ LOADER_VERSION = 1.1 # Use date like formatting here, YYYY.MM.DD.XX, without leading zeroes. # The version is processed from left to right, the version number can only # be increased. -BOOT_VERSION = $(LOADER_VERSION)-2019.08.07.1 +BOOT_VERSION = $(LOADER_VERSION)-2019.08.15.1 diff --git a/usr/src/boot/lib/libstand/zfs/Makefile.inc b/usr/src/boot/lib/libstand/zfs/Makefile.inc index 083b6f8d07..90cfb8e2f8 100644 --- a/usr/src/boot/lib/libstand/zfs/Makefile.inc +++ b/usr/src/boot/lib/libstand/zfs/Makefile.inc @@ -19,12 +19,14 @@ SRCS += $(SRC)/common/crypto/edonr/edonr.c SRCS += $(SRC)/common/crypto/skein/skein.c SRCS += $(SRC)/common/crypto/skein/skein_iv.c SRCS += $(SRC)/common/crypto/skein/skein_block.c +SRCS += $(SRC)/common/list/list.c OBJS += zfs.o OBJS += gzip.o OBJS += edonr.o OBJS += skein.o OBJS += skein_iv.o OBJS += skein_block.o +OBJS += list.o zfs.o := CPPFLAGS += -I../../common zfs.o := CPPFLAGS += -I../../../cddl/boot/zfs @@ -41,4 +43,7 @@ skein_block.o := CPPFLAGS += -DSKEIN_LOOP=111 %.o: $(SRC)/common/crypto/skein/%.c $(COMPILE.c) -o $@ $< +%.o: $(SRC)/common/list/%.c + $(COMPILE.c) -DNDEBUG $< + zfs.o: $(ZFSSRC)/zfsimpl.c diff --git a/usr/src/boot/lib/libstand/zfs/zfsimpl.c b/usr/src/boot/lib/libstand/zfs/zfsimpl.c index 8bea34b522..3958b4af4a 100644 --- a/usr/src/boot/lib/libstand/zfs/zfsimpl.c +++ b/usr/src/boot/lib/libstand/zfs/zfsimpl.c @@ -1,4 +1,4 @@ -/*- +/* * Copyright (c) 2007 Doug Rabson * All rights reserved. * @@ -33,6 +33,8 @@ #include <sys/endian.h> #include <sys/stat.h> #include <sys/stdint.h> +#include <sys/list.h> +#include <inttypes.h> #include "zfsimpl.h" #include "zfssubr.c" @@ -45,6 +47,58 @@ struct zfsmount { }; /* + * The indirect_child_t represents the vdev that we will read from, when we + * need to read all copies of the data (e.g. for scrub or reconstruction). + * For plain (non-mirror) top-level vdevs (i.e. is_vdev is not a mirror), + * ic_vdev is the same as is_vdev. However, for mirror top-level vdevs, + * ic_vdev is a child of the mirror. + */ +typedef struct indirect_child { + void *ic_data; + vdev_t *ic_vdev; +} indirect_child_t; + +/* + * The indirect_split_t represents one mapped segment of an i/o to the + * indirect vdev. For non-split (contiguously-mapped) blocks, there will be + * only one indirect_split_t, with is_split_offset==0 and is_size==io_size. + * For split blocks, there will be several of these. + */ +typedef struct indirect_split { + list_node_t is_node; /* link on iv_splits */ + + /* + * is_split_offset is the offset into the i/o. + * This is the sum of the previous splits' is_size's. + */ + uint64_t is_split_offset; + + vdev_t *is_vdev; /* top-level vdev */ + uint64_t is_target_offset; /* offset on is_vdev */ + uint64_t is_size; + int is_children; /* number of entries in is_child[] */ + + /* + * is_good_child is the child that we are currently using to + * attempt reconstruction. + */ + int is_good_child; + + indirect_child_t is_child[1]; /* variable-length */ +} indirect_split_t; + +/* + * The indirect_vsd_t is associated with each i/o to the indirect vdev. + * It is the "Vdev-Specific Data" in the zio_t's io_vsd. + */ +typedef struct indirect_vsd { + boolean_t iv_split_block; + boolean_t iv_reconstruct; + + list_t iv_splits; /* list of indirect_split_t's */ +} indirect_vsd_t; + +/* * List of all vdevs, chained through v_alllink. */ static vdev_list_t zfs_vdevs; @@ -70,6 +124,8 @@ static const char *features_for_read[] = { "com.datto:bookmark_v2", "org.zfsonlinux:allocation_classes", "com.datto:resilver_defer", + "com.delphix:device_removal", + "com.delphix:obsolete_counts", NULL }; @@ -92,6 +148,14 @@ static int zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result); static int zap_lookup(const spa_t *spa, const dnode_phys_t *dnode, const char *name, uint64_t integer_size, uint64_t num_integers, void *value); +static int objset_get_dnode(const spa_t *, const objset_phys_t *, uint64_t, + dnode_phys_t *); +static int dnode_read(const spa_t *, const dnode_phys_t *, off_t, void *, + size_t); +static int vdev_indirect_read(vdev_t *, const blkptr_t *, void *, off_t, + size_t); +static int vdev_mirror_read(vdev_t *, const blkptr_t *, void *, off_t, + size_t); static void zfs_init(void) @@ -161,7 +225,7 @@ xdr_uint64_t(const unsigned char **xdr, uint64_t *lp) static int nvlist_find(const unsigned char *nvlist, const char *name, int type, - int* elementsp, void *valuep) + int *elementsp, void *valuep) { const unsigned char *p, *pair; int junk; @@ -426,12 +490,511 @@ vdev_read_phys(vdev_t *vdev, const blkptr_t *bp, void *buf, rc = vdev->v_phys_read(vdev, vdev->v_read_priv, offset, buf, psize); if (rc) return (rc); - if (bp && zio_checksum_verify(vdev->spa, bp, buf)) - return (EIO); + if (bp != NULL) + return (zio_checksum_verify(vdev->spa, bp, buf)); return (0); } +typedef struct remap_segment { + vdev_t *rs_vd; + uint64_t rs_offset; + uint64_t rs_asize; + uint64_t rs_split_offset; + list_node_t rs_node; +} remap_segment_t; + +static remap_segment_t * +rs_alloc(vdev_t *vd, uint64_t offset, uint64_t asize, uint64_t split_offset) +{ + remap_segment_t *rs = malloc(sizeof (remap_segment_t)); + + if (rs != NULL) { + rs->rs_vd = vd; + rs->rs_offset = offset; + rs->rs_asize = asize; + rs->rs_split_offset = split_offset; + } + + return (rs); +} + +vdev_indirect_mapping_t * +vdev_indirect_mapping_open(spa_t *spa, objset_phys_t *os, + uint64_t mapping_object) +{ + vdev_indirect_mapping_t *vim; + vdev_indirect_mapping_phys_t *vim_phys; + int rc; + + vim = calloc(1, sizeof (*vim)); + if (vim == NULL) + return (NULL); + + vim->vim_dn = calloc(1, sizeof (*vim->vim_dn)); + if (vim->vim_dn == NULL) { + free(vim); + return (NULL); + } + + rc = objset_get_dnode(spa, os, mapping_object, vim->vim_dn); + if (rc != 0) { + free(vim->vim_dn); + free(vim); + return (NULL); + } + + vim->vim_spa = spa; + vim->vim_phys = malloc(sizeof (*vim->vim_phys)); + if (vim->vim_phys == NULL) { + free(vim->vim_dn); + free(vim); + return (NULL); + } + + vim_phys = (vdev_indirect_mapping_phys_t *)DN_BONUS(vim->vim_dn); + *vim->vim_phys = *vim_phys; + + vim->vim_objset = os; + vim->vim_object = mapping_object; + vim->vim_entries = NULL; + + vim->vim_havecounts = + (vim->vim_dn->dn_bonuslen > VDEV_INDIRECT_MAPPING_SIZE_V0); + + return (vim); +} + +/* + * Compare an offset with an indirect mapping entry; there are three + * possible scenarios: + * + * 1. The offset is "less than" the mapping entry; meaning the + * offset is less than the source offset of the mapping entry. In + * this case, there is no overlap between the offset and the + * mapping entry and -1 will be returned. + * + * 2. The offset is "greater than" the mapping entry; meaning the + * offset is greater than the mapping entry's source offset plus + * the entry's size. In this case, there is no overlap between + * the offset and the mapping entry and 1 will be returned. + * + * NOTE: If the offset is actually equal to the entry's offset + * plus size, this is considered to be "greater" than the entry, + * and this case applies (i.e. 1 will be returned). Thus, the + * entry's "range" can be considered to be inclusive at its + * start, but exclusive at its end: e.g. [src, src + size). + * + * 3. The last case to consider is if the offset actually falls + * within the mapping entry's range. If this is the case, the + * offset is considered to be "equal to" the mapping entry and + * 0 will be returned. + * + * NOTE: If the offset is equal to the entry's source offset, + * this case applies and 0 will be returned. If the offset is + * equal to the entry's source plus its size, this case does + * *not* apply (see "NOTE" above for scenario 2), and 1 will be + * returned. + */ +static int +dva_mapping_overlap_compare(const void *v_key, const void *v_array_elem) +{ + const uint64_t *key = v_key; + const vdev_indirect_mapping_entry_phys_t *array_elem = + v_array_elem; + uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem); + + if (*key < src_offset) { + return (-1); + } else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) { + return (0); + } else { + return (1); + } +} + +/* + * Return array entry. + */ +static vdev_indirect_mapping_entry_phys_t * +vdev_indirect_mapping_entry(vdev_indirect_mapping_t *vim, uint64_t index) +{ + uint64_t size; + off_t offset = 0; + int rc; + + if (vim->vim_phys->vimp_num_entries == 0) + return (NULL); + + if (vim->vim_entries == NULL) { + uint64_t bsize; + + bsize = vim->vim_dn->dn_datablkszsec << SPA_MINBLOCKSHIFT; + size = vim->vim_phys->vimp_num_entries * + sizeof (*vim->vim_entries); + if (size > bsize) { + size = bsize / sizeof (*vim->vim_entries); + size *= sizeof (*vim->vim_entries); + } + vim->vim_entries = malloc(size); + if (vim->vim_entries == NULL) + return (NULL); + vim->vim_num_entries = size / sizeof (*vim->vim_entries); + offset = index * sizeof (*vim->vim_entries); + } + + /* We have data in vim_entries */ + if (offset == 0) { + if (index >= vim->vim_entry_offset && + index <= vim->vim_entry_offset + vim->vim_num_entries) { + index -= vim->vim_entry_offset; + return (&vim->vim_entries[index]); + } + offset = index * sizeof (*vim->vim_entries); + } + + vim->vim_entry_offset = index; + size = vim->vim_num_entries * sizeof (*vim->vim_entries); + rc = dnode_read(vim->vim_spa, vim->vim_dn, offset, vim->vim_entries, + size); + if (rc != 0) { + /* Read error, invalidate vim_entries. */ + free(vim->vim_entries); + vim->vim_entries = NULL; + return (NULL); + } + index -= vim->vim_entry_offset; + return (&vim->vim_entries[index]); +} + +/* + * Returns the mapping entry for the given offset. + * + * It's possible that the given offset will not be in the mapping table + * (i.e. no mapping entries contain this offset), in which case, the + * return value value depends on the "next_if_missing" parameter. + * + * If the offset is not found in the table and "next_if_missing" is + * B_FALSE, then NULL will always be returned. The behavior is intended + * to allow consumers to get the entry corresponding to the offset + * parameter, iff the offset overlaps with an entry in the table. + * + * If the offset is not found in the table and "next_if_missing" is + * B_TRUE, then the entry nearest to the given offset will be returned, + * such that the entry's source offset is greater than the offset + * passed in (i.e. the "next" mapping entry in the table is returned, if + * the offset is missing from the table). If there are no entries whose + * source offset is greater than the passed in offset, NULL is returned. + */ +static vdev_indirect_mapping_entry_phys_t * +vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim, + uint64_t offset) +{ + ASSERT(vim->vim_phys->vimp_num_entries > 0); + + vdev_indirect_mapping_entry_phys_t *entry; + + uint64_t last = vim->vim_phys->vimp_num_entries - 1; + uint64_t base = 0; + + /* + * We don't define these inside of the while loop because we use + * their value in the case that offset isn't in the mapping. + */ + uint64_t mid; + int result; + + while (last >= base) { + mid = base + ((last - base) >> 1); + + entry = vdev_indirect_mapping_entry(vim, mid); + if (entry == NULL) + break; + result = dva_mapping_overlap_compare(&offset, entry); + + if (result == 0) { + break; + } else if (result < 0) { + last = mid - 1; + } else { + base = mid + 1; + } + } + return (entry); +} + +/* + * Given an indirect vdev and an extent on that vdev, it duplicates the + * physical entries of the indirect mapping that correspond to the extent + * to a new array and returns a pointer to it. In addition, copied_entries + * is populated with the number of mapping entries that were duplicated. + * + * Finally, since we are doing an allocation, it is up to the caller to + * free the array allocated in this function. + */ +vdev_indirect_mapping_entry_phys_t * +vdev_indirect_mapping_duplicate_adjacent_entries(vdev_t *vd, uint64_t offset, + uint64_t asize, uint64_t *copied_entries) +{ + vdev_indirect_mapping_entry_phys_t *duplicate_mappings = NULL; + vdev_indirect_mapping_t *vim = vd->v_mapping; + uint64_t entries = 0; + + vdev_indirect_mapping_entry_phys_t *first_mapping = + vdev_indirect_mapping_entry_for_offset(vim, offset); + ASSERT3P(first_mapping, !=, NULL); + + vdev_indirect_mapping_entry_phys_t *m = first_mapping; + while (asize > 0) { + uint64_t size = DVA_GET_ASIZE(&m->vimep_dst); + uint64_t inner_offset = offset - DVA_MAPPING_GET_SRC_OFFSET(m); + uint64_t inner_size = MIN(asize, size - inner_offset); + + offset += inner_size; + asize -= inner_size; + entries++; + m++; + } + + size_t copy_length = entries * sizeof (*first_mapping); + duplicate_mappings = malloc(copy_length); + if (duplicate_mappings != NULL) + bcopy(first_mapping, duplicate_mappings, copy_length); + else + entries = 0; + + *copied_entries = entries; + + return (duplicate_mappings); +} + +static vdev_t * +vdev_lookup_top(spa_t *spa, uint64_t vdev) +{ + vdev_t *rvd; + + STAILQ_FOREACH(rvd, &spa->spa_vdevs, v_childlink) + if (rvd->v_id == vdev) + break; + + return (rvd); +} + +/* + * This is a callback for vdev_indirect_remap() which allocates an + * indirect_split_t for each split segment and adds it to iv_splits. + */ +static void +vdev_indirect_gather_splits(uint64_t split_offset, vdev_t *vd, uint64_t offset, + uint64_t size, void *arg) +{ + int n = 1; + zio_t *zio = arg; + indirect_vsd_t *iv = zio->io_vsd; + + if (vd->v_read == vdev_indirect_read) + return; + + if (vd->v_read == vdev_mirror_read) + n = vd->v_nchildren; + + indirect_split_t *is = + malloc(offsetof(indirect_split_t, is_child[n])); + if (is == NULL) { + zio->io_error = ENOMEM; + return; + } + bzero(is, offsetof(indirect_split_t, is_child[n])); + + is->is_children = n; + is->is_size = size; + is->is_split_offset = split_offset; + is->is_target_offset = offset; + is->is_vdev = vd; + + /* + * Note that we only consider multiple copies of the data for + * *mirror* vdevs. We don't for "replacing" or "spare" vdevs, even + * though they use the same ops as mirror, because there's only one + * "good" copy under the replacing/spare. + */ + if (vd->v_read == vdev_mirror_read) { + int i = 0; + vdev_t *kid; + + STAILQ_FOREACH(kid, &vd->v_children, v_childlink) { + is->is_child[i++].ic_vdev = kid; + } + } else { + is->is_child[0].ic_vdev = vd; + } + + list_insert_tail(&iv->iv_splits, is); +} + +static void +vdev_indirect_remap(vdev_t *vd, uint64_t offset, uint64_t asize, void *arg) +{ + list_t stack; + spa_t *spa = vd->spa; + zio_t *zio = arg; + + list_create(&stack, sizeof (remap_segment_t), + offsetof(remap_segment_t, rs_node)); + + for (remap_segment_t *rs = rs_alloc(vd, offset, asize, 0); + rs != NULL; rs = list_remove_head(&stack)) { + vdev_t *v = rs->rs_vd; + uint64_t num_entries = 0; + /* vdev_indirect_mapping_t *vim = v->v_mapping; */ + vdev_indirect_mapping_entry_phys_t *mapping = + vdev_indirect_mapping_duplicate_adjacent_entries(v, + rs->rs_offset, rs->rs_asize, &num_entries); + + for (uint64_t i = 0; i < num_entries; i++) { + vdev_indirect_mapping_entry_phys_t *m = &mapping[i]; + uint64_t size = DVA_GET_ASIZE(&m->vimep_dst); + uint64_t dst_offset = DVA_GET_OFFSET(&m->vimep_dst); + uint64_t dst_vdev = DVA_GET_VDEV(&m->vimep_dst); + uint64_t inner_offset = rs->rs_offset - + DVA_MAPPING_GET_SRC_OFFSET(m); + uint64_t inner_size = + MIN(rs->rs_asize, size - inner_offset); + vdev_t *dst_v = vdev_lookup_top(spa, dst_vdev); + + if (dst_v->v_read == vdev_indirect_read) { + list_insert_head(&stack, + rs_alloc(dst_v, dst_offset + inner_offset, + inner_size, rs->rs_split_offset)); + } + vdev_indirect_gather_splits(rs->rs_split_offset, dst_v, + dst_offset + inner_offset, + inner_size, arg); + + /* + * vdev_indirect_gather_splits can have memory + * allocation error, we can not recover from it. + */ + if (zio->io_error != 0) + break; + rs->rs_offset += inner_size; + rs->rs_asize -= inner_size; + rs->rs_split_offset += inner_size; + } + + free(mapping); + free(rs); + if (zio->io_error != 0) + break; + } + + list_destroy(&stack); +} + +static void +vdev_indirect_map_free(zio_t *zio) +{ + indirect_vsd_t *iv = zio->io_vsd; + indirect_split_t *is; + + while ((is = list_head(&iv->iv_splits)) != NULL) { + for (int c = 0; c < is->is_children; c++) { + indirect_child_t *ic = &is->is_child[c]; + free(ic->ic_data); + } + list_remove(&iv->iv_splits, is); + free(is); + } + free(iv); +} + +static int +vdev_indirect_read(vdev_t *vdev, const blkptr_t *bp, void *buf, + off_t offset, size_t bytes) +{ + zio_t zio = { 0 }; + spa_t *spa = vdev->spa; + indirect_vsd_t *iv = malloc(sizeof (*iv)); + indirect_split_t *first; + int rc = EIO; + + if (iv == NULL) + return (ENOMEM); + bzero(iv, sizeof (*iv)); + + list_create(&iv->iv_splits, + sizeof (indirect_split_t), offsetof(indirect_split_t, is_node)); + + zio.io_spa = spa; + zio.io_bp = (blkptr_t *)bp; + zio.io_data = buf; + zio.io_size = bytes; + zio.io_offset = offset; + zio.io_vd = vdev; + zio.io_vsd = iv; + + if (vdev->v_mapping == NULL) { + vdev_indirect_config_t *vic; + + vic = &vdev->vdev_indirect_config; + vdev->v_mapping = vdev_indirect_mapping_open(spa, + &spa->spa_mos, vic->vic_mapping_object); + } + + vdev_indirect_remap(vdev, offset, bytes, &zio); + if (zio.io_error != 0) + return (zio.io_error); + + first = list_head(&iv->iv_splits); + if (first->is_size == zio.io_size) { + /* + * This is not a split block; we are pointing to the entire + * data, which will checksum the same as the original data. + * Pass the BP down so that the child i/o can verify the + * checksum, and try a different location if available + * (e.g. on a mirror). + * + * While this special case could be handled the same as the + * general (split block) case, doing it this way ensures + * that the vast majority of blocks on indirect vdevs + * (which are not split) are handled identically to blocks + * on non-indirect vdevs. This allows us to be less strict + * about performance in the general (but rare) case. + */ + rc = first->is_vdev->v_read(first->is_vdev, zio.io_bp, + zio.io_data, first->is_target_offset, bytes); + } else { + iv->iv_split_block = B_TRUE; + /* + * Read one copy of each split segment, from the + * top-level vdev. Since we don't know the + * checksum of each split individually, the child + * zio can't ensure that we get the right data. + * E.g. if it's a mirror, it will just read from a + * random (healthy) leaf vdev. We have to verify + * the checksum in vdev_indirect_io_done(). + */ + for (indirect_split_t *is = list_head(&iv->iv_splits); + is != NULL; is = list_next(&iv->iv_splits, is)) { + char *ptr = zio.io_data; + + rc = is->is_vdev->v_read(is->is_vdev, zio.io_bp, + ptr + is->is_split_offset, is->is_target_offset, + is->is_size); + } + if (zio_checksum_verify(spa, zio.io_bp, zio.io_data)) + rc = ECKSUM; + else + rc = 0; + } + + vdev_indirect_map_free(&zio); + if (rc == 0) + rc = zio.io_error; + + return (rc); +} + static int vdev_disk_read(vdev_t *vdev, const blkptr_t *bp, void *buf, off_t offset, size_t bytes) @@ -498,6 +1061,7 @@ static vdev_t * vdev_create(uint64_t guid, vdev_read_t *vdev_read) { vdev_t *vdev; + vdev_indirect_config_t *vic; vdev = malloc(sizeof(vdev_t)); memset(vdev, 0, sizeof(vdev_t)); @@ -505,8 +1069,9 @@ vdev_create(uint64_t guid, vdev_read_t *vdev_read) vdev->v_guid = guid; vdev->v_state = VDEV_STATE_OFFLINE; vdev->v_read = vdev_read; - vdev->v_phys_read = 0; - vdev->v_read_priv = 0; + + vic = &vdev->vdev_indirect_config; + vic->vic_prev_indirect_vdev = UINT64_MAX; STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink); return (vdev); @@ -540,6 +1105,7 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev, && strcmp(type, VDEV_TYPE_FILE) #endif && strcmp(type, VDEV_TYPE_RAIDZ) + && strcmp(type, VDEV_TYPE_INDIRECT) && strcmp(type, VDEV_TYPE_REPLACING)) { printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n"); return (EIO); @@ -568,7 +1134,23 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev, vdev = vdev_create(guid, vdev_raidz_read); else if (!strcmp(type, VDEV_TYPE_REPLACING)) vdev = vdev_create(guid, vdev_replacing_read); - else + else if (!strcmp(type, VDEV_TYPE_INDIRECT)) { + vdev_indirect_config_t *vic; + + vdev = vdev_create(guid, vdev_indirect_read); + vdev->v_state = VDEV_STATE_HEALTHY; + vic = &vdev->vdev_indirect_config; + + nvlist_find(nvlist, + ZPOOL_CONFIG_INDIRECT_OBJECT, DATA_TYPE_UINT64, + NULL, &vic->vic_mapping_object); + nvlist_find(nvlist, + ZPOOL_CONFIG_INDIRECT_BIRTHS, DATA_TYPE_UINT64, + NULL, &vic->vic_births_object); + nvlist_find(nvlist, + ZPOOL_CONFIG_PREV_INDIRECT_VDEV, DATA_TYPE_UINT64, + NULL, &vic->vic_prev_indirect_vdev); + } else vdev = vdev_create(guid, vdev_disk_read); vdev->v_id = id; @@ -603,20 +1185,24 @@ vdev_init_from_nvlist(const unsigned char *nvlist, vdev_t *pvdev, vdev->v_devid = NULL; } } else { + char *name; + if (!strcmp(type, "raidz")) { - if (vdev->v_nparity == 1) - vdev->v_name = "raidz1"; - else if (vdev->v_nparity == 2) - vdev->v_name = "raidz2"; - else if (vdev->v_nparity == 3) - vdev->v_name = "raidz3"; - else { - printf("ZFS: can only boot from disk, mirror, raidz1, raidz2 and raidz3 vdevs\n"); + if (vdev->v_nparity < 1 || + vdev->v_nparity > 3) { + printf("ZFS: can only boot from disk, " + "mirror, raidz1, raidz2 and raidz3 " + "vdevs\n"); return (EIO); } + asprintf(&name, "%s%d-%" PRIu64, type, + vdev->v_nparity, id); } else { - vdev->v_name = strdup(type); + asprintf(&name, "%s-%" PRIu64, type, id); } + if (name == NULL) + return (ENOMEM); + vdev->v_name = name; } } else { is_new = 0; @@ -2261,10 +2847,48 @@ check_mos_features(const spa_t *spa) } static int -zfs_spa_init(spa_t *spa) +load_nvlist(spa_t *spa, uint64_t obj, unsigned char **value) { dnode_phys_t dir; + size_t size; int rc; + unsigned char *nv; + + *value = NULL; + if ((rc = objset_get_dnode(spa, &spa->spa_mos, obj, &dir)) != 0) + return (rc); + if (dir.dn_type != DMU_OT_PACKED_NVLIST && + dir.dn_bonustype != DMU_OT_PACKED_NVLIST_SIZE) { + return (EIO); + } + + if (dir.dn_bonuslen != sizeof (uint64_t)) + return (EIO); + + size = *(uint64_t *)DN_BONUS(&dir); + nv = malloc(size); + if (nv == NULL) + return (ENOMEM); + + rc = dnode_read(spa, &dir, 0, nv, size); + if (rc != 0) { + free(nv); + nv = NULL; + return (rc); + } + *value = nv; + return (rc); +} + +static int +zfs_spa_init(spa_t *spa) +{ + dnode_phys_t dir; + uint64_t config_object; + unsigned char *nvlist; + char *type; + const unsigned char *nv; + int nkids, rc; if (zio_read(spa, &spa->spa_uberblock.ub_rootbp, &spa->spa_mos)) { printf("ZFS: can't read MOS of pool %s\n", spa->spa_name); @@ -2289,8 +2913,77 @@ zfs_spa_init(spa_t *spa) rc = check_mos_features(spa); if (rc != 0) { printf("ZFS: pool %s is not supported\n", spa->spa_name); + return (rc); } + rc = zap_lookup(spa, &dir, DMU_POOL_CONFIG, + sizeof (config_object), 1, &config_object); + if (rc != 0) { + printf("ZFS: can not read MOS %s\n", DMU_POOL_CONFIG); + return (EIO); + } + rc = load_nvlist(spa, config_object, &nvlist); + if (rc != 0) + return (rc); + + /* Update vdevs from MOS config. */ + if (nvlist_find(nvlist + 4, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, + NULL, &nv)) { + rc = EIO; + goto done; + } + + if (nvlist_find(nv, ZPOOL_CONFIG_TYPE, DATA_TYPE_STRING, + NULL, &type)) { + printf("ZFS: can't find vdev details\n"); + rc = ENOENT; + goto done; + } + if (strcmp(type, VDEV_TYPE_ROOT) != 0) { + rc = ENOENT; + goto done; + } + + rc = nvlist_find(nv, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, + &nkids, &nv); + if (rc != 0) + goto done; + + for (int i = 0; i < nkids; i++) { + vdev_t *vd, *prev, *kid = NULL; + rc = vdev_init_from_nvlist(nv, NULL, &kid, 0); + if (rc != 0) { + printf("vdev_init_from_nvlist: %d\n", rc); + break; + } + kid->spa = spa; + prev = NULL; + STAILQ_FOREACH(vd, &spa->spa_vdevs, v_childlink) { + /* Already present? */ + if (kid->v_id == vd->v_id) { + kid = NULL; + break; + } + if (vd->v_id > kid->v_id) { + if (prev == NULL) { + STAILQ_INSERT_HEAD(&spa->spa_vdevs, + kid, v_childlink); + } else { + STAILQ_INSERT_AFTER(&spa->spa_vdevs, + prev, kid, v_childlink); + } + kid = NULL; + break; + } + prev = vd; + } + if (kid != NULL) + STAILQ_INSERT_TAIL(&spa->spa_vdevs, kid, v_childlink); + nv = nvlist_next(nv); + } + rc = 0; +done: + free(nvlist); return (rc); } diff --git a/usr/src/boot/sys/boot/efi/libefi/efi_console.c b/usr/src/boot/sys/boot/efi/libefi/efi_console.c index a4bc6a27ec..1e60d2488a 100644 --- a/usr/src/boot/sys/boot/efi/libefi/efi_console.c +++ b/usr/src/boot/sys/boot/efi/libefi/efi_console.c @@ -57,7 +57,7 @@ struct efi_console_data { }; #define KEYBUFSZ 10 -static unsigned keybuf[KEYBUFSZ]; /* keybuf for extended codes */ +static unsigned keybuf[KEYBUFSZ]; /* keybuf for extended codes */ static int key_pending; @@ -218,12 +218,12 @@ plat_cons_update_mode(int mode) } if (console_control != NULL) - (void)console_control->SetMode(console_control, console_mode); + (void) console_control->SetMode(console_control, console_mode); /* some firmware enables the cursor when switching modes */ conout->EnableCursor(conout, FALSE); if (console_mode == EfiConsoleControlScreenText) { - (void)conout->QueryMode(conout, conout->Mode->Mode, + (void) conout->QueryMode(conout, conout->Mode->Mode, &cols, &rows); devinit.version = VIS_CONS_REV; devinit.width = cols; @@ -275,7 +275,7 @@ efi_text_devinit(struct vis_devinit *data) if (console_mode != EfiConsoleControlScreenText) return (1); - (void)conout->QueryMode(conout, conout->Mode->Mode, &cols, &rows); + (void) conout->QueryMode(conout, conout->Mode->Mode, &cols, &rows); data->version = VIS_CONS_REV; data->width = cols; data->height = rows; @@ -328,7 +328,7 @@ efi_text_cons_display(struct vis_consdisplay *da) tem_char_t *data; int i; - (void)conout->QueryMode(conout, conout->Mode->Mode, &col, &row); + (void) conout->QueryMode(conout, conout->Mode->Mode, &col, &row); /* reduce clear line on bottom row by one to prevent autoscroll */ if (row - 1 == da->row && da->col == 0 && da->width == col) @@ -365,6 +365,7 @@ static void efi_cons_cursor(struct vis_conscursor *cc) case VIS_GET_CURSOR: { /* only used at startup */ uint32_t row, col; + row = col = 0; plat_tem_get_prom_pos(&row, &col); cc->row = row; cc->col = col; @@ -475,7 +476,7 @@ efi_cons_init(struct console *cp, int arg __unused) memset(keybuf, 0, KEYBUFSZ); status = BS->LocateProtocol(&ccontrol_protocol_guid, NULL, - (VOID **)&console_control); + (void **)&console_control); if (status == EFI_SUCCESS) { BOOLEAN GopUgaExists, StdInLocked; status = console_control->GetMode(console_control, @@ -485,7 +486,7 @@ efi_cons_init(struct console *cp, int arg __unused) } max_dim = best_mode = 0; - for (i = 0; i <= conout->Mode->MaxMode ; i++) { + for (i = 0; i <= conout->Mode->MaxMode; i++) { status = conout->QueryMode(conout, i, &cols, &rows); if (EFI_ERROR(status)) continue; @@ -518,7 +519,7 @@ efi_cons_init(struct console *cp, int arg __unused) } if (console_control != NULL) - (void)console_control->SetMode(console_control, console_mode); + (void) console_control->SetMode(console_control, console_mode); /* some firmware enables the cursor when switching modes */ conout->EnableCursor(conout, FALSE); @@ -742,7 +743,7 @@ efi_cons_efiputchar(int c) EFI_STATUS status; buf[0] = c; - buf[1] = 0; /* terminate string */ + buf[1] = 0; /* terminate string */ status = conout->TestString(conout, buf); if (EFI_ERROR(status)) diff --git a/usr/src/boot/sys/boot/efi/libefi/i386/Makefile b/usr/src/boot/sys/boot/efi/libefi/i386/Makefile index cc749255bd..60274fab76 100644 --- a/usr/src/boot/sys/boot/efi/libefi/i386/Makefile +++ b/usr/src/boot/sys/boot/efi/libefi/i386/Makefile @@ -24,9 +24,6 @@ include ../Makefile.com CFLAGS += -m32 -# false positive only with a 64-bit smatch -SMOFF += uninitialized - CLEANFILES += machine x86 $(OBJS): machine x86 diff --git a/usr/src/boot/sys/boot/efi/loader/Makefile.com b/usr/src/boot/sys/boot/efi/loader/Makefile.com index cc1b0f8991..9308371ae8 100644 --- a/usr/src/boot/sys/boot/efi/loader/Makefile.com +++ b/usr/src/boot/sys/boot/efi/loader/Makefile.com @@ -32,7 +32,6 @@ SRCS= \ font.c \ $(FONT).c \ framebuffer.c \ - list.c \ main.c \ memmap.c \ multiboot.S \ @@ -52,7 +51,6 @@ OBJS= \ font.o \ $(FONT).o \ framebuffer.o \ - list.o \ main.o \ memmap.o \ multiboot.o \ @@ -190,9 +188,6 @@ clean clobber: %.o: ../../../i386/libi386/%.c $(COMPILE.c) $< -%.o: $(SRC)/common/list/%.c - $(COMPILE.c) -DNDEBUG $< - %.o: $(SRC)/common/font/%.c $(COMPILE.c) $< diff --git a/usr/src/boot/sys/boot/i386/loader/Makefile b/usr/src/boot/sys/boot/i386/loader/Makefile index 0b1b0c6198..4fc97ac951 100644 --- a/usr/src/boot/sys/boot/i386/loader/Makefile +++ b/usr/src/boot/sys/boot/i386/loader/Makefile @@ -59,7 +59,7 @@ SRCS += boot.c commands.c console.c devopen.c interp.c SRCS += interp_backslash.c interp_parse.c ls.c misc.c SRCS += module.c linenoise.c multiboot2.c SRCS += zfs_cmd.c -SRCS += font.c $(FONT).c list.c tem.c +SRCS += font.c $(FONT).c tem.c tem.o := CPPFLAGS += $(DEFAULT_CONSOLE_COLOR) @@ -164,9 +164,6 @@ install: all $(ROOT_BOOT_DEFAULTS) $(ROOT_BOOT_FORTH) \ %.o: ../../common/linenoise/%.c $(COMPILE.c) -o $@ $< -%.o: $(SRC)/common/list/%.c - $(COMPILE.c) -DNDEBUG $< - %.o: $(SRC)/common/font/%.c $(COMPILE.c) $< diff --git a/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h b/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h index 6c61f6e0c9..5997260616 100644 --- a/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h +++ b/usr/src/boot/sys/cddl/boot/zfs/zfsimpl.h @@ -717,6 +717,9 @@ typedef enum { #define ZPOOL_CONFIG_CHILDREN "children" #define ZPOOL_CONFIG_ID "id" #define ZPOOL_CONFIG_GUID "guid" +#define ZPOOL_CONFIG_INDIRECT_OBJECT "com.delphix:indirect_object" +#define ZPOOL_CONFIG_INDIRECT_BIRTHS "com.delphix:indirect_births" +#define ZPOOL_CONFIG_PREV_INDIRECT_VDEV "com.delphix:prev_indirect_vdev" #define ZPOOL_CONFIG_PATH "path" #define ZPOOL_CONFIG_DEVID "devid" #define ZPOOL_CONFIG_PHYS_PATH "phys_path" @@ -761,6 +764,7 @@ typedef enum { #define VDEV_TYPE_SPARE "spare" #define VDEV_TYPE_LOG "log" #define VDEV_TYPE_L2CACHE "l2cache" +#define VDEV_TYPE_INDIRECT "indirect" /* * This is needed in userland to report the minimum necessary device size. @@ -853,7 +857,7 @@ struct uberblock { */ #define DNODE_SHIFT 9 /* 512 bytes */ #define DN_MIN_INDBLKSHIFT 12 /* 4k */ -#define DN_MAX_INDBLKSHIFT 14 /* 16k */ +#define DN_MAX_INDBLKSHIFT 17 /* 128k */ #define DNODE_BLOCK_SHIFT 14 /* 16k */ #define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */ #define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */ @@ -1226,6 +1230,9 @@ typedef struct dsl_dataset_phys { #define DMU_POOL_HISTORY "history" #define DMU_POOL_PROPS "pool_props" #define DMU_POOL_CHECKSUM_SALT "org.illumos:checksum_salt" +#define DMU_POOL_REMOVING "com.delphix:removing" +#define DMU_POOL_OBSOLETE_BPOBJ "com.delphix:obsolete_bpobj" +#define DMU_POOL_CONDENSING_INDIRECT "com.delphix:condensing_indirect" #define ZAP_MAGIC 0x2F52AB2ABULL @@ -1539,6 +1546,116 @@ typedef int vdev_read_t(struct vdev *vdev, const blkptr_t *bp, typedef STAILQ_HEAD(vdev_list, vdev) vdev_list_t; +typedef struct vdev_indirect_mapping_entry_phys { + /* + * Decode with DVA_MAPPING_* macros. + * Contains: + * the source offset (low 63 bits) + * the one-bit "mark", used for garbage collection (by zdb) + */ + uint64_t vimep_src; + + /* + * Note: the DVA's asize is 24 bits, and can thus store ranges + * up to 8GB. + */ + dva_t vimep_dst; +} vdev_indirect_mapping_entry_phys_t; + +#define DVA_MAPPING_GET_SRC_OFFSET(vimep) \ + BF64_GET_SB((vimep)->vimep_src, 0, 63, SPA_MINBLOCKSHIFT, 0) +#define DVA_MAPPING_SET_SRC_OFFSET(vimep, x) \ + BF64_SET_SB((vimep)->vimep_src, 0, 63, SPA_MINBLOCKSHIFT, 0, x) + +typedef struct vdev_indirect_mapping_entry { + vdev_indirect_mapping_entry_phys_t vime_mapping; + uint32_t vime_obsolete_count; + list_node_t vime_node; +} vdev_indirect_mapping_entry_t; + +/* + * This is stored in the bonus buffer of the mapping object, see comment of + * vdev_indirect_config for more details. + */ +typedef struct vdev_indirect_mapping_phys { + uint64_t vimp_max_offset; + uint64_t vimp_bytes_mapped; + uint64_t vimp_num_entries; /* number of v_i_m_entry_phys_t's */ + + /* + * For each entry in the mapping object, this object contains an + * entry representing the number of bytes of that mapping entry + * that were no longer in use by the pool at the time this indirect + * vdev was last condensed. + */ + uint64_t vimp_counts_object; +} vdev_indirect_mapping_phys_t; + +#define VDEV_INDIRECT_MAPPING_SIZE_V0 (3 * sizeof (uint64_t)) + +typedef struct vdev_indirect_mapping { + uint64_t vim_object; + boolean_t vim_havecounts; + + /* vim_entries segment offset currently in memory. */ + uint64_t vim_entry_offset; + /* vim_entries segment size. */ + size_t vim_num_entries; + + /* Needed by dnode_read() */ + const void *vim_spa; + dnode_phys_t *vim_dn; + + /* + * An ordered array of mapping entries, sorted by source offset. + * Note that vim_entries is needed during a removal (and contains + * mappings that have been synced to disk so far) to handle frees + * from the removing device. + */ + vdev_indirect_mapping_entry_phys_t *vim_entries; + objset_phys_t *vim_objset; + vdev_indirect_mapping_phys_t *vim_phys; +} vdev_indirect_mapping_t; + +/* + * On-disk indirect vdev state. + * + * An indirect vdev is described exclusively in the MOS config of a pool. + * The config for an indirect vdev includes several fields, which are + * accessed in memory by a vdev_indirect_config_t. + */ +typedef struct vdev_indirect_config { + /* + * Object (in MOS) which contains the indirect mapping. This object + * contains an array of vdev_indirect_mapping_entry_phys_t ordered by + * vimep_src. The bonus buffer for this object is a + * vdev_indirect_mapping_phys_t. This object is allocated when a vdev + * removal is initiated. + * + * Note that this object can be empty if none of the data on the vdev + * has been copied yet. + */ + uint64_t vic_mapping_object; + + /* + * Object (in MOS) which contains the birth times for the mapping + * entries. This object contains an array of + * vdev_indirect_birth_entry_phys_t sorted by vibe_offset. The bonus + * buffer for this object is a vdev_indirect_birth_phys_t. This object + * is allocated when a vdev removal is initiated. + * + * Note that this object can be empty if none of the vdev has yet been + * copied. + */ + uint64_t vic_births_object; + +/* + * This is the vdev ID which was removed previous to this vdev, or + * UINT64_MAX if there are no previously removed vdevs. + */ + uint64_t vic_prev_indirect_vdev; +} vdev_indirect_config_t; + typedef struct vdev { STAILQ_ENTRY(vdev) v_childlink; /* link in parent's child list */ STAILQ_ENTRY(vdev) v_alllink; /* link in global vdev list */ @@ -1557,6 +1674,11 @@ typedef struct vdev { vdev_read_t *v_read; /* read from vdev */ void *v_read_priv; /* private data for read function */ struct spa *spa; /* link to spa */ + /* + * Values stored in the config for an indirect or removing vdev. + */ + vdev_indirect_config_t vdev_indirect_config; + vdev_indirect_mapping_t *v_mapping; } vdev_t; /* @@ -1578,6 +1700,21 @@ typedef struct spa { vdev_t *spa_boot_vdev; /* boot device for kernel */ } spa_t; +/* IO related arguments. */ +typedef struct zio { + spa_t *io_spa; + blkptr_t *io_bp; + void *io_data; + uint64_t io_size; + uint64_t io_offset; + + /* Stuff for the vdev stack */ + vdev_t *io_vd; + void *io_vsd; + + int io_error; +} zio_t; + static void decode_embedded_bp_compressed(const blkptr_t *, void *); #endif /* _ZFSIMPL_H */ diff --git a/usr/src/tools/smatch/Makefile b/usr/src/tools/smatch/Makefile index 821e2804f0..bd0bce00b2 100644 --- a/usr/src/tools/smatch/Makefile +++ b/usr/src/tools/smatch/Makefile @@ -27,8 +27,11 @@ include ../Makefile.tools # We have to build smatch before we can use cw i386_CC = $(GNUC_ROOT)/bin/gcc sparc_CC = $(GNUC_ROOT)/bin/gcc +# sparc doesn't recognise -msave-args +i386_SMATCHFLAGS = -msave-args +sparc_SMATCHFLAGS = -CFLAGS = -O -m64 -msave-args -D__sun -Wall -Wno-unknown-pragmas -std=gnu99 -nodefaultlibs +CFLAGS = -O -m64 $($(MACH)_SMATCHFLAGS) -D__sun -Wall -Wno-unknown-pragmas -std=gnu99 -nodefaultlibs SMATCHDATADIR = $(ROOTONBLDSHARE)/smatch diff --git a/usr/src/uts/common/io/ib/ibnex/ibnex.c b/usr/src/uts/common/io/ib/ibnex/ibnex.c index 765f215458..e8cc157c91 100644 --- a/usr/src/uts/common/io/ib/ibnex/ibnex.c +++ b/usr/src/uts/common/io/ib/ibnex/ibnex.c @@ -78,7 +78,7 @@ dev_info_t *ibnex_commsvc_initnode(dev_info_t *, static void ibnex_delete_port_node_data(ibnex_node_data_t *); int ibnex_get_dip_from_guid(ib_guid_t, int, ib_pkey_t, dev_info_t **); -int ibnex_get_node_and_dip_from_guid(ib_guid_t, int, +int ibnex_get_node_and_dip_from_guid(ib_guid_t, int, ib_pkey_t, ibnex_node_data_t **, dev_info_t **); static ibnex_node_data_t *ibnex_is_node_data_present(ibnex_node_type_t, void *, int, ib_pkey_t); @@ -257,7 +257,7 @@ static struct cb_ops ibnex_cbops = { ddi_prop_op, /* prop_op */ NULL, /* stream */ D_MP, /* cb_flag */ - CB_REV, /* rev */ + CB_REV, /* rev */ nodev, /* int (*cb_aread)() */ nodev /* int (*cb_awrite)() */ }; @@ -526,7 +526,7 @@ ibnex_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) * power management of the phci and client */ if (ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, - "pm-want-child-notification?", NULL, NULL) != DDI_PROP_SUCCESS) { + "pm-want-child-notification?", NULL, 0) != DDI_PROP_SUCCESS) { IBTF_DPRINTF_L2("ibnex", "_attach: create pm-want-child-notification failed"); (void) ddi_remove_minor_node(dip, NULL); @@ -827,7 +827,7 @@ ibt_status_t ibnex_ibtl_callback(ibtl_ibnex_cb_args_t *cb_args) { int retval = IBT_SUCCESS; - struct dev_ops *hca_dev_ops; + struct dev_ops *hca_dev_ops; dev_info_t *clnt_dip; ibnex_node_data_t *node_data; @@ -888,7 +888,7 @@ ibnex_ibtl_callback(ibtl_ibnex_cb_args_t *cb_args) /* * ibnex_map_fault - * IOC drivers need not map memory. Return failure to fail any + * IOC drivers need not map memory. Return failure to fail any * such calls. */ /*ARGSUSED*/ @@ -903,7 +903,7 @@ ibnex_map_fault(dev_info_t *dip, dev_info_t *rdip, struct hat *hat, /* * ibnex_busctl - * bus_ctl bus_ops entry point + * bus_ctl bus_ops entry point */ /*ARGSUSED*/ int @@ -1134,7 +1134,7 @@ ibnex_bus_config(dev_info_t *parent, uint_t flag, ddi_bus_config_op_t op, void *devname, dev_info_t **child) { int ret = IBNEX_SUCCESS, len, circ, need_bus_config; - char *device_name, *cname = NULL, *caddr = NULL; + char *device_name, *cname = NULL, *caddr = NULL; dev_info_t *cdip; ibnex_node_data_t *node_data; @@ -1478,9 +1478,9 @@ void ibnex_create_vppa_nodes( dev_info_t *parent, ibdm_port_attr_t *port_attr) { - int idx, ii; + int idx, ii; int rval; - ib_pkey_t pkey; + ib_pkey_t pkey; dev_info_t *dip; IBTF_DPRINTF_L4("ibnex", "\tcreate_vppa_nodes: Begin"); @@ -1715,8 +1715,8 @@ int ibnex_get_pkey_commsvc_index_portnum(char *device_name, int *index, ib_pkey_t *pkey, uint8_t *port_num) { - char *srv, **service_name, *temp; - int ii, ncommsvcs, ret; + char *srv, **service_name, *temp; + int ii, ncommsvcs, ret; if (ibnex_devname_to_portnum(device_name, port_num) != IBNEX_SUCCESS) { @@ -1946,7 +1946,7 @@ ibnex_pseudo_config_one(ibnex_node_data_t *node_data, char *caddr, /* * This function is now called with PHCI / HCA driver * as parent. The format of devicename is : - * <driver_name>@<driver_name>,<unit_address> + * <driver_name>@<driver_name>,<unit_address> * The "caddr" part of the devicename matches the * format of pseudo_node_addr. * @@ -2975,7 +2975,7 @@ ibnex_commsvc_initnode(dev_info_t *parent, ibdm_port_attr_t *port_attr, ibnex_node_data_t *node_data; ibnex_port_node_t *port_node; char devname[MAXNAMELEN]; - int cdip_allocated = 0; + int cdip_allocated = 0; ASSERT(MUTEX_HELD(&ibnex.ibnex_mutex)); @@ -3303,7 +3303,7 @@ static int ibnex_create_port_compatible_prop(dev_info_t *child_dip, char *comm_svcp, ibdm_port_attr_t *port_attr) { - int rval, i; + int rval, i; char *temp; char *compatible[IBNEX_MAX_IBPORT_COMPAT_NAMES]; @@ -3974,7 +3974,7 @@ ib_vhci_pi_uninit(dev_info_t *vdip, mdi_pathinfo_t *pip, int flag) /*ARGSUSED*/ static int ib_vhci_pi_state_change(dev_info_t *vdip, mdi_pathinfo_t *pip, - mdi_pathinfo_state_t state, uint32_t arg1, int arg2) + mdi_pathinfo_state_t state, uint32_t arg1, int arg2) { IBTF_DPRINTF_L4("ibnex", "\tpi_state_change: dip %p pip %p state %x", vdip, pip, state); diff --git a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c index e78c3735ad..7b7457160f 100644 --- a/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c +++ b/usr/src/uts/common/io/ib/ibnex/ibnex_ioctl.c @@ -60,7 +60,7 @@ static int ibnex_fill_nodeinfo(nvlist_t **, ibnex_node_data_t *, static void ibnex_figure_ap_devstate(ibnex_node_data_t *, devctl_ap_state_t *); static void ibnex_figure_ib_apid_devstate(devctl_ap_state_t *); -static char *ibnex_get_apid(struct devctl_iocdata *); +static char *ibnex_get_apid(struct devctl_iocdata *); static int ibnex_get_dip_from_apid(char *, dev_info_t **, ibnex_node_data_t **); extern int ibnex_get_node_and_dip_from_guid(ib_guid_t, int, @@ -1010,9 +1010,9 @@ ibnex_devctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, /* * Five types of APIDs are supported: * o HCA_GUID,0,service-name (HCA-SVC device) - * o IOC_GUID (IOC device) + * o IOC_GUID (IOC device) * o PORT_GUID,0,service-name (Port device) - * o pseudo_name,unit-address, (Pseudo device) + * o pseudo_name,unit-address, (Pseudo device) * o PORT_GUID,P_Key,service-name (VPPA device) * If the apid doesn't have "," then treat it as an IOC * If the apid has one "," then it is Pseudo device @@ -1149,7 +1149,7 @@ ibnex_get_snapshot(char **buf, size_t *sz, int allow_probe) { int i, j, k, l, hca_count; nvlist_t *nvl; - ib_pkey_t pkey; + ib_pkey_t pkey; boolean_t found; ibdm_ioc_info_t *ioc_listp; ibdm_ioc_info_t *iocp; @@ -2909,7 +2909,7 @@ out: (x)->hca_max_rdma_in_chan = (y)->hca_max_rdma_in_chan; \ (x)->hca_max_rdma_out_chan = (y)->hca_max_rdma_out_chan; \ (x)->hca_max_ipv6_chan = (y)->hca_max_ipv6_chan; \ - (x)->hca_max_ether_chan = (y)->hca_max_ether_chan; \ + (x)->hca_max_ether_chan = (y)->hca_max_ether_chan; \ (x)->hca_max_mcg_chans = (y)->hca_max_mcg_chans; \ (x)->hca_max_mcg = (y)->hca_max_mcg; \ (x)->hca_max_chan_per_mcg = (y)->hca_max_chan_per_mcg; \ @@ -2946,8 +2946,7 @@ out: MAX_HCA_DRVNAME_LEN); \ (x)->hca_driver_instance = (instance); \ \ - (x)->hca_device_path = ((device_path_alloc_sz) >= (device_path_len)) \ - ? (device_path) : NULL; \ + (x)->hca_device_path = (device_path); \ (x)->hca_device_path_len = (device_path_len); \ } @@ -2960,7 +2959,9 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, { int rv = 0; ibnex_ctl_query_hca_t *query_hca = NULL; +#ifdef _MULTI_DATAMODEL ibnex_ctl_query_hca_32_t *query_hca_32 = NULL; +#endif ibt_hca_attr_t *hca_attr = NULL; char driver_name[MAX_HCA_DRVNAME_LEN]; int instance; @@ -2968,13 +2969,15 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, char *device_path; uint_t device_path_alloc_sz, hca_device_path_len; char *hca_device_path = NULL; + uint_t model; IBTF_DPRINTF_L4("ibnex", "\tctl_query_hca: cmd=%x, arg=%p, " "mode=%x, cred=%p, rval=%p, dev=0x%x", cmd, arg, mode, credp, rvalp, dev); + switch (model = ddi_model_convert_from(mode & FMODELS)) { #ifdef _MULTI_DATAMODEL - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { + case DDI_MODEL_ILP32: query_hca_32 = kmem_zalloc( sizeof (ibnex_ctl_query_hca_32_t), KM_SLEEP); @@ -2989,9 +2992,9 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, hca_guid = query_hca_32->hca_guid; device_path = (char *)(uintptr_t)query_hca_32->hca_device_path; device_path_alloc_sz = query_hca_32->hca_device_path_alloc_sz; - } else + break; #endif - { + default: query_hca = kmem_zalloc(sizeof (ibnex_ctl_query_hca_t), KM_SLEEP); @@ -3006,6 +3009,7 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, hca_guid = query_hca->hca_guid; device_path = query_hca->hca_device_path; device_path_alloc_sz = query_hca->hca_device_path_alloc_sz; + break; } hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP); @@ -3020,11 +3024,19 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, hca_device_path_len = strlen(hca_device_path) + 1; + switch (model) { + char *device_path64; #ifdef _MULTI_DATAMODEL - if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { + caddr32_t device_path32; + case DDI_MODEL_ILP32: + + if (device_path_alloc_sz >= hca_device_path_len) + device_path32 = (uintptr_t)device_path; + else + device_path32 = (uintptr_t)NULL; IBNEX_CTL_CP_HCA_INFO(&query_hca_32->hca_info, hca_attr, - driver_name, instance, query_hca_32->hca_device_path, + driver_name, instance, device_path32, device_path_alloc_sz, hca_device_path_len); /* copy hca information to the user space */ @@ -3036,11 +3048,16 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, rv = EFAULT; goto out; } - } else + break; #endif - { + default: + if (device_path_alloc_sz >= hca_device_path_len) + device_path64 = device_path; + else + device_path64 = NULL; + IBNEX_CTL_CP_HCA_INFO(&query_hca->hca_info, hca_attr, - driver_name, instance, device_path, + driver_name, instance, device_path64, device_path_alloc_sz, hca_device_path_len); /* copy hca information to the user space */ @@ -3052,6 +3069,7 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, rv = EFAULT; goto out; } + break; } if (device_path_alloc_sz >= hca_device_path_len) { @@ -3067,8 +3085,10 @@ ibnex_ctl_query_hca(dev_t dev, int cmd, intptr_t arg, int mode, out: if (query_hca) kmem_free(query_hca, sizeof (ibnex_ctl_query_hca_t)); +#ifdef _MULTI_DATAMODEL if (query_hca_32) kmem_free(query_hca_32, sizeof (ibnex_ctl_query_hca_32_t)); +#endif if (hca_attr) kmem_free(hca_attr, sizeof (ibt_hca_attr_t)); if (hca_device_path) |
