summaryrefslogtreecommitdiff
path: root/usr/src/uts
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts')
-rw-r--r--usr/src/uts/common/io/cmlb.c58
-rw-r--r--usr/src/uts/common/io/scsi/targets/sd.c744
-rw-r--r--usr/src/uts/common/os/dumpsubr.c12
-rw-r--r--usr/src/uts/common/sys/dkio.h18
-rw-r--r--usr/src/uts/common/sys/dklabel.h4
-rw-r--r--usr/src/uts/common/sys/scsi/targets/sddef.h34
-rw-r--r--usr/src/uts/common/xen/io/xdb.c18
-rw-r--r--usr/src/uts/common/xen/io/xdb.h2
-rw-r--r--usr/src/uts/common/xen/io/xdf.c93
-rw-r--r--usr/src/uts/common/xen/io/xdf.h3
-rw-r--r--usr/src/uts/common/xen/sys/xendev.h1
-rw-r--r--usr/src/uts/sun4v/io/vdc.c164
-rw-r--r--usr/src/uts/sun4v/io/vds.c338
-rw-r--r--usr/src/uts/sun4v/sys/vdc.h10
-rw-r--r--usr/src/uts/sun4v/sys/vdsk_common.h12
15 files changed, 1205 insertions, 306 deletions
diff --git a/usr/src/uts/common/io/cmlb.c b/usr/src/uts/common/io/cmlb.c
index 75559a9b94..343b1b965c 100644
--- a/usr/src/uts/common/io/cmlb.c
+++ b/usr/src/uts/common/io/cmlb.c
@@ -1287,6 +1287,9 @@ cmlb_check_update_blockcount(struct cmlb_lun *cl, void *tg_cookie)
if ((capacity != 0) && (lbasize != 0)) {
cl->cl_blockcount = capacity;
cl->cl_tgt_blocksize = lbasize;
+ if (!cl->cl_is_removable) {
+ cl->cl_sys_blocksize = lbasize;
+ }
return (0);
} else {
return (EIO);
@@ -1592,7 +1595,7 @@ cmlb_validate_geometry(struct cmlb_lun *cl, boolean_t forcerevalid, int flags,
label_addr = (daddr_t)(cl->cl_solaris_offset + DK_LABEL_LOC);
- buffer_size = sizeof (struct dk_label);
+ buffer_size = cl->cl_sys_blocksize;
cmlb_dbg(CMLB_TRACE, cl, "cmlb_validate_geometry: "
"label_addr: 0x%x allocation size: 0x%x\n",
@@ -2199,12 +2202,6 @@ cmlb_use_efi(struct cmlb_lun *cl, diskaddr_t capacity, int flags,
ASSERT(mutex_owned(CMLB_MUTEX(cl)));
- if (cl->cl_tgt_blocksize != cl->cl_sys_blocksize) {
- rval = EINVAL;
- goto done_err1;
- }
-
-
lbasize = cl->cl_sys_blocksize;
cl->cl_reserved = -1;
@@ -3637,7 +3634,7 @@ cmlb_dkio_partition(struct cmlb_lun *cl, caddr_t arg, int flag,
}
buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP);
- rval = DK_TG_READ(cl, buffer, 1, DEV_BSIZE, tg_cookie);
+ rval = DK_TG_READ(cl, buffer, 1, cl->cl_sys_blocksize, tg_cookie);
if (rval != 0)
goto done_error;
@@ -4048,9 +4045,9 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie)
cl->cl_reserved = -1;
mutex_exit(CMLB_MUTEX(cl));
- gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP);
+ gpt = kmem_alloc(cl->cl_sys_blocksize, KM_SLEEP);
- if (DK_TG_READ(cl, gpt, 1, DEV_BSIZE, tg_cookie) != 0) {
+ if (DK_TG_READ(cl, gpt, 1, cl->cl_sys_blocksize, tg_cookie) != 0) {
goto done;
}
@@ -4059,7 +4056,8 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie)
if (rval == 0) {
/* clear primary */
bzero(gpt, sizeof (efi_gpt_t));
- if (rval = DK_TG_WRITE(cl, gpt, 1, EFI_LABEL_SIZE, tg_cookie)) {
+ if (rval = DK_TG_WRITE(cl, gpt, 1, cl->cl_sys_blocksize,
+ tg_cookie)) {
cmlb_dbg(CMLB_INFO, cl,
"cmlb_clear_efi: clear primary label failed\n");
}
@@ -4070,8 +4068,8 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie)
goto done;
}
- if ((rval = DK_TG_READ(cl, gpt, cap - 1, EFI_LABEL_SIZE, tg_cookie))
- != 0) {
+ if ((rval = DK_TG_READ(cl, gpt, cap - 1, cl->cl_sys_blocksize,
+ tg_cookie)) != 0) {
goto done;
}
cmlb_swap_efi_gpt(gpt);
@@ -4081,7 +4079,7 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie)
cmlb_dbg(CMLB_TRACE, cl,
"cmlb_clear_efi clear backup@%lu\n", cap - 1);
bzero(gpt, sizeof (efi_gpt_t));
- if ((rval = DK_TG_WRITE(cl, gpt, cap - 1, EFI_LABEL_SIZE,
+ if ((rval = DK_TG_WRITE(cl, gpt, cap - 1, cl->cl_sys_blocksize,
tg_cookie))) {
cmlb_dbg(CMLB_INFO, cl,
"cmlb_clear_efi: clear backup label failed\n");
@@ -4092,7 +4090,7 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie)
* header of this file
*/
if ((rval = DK_TG_READ(cl, gpt, cap - 2,
- EFI_LABEL_SIZE, tg_cookie)) != 0) {
+ cl->cl_sys_blocksize, tg_cookie)) != 0) {
goto done;
}
cmlb_swap_efi_gpt(gpt);
@@ -4104,7 +4102,7 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie)
cap - 2);
bzero(gpt, sizeof (efi_gpt_t));
if ((rval = DK_TG_WRITE(cl, gpt, cap - 2,
- EFI_LABEL_SIZE, tg_cookie))) {
+ cl->cl_sys_blocksize, tg_cookie))) {
cmlb_dbg(CMLB_INFO, cl,
"cmlb_clear_efi: clear legacy backup label "
"failed\n");
@@ -4113,7 +4111,7 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie)
}
done:
- kmem_free(gpt, sizeof (efi_gpt_t));
+ kmem_free(gpt, cl->cl_sys_blocksize);
}
/*
@@ -4210,7 +4208,7 @@ cmlb_clear_vtoc(struct cmlb_lun *cl, void *tg_cookie)
struct dk_label *dkl;
mutex_exit(CMLB_MUTEX(cl));
- dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
+ dkl = kmem_zalloc(cl->cl_sys_blocksize, KM_SLEEP);
mutex_enter(CMLB_MUTEX(cl));
/*
* cmlb_set_vtoc uses these fields in order to figure out
@@ -4223,7 +4221,7 @@ cmlb_clear_vtoc(struct cmlb_lun *cl, void *tg_cookie)
dkl->dkl_nsect = cl->cl_g.dkg_nsect;
mutex_exit(CMLB_MUTEX(cl));
(void) cmlb_set_vtoc(cl, dkl, tg_cookie);
- kmem_free(dkl, sizeof (struct dk_label));
+ kmem_free(dkl, cl->cl_sys_blocksize);
mutex_enter(CMLB_MUTEX(cl));
}
@@ -4258,7 +4256,7 @@ cmlb_write_label(struct cmlb_lun *cl, void *tg_cookie)
ASSERT(mutex_owned(CMLB_MUTEX(cl)));
mutex_exit(CMLB_MUTEX(cl));
- dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP);
+ dkl = kmem_zalloc(cl->cl_sys_blocksize, KM_SLEEP);
mutex_enter(CMLB_MUTEX(cl));
bcopy(&cl->cl_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc));
@@ -4303,7 +4301,7 @@ cmlb_write_label(struct cmlb_lun *cl, void *tg_cookie)
rval = cmlb_set_vtoc(cl, dkl, tg_cookie);
exit:
- kmem_free(dkl, sizeof (struct dk_label));
+ kmem_free(dkl, cl->cl_sys_blocksize);
mutex_enter(CMLB_MUTEX(cl));
return (rval);
}
@@ -4422,7 +4420,7 @@ cmlb_dkio_get_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie)
/*
* Read the mboot block, located at absolute block 0 on the target.
*/
- buffer_size = sizeof (struct mboot);
+ buffer_size = cl->cl_sys_blocksize;
cmlb_dbg(CMLB_TRACE, cl,
"cmlb_dkio_get_mboot: allocation size: 0x%x\n", buffer_size);
@@ -4481,18 +4479,18 @@ cmlb_dkio_set_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie)
return (EINVAL);
}
- mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP);
+ mboot = kmem_zalloc(cl->cl_sys_blocksize, KM_SLEEP);
if (ddi_copyin((const void *)arg, mboot,
- sizeof (struct mboot), flag) != 0) {
- kmem_free(mboot, (size_t)(sizeof (struct mboot)));
+ cl->cl_sys_blocksize, flag) != 0) {
+ kmem_free(mboot, cl->cl_sys_blocksize);
return (EFAULT);
}
/* Is this really a master boot record? */
magic = LE_16(mboot->signature);
if (magic != MBB_MAGIC) {
- kmem_free(mboot, (size_t)(sizeof (struct mboot)));
+ kmem_free(mboot, cl->cl_sys_blocksize);
return (EINVAL);
}
@@ -4508,7 +4506,7 @@ cmlb_dkio_set_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie)
rval = cmlb_update_fdisk_and_vtoc(cl, tg_cookie);
if ((!cl->cl_f_geometry_is_valid) || (rval != 0)) {
mutex_exit(CMLB_MUTEX(cl));
- kmem_free(mboot, (size_t)(sizeof (struct mboot)));
+ kmem_free(mboot, cl->cl_sys_blocksize);
return (rval);
}
}
@@ -4529,7 +4527,7 @@ cmlb_dkio_set_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie)
#endif
cl->cl_msglog_flag |= CMLB_ALLOW_2TB_WARN;
mutex_exit(CMLB_MUTEX(cl));
- kmem_free(mboot, (size_t)(sizeof (struct mboot)));
+ kmem_free(mboot, cl->cl_sys_blocksize);
return (rval);
}
@@ -5098,10 +5096,10 @@ fallback: return (ddi_prop_op(dev, dip, prop_op, mod_flags,
(diskaddr_t *)&nblocks64, NULL, NULL, NULL, tg_cookie);
/*
- * Assume partition information is in DEV_BSIZE units, compute
+ * Assume partition information is in sys_blocksize units, compute
* divisor for size(9P) property representation.
*/
- dblk = lbasize / DEV_BSIZE;
+ dblk = lbasize / cl->cl_sys_blocksize;
/* Now let ddi_prop_op_nblocks_blksize() handle the request. */
return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags,
diff --git a/usr/src/uts/common/io/scsi/targets/sd.c b/usr/src/uts/common/io/scsi/targets/sd.c
index f2cbc0df37..8cbc1310a3 100644
--- a/usr/src/uts/common/io/scsi/targets/sd.c
+++ b/usr/src/uts/common/io/scsi/targets/sd.c
@@ -1017,6 +1017,7 @@ static int sd_pm_idletime = 1;
#define sd_free_rqs ssd_free_rqs
#define sd_dump_memory ssd_dump_memory
#define sd_get_media_info ssd_get_media_info
+#define sd_get_media_info_ext ssd_get_media_info_ext
#define sd_dkio_ctrl_info ssd_dkio_ctrl_info
#define sd_nvpair_str_decode ssd_nvpair_str_decode
#define sd_strtok_r ssd_strtok_r
@@ -1093,6 +1094,7 @@ static int sd_pm_idletime = 1;
#define sd_is_lsi ssd_is_lsi
#define sd_tg_rdwr ssd_tg_rdwr
#define sd_tg_getinfo ssd_tg_getinfo
+#define sd_rmw_msg_print_handler ssd_rmw_msg_print_handler
#endif /* #if (defined(__fibre)) */
@@ -1463,7 +1465,7 @@ static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
uint32_t *lbap, int path_flag);
static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
- uint32_t *lbap, int path_flag);
+ uint32_t *lbap, uint32_t *psp, int path_flag);
static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int flag,
int path_flag);
static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
@@ -1510,6 +1512,7 @@ static void sd_panic_for_res_conflict(struct sd_lun *un);
* Disk Ioctl Function Prototypes
*/
static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
+static int sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag);
static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
@@ -1610,6 +1613,11 @@ static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
/*
+ * For printing RMW warning message timely
+ */
+static void sd_rmw_msg_print_handler(void *arg);
+
+/*
* Constants for failfast support:
*
* SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
@@ -1781,13 +1789,19 @@ static sd_chain_t sd_iostart_chain[] = {
sd_mapblockaddr_iostart, /* Index: 3 */
sd_core_iostart, /* Index: 4 */
- /* Chain for buf IO for removable-media targets (PM enabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets with RMW needed (PM enabled)
+ */
sd_mapblockaddr_iostart, /* Index: 5 */
sd_mapblocksize_iostart, /* Index: 6 */
sd_pm_iostart, /* Index: 7 */
sd_core_iostart, /* Index: 8 */
- /* Chain for buf IO for removable-media targets (PM disabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets with RMW needed (PM disabled)
+ */
sd_mapblockaddr_iostart, /* Index: 9 */
sd_mapblocksize_iostart, /* Index: 10 */
sd_core_iostart, /* Index: 11 */
@@ -1817,6 +1831,26 @@ static sd_chain_t sd_iostart_chain[] = {
/* Chain for "direct priority" USCSI commands (all targets) */
sd_core_iostart, /* Index: 25 */
+
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with RMW needed with checksumming (PM enabled)
+ */
+ sd_mapblockaddr_iostart, /* Index: 26 */
+ sd_mapblocksize_iostart, /* Index: 27 */
+ sd_checksum_iostart, /* Index: 28 */
+ sd_pm_iostart, /* Index: 29 */
+ sd_core_iostart, /* Index: 30 */
+
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with RMW needed with checksumming (PM disabled)
+ */
+ sd_mapblockaddr_iostart, /* Index: 31 */
+ sd_mapblocksize_iostart, /* Index: 32 */
+ sd_checksum_iostart, /* Index: 33 */
+ sd_core_iostart, /* Index: 34 */
+
};
/*
@@ -1825,7 +1859,9 @@ static sd_chain_t sd_iostart_chain[] = {
*/
#define SD_CHAIN_DISK_IOSTART 0
#define SD_CHAIN_DISK_IOSTART_NO_PM 3
+#define SD_CHAIN_MSS_DISK_IOSTART 5
#define SD_CHAIN_RMMEDIA_IOSTART 5
+#define SD_CHAIN_MSS_DISK_IOSTART_NO_PM 9
#define SD_CHAIN_RMMEDIA_IOSTART_NO_PM 9
#define SD_CHAIN_CHKSUM_IOSTART 12
#define SD_CHAIN_CHKSUM_IOSTART_NO_PM 16
@@ -1833,6 +1869,8 @@ static sd_chain_t sd_iostart_chain[] = {
#define SD_CHAIN_USCSI_CHKSUM_IOSTART 21
#define SD_CHAIN_DIRECT_CMD_IOSTART 24
#define SD_CHAIN_PRIORITY_CMD_IOSTART 25
+#define SD_CHAIN_MSS_CHKSUM_IOSTART 26
+#define SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM 31
/*
@@ -1859,13 +1897,19 @@ static sd_chain_t sd_iodone_chain[] = {
sd_buf_iodone, /* Index: 3 */
sd_mapblockaddr_iodone, /* Index: 4 */
- /* Chain for buf IO for removable-media targets (PM enabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets with RMW needed (PM enabled)
+ */
sd_buf_iodone, /* Index: 5 */
sd_mapblockaddr_iodone, /* Index: 6 */
sd_mapblocksize_iodone, /* Index: 7 */
sd_pm_iodone, /* Index: 8 */
- /* Chain for buf IO for removable-media targets (PM disabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets with RMW needed (PM disabled)
+ */
sd_buf_iodone, /* Index: 9 */
sd_mapblockaddr_iodone, /* Index: 10 */
sd_mapblocksize_iodone, /* Index: 11 */
@@ -1895,6 +1939,25 @@ static sd_chain_t sd_iodone_chain[] = {
/* Chain for "direct priority" USCSI commands (all targets) */
sd_uscsi_iodone, /* Index: 25 */
+
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with checksumming (PM enabled)
+ */
+ sd_buf_iodone, /* Index: 26 */
+ sd_mapblockaddr_iodone, /* Index: 27 */
+ sd_mapblocksize_iodone, /* Index: 28 */
+ sd_checksum_iodone, /* Index: 29 */
+ sd_pm_iodone, /* Index: 30 */
+
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with checksumming (PM disabled)
+ */
+ sd_buf_iodone, /* Index: 31 */
+ sd_mapblockaddr_iodone, /* Index: 32 */
+ sd_mapblocksize_iodone, /* Index: 33 */
+ sd_checksum_iodone, /* Index: 34 */
};
@@ -1910,14 +1973,17 @@ static sd_chain_t sd_iodone_chain[] = {
#define SD_CHAIN_DISK_IODONE 2
#define SD_CHAIN_DISK_IODONE_NO_PM 4
#define SD_CHAIN_RMMEDIA_IODONE 8
+#define SD_CHAIN_MSS_DISK_IODONE 8
#define SD_CHAIN_RMMEDIA_IODONE_NO_PM 11
+#define SD_CHAIN_MSS_DISK_IODONE_NO_PM 11
#define SD_CHAIN_CHKSUM_IODONE 15
#define SD_CHAIN_CHKSUM_IODONE_NO_PM 18
#define SD_CHAIN_USCSI_CMD_IODONE 20
#define SD_CHAIN_USCSI_CHKSUM_IODONE 22
#define SD_CHAIN_DIRECT_CMD_IODONE 24
#define SD_CHAIN_PRIORITY_CMD_IODONE 25
-
+#define SD_CHAIN_MSS_CHKSUM_IODONE 30
+#define SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM 34
@@ -1940,13 +2006,19 @@ static sd_initpkt_t sd_initpkt_map[] = {
sd_initpkt_for_buf, /* Index: 3 */
sd_initpkt_for_buf, /* Index: 4 */
- /* Chain for buf IO for removable-media targets (PM enabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets (PM enabled)
+ */
sd_initpkt_for_buf, /* Index: 5 */
sd_initpkt_for_buf, /* Index: 6 */
sd_initpkt_for_buf, /* Index: 7 */
sd_initpkt_for_buf, /* Index: 8 */
- /* Chain for buf IO for removable-media targets (PM disabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets (PM disabled)
+ */
sd_initpkt_for_buf, /* Index: 9 */
sd_initpkt_for_buf, /* Index: 10 */
sd_initpkt_for_buf, /* Index: 11 */
@@ -1977,6 +2049,24 @@ static sd_initpkt_t sd_initpkt_map[] = {
/* Chain for "direct priority" USCSI commands (all targets) */
sd_initpkt_for_uscsi, /* Index: 25 */
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with checksumming (PM enabled)
+ */
+ sd_initpkt_for_buf, /* Index: 26 */
+ sd_initpkt_for_buf, /* Index: 27 */
+ sd_initpkt_for_buf, /* Index: 28 */
+ sd_initpkt_for_buf, /* Index: 29 */
+ sd_initpkt_for_buf, /* Index: 30 */
+
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with checksumming (PM disabled)
+ */
+ sd_initpkt_for_buf, /* Index: 31 */
+ sd_initpkt_for_buf, /* Index: 32 */
+ sd_initpkt_for_buf, /* Index: 33 */
+ sd_initpkt_for_buf, /* Index: 34 */
};
@@ -1999,13 +2089,19 @@ static sd_destroypkt_t sd_destroypkt_map[] = {
sd_destroypkt_for_buf, /* Index: 3 */
sd_destroypkt_for_buf, /* Index: 4 */
- /* Chain for buf IO for removable-media targets (PM enabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets (PM enabled)
+ */
sd_destroypkt_for_buf, /* Index: 5 */
sd_destroypkt_for_buf, /* Index: 6 */
sd_destroypkt_for_buf, /* Index: 7 */
sd_destroypkt_for_buf, /* Index: 8 */
- /* Chain for buf IO for removable-media targets (PM disabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets (PM disabled)
+ */
sd_destroypkt_for_buf, /* Index: 9 */
sd_destroypkt_for_buf, /* Index: 10 */
sd_destroypkt_for_buf, /* Index: 11 */
@@ -2036,6 +2132,24 @@ static sd_destroypkt_t sd_destroypkt_map[] = {
/* Chain for "direct priority" USCSI commands (all targets) */
sd_destroypkt_for_uscsi, /* Index: 25 */
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with checksumming (PM disabled)
+ */
+ sd_destroypkt_for_buf, /* Index: 26 */
+ sd_destroypkt_for_buf, /* Index: 27 */
+ sd_destroypkt_for_buf, /* Index: 28 */
+ sd_destroypkt_for_buf, /* Index: 29 */
+ sd_destroypkt_for_buf, /* Index: 30 */
+
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with checksumming (PM enabled)
+ */
+ sd_destroypkt_for_buf, /* Index: 31 */
+ sd_destroypkt_for_buf, /* Index: 32 */
+ sd_destroypkt_for_buf, /* Index: 33 */
+ sd_destroypkt_for_buf, /* Index: 34 */
};
@@ -2066,13 +2180,19 @@ static int sd_chain_type_map[] = {
SD_CHAIN_BUFIO, /* Index: 3 */
SD_CHAIN_BUFIO, /* Index: 4 */
- /* Chain for buf IO for removable-media targets (PM enabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets (PM enabled)
+ */
SD_CHAIN_BUFIO, /* Index: 5 */
SD_CHAIN_BUFIO, /* Index: 6 */
SD_CHAIN_BUFIO, /* Index: 7 */
SD_CHAIN_BUFIO, /* Index: 8 */
- /* Chain for buf IO for removable-media targets (PM disabled) */
+ /*
+ * Chain for buf IO for removable-media or large sector size
+ * disk drive targets (PM disabled)
+ */
SD_CHAIN_BUFIO, /* Index: 9 */
SD_CHAIN_BUFIO, /* Index: 10 */
SD_CHAIN_BUFIO, /* Index: 11 */
@@ -2095,13 +2215,32 @@ static int sd_chain_type_map[] = {
/* Chain for USCSI commands (checksum targets) */
SD_CHAIN_USCSI, /* Index: 21 */
SD_CHAIN_USCSI, /* Index: 22 */
- SD_CHAIN_USCSI, /* Index: 22 */
+ SD_CHAIN_USCSI, /* Index: 23 */
/* Chain for "direct" USCSI commands (all targets) */
SD_CHAIN_DIRECT, /* Index: 24 */
/* Chain for "direct priority" USCSI commands (all targets) */
SD_CHAIN_DIRECT_PRIORITY, /* Index: 25 */
+
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with checksumming (PM enabled)
+ */
+ SD_CHAIN_BUFIO, /* Index: 26 */
+ SD_CHAIN_BUFIO, /* Index: 27 */
+ SD_CHAIN_BUFIO, /* Index: 28 */
+ SD_CHAIN_BUFIO, /* Index: 29 */
+ SD_CHAIN_BUFIO, /* Index: 30 */
+
+ /*
+ * Chain for buf IO for large sector size disk drive targets
+ * with checksumming (PM disabled)
+ */
+ SD_CHAIN_BUFIO, /* Index: 31 */
+ SD_CHAIN_BUFIO, /* Index: 32 */
+ SD_CHAIN_BUFIO, /* Index: 33 */
+ SD_CHAIN_BUFIO, /* Index: 34 */
};
@@ -2147,6 +2286,9 @@ static struct sd_chain_index sd_chain_index_map[] = {
{ SD_CHAIN_USCSI_CHKSUM_IOSTART, SD_CHAIN_USCSI_CHKSUM_IODONE },
{ SD_CHAIN_DIRECT_CMD_IOSTART, SD_CHAIN_DIRECT_CMD_IODONE },
{ SD_CHAIN_PRIORITY_CMD_IOSTART, SD_CHAIN_PRIORITY_CMD_IODONE },
+ { SD_CHAIN_MSS_CHKSUM_IOSTART, SD_CHAIN_MSS_CHKSUM_IODONE },
+ { SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM, SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM },
+
};
@@ -2158,9 +2300,13 @@ static struct sd_chain_index sd_chain_index_map[] = {
#define SD_CHAIN_INFO_DISK 0
#define SD_CHAIN_INFO_DISK_NO_PM 1
#define SD_CHAIN_INFO_RMMEDIA 2
+#define SD_CHAIN_INFO_MSS_DISK 2
#define SD_CHAIN_INFO_RMMEDIA_NO_PM 3
+#define SD_CHAIN_INFO_MSS_DSK_NO_PM 3
#define SD_CHAIN_INFO_CHKSUM 4
#define SD_CHAIN_INFO_CHKSUM_NO_PM 5
+#define SD_CHAIN_INFO_MSS_DISK_CHKSUM 10
+#define SD_CHAIN_INFO_MSS_DISK_CHKSUM_NO_PM 11
/* un->un_uscsi_chain_type must be set to one of these */
#define SD_CHAIN_INFO_USCSI_CMD 6
@@ -3967,6 +4113,16 @@ sd_set_properties(struct sd_lun *un, char *name, char *value)
"min throttle set to %d\n", un->un_min_throttle);
}
+ if (strcasecmp(name, "rmw-type") == 0) {
+ if (ddi_strtol(value, &endptr, 0, &val) == 0) {
+ un->un_f_rmw_type = val;
+ } else {
+ goto value_invalid;
+ }
+ SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
+ "RMW type set to %d\n", un->un_f_rmw_type);
+ }
+
/*
* Validate the throttle values.
* If any of the numbers are invalid, set everything to defaults.
@@ -4996,7 +5152,10 @@ sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
{
if (lbasize != 0) {
un->un_tgt_blocksize = lbasize;
- un->un_f_tgt_blocksize_is_valid = TRUE;
+ un->un_f_tgt_blocksize_is_valid = TRUE;
+ if (!un->un_f_has_removable_media) {
+ un->un_sys_blocksize = lbasize;
+ }
}
if (capacity != 0) {
@@ -5290,7 +5449,7 @@ sd_get_devid(sd_ssc_t *ssc)
/* Calculate the checksum */
chksum = 0;
ip = (uint_t *)dkdevid;
- for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
+ for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
i++) {
chksum ^= ip[i];
}
@@ -5386,6 +5545,7 @@ static int
sd_write_deviceid(sd_ssc_t *ssc)
{
struct dk_devid *dkdevid;
+ uchar_t *buf;
diskaddr_t blk;
uint_t *ip, chksum;
int status;
@@ -5406,7 +5566,8 @@ sd_write_deviceid(sd_ssc_t *ssc)
/* Allocate the buffer */
- dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
+ buf = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
+ dkdevid = (struct dk_devid *)buf;
/* Fill in the revision */
dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
@@ -5421,7 +5582,7 @@ sd_write_deviceid(sd_ssc_t *ssc)
/* Calculate the checksum */
chksum = 0;
ip = (uint_t *)dkdevid;
- for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int));
+ for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
i++) {
chksum ^= ip[i];
}
@@ -5430,12 +5591,12 @@ sd_write_deviceid(sd_ssc_t *ssc)
DKD_FORMCHKSUM(chksum, dkdevid);
/* Write the reserved sector */
- status = sd_send_scsi_WRITE(ssc, dkdevid, un->un_sys_blocksize, blk,
+ status = sd_send_scsi_WRITE(ssc, buf, un->un_sys_blocksize, blk,
SD_PATH_DIRECT);
if (status != 0)
sd_ssc_assessment(ssc, SD_FMT_IGNORE);
- kmem_free(dkdevid, un->un_sys_blocksize);
+ kmem_free(buf, un->un_sys_blocksize);
mutex_enter(SD_MUTEX(un));
return (status);
@@ -5903,6 +6064,14 @@ sd_ddi_suspend(dev_info_t *devi)
mutex_exit(&un->un_pm_mutex);
}
+ if (un->un_rmw_msg_timeid != NULL) {
+ timeout_id_t temp_id = un->un_rmw_msg_timeid;
+ un->un_rmw_msg_timeid = NULL;
+ mutex_exit(SD_MUTEX(un));
+ (void) untimeout(temp_id);
+ mutex_enter(SD_MUTEX(un));
+ }
+
if (un->un_retry_timeid != NULL) {
timeout_id_t temp_id = un->un_retry_timeid;
un->un_retry_timeid = NULL;
@@ -6217,7 +6386,7 @@ sd_pm_idletimeout_handler(void *arg)
} else {
un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
}
- un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD;
+ un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD;
SD_TRACE(SD_LOG_IO_PM, un,
"sd_pm_idletimeout_handler: idling device\n");
@@ -6839,6 +7008,7 @@ sd_unit_attach(dev_info_t *devi)
struct scsi_device *devp;
struct sd_lun *un;
char *variantp;
+ char name_str[48];
int reservation_flag = SD_TARGET_IS_UNRESERVED;
int instance;
int rval;
@@ -7267,6 +7437,7 @@ sd_unit_attach(dev_info_t *devi)
* meaning a non-zero value must be entered to change the default.
*/
un->un_f_disksort_disabled = FALSE;
+ un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT;
/*
* Retrieve the properties from the static driver table or the driver
@@ -7906,6 +8077,24 @@ sd_unit_attach(dev_info_t *devi)
un->un_f_write_cache_enabled = (wc_enabled != 0);
mutex_exit(SD_MUTEX(un));
+ if (un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
+ un->un_tgt_blocksize != DEV_BSIZE) {
+ if (!(un->un_wm_cache)) {
+ (void) snprintf(name_str, sizeof (name_str),
+ "%s%d_cache",
+ ddi_driver_name(SD_DEVINFO(un)),
+ ddi_get_instance(SD_DEVINFO(un)));
+ un->un_wm_cache = kmem_cache_create(
+ name_str, sizeof (struct sd_w_map),
+ 8, sd_wm_cache_constructor,
+ sd_wm_cache_destructor, NULL,
+ (void *)un, NULL, 0);
+ if (!(un->un_wm_cache)) {
+ goto wm_cache_failed;
+ }
+ }
+ }
+
/*
* Check the value of the NV_SUP bit and set
* un_f_suppress_cache_flush accordingly.
@@ -7994,7 +8183,7 @@ sd_unit_attach(dev_info_t *devi)
/*
* An error occurred during the attach; clean up & return failure.
*/
-
+wm_cache_failed:
devid_failed:
setup_pm_failed:
@@ -8057,6 +8246,15 @@ spinup_failed:
mutex_enter(SD_MUTEX(un));
}
+ /* Cancel rmw warning message timeouts */
+ if (un->un_rmw_msg_timeid != NULL) {
+ timeout_id_t temp_id = un->un_rmw_msg_timeid;
+ un->un_rmw_msg_timeid = NULL;
+ mutex_exit(SD_MUTEX(un));
+ (void) untimeout(temp_id);
+ mutex_enter(SD_MUTEX(un));
+ }
+
/* Cancel any pending retry timeouts */
if (un->un_retry_timeid != NULL) {
timeout_id_t temp_id = un->un_retry_timeid;
@@ -8270,6 +8468,14 @@ sd_unit_detach(dev_info_t *devi)
mutex_enter(SD_MUTEX(un));
}
+ if (un->un_rmw_msg_timeid != NULL) {
+ timeout_id_t temp_id = un->un_rmw_msg_timeid;
+ un->un_rmw_msg_timeid = NULL;
+ mutex_exit(SD_MUTEX(un));
+ (void) untimeout(temp_id);
+ mutex_enter(SD_MUTEX(un));
+ }
+
if (un->un_dcvb_timeid != NULL) {
timeout_id_t temp_id = un->un_dcvb_timeid;
un->un_dcvb_timeid = NULL;
@@ -10288,7 +10494,9 @@ sd_ready_and_valid(sd_ssc_t *ssc, int part)
* a media is changed this routine will be called and the
* block size is a function of media rather than device.
*/
- if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) {
+ if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
+ un->un_f_non_devbsize_supported) &&
+ un->un_tgt_blocksize != DEV_BSIZE) {
if (!(un->un_wm_cache)) {
(void) snprintf(name_str, sizeof (name_str),
"%s%d_cache",
@@ -10518,17 +10726,20 @@ sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
/*
* Read requests are restricted to multiples of the system block size.
*/
- secmask = un->un_sys_blocksize - 1;
+ if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
+ secmask = un->un_tgt_blocksize - 1;
+ else
+ secmask = DEV_BSIZE - 1;
if (uio->uio_loffset & ((offset_t)(secmask))) {
SD_ERROR(SD_LOG_READ_WRITE, un,
"sdread: file offset not modulo %d\n",
- un->un_sys_blocksize);
+ secmask + 1);
err = EINVAL;
} else if (uio->uio_iov->iov_len & (secmask)) {
SD_ERROR(SD_LOG_READ_WRITE, un,
"sdread: transfer length not modulo %d\n",
- un->un_sys_blocksize);
+ secmask + 1);
err = EINVAL;
} else {
err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
@@ -10604,17 +10815,20 @@ sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
/*
* Write requests are restricted to multiples of the system block size.
*/
- secmask = un->un_sys_blocksize - 1;
+ if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
+ secmask = un->un_tgt_blocksize - 1;
+ else
+ secmask = DEV_BSIZE - 1;
if (uio->uio_loffset & ((offset_t)(secmask))) {
SD_ERROR(SD_LOG_READ_WRITE, un,
"sdwrite: file offset not modulo %d\n",
- un->un_sys_blocksize);
+ secmask + 1);
err = EINVAL;
} else if (uio->uio_iov->iov_len & (secmask)) {
SD_ERROR(SD_LOG_READ_WRITE, un,
"sdwrite: transfer length not modulo %d\n",
- un->un_sys_blocksize);
+ secmask + 1);
err = EINVAL;
} else {
err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
@@ -10690,17 +10904,20 @@ sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
/*
* Read requests are restricted to multiples of the system block size.
*/
- secmask = un->un_sys_blocksize - 1;
+ if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
+ secmask = un->un_tgt_blocksize - 1;
+ else
+ secmask = DEV_BSIZE - 1;
if (uio->uio_loffset & ((offset_t)(secmask))) {
SD_ERROR(SD_LOG_READ_WRITE, un,
"sdaread: file offset not modulo %d\n",
- un->un_sys_blocksize);
+ secmask + 1);
err = EINVAL;
} else if (uio->uio_iov->iov_len & (secmask)) {
SD_ERROR(SD_LOG_READ_WRITE, un,
"sdaread: transfer length not modulo %d\n",
- un->un_sys_blocksize);
+ secmask + 1);
err = EINVAL;
} else {
err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
@@ -10776,17 +10993,20 @@ sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
/*
* Write requests are restricted to multiples of the system block size.
*/
- secmask = un->un_sys_blocksize - 1;
+ if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR)
+ secmask = un->un_tgt_blocksize - 1;
+ else
+ secmask = DEV_BSIZE - 1;
if (uio->uio_loffset & ((offset_t)(secmask))) {
SD_ERROR(SD_LOG_READ_WRITE, un,
"sdawrite: file offset not modulo %d\n",
- un->un_sys_blocksize);
+ secmask + 1);
err = EINVAL;
} else if (uio->uio_iov->iov_len & (secmask)) {
SD_ERROR(SD_LOG_READ_WRITE, un,
"sdawrite: transfer length not modulo %d\n",
- un->un_sys_blocksize);
+ secmask + 1);
err = EINVAL;
} else {
err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
@@ -11012,6 +11232,7 @@ sdstrategy(struct buf *bp)
biodone(bp);
return (0);
}
+
/* As was done in the past, fail new cmds. if state is dumping. */
if (un->un_state == SD_STATE_DUMPING) {
bioerror(bp, ENXIO);
@@ -11150,6 +11371,27 @@ sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
/* FALLTHRU */
case SD_CHAIN_BUFIO:
index = un->un_buf_chain_type;
+ if ((!un->un_f_has_removable_media) &&
+ (un->un_tgt_blocksize != 0) &&
+ (un->un_tgt_blocksize != DEV_BSIZE)) {
+ int secmask = 0, blknomask = 0;
+ blknomask =
+ (un->un_tgt_blocksize / DEV_BSIZE) - 1;
+ secmask = un->un_tgt_blocksize - 1;
+
+ if ((bp->b_lblkno & (blknomask)) ||
+ (bp->b_bcount & (secmask))) {
+ if (un->un_f_rmw_type !=
+ SD_RMW_TYPE_RETURN_ERROR) {
+ if (un->un_f_pm_is_enabled == FALSE)
+ index =
+ SD_CHAIN_INFO_MSS_DSK_NO_PM;
+ else
+ index =
+ SD_CHAIN_INFO_MSS_DISK;
+ }
+ }
+ }
break;
case SD_CHAIN_USCSI:
index = un->un_uscsi_chain_type;
@@ -12039,6 +12281,20 @@ sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
* request would exceed partition range. Converts
* partition-relative block address to absolute.
*
+ * Upon exit of this function:
+ * 1.I/O is aligned
+ * xp->xb_blkno represents the absolute sector address
+ * 2.I/O is misaligned
+ * xp->xb_blkno represents the absolute logical block address
+ * based on DEV_BSIZE. The logical block address will be
+ * converted to physical sector address in sd_mapblocksize_\
+ * iostart.
+ * 3.I/O is misaligned but is aligned in "overrun" buf
+ * xp->xb_blkno represents the absolute logical block address
+ * based on DEV_BSIZE. The logical block address will be
+ * converted to physical sector address in sd_mapblocksize_\
+ * iostart. But no RMW will be issued in this case.
+ *
* Context: Can sleep
*
* Issues: This follows what the old code did, in terms of accessing
@@ -12060,6 +12316,8 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
int partition;
diskaddr_t partition_offset;
struct sd_xbuf *xp;
+ int secmask = 0, blknomask = 0;
+ ushort_t is_aligned = TRUE;
ASSERT(un != NULL);
ASSERT(bp != NULL);
@@ -12116,6 +12374,57 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
(void) cmlb_partinfo(un->un_cmlbhandle, partition,
&nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
+ blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
+ secmask = un->un_tgt_blocksize - 1;
+
+ if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) {
+ is_aligned = FALSE;
+ }
+
+ if (!(NOT_DEVBSIZE(un))) {
+ /*
+ * If I/O is aligned, no need to involve RMW(Read Modify Write)
+ * Convert the logical block number to target's physical sector
+ * number.
+ */
+ if (is_aligned) {
+ xp->xb_blkno = SD_SYS2TGTBLOCK(un, xp->xb_blkno);
+ } else {
+ switch (un->un_f_rmw_type) {
+ case SD_RMW_TYPE_RETURN_ERROR:
+ bp->b_flags |= B_ERROR;
+ goto error_exit;
+
+ case SD_RMW_TYPE_DEFAULT:
+ mutex_enter(SD_MUTEX(un));
+ if (un->un_rmw_msg_timeid == NULL) {
+ scsi_log(SD_DEVINFO(un), sd_label,
+ CE_WARN, "I/O request is not "
+ "aligned with %d disk sector size. "
+ "It is handled through Read Modify "
+ "Write but the performance is "
+ "very low.\n",
+ un->un_tgt_blocksize);
+ un->un_rmw_msg_timeid =
+ timeout(sd_rmw_msg_print_handler,
+ un, SD_RMW_MSG_PRINT_TIMEOUT);
+ } else {
+ un->un_rmw_incre_count ++;
+ }
+ mutex_exit(SD_MUTEX(un));
+ break;
+
+ case SD_RMW_TYPE_NO_WARNING:
+ default:
+ break;
+ }
+
+ nblocks = SD_TGT2SYSBLOCK(un, nblocks);
+ partition_offset = SD_TGT2SYSBLOCK(un,
+ partition_offset);
+ }
+ }
+
/*
* blocknum is the starting block number of the request. At this
* point it is still relative to the start of the minor device.
@@ -12136,7 +12445,7 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
* a multiple of the system block size.
*/
if ((blocknum < 0) || (blocknum >= nblocks) ||
- ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) {
+ ((bp->b_bcount & (DEV_BSIZE - 1)) != 0)) {
bp->b_flags |= B_ERROR;
goto error_exit;
}
@@ -12145,11 +12454,18 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
* If the requsted # blocks exceeds the available # blocks, that
* is an overrun of the partition.
*/
- requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount);
+ if ((!NOT_DEVBSIZE(un)) && is_aligned) {
+ requested_nblocks = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
+ } else {
+ requested_nblocks = SD_BYTES2SYSBLOCKS(bp->b_bcount);
+ }
+
available_nblocks = (size_t)(nblocks - blocknum);
ASSERT(nblocks >= blocknum);
if (requested_nblocks > available_nblocks) {
+ size_t resid;
+
/*
* Allocate an "overrun" buf to allow the request to proceed
* for the amount of space available in the partition. The
@@ -12158,8 +12474,14 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
* replaces the original buf here, and the original buf
* is saved inside the overrun buf, for later use.
*/
- size_t resid = SD_SYSBLOCKS2BYTES(un,
- (offset_t)(requested_nblocks - available_nblocks));
+ if ((!NOT_DEVBSIZE(un)) && is_aligned) {
+ resid = SD_TGTBLOCKS2BYTES(un,
+ (offset_t)(requested_nblocks - available_nblocks));
+ } else {
+ resid = SD_SYSBLOCKS2BYTES(
+ (offset_t)(requested_nblocks - available_nblocks));
+ }
+
size_t count = bp->b_bcount - resid;
/*
* Note: count is an unsigned entity thus it'll NEVER
@@ -12318,7 +12640,7 @@ sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
* un->un_sys_blocksize as its block size or if bcount == 0.
* In this case there is no layer-private data block allocated.
*/
- if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
+ if ((un->un_tgt_blocksize == DEV_BSIZE) ||
(bp->b_bcount == 0)) {
goto done;
}
@@ -12333,7 +12655,7 @@ sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
"tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
- un->un_tgt_blocksize, un->un_sys_blocksize);
+ un->un_tgt_blocksize, DEV_BSIZE);
SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
"request start block:0x%x\n", xp->xb_blkno);
SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
@@ -12376,7 +12698,7 @@ sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
* Note that end_block is actually the block that follows the last
* block of the request, but that's what is needed for the computation.
*/
- first_byte = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
+ first_byte = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
end_block = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) /
un->un_tgt_blocksize;
@@ -12519,7 +12841,7 @@ sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
* There is no shadow buf or layer-private data if the target is
* using un->un_sys_blocksize as its block size or if bcount == 0.
*/
- if ((un->un_tgt_blocksize == un->un_sys_blocksize) ||
+ if ((un->un_tgt_blocksize == DEV_BSIZE) ||
(bp->b_bcount == 0)) {
goto exit;
}
@@ -15550,6 +15872,48 @@ sd_start_retry_command(void *arg)
"sd_start_retry_command: exit\n");
}
+/*
+ * Function: sd_rmw_msg_print_handler
+ *
+ * Description: If RMW mode is enabled and warning message is triggered
+ * print I/O count during a fixed interval.
+ *
+ * Arguments: arg - pointer to associated softstate for the device.
+ *
+ * Context: timeout(9F) thread context. May not sleep.
+ */
+static void
+sd_rmw_msg_print_handler(void *arg)
+{
+ struct sd_lun *un = arg;
+
+ ASSERT(un != NULL);
+ ASSERT(!mutex_owned(SD_MUTEX(un)));
+
+ SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
+ "sd_rmw_msg_print_handler: entry\n");
+
+ mutex_enter(SD_MUTEX(un));
+
+ if (un->un_rmw_incre_count > 0) {
+ scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
+ "%"PRIu64" I/O requests are not aligned with %d disk "
+ "sector size in %ld seconds. They are handled through "
+ "Read Modify Write but the performance is very low!\n",
+ un->un_rmw_incre_count, un->un_tgt_blocksize,
+ drv_hztousec(SD_RMW_MSG_PRINT_TIMEOUT) / 1000000);
+ un->un_rmw_incre_count = 0;
+ un->un_rmw_msg_timeid = timeout(sd_rmw_msg_print_handler,
+ un, SD_RMW_MSG_PRINT_TIMEOUT);
+ } else {
+ un->un_rmw_msg_timeid = NULL;
+ }
+
+ mutex_exit(SD_MUTEX(un));
+
+ SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
+ "sd_rmw_msg_print_handler: exit\n");
+}
/*
* Function: sd_start_direct_priority_command
@@ -19336,6 +19700,7 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
uint32_t *capacity_buf;
uint64_t capacity;
uint32_t lbasize;
+ uint32_t pbsize;
int status;
struct sd_lun *un;
@@ -19418,7 +19783,7 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
if (capacity == 0xffffffff) {
sd_ssc_assessment(ssc, SD_FMT_IGNORE);
status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
- &lbasize, path_flag);
+ &lbasize, &pbsize, path_flag);
if (status != 0) {
return (status);
}
@@ -19467,10 +19832,11 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
* on the logical unit. The actual logical block count will be
* this value plus one.
*
- * Currently the capacity is saved in terms of un->un_sys_blocksize,
- * so scale the capacity value to reflect this.
+ * Currently, for removable media, the capacity is saved in terms
+ * of un->un_sys_blocksize, so scale the capacity value to reflect this.
*/
- capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
+ if (un->un_f_has_removable_media)
+ capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize);
/*
* Copy the values from the READ CAPACITY command into the space
@@ -19504,15 +19870,19 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
* determine the device capacity in number of blocks and the
* device native block size. If this function returns a failure,
* then the values in *capp and *lbap are undefined.
- * This routine should always be called by
- * sd_send_scsi_READ_CAPACITY which will appy any device
- * specific adjustments to capacity and lbasize.
+ * This routine should be called by sd_send_scsi_READ_CAPACITY
+ * which will apply any device specific adjustments to capacity
+ * and lbasize. One exception is it is also called by
+ * sd_get_media_info_ext. In that function, there is no need to
+ * adjust the capacity and lbasize.
*
* Arguments: ssc - ssc contains ptr to soft state struct for the target
* capp - ptr to unsigned 64-bit variable to receive the
* capacity value from the command.
* lbap - ptr to unsigned 32-bit varaible to receive the
* block size value from the command
+ * psp - ptr to unsigned 32-bit variable to receive the
+ * physical block size value from the command
* path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
* the normal command waitq, or SD_PATH_DIRECT_PRIORITY
* to use the USCSI "direct" chain and bypass the normal
@@ -19533,7 +19903,7 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
static int
sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
- uint32_t *lbap, int path_flag)
+ uint32_t *lbap, uint32_t *psp, int path_flag)
{
struct scsi_extended_sense sense_buf;
struct uscsi_cmd ucmd_buf;
@@ -19541,6 +19911,8 @@ sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
uint64_t *capacity16_buf;
uint64_t capacity;
uint32_t lbasize;
+ uint32_t pbsize;
+ uint32_t lbpb_exp;
int status;
struct sd_lun *un;
@@ -19617,9 +19989,13 @@ sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
* bytes 8-11: Block length in bytes
* (MSB in byte:8 & LSB in byte:11)
*
+ * byte 13: LOGICAL BLOCKS PER PHYSICAL BLOCK EXPONENT
*/
capacity = BE_64(capacity16_buf[0]);
lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
+ lbpb_exp = (BE_64(capacity16_buf[1]) >> 40) & 0x0f;
+
+ pbsize = lbasize << lbpb_exp;
/*
* Done with capacity16_buf
@@ -19666,9 +20042,11 @@ sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
*capp = capacity;
*lbap = lbasize;
+ *psp = pbsize;
SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
- "capacity:0x%llx lbasize:0x%x\n", capacity, lbasize);
+ "capacity:0x%llx lbasize:0x%x, pbsize: 0x%x\n",
+ capacity, lbasize, pbsize);
return (0);
}
@@ -21443,6 +21821,7 @@ sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
case DKIOCHOTPLUGGABLE:
case DKIOCINFO:
case DKIOCGMEDIAINFO:
+ case DKIOCGMEDIAINFOEXT:
case MHIOCENFAILFAST:
case MHIOCSTATUS:
case MHIOCTKOWN:
@@ -21509,6 +21888,11 @@ skip_ready_valid:
err = sd_get_media_info(dev, (caddr_t)arg, flag);
break;
+ case DKIOCGMEDIAINFOEXT:
+ SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFOEXT\n");
+ err = sd_get_media_info_ext(dev, (caddr_t)arg, flag);
+ break;
+
case DKIOCGGEOM:
case DKIOCGVTOC:
case DKIOCGEXTVTOC:
@@ -22609,6 +22993,205 @@ no_assessment:
return (rval);
}
+/*
+ * Function: sd_get_media_info_ext
+ *
+ * Description: This routine is the driver entry point for handling ioctl
+ * requests for the media type or command set profile used by the
+ * drive to operate on the media (DKIOCGMEDIAINFOEXT). The
+ * difference this ioctl and DKIOCGMEDIAINFO is the return value
+ * of this ioctl contains both logical block size and physical
+ * block size.
+ *
+ *
+ * Arguments: dev - the device number
+ * arg - pointer to user provided dk_minfo_ext structure
+ * specifying the media type, logical block size,
+ * physical block size and disk capacity.
+ * flag - this argument is a pass through to ddi_copyxxx()
+ * directly from the mode argument of ioctl().
+ *
+ * Return Code: 0
+ * EACCESS
+ * EFAULT
+ * ENXIO
+ * EIO
+ */
+
+static int
+sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag)
+{
+ struct sd_lun *un = NULL;
+ struct uscsi_cmd com;
+ struct scsi_inquiry *sinq;
+ struct dk_minfo_ext media_info_ext;
+ u_longlong_t media_capacity;
+ uint64_t capacity;
+ uint_t lbasize;
+ uint_t pbsize;
+ uchar_t *out_data;
+ uchar_t *rqbuf;
+ int rval = 0;
+ int rtn;
+ sd_ssc_t *ssc;
+
+ if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
+ (un->un_state == SD_STATE_OFFLINE)) {
+ return (ENXIO);
+ }
+
+ SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info_ext: entry\n");
+
+ out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
+ rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
+ ssc = sd_ssc_init(un);
+
+ /* Issue a TUR to determine if the drive is ready with media present */
+ rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
+ if (rval == ENXIO) {
+ goto done;
+ } else if (rval != 0) {
+ sd_ssc_assessment(ssc, SD_FMT_IGNORE);
+ }
+
+ /* Now get configuration data */
+ if (ISCD(un)) {
+ media_info_ext.dki_media_type = DK_CDROM;
+
+ /* Allow SCMD_GET_CONFIGURATION to MMC devices only */
+ if (un->un_f_mmc_cap == TRUE) {
+ rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
+ SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
+ SD_PATH_STANDARD);
+
+ if (rtn) {
+ /*
+ * We ignore all failures for CD and need to
+ * put the assessment before processing code
+ * to avoid missing assessment for FMA.
+ */
+ sd_ssc_assessment(ssc, SD_FMT_IGNORE);
+ /*
+ * Failed for other than an illegal request
+ * or command not supported
+ */
+ if ((com.uscsi_status == STATUS_CHECK) &&
+ (com.uscsi_rqstatus == STATUS_GOOD)) {
+ if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
+ (rqbuf[12] != 0x20)) {
+ rval = EIO;
+ goto no_assessment;
+ }
+ }
+ } else {
+ /*
+ * The GET CONFIGURATION command succeeded
+ * so set the media type according to the
+ * returned data
+ */
+ media_info_ext.dki_media_type = out_data[6];
+ media_info_ext.dki_media_type <<= 8;
+ media_info_ext.dki_media_type |= out_data[7];
+ }
+ }
+ } else {
+ /*
+ * The profile list is not available, so we attempt to identify
+ * the media type based on the inquiry data
+ */
+ sinq = un->un_sd->sd_inq;
+ if ((sinq->inq_dtype == DTYPE_DIRECT) ||
+ (sinq->inq_dtype == DTYPE_OPTICAL)) {
+ /* This is a direct access device or optical disk */
+ media_info_ext.dki_media_type = DK_FIXED_DISK;
+
+ if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
+ (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
+ if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
+ media_info_ext.dki_media_type = DK_ZIP;
+ } else if (
+ (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
+ media_info_ext.dki_media_type = DK_JAZ;
+ }
+ }
+ } else {
+ /*
+ * Not a CD, direct access or optical disk so return
+ * unknown media
+ */
+ media_info_ext.dki_media_type = DK_UNKNOWN;
+ }
+ }
+
+ /*
+ * Now read the capacity so we can provide the lbasize,
+ * pbsize and capacity.
+ */
+ rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize, &pbsize,
+ SD_PATH_DIRECT);
+
+ if (rval != 0) {
+ rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
+ SD_PATH_DIRECT);
+
+ switch (rval) {
+ case 0:
+ pbsize = lbasize;
+ media_capacity = capacity;
+ /*
+ * sd_send_scsi_READ_CAPACITY() reports capacity in
+ * un->un_sys_blocksize chunks. So we need to convert
+ * it into cap.lbsize chunks.
+ */
+ if (un->un_f_has_removable_media) {
+ media_capacity *= un->un_sys_blocksize;
+ media_capacity /= lbasize;
+ }
+ break;
+ case EACCES:
+ rval = EACCES;
+ goto done;
+ default:
+ rval = EIO;
+ goto done;
+ }
+ } else {
+ media_capacity = capacity;
+ }
+
+ /*
+ * If lun is expanded dynamically, update the un structure.
+ */
+ mutex_enter(SD_MUTEX(un));
+ if ((un->un_f_blockcount_is_valid == TRUE) &&
+ (un->un_f_tgt_blocksize_is_valid == TRUE) &&
+ (capacity > un->un_blockcount)) {
+ sd_update_block_info(un, lbasize, capacity);
+ }
+ mutex_exit(SD_MUTEX(un));
+
+ media_info_ext.dki_lbsize = lbasize;
+ media_info_ext.dki_capacity = media_capacity;
+ media_info_ext.dki_pbsize = pbsize;
+
+ if (ddi_copyout(&media_info_ext, arg, sizeof (struct dk_minfo_ext),
+ flag)) {
+ rval = EFAULT;
+ goto no_assessment;
+ }
+done:
+ if (rval != 0) {
+ if (rval == EIO)
+ sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
+ else
+ sd_ssc_assessment(ssc, SD_FMT_IGNORE);
+ }
+no_assessment:
+ sd_ssc_fini(ssc);
+ kmem_free(out_data, SD_PROFILE_HEADER_LEN);
+ kmem_free(rqbuf, SENSE_LENGTH);
+ return (rval);
+}
/*
* Function: sd_check_media
@@ -24700,17 +25283,51 @@ sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
partition = SDPART(dev);
SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
+ if (!(NOT_DEVBSIZE(un))) {
+ int secmask = 0;
+ int blknomask = 0;
+
+ blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
+ secmask = un->un_tgt_blocksize - 1;
+
+ if (blkno & blknomask) {
+ SD_TRACE(SD_LOG_DUMP, un,
+ "sddump: dump start block not modulo %d\n",
+ un->un_tgt_blocksize);
+ return (EINVAL);
+ }
+
+ if ((nblk * DEV_BSIZE) & secmask) {
+ SD_TRACE(SD_LOG_DUMP, un,
+ "sddump: dump length not modulo %d\n",
+ un->un_tgt_blocksize);
+ return (EINVAL);
+ }
+
+ }
+
/* Validate blocks to dump at against partition size. */
(void) cmlb_partinfo(un->un_cmlbhandle, partition,
&nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
- if ((blkno + nblk) > nblks) {
- SD_TRACE(SD_LOG_DUMP, un,
- "sddump: dump range larger than partition: "
- "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
- blkno, nblk, nblks);
- return (EINVAL);
+ if (NOT_DEVBSIZE(un)) {
+ if ((blkno + nblk) > nblks) {
+ SD_TRACE(SD_LOG_DUMP, un,
+ "sddump: dump range larger than partition: "
+ "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
+ blkno, nblk, nblks);
+ return (EINVAL);
+ }
+ } else {
+ if (((blkno / (un->un_tgt_blocksize / DEV_BSIZE)) +
+ (nblk / (un->un_tgt_blocksize / DEV_BSIZE))) > nblks) {
+ SD_TRACE(SD_LOG_DUMP, un,
+ "sddump: dump range larger than partition: "
+ "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
+ blkno, nblk, nblks);
+ return (EINVAL);
+ }
}
mutex_enter(&un->un_pm_mutex);
@@ -24813,7 +25430,12 @@ sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
* Convert the partition-relative block number to a
* disk physical block number.
*/
- blkno += start_block;
+ if (NOT_DEVBSIZE(un)) {
+ blkno += start_block;
+ } else {
+ blkno = blkno / (un->un_tgt_blocksize / DEV_BSIZE);
+ blkno += start_block;
+ }
SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
@@ -24901,6 +25523,10 @@ sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
dma_resid = wr_bp->b_bcount;
oblkno = blkno;
+ if (!(NOT_DEVBSIZE(un))) {
+ nblk = nblk / (un->un_tgt_blocksize / DEV_BSIZE);
+ }
+
while (dma_resid != 0) {
for (i = 0; i < SD_NDUMP_RETRIES; i++) {
@@ -29894,7 +30520,7 @@ sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
* sys_blocksize != tgt_blocksize, need to re-adjust
* blkno and save the index to beginning of dk_label
*/
- first_byte = SD_SYSBLOCKS2BYTES(un, start_block);
+ first_byte = SD_SYSBLOCKS2BYTES(start_block);
real_addr = first_byte / un->un_tgt_blocksize;
end_block = (first_byte + reqlength +
diff --git a/usr/src/uts/common/os/dumpsubr.c b/usr/src/uts/common/os/dumpsubr.c
index 201d6d1bfd..0753cc19da 100644
--- a/usr/src/uts/common/os/dumpsubr.c
+++ b/usr/src/uts/common/os/dumpsubr.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -255,12 +255,12 @@ dumpinit(vnode_t *vp, char *name, int justchecking)
if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
size_t blk_size;
struct dk_cinfo dki;
- struct extvtoc vtoc;
+ struct dk_minfo minf;
- if (VOP_IOCTL(cdev_vp, DKIOCGEXTVTOC, (intptr_t)&vtoc,
- FKIOCTL, kcred, NULL, NULL) == 0 &&
- vtoc.v_sectorsz != 0)
- blk_size = vtoc.v_sectorsz;
+ if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO,
+ (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL)
+ == 0 && minf.dki_lbsize != 0)
+ blk_size = minf.dki_lbsize;
else
blk_size = DEV_BSIZE;
diff --git a/usr/src/uts/common/sys/dkio.h b/usr/src/uts/common/sys/dkio.h
index 18f49e513a..caf7d7976d 100644
--- a/usr/src/uts/common/sys/dkio.h
+++ b/usr/src/uts/common/sys/dkio.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -301,6 +301,11 @@ enum dkio_state { DKIO_NONE, DKIO_EJECTED, DKIO_INSERTED, DKIO_DEV_GONE };
#define DKIOCGTEMPERATURE (DKIOC|45) /* get temperature */
/*
+ * ioctl to get the media info including physical block size
+ */
+#define DKIOCGMEDIAINFOEXT (DKIOC|48)
+
+/*
* Used for providing the temperature.
*/
@@ -324,6 +329,17 @@ struct dk_minfo {
};
/*
+ * Used for Media info or the current profile info
+ * including physical block size if supported.
+ */
+struct dk_minfo_ext {
+ uint_t dki_media_type; /* Media type or profile info */
+ uint_t dki_lbsize; /* Logical blocksize of media */
+ diskaddr_t dki_capacity; /* Capacity as # of dki_lbsize blks */
+ uint_t dki_pbsize; /* Physical blocksize of media */
+};
+
+/*
* Media types or profiles known
*/
#define DK_UNKNOWN 0x00 /* Media inserted - type unknown */
diff --git a/usr/src/uts/common/sys/dklabel.h b/usr/src/uts/common/sys/dklabel.h
index 01baa7157c..457c1ecadc 100644
--- a/usr/src/uts/common/sys/dklabel.h
+++ b/usr/src/uts/common/sys/dklabel.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -207,7 +207,7 @@ struct dk_label {
uint16_t dkl_ncyl; /* # of data cylinders */
uint16_t dkl_acyl; /* # of alternate cylinders */
uint16_t dkl_nhead; /* # of heads in this partition */
- uint16_t dkl_nsect; /* # of 512 byte sectors per track */
+ uint16_t dkl_nsect; /* # of sectors per track */
uint16_t dkl_obs3; /* obsolete */
uint16_t dkl_obs4; /* obsolete */
struct dk_map32 dkl_map[NDKMAP]; /* logical partition headers */
diff --git a/usr/src/uts/common/sys/scsi/targets/sddef.h b/usr/src/uts/common/sys/scsi/targets/sddef.h
index c5bbc59ef1..90129e40c3 100644
--- a/usr/src/uts/common/sys/scsi/targets/sddef.h
+++ b/usr/src/uts/common/sys/scsi/targets/sddef.h
@@ -438,7 +438,8 @@ struct sd_lun {
/* SYNC CACHE needs to be */
/* sent in sdclose */
un_f_devid_transport_defined :1, /* devid defined by transport */
- un_f_reserved :12;
+ un_f_rmw_type :2, /* RMW type */
+ un_f_reserved :10;
/* Ptr to table of strings for ASC/ASCQ error message printing */
struct scsi_asq_key_strings *un_additional_codes;
@@ -477,6 +478,8 @@ struct sd_lun {
struct kmem_cache *un_wm_cache; /* fast alloc in non-512 write case */
uint_t un_rmw_count; /* count of read-modify-writes */
struct sd_w_map *un_wm; /* head of sd_w_map chain */
+ uint64_t un_rmw_incre_count; /* count I/O */
+ timeout_id_t un_rmw_msg_timeid; /* for RMW message control */
/* For timeout callback to issue a START STOP UNIT command */
timeout_id_t un_startstop_timeid;
@@ -560,12 +563,12 @@ struct sd_lun {
(blockcount * (un)->un_tgt_blocksize)
/* Convert a byte count to a number of system blocks */
-#define SD_BYTES2SYSBLOCKS(un, bytecount) \
- ((bytecount + (un->un_sys_blocksize - 1))/un->un_sys_blocksize)
+#define SD_BYTES2SYSBLOCKS(bytecount) \
+ ((bytecount + (DEV_BSIZE - 1))/DEV_BSIZE)
/* Convert a system block count to a number of bytes */
-#define SD_SYSBLOCKS2BYTES(un, blockcount) \
- (blockcount * (un)->un_sys_blocksize)
+#define SD_SYSBLOCKS2BYTES(blockcount) \
+ (blockcount * DEV_BSIZE)
/*
* Calculate the number of bytes needed to hold the requested number of bytes
@@ -579,13 +582,19 @@ struct sd_lun {
* to the system block location.
*/
#define SD_TGTBYTEOFFSET(un, sysblk, tgtblk) \
- (SD_SYSBLOCKS2BYTES(un, sysblk) - SD_TGTBLOCKS2BYTES(un, tgtblk))
+ (SD_SYSBLOCKS2BYTES(sysblk) - SD_TGTBLOCKS2BYTES(un, tgtblk))
/*
* Calculate the target block location from the system block location
*/
#define SD_SYS2TGTBLOCK(un, blockcnt) \
- ((blockcnt * un->un_sys_blocksize) / un->un_tgt_blocksize)
+ (blockcnt / ((un)->un_tgt_blocksize / DEV_BSIZE))
+
+/*
+ * Calculate the target block location from the system block location
+ */
+#define SD_TGT2SYSBLOCK(un, blockcnt) \
+ (blockcnt * ((un)->un_tgt_blocksize / DEV_BSIZE))
/*
* SD_DEFAULT_MAX_XFER_SIZE is the default value to bound the max xfer
@@ -768,6 +777,12 @@ _NOTE(MUTEX_PROTECTS_DATA(sd_lun::un_fi_mutex,
#define SD_WTYPE_RMW 0x002 /* Write requires read-modify-write */
#define SD_WM_BUSY 0x100 /* write-map is busy */
+/*
+ * RMW type
+ */
+#define SD_RMW_TYPE_DEFAULT 0 /* do rmw with warning message */
+#define SD_RMW_TYPE_NO_WARNING 1 /* do rmw without warning message */
+#define SD_RMW_TYPE_RETURN_ERROR 2 /* rmw disabled */
/* Device error kstats */
struct sd_errstats {
@@ -1678,6 +1693,11 @@ struct sd_fm_internal {
#define SD_RESTART_TIMEOUT (drv_usectohz((clock_t)100000))
/*
+ * 10s misaligned I/O warning message interval
+ */
+#define SD_RMW_MSG_PRINT_TIMEOUT (drv_usectohz((clock_t)10000000))
+
+/*
* 100 msec. is what we'll wait for certain retries for fibre channel
* targets, 0 msec for parallel SCSI.
*/
diff --git a/usr/src/uts/common/xen/io/xdb.c b/usr/src/uts/common/xen/io/xdb.c
index 16fd5aff9d..06551ebe85 100644
--- a/usr/src/uts/common/xen/io/xdb.c
+++ b/usr/src/uts/common/xen/io/xdb.c
@@ -1202,6 +1202,7 @@ xdb_open_device(xdb_t *vdp)
{
dev_info_t *dip = vdp->xs_dip;
uint64_t devsize;
+ int blksize;
char *nodepath;
ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
@@ -1252,7 +1253,17 @@ xdb_open_device(xdb_t *vdp)
kmem_free(nodepath, MAXPATHLEN);
return (DDI_FAILURE);
}
- vdp->xs_sectors = devsize / XB_BSIZE;
+
+ blksize = ldi_prop_get_int64(vdp->xs_ldi_hdl,
+ DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ "blksize", DEV_BSIZE);
+ if (blksize == DEV_BSIZE)
+ blksize = ldi_prop_get_int(vdp->xs_ldi_hdl,
+ LDI_DEV_T_ANY | DDI_PROP_DONTPASS |
+ DDI_PROP_NOTPROM, "device-blksize", DEV_BSIZE);
+
+ vdp->xs_sec_size = blksize;
+ vdp->xs_sectors = devsize / blksize;
/* check if the underlying device is a CD/DVD disc */
if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS,
@@ -1388,13 +1399,12 @@ trans_retry:
/* If feature-barrier isn't present in xenstore, add it. */
fb_exists = xenbus_exists(xsname, XBP_FB);
- /* hard-coded 512-byte sector size */
- ssize = DEV_BSIZE;
+ ssize = (vdp->xs_sec_size == 0) ? DEV_BSIZE : vdp->xs_sec_size;
sectors = vdp->xs_sectors;
if (((!fb_exists &&
(err = xenbus_printf(xbt, xsname, XBP_FB, "%d", 1)))) ||
(err = xenbus_printf(xbt, xsname, XBP_INFO, "%u", dinfo)) ||
- (err = xenbus_printf(xbt, xsname, "sector-size", "%u", ssize)) ||
+ (err = xenbus_printf(xbt, xsname, XBP_SECTOR_SIZE, "%u", ssize)) ||
(err = xenbus_printf(xbt, xsname,
XBP_SECTORS, "%"PRIu64, sectors)) ||
(err = xenbus_printf(xbt, xsname, "instance", "%d", instance)) ||
diff --git a/usr/src/uts/common/xen/io/xdb.h b/usr/src/uts/common/xen/io/xdb.h
index f8046e8219..2173ca6ad9 100644
--- a/usr/src/uts/common/xen/io/xdb.h
+++ b/usr/src/uts/common/xen/io/xdb.h
@@ -113,6 +113,8 @@ struct xdb {
uint32_t xs_type;
/* # of total sectors */
uint64_t xs_sectors;
+ /* sector size if existed */
+ uint_t xs_sec_size;
/* blkif I/O request ring buffer */
xendev_ring_t *xs_ring;
/* handle to access the ring buffer */
diff --git a/usr/src/uts/common/xen/io/xdf.c b/usr/src/uts/common/xen/io/xdf.c
index 109421797d..ef50b2bec7 100644
--- a/usr/src/uts/common/xen/io/xdf.c
+++ b/usr/src/uts/common/xen/io/xdf.c
@@ -478,7 +478,6 @@ vreq_setup(xdf_t *vdp, v_req_t *vreq)
if (!ALIGNED_XFER(bp)) {
if (bp->b_flags & (B_PAGEIO | B_PHYS))
bp_mapin(bp);
-
rc = ddi_dma_mem_alloc(vreq->v_memdmahdl,
roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr,
DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp,
@@ -1638,11 +1637,13 @@ xdf_get_flush_block(xdf_t *vdp)
/*
* Get a DEV_BSIZE aligned bufer
*/
- vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP);
+ vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP);
vdp->xdf_cache_flush_block =
- (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE);
+ (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem),
+ (int)vdp->xdf_xdev_secsize);
+
if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block,
- xdf_flush_block, DEV_BSIZE, NULL) != 0)
+ xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0)
return (DDI_FAILURE);
return (DDI_SUCCESS);
}
@@ -1746,7 +1747,7 @@ xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp)
geomp->g_acyl = 0;
geomp->g_nhead = XDF_NHEADS;
geomp->g_nsect = XDF_NSECTS;
- geomp->g_secsize = XB_BSIZE;
+ geomp->g_secsize = vdp->xdf_xdev_secsize;
geomp->g_capacity = vdp->xdf_xdev_nblocks;
geomp->g_intrlv = 0;
geomp->g_rpm = 7200;
@@ -1764,6 +1765,7 @@ xdf_setstate_connected(xdf_t *vdp)
dev_info_t *dip = vdp->xdf_dip;
cmlb_geom_t pgeom;
diskaddr_t nblocks = 0;
+ uint_t secsize = 0;
char *oename, *xsname, *str;
uint_t dinfo;
@@ -1793,6 +1795,7 @@ xdf_setstate_connected(xdf_t *vdp)
*/
if (xenbus_gather(XBT_NULL, oename,
XBP_SECTORS, "%"SCNu64, &nblocks,
+ XBP_SECTOR_SIZE, "%u", &secsize,
XBP_INFO, "%u", &dinfo,
NULL) != 0) {
cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
@@ -1808,7 +1811,10 @@ xdf_setstate_connected(xdf_t *vdp)
dinfo |= VDISK_CDROM;
strfree(str);
+ if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE)))
+ secsize = DEV_BSIZE;
vdp->xdf_xdev_nblocks = nblocks;
+ vdp->xdf_xdev_secsize = secsize;
#ifdef _ILP32
if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) {
cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
@@ -2373,6 +2379,14 @@ xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep)
int
xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
{
+ int instance;
+ xdf_t *vdp;
+
+ instance = ddi_get_instance(dip);
+
+ if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
+ return (ENXIO);
+
switch (cmd) {
case TG_GETPHYGEOM:
return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg));
@@ -2381,7 +2395,9 @@ xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
case TG_GETCAPACITY:
return (xdf_lb_getcap(dip, (diskaddr_t *)arg));
case TG_GETBLOCKSIZE:
- *(uint32_t *)arg = XB_BSIZE;
+ mutex_enter(&vdp->xdf_cb_lk);
+ *(uint32_t *)arg = vdp->xdf_xdev_secsize;
+ mutex_exit(&vdp->xdf_cb_lk);
return (0);
case TG_GETATTR:
return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg));
@@ -2404,7 +2420,8 @@ xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp,
/* We don't allow IO from the oe_change callback thread */
ASSERT(curthread != vdp->xdf_oe_change_thread);
- if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity)
+ if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE))
+ >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity)
return (EINVAL);
bp = getrbuf(KM_SLEEP);
@@ -2412,9 +2429,10 @@ xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp,
bp->b_flags = B_BUSY | B_READ;
else
bp->b_flags = B_BUSY | B_WRITE;
+
bp->b_un.b_addr = bufp;
bp->b_bcount = reqlen;
- bp->b_blkno = start;
+ bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE);
bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */
mutex_enter(&vdp->xdf_dev_lk);
@@ -2582,7 +2600,7 @@ xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
case DKIOCGMEDIAINFO: {
struct dk_minfo media_info;
- media_info.dki_lbsize = DEV_BSIZE;
+ media_info.dki_lbsize = vdp->xdf_xdev_secsize;
media_info.dki_capacity = vdp->xdf_pgeom.g_capacity;
if (XD_IS_CD(vdp))
media_info.dki_media_type = DK_CDROM;
@@ -2664,7 +2682,7 @@ xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
!xdf_barrier_flush_disable) {
rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
vdp->xdf_cache_flush_block, xdf_flush_block,
- DEV_BSIZE, (void *)dev);
+ vdp->xdf_xdev_secsize, (void *)dev);
} else {
return (ENOTTY);
}
@@ -2686,6 +2704,7 @@ xdf_strategy(struct buf *bp)
xdf_t *vdp;
minor_t minor;
diskaddr_t p_blkct, p_blkst;
+ daddr_t blkno;
ulong_t nblks;
int part;
@@ -2726,16 +2745,24 @@ xdf_strategy(struct buf *bp)
mutex_enter(&vdp->xdf_dev_lk);
}
+ /*
+ * Adjust the real blkno and bcount according to the underline
+ * physical sector size.
+ */
+ blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE);
+
/* check for a starting block beyond the disk or partition limit */
- if (bp->b_blkno > p_blkct) {
+ if (blkno > p_blkct) {
DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64,
- vdp->xdf_addr, (longlong_t)bp->b_blkno, (uint64_t)p_blkct));
+ vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct));
+ mutex_exit(&vdp->xdf_dev_lk);
xdf_io_err(bp, EINVAL, 0);
return (0);
}
/* Legacy: don't set error flag at this case */
- if (bp->b_blkno == p_blkct) {
+ if (blkno == p_blkct) {
+ mutex_exit(&vdp->xdf_dev_lk);
bp->b_resid = bp->b_bcount;
biodone(bp);
return (0);
@@ -2747,14 +2774,29 @@ xdf_strategy(struct buf *bp)
bp->av_back = bp->av_forw = NULL;
/* Adjust for partial transfer, this will result in an error later */
- nblks = bp->b_bcount >> XB_BSHIFT;
- if ((bp->b_blkno + nblks) > p_blkct) {
- bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT;
+ if (vdp->xdf_xdev_secsize != 0 &&
+ vdp->xdf_xdev_secsize != XB_BSIZE) {
+ nblks = bp->b_bcount / vdp->xdf_xdev_secsize;
+ } else {
+ nblks = bp->b_bcount >> XB_BSHIFT;
+ }
+
+ if ((blkno + nblks) > p_blkct) {
+ if (vdp->xdf_xdev_secsize != 0 &&
+ vdp->xdf_xdev_secsize != XB_BSIZE) {
+ bp->b_resid =
+ ((blkno + nblks) - p_blkct) *
+ vdp->xdf_xdev_secsize;
+ } else {
+ bp->b_resid =
+ ((blkno + nblks) - p_blkct) <<
+ XB_BSHIFT;
+ }
bp->b_bcount -= bp->b_resid;
}
DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n",
- vdp->xdf_addr, (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount));
+ vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount));
/* Fix up the buf struct */
bp->b_flags |= B_BUSY;
@@ -2792,6 +2834,9 @@ xdf_read(dev_t dev, struct uio *uiop, cred_t *credp)
NULL, NULL, NULL, NULL))
return (ENXIO);
+ if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
+ return (ENOSPC);
+
if (U_INVAL(uiop))
return (EINVAL);
@@ -2822,7 +2867,7 @@ xdf_write(dev_t dev, struct uio *uiop, cred_t *credp)
NULL, NULL, NULL, NULL))
return (ENXIO);
- if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
+ if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
return (ENOSPC);
if (U_INVAL(uiop))
@@ -2853,7 +2898,7 @@ xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp)
NULL, NULL, NULL, NULL))
return (ENXIO);
- if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
+ if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
return (ENOSPC);
if (U_INVAL(uiop))
@@ -2884,7 +2929,7 @@ xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp)
NULL, NULL, NULL, NULL))
return (ENXIO);
- if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
+ if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
return (ENOSPC);
if (U_INVAL(uiop))
@@ -2921,9 +2966,11 @@ xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
NULL, NULL, NULL))
return (ENXIO);
- if ((blkno + nblk) > p_blkcnt) {
+ if ((blkno + nblk) >
+ (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) {
cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64,
- vdp->xdf_addr, blkno + nblk, (uint64_t)p_blkcnt);
+ vdp->xdf_addr, (daddr_t)((blkno + nblk) /
+ (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt);
return (EINVAL);
}
@@ -3451,7 +3498,7 @@ xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
mutex_destroy(&vdp->xdf_cb_lk);
mutex_destroy(&vdp->xdf_dev_lk);
if (vdp->xdf_cache_flush_block != NULL)
- kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE);
+ kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize);
ddi_soft_state_free(xdf_ssp, instance);
return (DDI_SUCCESS);
}
diff --git a/usr/src/uts/common/xen/io/xdf.h b/usr/src/uts/common/xen/io/xdf.h
index a3319f70a3..f2a2a82dd5 100644
--- a/usr/src/uts/common/xen/io/xdf.h
+++ b/usr/src/uts/common/xen/io/xdf.h
@@ -48,7 +48,7 @@ extern "C" {
#define XB_BSIZE DEV_BSIZE
#define XB_BMASK (XB_BSIZE - 1)
#define XB_BSHIFT 9
-#define XB_DTOB(bn) ((bn) << XB_BSHIFT)
+#define XB_DTOB(bn, vdp) ((bn) * (vdp)->xdf_xdev_secsize)
#define XB_MAX_SEGLEN (8 * XB_BSIZE)
#define XB_SEGOFFSET (XB_MAX_SEGLEN - 1)
@@ -222,6 +222,7 @@ typedef struct xdf {
kcondvar_t xdf_dev_cv; /* cv used in I/O path */
uint_t xdf_dinfo; /* disk info from backend xenstore */
diskaddr_t xdf_xdev_nblocks; /* total size in block */
+ uint_t xdf_xdev_secsize; /* disk blksize from backend */
cmlb_geom_t xdf_pgeom;
boolean_t xdf_pgeom_set;
boolean_t xdf_pgeom_fixed;
diff --git a/usr/src/uts/common/xen/sys/xendev.h b/usr/src/uts/common/xen/sys/xendev.h
index 8e5921dc3f..dad4ad222f 100644
--- a/usr/src/uts/common/xen/sys/xendev.h
+++ b/usr/src/uts/common/xen/sys/xendev.h
@@ -52,6 +52,7 @@ extern "C" {
/*
* Xenbus property interfaces, initialized by backend disk driver
*/
+#define XBP_SECTOR_SIZE "sector-size" /* backend prop: uint */
#define XBP_SECTORS "sectors" /* backend prop: uint64 */
#define XBP_INFO "info" /* backend prop: uint */
#define XBP_FB "feature-barrier" /* backend prop: boolean int */
diff --git a/usr/src/uts/sun4v/io/vdc.c b/usr/src/uts/sun4v/io/vdc.c
index 6c5d37b940..b7729adeed 100644
--- a/usr/src/uts/sun4v/io/vdc.c
+++ b/usr/src/uts/sun4v/io/vdc.c
@@ -150,6 +150,7 @@ static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *,
static void vdc_store_label_unk(vdc_t *vdc);
static boolean_t vdc_is_opened(vdc_t *vdc);
static void vdc_update_size(vdc_t *vdc, size_t, size_t, size_t);
+static int vdc_update_vio_bsize(vdc_t *vdc, uint32_t);
/* handshake with vds */
static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver);
@@ -621,8 +622,10 @@ vdc_do_attach(dev_info_t *dip)
vdc->state = VDC_STATE_INIT;
vdc->lifecycle = VDC_LC_ATTACHING;
vdc->session_id = 0;
- vdc->block_size = DEV_BSIZE;
- vdc->max_xfer_sz = maxphys / DEV_BSIZE;
+ vdc->vdisk_bsize = DEV_BSIZE;
+ vdc->vio_bmask = 0;
+ vdc->vio_bshift = 0;
+ vdc->max_xfer_sz = maxphys / vdc->vdisk_bsize;
/*
* We assume, for now, that the vDisk server will export 'read'
@@ -943,7 +946,7 @@ vdc_set_err_kstats(vdc_t *vdc)
stp = (vd_err_stats_t *)vdc->err_stats->ks_data;
ASSERT(stp != NULL);
- stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->block_size;
+ stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->vdisk_bsize;
(void) strcpy(stp->vd_vid.value.c, "SUN");
(void) strcpy(stp->vd_pid.value.c, "VDSK");
@@ -1124,7 +1127,7 @@ vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
name, valuep, lengthp));
}
nblocks = vdc->slice[VDCPART(dev)].nblocks;
- blksize = vdc->block_size;
+ blksize = vdc->vdisk_bsize;
mutex_exit(&vdc->lock);
return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags,
@@ -1382,6 +1385,7 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
size_t nbytes = nblk * DEV_BSIZE;
int instance = VDCUNIT(dev);
vdc_t *vdc = NULL;
+ diskaddr_t vio_blkno;
if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance);
@@ -1390,8 +1394,16 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n",
instance, nbytes, blkno, (void *)addr);
+
+ /* convert logical block to vio block */
+ if ((blkno & vdc->vio_bmask) != 0) {
+ DMSG(vdc, 0, "Misaligned block number (%lu)\n", blkno);
+ return (EINVAL);
+ }
+ vio_blkno = blkno >> vdc->vio_bshift;
+
rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes,
- VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir);
+ VDCPART(dev), vio_blkno, CB_STRATEGY, 0, VIO_write_dir);
if (rv) {
DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv);
return (rv);
@@ -1422,6 +1434,7 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
static int
vdc_strategy(struct buf *buf)
{
+ diskaddr_t vio_blkno;
int rv = -1;
vdc_t *vdc = NULL;
int instance = VDCUNIT(buf->b_edev);
@@ -1448,8 +1461,21 @@ vdc_strategy(struct buf *buf)
slice = VDCPART(buf->b_edev);
}
+ /*
+ * In the buf structure, b_lblkno represents a logical block number
+ * using a block size of 512 bytes. For the VIO request, this block
+ * number has to be converted to be represented with the block size
+ * used by the VIO protocol.
+ */
+ if ((buf->b_lblkno & vdc->vio_bmask) != 0) {
+ bioerror(buf, EINVAL);
+ biodone(buf);
+ return (0);
+ }
+ vio_blkno = buf->b_lblkno >> vdc->vio_bshift;
+
rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr,
- buf->b_bcount, slice, buf->b_lblkno,
+ buf->b_bcount, slice, vio_blkno,
CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir :
VIO_write_dir);
@@ -1494,8 +1520,8 @@ vdc_min(struct buf *bufp)
vdc = ddi_get_soft_state(vdc_state, instance);
VERIFY(vdc != NULL);
- if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) {
- bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size;
+ if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->vdisk_bsize)) {
+ bufp->b_bcount = vdc->max_xfer_sz * vdc->vdisk_bsize;
}
}
@@ -1670,7 +1696,7 @@ vdc_init_attr_negotiation(vdc_t *vdc)
pkt.tag.vio_sid = vdc->session_id;
/* fill in payload */
pkt.max_xfer_sz = vdc->max_xfer_sz;
- pkt.vdisk_block_size = vdc->block_size;
+ pkt.vdisk_block_size = vdc->vdisk_bsize;
pkt.xfer_mode = VIO_DRING_MODE_V1_0;
pkt.operations = 0; /* server will set bits of valid operations */
pkt.vdisk_type = 0; /* server will set to valid device type */
@@ -2605,13 +2631,13 @@ vdc_init_descriptor_ring(vdc_t *vdc)
* as we do not have the capability to split requests over
* multiple DRing entries.
*/
- if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) {
+ if ((vdc->max_xfer_sz * vdc->vdisk_bsize) < maxphys) {
DMSG(vdc, 0, "[%d] using minimum DRing size\n",
vdc->instance);
vdc->dring_max_cookies = maxphys / PAGESIZE;
} else {
vdc->dring_max_cookies =
- (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE;
+ (vdc->max_xfer_sz * vdc->vdisk_bsize) / PAGESIZE;
}
vdc->dring_entry_size = (sizeof (vd_dring_entry_t) +
(sizeof (ldc_mem_cookie_t) *
@@ -4864,6 +4890,17 @@ vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg)
vdc->instance);
attr_msg->vdisk_size = 0;
}
+
+ /* update the VIO block size */
+ if (attr_msg->vdisk_block_size > 0 &&
+ vdc_update_vio_bsize(vdc,
+ attr_msg->vdisk_block_size) != 0) {
+ DMSG(vdc, 0, "[%d] Invalid block size (%u) from vds",
+ vdc->instance, attr_msg->vdisk_block_size);
+ status = EINVAL;
+ break;
+ }
+
/* update disk, block and transfer sizes */
vdc_update_size(vdc, attr_msg->vdisk_size,
attr_msg->vdisk_block_size, attr_msg->max_xfer_sz);
@@ -4877,7 +4914,7 @@ vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg)
DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n",
vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz);
DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n",
- vdc->instance, vdc->block_size,
+ vdc->instance, vdc->vdisk_bsize,
attr_msg->vdisk_block_size);
if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) ||
@@ -5266,7 +5303,7 @@ vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag)
return (EFAULT);
}
- VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl);
+ VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl);
if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) {
return (rv);
@@ -5307,7 +5344,7 @@ vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag)
* flag - ioctl flags
*/
static int
-vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag)
+vdc_dioctl_rwcmd(vdc_t *vdc, caddr_t arg, int flag)
{
struct dadkio_rwcmd32 rwcmd32;
struct dadkio_rwcmd rwcmd;
@@ -5351,7 +5388,7 @@ vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag)
bzero((caddr_t)&auio, sizeof (struct uio));
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
- auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE;
+ auio.uio_loffset = rwcmd.blkaddr * vdc->vdisk_bsize;
auio.uio_resid = rwcmd.buflen;
auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE;
@@ -5363,7 +5400,8 @@ vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag)
*/
buf->b_private = (void *)VD_SLICE_NONE;
- status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio);
+ status = physio(vdc_strategy, buf, VD_MAKE_DEV(vdc->instance, 0),
+ rw, vdc_min, &auio);
biofini(buf);
kmem_free(buf, sizeof (buf_t));
@@ -6639,14 +6677,23 @@ vdc_check_capacity(vdc_t *vdc)
if ((rv = vdc_get_capacity(vdc, &dsk_size, &blk_size)) != 0)
return (rv);
- if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0)
+ if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || blk_size == 0)
return (EINVAL);
mutex_enter(&vdc->lock);
- vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz);
+ /*
+ * First try to update the VIO block size (which is the same as the
+ * vdisk block size). If this returns an error then that means that
+ * we can not use that block size so basically the vdisk is unusable
+ * and we return an error.
+ */
+ rv = vdc_update_vio_bsize(vdc, blk_size);
+ if (rv == 0)
+ vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz);
+
mutex_exit(&vdc->lock);
- return (0);
+ return (rv);
}
/*
@@ -6969,7 +7016,7 @@ vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp)
case DIOCTL_RWCMD:
{
- return (vdc_dioctl_rwcmd(dev, arg, mode));
+ return (vdc_dioctl_rwcmd(vdc, arg, mode));
}
case DKIOCGAPART:
@@ -7604,7 +7651,7 @@ vdc_create_fake_geometry(vdc_t *vdc)
(void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME);
(void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME);
- /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */
+ /* max_xfer_sz is #blocks so we don't need to divide by vdisk_bsize */
vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz;
/*
@@ -7660,7 +7707,7 @@ vdc_create_fake_geometry(vdc_t *vdc)
}
vdc->minfo->dki_capacity = vdc->vdisk_size;
- vdc->minfo->dki_lbsize = vdc->block_size;
+ vdc->minfo->dki_lbsize = vdc->vdisk_bsize;
}
static ushort_t
@@ -7692,7 +7739,7 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size)
* update anything.
*/
if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 ||
- (blk_size == vdc->block_size && dsk_size == vdc->vdisk_size &&
+ (blk_size == vdc->vdisk_bsize && dsk_size == vdc->vdisk_size &&
xfr_size == vdc->max_xfer_sz))
return;
@@ -7706,13 +7753,11 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size)
if ((xfr_size * blk_size) > (PAGESIZE * DEV_BSIZE)) {
DMSG(vdc, 0, "[%d] vds block transfer size too big;"
" using max supported by vdc", vdc->instance);
- xfr_size = maxphys / DEV_BSIZE;
- dsk_size = (dsk_size * blk_size) / DEV_BSIZE;
- blk_size = DEV_BSIZE;
+ xfr_size = maxphys / blk_size;
}
vdc->max_xfer_sz = xfr_size;
- vdc->block_size = blk_size;
+ vdc->vdisk_bsize = blk_size;
vdc->vdisk_size = dsk_size;
stp = (vd_err_stats_t *)vdc->err_stats->ks_data;
@@ -7723,6 +7768,50 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size)
}
/*
+ * Update information about the VIO block size. The VIO block size is the
+ * same as the vdisk block size which is stored in vdc->vdisk_bsize so we
+ * do not store that information again.
+ *
+ * However, buf structures will always use a logical block size of 512 bytes
+ * (DEV_BSIZE) and we will need to convert logical block numbers to VIO block
+ * numbers for each read or write operation using vdc_strategy(). To speed up
+ * this conversion, we expect the VIO block size to be a power of 2 and a
+ * multiple 512 bytes (DEV_BSIZE), and we cache some useful information.
+ *
+ * The function return EINVAL if the new VIO block size (blk_size) is not a
+ * power of 2 or not a multiple of 512 bytes, otherwise it returns 0.
+ */
+static int
+vdc_update_vio_bsize(vdc_t *vdc, uint32_t blk_size)
+{
+ uint32_t ratio, n;
+ int nshift = 0;
+
+ vdc->vio_bmask = 0;
+ vdc->vio_bshift = 0;
+
+ ASSERT(blk_size > 0);
+
+ if ((blk_size % DEV_BSIZE) != 0)
+ return (EINVAL);
+
+ ratio = blk_size / DEV_BSIZE;
+
+ for (n = ratio; n > 1; n >>= 1) {
+ if ((n & 0x1) != 0) {
+ /* blk_size is not a power of 2 */
+ return (EINVAL);
+ }
+ nshift++;
+ }
+
+ vdc->vio_bshift = nshift;
+ vdc->vio_bmask = ratio - 1;
+
+ return (0);
+}
+
+/*
* Function:
* vdc_validate_geometry
*
@@ -7747,7 +7836,7 @@ vdc_validate_geometry(vdc_t *vdc)
buf_t *buf; /* BREAD requests need to be in a buf_t structure */
dev_t dev;
int rv, rval;
- struct dk_label label;
+ struct dk_label *label;
struct dk_geom geom;
struct extvtoc vtoc;
efi_gpt_t *gpt;
@@ -7786,7 +7875,7 @@ vdc_validate_geometry(vdc_t *vdc)
return (EIO);
}
- VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl);
+ VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl);
rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe);
@@ -7870,14 +7959,15 @@ vdc_validate_geometry(vdc_t *vdc)
/*
* Read disk label from start of disk
*/
+ label = kmem_alloc(vdc->vdisk_bsize, KM_SLEEP);
buf = kmem_alloc(sizeof (buf_t), KM_SLEEP);
bioinit(buf);
- buf->b_un.b_addr = (caddr_t)&label;
- buf->b_bcount = DK_LABEL_SIZE;
+ buf->b_un.b_addr = (caddr_t)label;
+ buf->b_bcount = vdc->vdisk_bsize;
buf->b_flags = B_BUSY | B_READ;
buf->b_dev = cmpdev(dev);
- rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label,
- DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir);
+ rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)label,
+ vdc->vdisk_bsize, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir);
if (rv) {
DMSG(vdc, 1, "[%d] Failed to read disk block 0\n",
vdc->instance);
@@ -7892,15 +7982,17 @@ vdc_validate_geometry(vdc_t *vdc)
biofini(buf);
kmem_free(buf, sizeof (buf_t));
- if (rv != 0 || label.dkl_magic != DKL_MAGIC ||
- label.dkl_cksum != vdc_lbl2cksum(&label)) {
+ if (rv != 0 || label->dkl_magic != DKL_MAGIC ||
+ label->dkl_cksum != vdc_lbl2cksum(label)) {
DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n",
vdc->instance);
+ kmem_free(label, vdc->vdisk_bsize);
mutex_enter(&vdc->lock);
vdc_store_label_unk(vdc);
return (EINVAL);
}
+ kmem_free(label, vdc->vdisk_bsize);
mutex_enter(&vdc->lock);
vdc_store_label_vtoc(vdc, &geom, &vtoc);
return (0);
@@ -8108,7 +8200,7 @@ vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct extvtoc *vtoc)
int i;
ASSERT(MUTEX_HELD(&vdc->lock));
- ASSERT(vdc->block_size == vtoc->v_sectorsz);
+ ASSERT(vdc->vdisk_bsize == vtoc->v_sectorsz);
vdc->vdisk_label = VD_DISK_LABEL_VTOC;
bcopy(vtoc, vdc->vtoc, sizeof (struct extvtoc));
diff --git a/usr/src/uts/sun4v/io/vds.c b/usr/src/uts/sun4v/io/vds.c
index 548fc0f048..45f4122465 100644
--- a/usr/src/uts/sun4v/io/vds.c
+++ b/usr/src/uts/sun4v/io/vds.c
@@ -119,6 +119,10 @@
#define VD_EFI_LBA_GPT 1 /* LBA of the GPT */
#define VD_EFI_LBA_GPE 2 /* LBA of the GPE */
+#define VD_EFI_DEV_SET(dev, vdsk, ioctl) \
+ VDSK_EFI_DEV_SET(dev, vdsk, ioctl, \
+ (vdsk)->vdisk_bsize, (vdsk)->vdisk_size)
+
/*
* Flags defining the behavior for flushing asynchronous writes used to
* performed some write I/O requests.
@@ -451,13 +455,14 @@ typedef struct vd {
int open_flags; /* open flags */
uint_t nslices; /* number of slices we export */
size_t vdisk_size; /* number of blocks in vdisk */
- size_t vdisk_block_size; /* size of each vdisk block */
+ size_t vdisk_bsize; /* blk size of the vdisk */
vd_disk_type_t vdisk_type; /* slice or entire disk */
vd_disk_label_t vdisk_label; /* EFI or VTOC label */
vd_media_t vdisk_media; /* media type of backing dev. */
boolean_t is_atapi_dev; /* Is this an IDE CD-ROM dev? */
ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */
- size_t block_size; /* blk size of actual device */
+ size_t backend_bsize; /* blk size of backend device */
+ int vio_bshift; /* shift for blk convertion */
boolean_t volume; /* is vDisk backed by volume */
boolean_t zvol; /* is vDisk backed by a zvol */
boolean_t file; /* is vDisk backed by a file? */
@@ -506,21 +511,20 @@ typedef struct vd {
* followed by a GPT (efi_gpt_t) and a GPE (efi_gpe_t).
*
*/
-#define VD_LABEL_VTOC_SIZE \
- P2ROUNDUP(sizeof (struct dk_label), DEV_BSIZE)
+#define VD_LABEL_VTOC_SIZE(lba) \
+ P2ROUNDUP(sizeof (struct dk_label), (lba))
-#define VD_LABEL_EFI_SIZE \
- P2ROUNDUP(DEV_BSIZE + sizeof (efi_gpt_t) + \
- sizeof (efi_gpe_t) * VD_MAXPART, DEV_BSIZE)
+#define VD_LABEL_EFI_SIZE(lba) \
+ P2ROUNDUP(2 * (lba) + sizeof (efi_gpe_t) * VD_MAXPART, \
+ (lba))
#define VD_LABEL_VTOC(vd) \
((struct dk_label *)(void *)((vd)->flabel))
-#define VD_LABEL_EFI_GPT(vd) \
- ((efi_gpt_t *)(void *)((vd)->flabel + DEV_BSIZE))
-#define VD_LABEL_EFI_GPE(vd) \
- ((efi_gpe_t *)(void *)((vd)->flabel + DEV_BSIZE + \
- sizeof (efi_gpt_t)))
+#define VD_LABEL_EFI_GPT(vd, lba) \
+ ((efi_gpt_t *)(void *)((vd)->flabel + (lba)))
+#define VD_LABEL_EFI_GPE(vd, lba) \
+ ((efi_gpe_t *)(void *)((vd)->flabel + 2 * (lba)))
typedef struct vds_operation {
@@ -757,6 +761,7 @@ vd_dskimg_io_params(vd_t *vd, int slice, size_t *blkp, size_t *lenp)
ASSERT(vd->file || VD_DSKIMG(vd));
ASSERT(len > 0);
+ ASSERT(vd->vdisk_bsize == DEV_BSIZE);
/*
* If a file is exported as a slice then we don't care about the vtoc.
@@ -797,7 +802,6 @@ vd_dskimg_io_params(vd_t *vd, int slice, size_t *blkp, size_t *lenp)
ASSERT(vd->vtoc.v_sectorsz == DEV_BSIZE);
} else {
ASSERT(vd->vdisk_label == VD_DISK_LABEL_EFI);
- ASSERT(vd->vdisk_block_size == DEV_BSIZE);
}
if (blk >= vd->slices[slice].nblocks) {
@@ -875,6 +879,7 @@ vd_dskimg_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t offset,
ASSERT(vd->file || VD_DSKIMG(vd));
ASSERT(len > 0);
+ ASSERT(vd->vdisk_bsize == DEV_BSIZE);
if ((status = vd_dskimg_io_params(vd, slice, &offset, &len)) != 0)
return ((status == ENODATA)? 0: -1);
@@ -941,13 +946,14 @@ vd_dskimg_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t offset,
*
* Parameters:
* disk_size - the disk size in bytes
+ * bsize - the disk block size in bytes
* label - the returned default label.
*
* Return Code:
* none.
*/
static void
-vd_build_default_label(size_t disk_size, struct dk_label *label)
+vd_build_default_label(size_t disk_size, size_t bsize, struct dk_label *label)
{
size_t size;
char unit;
@@ -1005,7 +1011,7 @@ vd_build_default_label(size_t disk_size, struct dk_label *label)
}
label->dkl_pcyl = disk_size /
- (label->dkl_nsect * label->dkl_nhead * DEV_BSIZE);
+ (label->dkl_nsect * label->dkl_nhead * bsize);
if (label->dkl_pcyl == 0)
label->dkl_pcyl = 1;
@@ -1027,7 +1033,7 @@ vd_build_default_label(size_t disk_size, struct dk_label *label)
label->dkl_nhead, label->dkl_nsect);
PR0("provided disk size: %ld bytes\n", (uint64_t)
(label->dkl_pcyl * label->dkl_nhead *
- label->dkl_nsect * DEV_BSIZE));
+ label->dkl_nsect * bsize));
vd_get_readable_size(disk_size, &size, &unit);
@@ -1230,6 +1236,8 @@ vd_dskimg_read_devid(vd_t *vd, ddi_devid_t *devid)
uint_t chksum;
int status, sz;
+ ASSERT(vd->vdisk_bsize == DEV_BSIZE);
+
if ((status = vd_dskimg_get_devid_block(vd, &blk)) != 0)
return (status);
@@ -1304,6 +1312,8 @@ vd_dskimg_write_devid(vd_t *vd, ddi_devid_t devid)
size_t blk;
int status;
+ ASSERT(vd->vdisk_bsize == DEV_BSIZE);
+
if (devid == NULL) {
/* nothing to write */
return (0);
@@ -1371,12 +1381,12 @@ vd_do_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len)
ASSERT(!vd->file);
ASSERT(!vd->volume);
- ASSERT(vd->vdisk_block_size > 0);
+ ASSERT(vd->vdisk_bsize > 0);
max_sectors = vd->max_xfer_sz;
- nblk = (len / vd->vdisk_block_size);
+ nblk = (len / vd->vdisk_bsize);
- if (len % vd->vdisk_block_size != 0)
+ if (len % vd->vdisk_bsize != 0)
return (EINVAL);
/*
@@ -1414,7 +1424,7 @@ vd_do_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len)
}
ucmd.uscsi_cdb = (caddr_t)&cdb;
ucmd.uscsi_bufaddr = data;
- ucmd.uscsi_buflen = nsectors * vd->block_size;
+ ucmd.uscsi_buflen = nsectors * vd->backend_bsize;
ucmd.uscsi_timeout = vd_scsi_rdwr_timeout;
/*
* Set flags so that the command is isolated from normal
@@ -1459,7 +1469,7 @@ vd_do_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len)
blk += nsectors;
nblk -= nsectors;
- data += nsectors * vd->vdisk_block_size; /* SECSIZE */
+ data += nsectors * vd->vdisk_bsize;
}
return (status);
@@ -1498,7 +1508,7 @@ vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen)
size_t plen; /* length of data to be read from physical device */
char *buf; /* buffer area to fit physical device's block size */
- if (vd->block_size == 0) {
+ if (vd->backend_bsize == 0) {
/*
* The block size was not available during the attach,
* try to update it now.
@@ -1514,10 +1524,10 @@ vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen)
* and adjust the block to be read from and the amount of data to
* read to correspond with the device's block size.
*/
- if (vd->vdisk_block_size == vd->block_size)
+ if (vd->vdisk_bsize == vd->backend_bsize)
return (vd_do_scsi_rdwr(vd, operation, data, vblk, vlen));
- if (vd->vdisk_block_size > vd->block_size)
+ if (vd->vdisk_bsize > vd->backend_bsize)
return (EINVAL);
/*
@@ -1540,23 +1550,23 @@ vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen)
* v v
* --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- virtual disk:
* | | | |XX|XX|XX|XX|XX|XX| | | | | | } block size is
- * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- vd->vdisk_block_size
+ * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- vd->vdisk_bsize
* : : : :
* >:==:< delta : :
* : : : :
* --+-----+-----+-----+-----+-----+-----+-----+-- physical disk:
* | |YY:YY|YYYYY|YYYYY|YY:YY| | | } block size is
- * --+-----+-----+-----+-----+-----+-----+-----+-- vd->block_size
+ * --+-----+-----+-----+-----+-----+-----+-----+-- vd->backend_bsize
* ^ ^
* |<--------------------->|
* | plen
* pblk
*/
/* END CSTYLED */
- pblk = (vblk * vd->vdisk_block_size) / vd->block_size;
- delta = (vblk * vd->vdisk_block_size) - (pblk * vd->block_size);
- pnblk = ((delta + vlen - 1) / vd->block_size) + 1;
- plen = pnblk * vd->block_size;
+ pblk = (vblk * vd->vdisk_bsize) / vd->backend_bsize;
+ delta = (vblk * vd->vdisk_bsize) - (pblk * vd->backend_bsize);
+ pnblk = ((delta + vlen - 1) / vd->backend_bsize) + 1;
+ plen = pnblk * vd->backend_bsize;
PR2("vblk %lx:pblk %lx: vlen %ld:plen %ld", vblk, pblk, vlen, plen);
@@ -1591,7 +1601,7 @@ static ssize_t
vd_slice_flabel_read(vd_t *vd, caddr_t data, size_t offset, size_t length)
{
size_t n = 0;
- uint_t limit = vd->flabel_limit * DEV_BSIZE;
+ uint_t limit = vd->flabel_limit * vd->vdisk_bsize;
ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE);
ASSERT(vd->flabel != NULL);
@@ -1646,7 +1656,7 @@ vd_slice_flabel_read(vd_t *vd, caddr_t data, size_t offset, size_t length)
static ssize_t
vd_slice_flabel_write(vd_t *vd, caddr_t data, size_t offset, size_t length)
{
- uint_t limit = vd->flabel_limit * DEV_BSIZE;
+ uint_t limit = vd->flabel_limit * vd->vdisk_bsize;
struct dk_label *label;
struct dk_geom geom;
struct extvtoc vtoc;
@@ -1663,7 +1673,7 @@ vd_slice_flabel_write(vd_t *vd, caddr_t data, size_t offset, size_t length)
* write was successful, but note that nothing is actually overwritten.
*/
if (vd->vdisk_label == VD_DISK_LABEL_VTOC &&
- offset == 0 && length == DEV_BSIZE) {
+ offset == 0 && length == vd->vdisk_bsize) {
label = (void *)data;
/* check that this is a valid label */
@@ -1721,7 +1731,7 @@ vd_slice_flabel_write(vd_t *vd, caddr_t data, size_t offset, size_t length)
* Return the starting block relative to the vdisk
* backend for the remaining operation.
* lengthp - pointer to the number of bytes to read or write.
- * This should be a multiple of DEV_BSIZE. Return the
+ * This should be a multiple of vdisk_bsize. Return the
* remaining number of bytes to read or write.
*
* Return Code:
@@ -1739,6 +1749,7 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap,
size_t ablk, asize, aoff, alen;
ssize_t n;
int sec, status;
+ size_t bsize = vd->vdisk_bsize;
ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE);
ASSERT(slice != 0);
@@ -1759,23 +1770,23 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap,
return (EIO);
}
- if (length % DEV_BSIZE != 0)
+ if (length % bsize != 0)
return (EINVAL);
/* handle any I/O with the fake label */
if (operation == VD_OP_BWRITE)
- n = vd_slice_flabel_write(vd, data, blk * DEV_BSIZE, length);
+ n = vd_slice_flabel_write(vd, data, blk * bsize, length);
else
- n = vd_slice_flabel_read(vd, data, blk * DEV_BSIZE, length);
+ n = vd_slice_flabel_read(vd, data, blk * bsize, length);
if (n == -1)
return (EINVAL);
- ASSERT(n % DEV_BSIZE == 0);
+ ASSERT(n % bsize == 0);
/* adjust I/O arguments */
data += n;
- blk += n / DEV_BSIZE;
+ blk += n / bsize;
length -= n;
/* check if there's something else to process */
@@ -1791,7 +1802,7 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap,
}
if (vd->vdisk_label == VD_DISK_LABEL_EFI) {
- asize = EFI_MIN_RESV_SIZE + 33;
+ asize = EFI_MIN_RESV_SIZE + (EFI_MIN_ARRAY_SIZE / bsize) + 1;
ablk = vd->vdisk_size - asize;
} else {
ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC);
@@ -1802,7 +1813,7 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap,
asize = vd->dk_geom.dkg_acyl * csize;
}
- alen = length / DEV_BSIZE;
+ alen = length / bsize;
aoff = blk;
/* if we have reached the last block then the I/O is completed */
@@ -1834,10 +1845,10 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap,
alen = ablk + asize - aoff;
}
- alen *= DEV_BSIZE;
+ alen *= bsize;
if (operation == VD_OP_BREAD) {
- bzero(data + (aoff - blk) * DEV_BSIZE, alen);
+ bzero(data + (aoff - blk) * bsize, alen);
if (vd->vdisk_label == VD_DISK_LABEL_VTOC) {
/* check if we read backup labels */
@@ -1848,9 +1859,9 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap,
for (sec = 1; (sec < 5 * 2 + 1); sec += 2) {
if (ablk + sec >= blk &&
- ablk + sec < blk + (length / DEV_BSIZE)) {
+ ablk + sec < blk + (length / bsize)) {
bcopy(label, data +
- (ablk + sec - blk) * DEV_BSIZE,
+ (ablk + sec - blk) * bsize,
sizeof (struct dk_label));
}
}
@@ -1899,6 +1910,8 @@ vd_bio_task(void *arg)
ssize_t resid;
int status;
+ ASSERT(vd->vdisk_bsize == DEV_BSIZE);
+
if (vd->zvol) {
status = ldi_strategy(vd->ldi_handle[0], buf);
@@ -2162,6 +2175,9 @@ vd_start_bio(vd_task_t *task)
buf->b_flags |= B_WRITE;
}
+ /* convert VIO block number to buf block number */
+ buf->b_lblkno = offset << vd->vio_bshift;
+
request->status = ldi_strategy(vd->ldi_handle[slice], buf);
}
@@ -3101,7 +3117,8 @@ vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg)
switch (cmd) {
case DKIOCGETEFI:
len = vd_slice_flabel_read(vd,
- (caddr_t)dk_ioc->dki_data, lba * DEV_BSIZE, len);
+ (caddr_t)dk_ioc->dki_data,
+ lba * vd->vdisk_bsize, len);
ASSERT(len > 0);
@@ -3237,7 +3254,8 @@ vd_dskimg_validate_geometry(vd_t *vd)
}
vd->vdisk_label = VD_DISK_LABEL_UNK;
- vd_build_default_label(vd->dskimg_size, &label);
+ vd_build_default_label(vd->dskimg_size, vd->vdisk_bsize,
+ &label);
status = EINVAL;
} else {
vd->vdisk_label = VD_DISK_LABEL_VTOC;
@@ -3835,7 +3853,7 @@ vd_get_capacity(vd_task_t *task)
request->status = 0;
- vd_cap.vdisk_block_size = vd->vdisk_block_size;
+ vd_cap.vdisk_block_size = vd->vdisk_bsize;
vd_cap.vdisk_size = vd->vdisk_size;
if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&vd_cap, 0, &nbytes,
@@ -4480,7 +4498,7 @@ vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
* Must first get the maximum transfer size in bytes.
*/
size_t max_xfer_bytes = attr_msg->vdisk_block_size ?
- attr_msg->vdisk_block_size*attr_msg->max_xfer_sz :
+ attr_msg->vdisk_block_size * attr_msg->max_xfer_sz :
attr_msg->max_xfer_sz;
size_t max_inband_msglen =
sizeof (vd_dring_inband_msg_t) +
@@ -4506,7 +4524,7 @@ vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
}
/* Return the device's block size and max transfer size to the client */
- attr_msg->vdisk_block_size = vd->vdisk_block_size;
+ attr_msg->vdisk_block_size = vd->vdisk_bsize;
attr_msg->max_xfer_sz = vd->max_xfer_sz;
attr_msg->vdisk_size = vd->vdisk_size;
@@ -5442,7 +5460,7 @@ vd_dskimg_is_iso_image(vd_t *vd)
* Standard Identifier and is set to CD001 for a CD-ROM compliant
* to the ISO 9660 standard.
*/
- sec = (ISO_VOLDESC_SEC * ISO_SECTOR_SIZE) / vd->vdisk_block_size;
+ sec = (ISO_VOLDESC_SEC * ISO_SECTOR_SIZE) / vd->vdisk_bsize;
rv = vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)iso_buf,
sec, ISO_SECTOR_SIZE);
@@ -5507,16 +5525,13 @@ vd_setup_full_disk(vd_t *vd)
ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK);
- vd->vdisk_block_size = DEV_BSIZE;
-
/* set the disk size, block size and the media type of the disk */
status = vd_backend_check_size(vd);
if (status != 0) {
if (!vd->scsi) {
/* unexpected failure */
- PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d",
- status);
+ PRN("Failed to check backend size (errno %d)", status);
return (status);
}
@@ -5526,7 +5541,8 @@ vd_setup_full_disk(vd_t *vd)
* size of the disk and the block size.
*/
vd->vdisk_size = VD_SIZE_UNKNOWN;
- vd->block_size = 0;
+ vd->vdisk_bsize = 0;
+ vd->backend_bsize = 0;
vd->vdisk_media = VD_MEDIA_FIXED;
}
@@ -5697,7 +5713,7 @@ vd_setup_partition_vtoc(vd_t *vd)
vd->vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_size =
vd->dk_geom.dkg_ncyl * csize;
- vd_get_readable_size(vd->vdisk_size * vd->vdisk_block_size,
+ vd_get_readable_size(vd->vdisk_size * vd->vdisk_bsize,
&size, &unit);
/*
@@ -5723,7 +5739,7 @@ vd_setup_partition_vtoc(vd_t *vd)
/* create a fake label from the vtoc and geometry */
vd->flabel_limit = (uint_t)csize;
- vd->flabel_size = VD_LABEL_VTOC_SIZE;
+ vd->flabel_size = VD_LABEL_VTOC_SIZE(vd->vdisk_bsize);
vd->flabel = kmem_zalloc(vd->flabel_size, KM_SLEEP);
vd_vtocgeom_to_label(&vd->vtoc, &vd->dk_geom,
VD_LABEL_VTOC(vd));
@@ -5741,7 +5757,7 @@ vd_setup_partition_vtoc(vd_t *vd)
* as a slice without the addition of any metadata.
*
* So when exporting the disk as an EFI disk, we fake a disk with the following
- * layout:
+ * layout: (assuming the block size is 512 bytes)
*
* flabel +--- flabel_limit
* <------> v
@@ -5776,9 +5792,8 @@ vd_setup_partition_vtoc(vd_t *vd)
* - blocks 34+N+1 to P define a fake reserved partition and backup label, it
* returns 0
*
- * Note: if the backend size is not a multiple of the vdisk block size
- * (DEV_BSIZE = 512 byte) then the very end of the backend will not map to
- * any block of the virtual disk.
+ * Note: if the backend size is not a multiple of the vdisk block size then
+ * the very end of the backend will not map to any block of the virtual disk.
*/
static int
vd_setup_partition_efi(vd_t *vd)
@@ -5788,23 +5803,35 @@ vd_setup_partition_efi(vd_t *vd)
struct uuid uuid = EFI_USR;
struct uuid efi_reserved = EFI_RESERVED;
uint32_t crc;
- uint64_t s0_start, s0_end;
+ uint64_t s0_start, s0_end, first_u_lba;
+ size_t bsize;
- vd->flabel_limit = 34;
- vd->flabel_size = VD_LABEL_EFI_SIZE;
+ ASSERT(vd->vdisk_bsize > 0);
+
+ bsize = vd->vdisk_bsize;
+ /*
+ * The minimum size for the label is 16K (EFI_MIN_ARRAY_SIZE)
+ * for GPEs plus one block for the GPT and one for PMBR.
+ */
+ first_u_lba = (EFI_MIN_ARRAY_SIZE / bsize) + 2;
+ vd->flabel_limit = (uint_t)first_u_lba;
+ vd->flabel_size = VD_LABEL_EFI_SIZE(bsize);
vd->flabel = kmem_zalloc(vd->flabel_size, KM_SLEEP);
- gpt = VD_LABEL_EFI_GPT(vd);
- gpe = VD_LABEL_EFI_GPE(vd);
+ gpt = VD_LABEL_EFI_GPT(vd, bsize);
+ gpe = VD_LABEL_EFI_GPE(vd, bsize);
- /* adjust the vdisk_size, we emulate the first 34 blocks */
- vd->vdisk_size += 34;
- s0_start = 34;
+ /*
+ * Adjust the vdisk_size, we emulate the first few blocks
+ * for the disk label.
+ */
+ vd->vdisk_size += first_u_lba;
+ s0_start = first_u_lba;
s0_end = vd->vdisk_size - 1;
gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE);
gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT);
gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t));
- gpt->efi_gpt_FirstUsableLBA = LE_64(34ULL);
+ gpt->efi_gpt_FirstUsableLBA = LE_64(first_u_lba);
gpt->efi_gpt_PartitionEntryLBA = LE_64(2ULL);
gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t));
@@ -5834,7 +5861,8 @@ vd_setup_partition_efi(vd_t *vd)
gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1);
/* adjust the vdisk size for the backup GPT and GPE */
- vd->vdisk_size += 33;
+ vd->vdisk_size += (EFI_MIN_ARRAY_SIZE / bsize) + 1;
+ gpt->efi_gpt_AlternateLBA = LE_64(vd->vdisk_size - 1);
CRC32(crc, gpe, sizeof (efi_gpe_t) * VD_MAXPART, -1U, crc32_table);
gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
@@ -5854,7 +5882,6 @@ static int
vd_setup_backend_vnode(vd_t *vd)
{
int rval, status;
- vattr_t vattr;
dev_t dev;
char *file_path = vd->device_path;
ldi_handle_t lhandle;
@@ -5874,20 +5901,6 @@ vd_setup_backend_vnode(vd_t *vd)
*/
vd->file = B_TRUE;
- vattr.va_mask = AT_SIZE;
- if ((status = VOP_GETATTR(vd->file_vnode, &vattr, 0, kcred, NULL))
- != 0) {
- PRN("VOP_GETATTR(%s) = errno %d", file_path, status);
- return (EIO);
- }
-
- vd->dskimg_size = vattr.va_size;
-
- if (vd->file_vnode->v_flag & VNOMAP) {
- PRN("File %s cannot be mapped", file_path);
- return (EIO);
- }
-
vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */
/*
@@ -5938,10 +5951,6 @@ vd_setup_slice_image(vd_t *vd)
struct dk_label label;
int status;
- /* sector size = block size = DEV_BSIZE */
- vd->block_size = DEV_BSIZE;
- vd->vdisk_block_size = DEV_BSIZE;
- vd->vdisk_size = vd->dskimg_size / DEV_BSIZE;
vd->vdisk_media = VD_MEDIA_FIXED;
vd->vdisk_label = (vd_slice_label == VD_DISK_LABEL_UNK)?
vd_file_slice_label : vd_slice_label;
@@ -5956,7 +5965,8 @@ vd_setup_slice_image(vd_t *vd)
* adjust the vtoc so that it defines a single-slice
* disk.
*/
- vd_build_default_label(vd->dskimg_size, &label);
+ vd_build_default_label(vd->dskimg_size, vd->vdisk_bsize,
+ &label);
vd_label_to_vtocgeom(&label, &vd->vtoc, &vd->dk_geom);
status = vd_setup_partition_vtoc(vd);
}
@@ -5970,6 +5980,12 @@ vd_setup_disk_image(vd_t *vd)
int status;
char *backend_path = vd->device_path;
+ if ((status = vd_backend_check_size(vd)) != 0) {
+ PRN("Fail to check size of %s (errno %d)",
+ backend_path, status);
+ return (EIO);
+ }
+
/* size should be at least sizeof(dk_label) */
if (vd->dskimg_size < sizeof (struct dk_label)) {
PRN("Size of file has to be at least %ld bytes",
@@ -5977,11 +5993,6 @@ vd_setup_disk_image(vd_t *vd)
return (EIO);
}
- /* sector size = block size = DEV_BSIZE */
- vd->block_size = DEV_BSIZE;
- vd->vdisk_block_size = DEV_BSIZE;
- vd->vdisk_size = vd->dskimg_size / DEV_BSIZE;
-
/*
* Find and validate the geometry of a disk image.
*/
@@ -5997,7 +6008,7 @@ vd_setup_disk_image(vd_t *vd)
* of the ISO image (images for both drive types are stored
* in the ISO-9600 format). CDs can store up to just under 1Gb
*/
- if ((vd->vdisk_size * vd->vdisk_block_size) > ONE_GIGABYTE)
+ if ((vd->vdisk_size * vd->vdisk_bsize) > ONE_GIGABYTE)
vd->vdisk_media = VD_MEDIA_DVD;
else
vd->vdisk_media = VD_MEDIA_CD;
@@ -6179,14 +6190,6 @@ vd_setup_backend_ldi(vd_t *vd)
if (vd->vdisk_type == VD_DISK_TYPE_DISK) {
if (vd->volume) {
- /* get size of backing device */
- if (ldi_get_size(vd->ldi_handle[0], &vd->dskimg_size) !=
- DDI_SUCCESS) {
- PRN("ldi_get_size() failed for %s",
- device_path);
- return (EIO);
- }
-
/* setup disk image */
return (vd_setup_disk_image(vd));
}
@@ -6220,14 +6223,6 @@ vd_setup_single_slice_disk(vd_t *vd)
char *device_path = vd->device_path;
struct vtoc vtoc;
- /* Get size of backing device */
- if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) {
- PRN("ldi_get_size() failed for %s", device_path);
- return (EIO);
- }
- vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */
- vd->block_size = DEV_BSIZE;
- vd->vdisk_block_size = DEV_BSIZE;
vd->vdisk_media = VD_MEDIA_FIXED;
if (vd->volume) {
@@ -6241,6 +6236,12 @@ vd_setup_single_slice_disk(vd_t *vd)
vd->vdisk_type = VD_DISK_TYPE_SLICE;
vd->nslices = 1;
+ /* Get size of backing device */
+ if ((status = vd_backend_check_size(vd)) != 0) {
+ PRN("Fail to check size of %s (errno %d)", device_path, status);
+ return (EIO);
+ }
+
/*
* When exporting a slice or a device as a single slice disk, we don't
* care about any partitioning exposed by the backend. The goal is just
@@ -6251,7 +6252,7 @@ vd_setup_single_slice_disk(vd_t *vd)
* variable.
*/
if (vd_slice_label == VD_DISK_LABEL_EFI ||
- vd->vdisk_size >= ONE_TERABYTE / DEV_BSIZE) {
+ vd->vdisk_size >= ONE_TERABYTE / vd->vdisk_bsize) {
vd->vdisk_label = VD_DISK_LABEL_EFI;
} else {
status = ldi_ioctl(vd->ldi_handle[0], DKIOCGEXTVTOC,
@@ -6281,8 +6282,8 @@ vd_setup_single_slice_disk(vd_t *vd)
} else if (vd_slice_label == VD_DISK_LABEL_VTOC) {
vd->vdisk_label = VD_DISK_LABEL_VTOC;
- vd_build_default_label(vd->vdisk_size * DEV_BSIZE,
- &label);
+ vd_build_default_label(vd->vdisk_size * vd->vdisk_bsize,
+ vd->vdisk_bsize, &label);
vd_label_to_vtocgeom(&label, &vd->vtoc, &vd->dk_geom);
} else {
@@ -6302,13 +6303,50 @@ vd_setup_single_slice_disk(vd_t *vd)
return (status);
}
+/*
+ * This function is invoked when setting up the vdisk backend and to process
+ * the VD_OP_GET_CAPACITY operation. It checks the backend size and set the
+ * following attributes of the vd structure:
+ *
+ * - vdisk_bsize: block size for the virtual disk used by the VIO protocol. Its
+ * value is 512 bytes (DEV_BSIZE) when the backend is a file, a volume or a
+ * CD/DVD. When the backend is a disk or a disk slice then it has the value
+ * of the logical block size of that disk (as returned by the DKIOCGMEDIAINFO
+ * ioctl). This block size is expected to be a power of 2 and a multiple of
+ * 512.
+ *
+ * - vdisk_size: size of the virtual disk expressed as a number of vdisk_bsize
+ * blocks.
+ *
+ * vdisk_size and vdisk_bsize are sent to the vdisk client during the connection
+ * handshake and in the result of a VD_OP_GET_CAPACITY operation.
+ *
+ * - backend_bsize: block size of the backend device. backend_bsize has the same
+ * value as vdisk_bsize except when the backend is a CD/DVD. In that case,
+ * vdisk_bsize is set to 512 (DEV_BSIZE) while backend_bsize is set to the
+ * effective logical block size of the CD/DVD (usually 2048).
+ *
+ * - dskimg_size: size of the backend when the backend is a disk image. This
+ * attribute is set only when the backend is a file or a volume, otherwise it
+ * is unused.
+ *
+ * - vio_bshift: number of bit to shift to convert a VIO block number (which
+ * uses a block size of vdisk_bsize) to a buf(9s) block number (which uses a
+ * block size of 512 bytes) i.e. we have vdisk_bsize = 512 x 2 ^ vio_bshift
+ *
+ * - vdisk_media: media of the virtual disk. This function only sets this
+ * attribute for physical disk and CD/DVD. For other backend types, this
+ * attribute is set in the setup function of the backend.
+ */
static int
vd_backend_check_size(vd_t *vd)
{
- size_t backend_size, old_size, new_size;
+ size_t backend_size, backend_bsize, vdisk_bsize;
+ size_t old_size, new_size;
struct dk_minfo minfo;
vattr_t vattr;
- int rval, rv;
+ int rval, rv, media, nshift = 0;
+ uint32_t n;
if (vd->file) {
@@ -6320,20 +6358,23 @@ vd_backend_check_size(vd_t *vd)
return (rv);
}
backend_size = vattr.va_size;
+ backend_bsize = DEV_BSIZE;
+ vdisk_bsize = DEV_BSIZE;
- } else if (vd->volume || vd->vdisk_type == VD_DISK_TYPE_SLICE) {
+ } else if (vd->volume) {
- /* physical slice or volume (slice or full disk) */
+ /* volume (slice or full disk) */
rv = ldi_get_size(vd->ldi_handle[0], &backend_size);
if (rv != DDI_SUCCESS) {
PR0("ldi_get_size() failed for %s", vd->device_path);
return (EIO);
}
+ backend_bsize = DEV_BSIZE;
+ vdisk_bsize = DEV_BSIZE;
} else {
- /* physical disk */
- ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK);
+ /* physical disk or slice */
rv = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO,
(intptr_t)&minfo, (vd->open_flags | FKIOCTL),
kcred, &rval);
@@ -6342,17 +6383,58 @@ vd_backend_check_size(vd_t *vd)
vd->device_path, rv);
return (rv);
}
- backend_size = minfo.dki_capacity * minfo.dki_lbsize;
+
+ if (vd->vdisk_type == VD_DISK_TYPE_SLICE) {
+ rv = ldi_get_size(vd->ldi_handle[0], &backend_size);
+ if (rv != DDI_SUCCESS) {
+ PR0("ldi_get_size() failed for %s",
+ vd->device_path);
+ return (EIO);
+ }
+ } else {
+ ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK);
+ backend_size = minfo.dki_capacity * minfo.dki_lbsize;
+ }
+
+ backend_bsize = minfo.dki_lbsize;
+ media = DK_MEDIATYPE2VD_MEDIATYPE(minfo.dki_media_type);
+
+ /*
+ * If the device is a CD or a DVD then we force the vdisk block
+ * size to 512 bytes (DEV_BSIZE). In that case, vdisk_bsize can
+ * be different from backend_size.
+ */
+ if (media == VD_MEDIA_CD || media == VD_MEDIA_DVD)
+ vdisk_bsize = DEV_BSIZE;
+ else
+ vdisk_bsize = backend_bsize;
}
+ /* check vdisk block size */
+ if (vdisk_bsize == 0 || vdisk_bsize % DEV_BSIZE != 0)
+ return (EINVAL);
+
old_size = vd->vdisk_size;
- new_size = backend_size / DEV_BSIZE;
+ new_size = backend_size / vdisk_bsize;
/* check if size has changed */
- if (old_size != VD_SIZE_UNKNOWN && old_size == new_size)
+ if (old_size != VD_SIZE_UNKNOWN && old_size == new_size &&
+ vd->vdisk_bsize == vdisk_bsize)
return (0);
+ /* cache info for blk conversion */
+ for (n = vdisk_bsize / DEV_BSIZE; n > 1; n >>= 1) {
+ if ((n & 0x1) != 0) {
+ /* blk_size is not a power of 2 */
+ return (EINVAL);
+ }
+ nshift++;
+ }
+
+ vd->vio_bshift = nshift;
vd->vdisk_size = new_size;
+ vd->vdisk_bsize = vdisk_bsize;
+ vd->backend_bsize = backend_bsize;
if (vd->file || vd->volume)
vd->dskimg_size = backend_size;
@@ -6384,9 +6466,7 @@ vd_backend_check_size(vd_t *vd)
} else if (!vd->file && !vd->volume) {
/* physical disk */
ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK);
- vd->block_size = minfo.dki_lbsize;
- vd->vdisk_media =
- DK_MEDIATYPE2VD_MEDIATYPE(minfo.dki_media_type);
+ vd->vdisk_media = media;
}
return (0);
diff --git a/usr/src/uts/sun4v/sys/vdc.h b/usr/src/uts/sun4v/sys/vdc.h
index 63b76b9d27..eecaf9a30b 100644
--- a/usr/src/uts/sun4v/sys/vdc.h
+++ b/usr/src/uts/sun4v/sys/vdc.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -98,6 +98,10 @@ extern "C" {
*/
#define VD_MAKE_DEV(instance, minor) ((instance << VDCUNIT_SHIFT) | minor)
+#define VDC_EFI_DEV_SET(dev, vdsk, ioctl) \
+ VDSK_EFI_DEV_SET(dev, vdsk, ioctl, \
+ (vdsk)->vdisk_bsize, (vdsk)->vdisk_size)
+
/*
* variables controlling how long to wait before timing out and how many
* retries to attempt before giving up when communicating with vds.
@@ -302,7 +306,9 @@ typedef struct vdc {
uint32_t vdisk_media; /* physical media type of vDisk */
uint64_t vdisk_size; /* device size in blocks */
uint64_t max_xfer_sz; /* maximum block size of a descriptor */
- uint64_t block_size; /* device block size used */
+ uint64_t vdisk_bsize; /* blk size for the virtual disk */
+ uint32_t vio_bmask; /* mask to check vio blk alignment */
+ int vio_bshift; /* shift for vio blk conversion */
uint64_t operations; /* bitmask of ops. server supports */
struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */
struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */
diff --git a/usr/src/uts/sun4v/sys/vdsk_common.h b/usr/src/uts/sun4v/sys/vdsk_common.h
index 62b45c2df4..0464964847 100644
--- a/usr/src/uts/sun4v/sys/vdsk_common.h
+++ b/usr/src/uts/sun4v/sys/vdsk_common.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -521,11 +521,11 @@ typedef struct vd_efi_dev {
vd_efi_ioctl_func vdisk_ioctl; /* vdisk ioctl function */
} vd_efi_dev_t;
-#define VD_EFI_DEV_SET(efi_dev, vdsk, ioctl) \
- (efi_dev).vdisk = vdsk; \
- (efi_dev).vdisk_ioctl = ioctl; \
- (efi_dev).block_size = (vdsk)->block_size; \
- (efi_dev).disk_size = (vdsk)->vdisk_size;
+#define VDSK_EFI_DEV_SET(efi_dev, vdsk, ioctl, bsize, dsize) \
+ (efi_dev).vdisk = vdsk; \
+ (efi_dev).vdisk_ioctl = ioctl; \
+ (efi_dev).block_size = bsize; \
+ (efi_dev).disk_size = dsize;
int vd_efi_alloc_and_read(vd_efi_dev_t *dev, efi_gpt_t **gpt, efi_gpe_t **gpe);