diff options
Diffstat (limited to 'usr/src/uts')
-rw-r--r-- | usr/src/uts/common/io/cmlb.c | 58 | ||||
-rw-r--r-- | usr/src/uts/common/io/scsi/targets/sd.c | 744 | ||||
-rw-r--r-- | usr/src/uts/common/os/dumpsubr.c | 12 | ||||
-rw-r--r-- | usr/src/uts/common/sys/dkio.h | 18 | ||||
-rw-r--r-- | usr/src/uts/common/sys/dklabel.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/sys/scsi/targets/sddef.h | 34 | ||||
-rw-r--r-- | usr/src/uts/common/xen/io/xdb.c | 18 | ||||
-rw-r--r-- | usr/src/uts/common/xen/io/xdb.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/xen/io/xdf.c | 93 | ||||
-rw-r--r-- | usr/src/uts/common/xen/io/xdf.h | 3 | ||||
-rw-r--r-- | usr/src/uts/common/xen/sys/xendev.h | 1 | ||||
-rw-r--r-- | usr/src/uts/sun4v/io/vdc.c | 164 | ||||
-rw-r--r-- | usr/src/uts/sun4v/io/vds.c | 338 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/vdc.h | 10 | ||||
-rw-r--r-- | usr/src/uts/sun4v/sys/vdsk_common.h | 12 |
15 files changed, 1205 insertions, 306 deletions
diff --git a/usr/src/uts/common/io/cmlb.c b/usr/src/uts/common/io/cmlb.c index 75559a9b94..343b1b965c 100644 --- a/usr/src/uts/common/io/cmlb.c +++ b/usr/src/uts/common/io/cmlb.c @@ -1287,6 +1287,9 @@ cmlb_check_update_blockcount(struct cmlb_lun *cl, void *tg_cookie) if ((capacity != 0) && (lbasize != 0)) { cl->cl_blockcount = capacity; cl->cl_tgt_blocksize = lbasize; + if (!cl->cl_is_removable) { + cl->cl_sys_blocksize = lbasize; + } return (0); } else { return (EIO); @@ -1592,7 +1595,7 @@ cmlb_validate_geometry(struct cmlb_lun *cl, boolean_t forcerevalid, int flags, label_addr = (daddr_t)(cl->cl_solaris_offset + DK_LABEL_LOC); - buffer_size = sizeof (struct dk_label); + buffer_size = cl->cl_sys_blocksize; cmlb_dbg(CMLB_TRACE, cl, "cmlb_validate_geometry: " "label_addr: 0x%x allocation size: 0x%x\n", @@ -2199,12 +2202,6 @@ cmlb_use_efi(struct cmlb_lun *cl, diskaddr_t capacity, int flags, ASSERT(mutex_owned(CMLB_MUTEX(cl))); - if (cl->cl_tgt_blocksize != cl->cl_sys_blocksize) { - rval = EINVAL; - goto done_err1; - } - - lbasize = cl->cl_sys_blocksize; cl->cl_reserved = -1; @@ -3637,7 +3634,7 @@ cmlb_dkio_partition(struct cmlb_lun *cl, caddr_t arg, int flag, } buffer = kmem_alloc(EFI_MIN_ARRAY_SIZE, KM_SLEEP); - rval = DK_TG_READ(cl, buffer, 1, DEV_BSIZE, tg_cookie); + rval = DK_TG_READ(cl, buffer, 1, cl->cl_sys_blocksize, tg_cookie); if (rval != 0) goto done_error; @@ -4048,9 +4045,9 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie) cl->cl_reserved = -1; mutex_exit(CMLB_MUTEX(cl)); - gpt = kmem_alloc(sizeof (efi_gpt_t), KM_SLEEP); + gpt = kmem_alloc(cl->cl_sys_blocksize, KM_SLEEP); - if (DK_TG_READ(cl, gpt, 1, DEV_BSIZE, tg_cookie) != 0) { + if (DK_TG_READ(cl, gpt, 1, cl->cl_sys_blocksize, tg_cookie) != 0) { goto done; } @@ -4059,7 +4056,8 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie) if (rval == 0) { /* clear primary */ bzero(gpt, sizeof (efi_gpt_t)); - if (rval = DK_TG_WRITE(cl, gpt, 1, EFI_LABEL_SIZE, tg_cookie)) { + if (rval = DK_TG_WRITE(cl, gpt, 1, cl->cl_sys_blocksize, + tg_cookie)) { cmlb_dbg(CMLB_INFO, cl, "cmlb_clear_efi: clear primary label failed\n"); } @@ -4070,8 +4068,8 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie) goto done; } - if ((rval = DK_TG_READ(cl, gpt, cap - 1, EFI_LABEL_SIZE, tg_cookie)) - != 0) { + if ((rval = DK_TG_READ(cl, gpt, cap - 1, cl->cl_sys_blocksize, + tg_cookie)) != 0) { goto done; } cmlb_swap_efi_gpt(gpt); @@ -4081,7 +4079,7 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie) cmlb_dbg(CMLB_TRACE, cl, "cmlb_clear_efi clear backup@%lu\n", cap - 1); bzero(gpt, sizeof (efi_gpt_t)); - if ((rval = DK_TG_WRITE(cl, gpt, cap - 1, EFI_LABEL_SIZE, + if ((rval = DK_TG_WRITE(cl, gpt, cap - 1, cl->cl_sys_blocksize, tg_cookie))) { cmlb_dbg(CMLB_INFO, cl, "cmlb_clear_efi: clear backup label failed\n"); @@ -4092,7 +4090,7 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie) * header of this file */ if ((rval = DK_TG_READ(cl, gpt, cap - 2, - EFI_LABEL_SIZE, tg_cookie)) != 0) { + cl->cl_sys_blocksize, tg_cookie)) != 0) { goto done; } cmlb_swap_efi_gpt(gpt); @@ -4104,7 +4102,7 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie) cap - 2); bzero(gpt, sizeof (efi_gpt_t)); if ((rval = DK_TG_WRITE(cl, gpt, cap - 2, - EFI_LABEL_SIZE, tg_cookie))) { + cl->cl_sys_blocksize, tg_cookie))) { cmlb_dbg(CMLB_INFO, cl, "cmlb_clear_efi: clear legacy backup label " "failed\n"); @@ -4113,7 +4111,7 @@ cmlb_clear_efi(struct cmlb_lun *cl, void *tg_cookie) } done: - kmem_free(gpt, sizeof (efi_gpt_t)); + kmem_free(gpt, cl->cl_sys_blocksize); } /* @@ -4210,7 +4208,7 @@ cmlb_clear_vtoc(struct cmlb_lun *cl, void *tg_cookie) struct dk_label *dkl; mutex_exit(CMLB_MUTEX(cl)); - dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP); + dkl = kmem_zalloc(cl->cl_sys_blocksize, KM_SLEEP); mutex_enter(CMLB_MUTEX(cl)); /* * cmlb_set_vtoc uses these fields in order to figure out @@ -4223,7 +4221,7 @@ cmlb_clear_vtoc(struct cmlb_lun *cl, void *tg_cookie) dkl->dkl_nsect = cl->cl_g.dkg_nsect; mutex_exit(CMLB_MUTEX(cl)); (void) cmlb_set_vtoc(cl, dkl, tg_cookie); - kmem_free(dkl, sizeof (struct dk_label)); + kmem_free(dkl, cl->cl_sys_blocksize); mutex_enter(CMLB_MUTEX(cl)); } @@ -4258,7 +4256,7 @@ cmlb_write_label(struct cmlb_lun *cl, void *tg_cookie) ASSERT(mutex_owned(CMLB_MUTEX(cl))); mutex_exit(CMLB_MUTEX(cl)); - dkl = kmem_zalloc(sizeof (struct dk_label), KM_SLEEP); + dkl = kmem_zalloc(cl->cl_sys_blocksize, KM_SLEEP); mutex_enter(CMLB_MUTEX(cl)); bcopy(&cl->cl_vtoc, &dkl->dkl_vtoc, sizeof (struct dk_vtoc)); @@ -4303,7 +4301,7 @@ cmlb_write_label(struct cmlb_lun *cl, void *tg_cookie) rval = cmlb_set_vtoc(cl, dkl, tg_cookie); exit: - kmem_free(dkl, sizeof (struct dk_label)); + kmem_free(dkl, cl->cl_sys_blocksize); mutex_enter(CMLB_MUTEX(cl)); return (rval); } @@ -4422,7 +4420,7 @@ cmlb_dkio_get_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie) /* * Read the mboot block, located at absolute block 0 on the target. */ - buffer_size = sizeof (struct mboot); + buffer_size = cl->cl_sys_blocksize; cmlb_dbg(CMLB_TRACE, cl, "cmlb_dkio_get_mboot: allocation size: 0x%x\n", buffer_size); @@ -4481,18 +4479,18 @@ cmlb_dkio_set_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie) return (EINVAL); } - mboot = kmem_zalloc(sizeof (struct mboot), KM_SLEEP); + mboot = kmem_zalloc(cl->cl_sys_blocksize, KM_SLEEP); if (ddi_copyin((const void *)arg, mboot, - sizeof (struct mboot), flag) != 0) { - kmem_free(mboot, (size_t)(sizeof (struct mboot))); + cl->cl_sys_blocksize, flag) != 0) { + kmem_free(mboot, cl->cl_sys_blocksize); return (EFAULT); } /* Is this really a master boot record? */ magic = LE_16(mboot->signature); if (magic != MBB_MAGIC) { - kmem_free(mboot, (size_t)(sizeof (struct mboot))); + kmem_free(mboot, cl->cl_sys_blocksize); return (EINVAL); } @@ -4508,7 +4506,7 @@ cmlb_dkio_set_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie) rval = cmlb_update_fdisk_and_vtoc(cl, tg_cookie); if ((!cl->cl_f_geometry_is_valid) || (rval != 0)) { mutex_exit(CMLB_MUTEX(cl)); - kmem_free(mboot, (size_t)(sizeof (struct mboot))); + kmem_free(mboot, cl->cl_sys_blocksize); return (rval); } } @@ -4529,7 +4527,7 @@ cmlb_dkio_set_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie) #endif cl->cl_msglog_flag |= CMLB_ALLOW_2TB_WARN; mutex_exit(CMLB_MUTEX(cl)); - kmem_free(mboot, (size_t)(sizeof (struct mboot))); + kmem_free(mboot, cl->cl_sys_blocksize); return (rval); } @@ -5098,10 +5096,10 @@ fallback: return (ddi_prop_op(dev, dip, prop_op, mod_flags, (diskaddr_t *)&nblocks64, NULL, NULL, NULL, tg_cookie); /* - * Assume partition information is in DEV_BSIZE units, compute + * Assume partition information is in sys_blocksize units, compute * divisor for size(9P) property representation. */ - dblk = lbasize / DEV_BSIZE; + dblk = lbasize / cl->cl_sys_blocksize; /* Now let ddi_prop_op_nblocks_blksize() handle the request. */ return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags, diff --git a/usr/src/uts/common/io/scsi/targets/sd.c b/usr/src/uts/common/io/scsi/targets/sd.c index f2cbc0df37..8cbc1310a3 100644 --- a/usr/src/uts/common/io/scsi/targets/sd.c +++ b/usr/src/uts/common/io/scsi/targets/sd.c @@ -1017,6 +1017,7 @@ static int sd_pm_idletime = 1; #define sd_free_rqs ssd_free_rqs #define sd_dump_memory ssd_dump_memory #define sd_get_media_info ssd_get_media_info +#define sd_get_media_info_ext ssd_get_media_info_ext #define sd_dkio_ctrl_info ssd_dkio_ctrl_info #define sd_nvpair_str_decode ssd_nvpair_str_decode #define sd_strtok_r ssd_strtok_r @@ -1093,6 +1094,7 @@ static int sd_pm_idletime = 1; #define sd_is_lsi ssd_is_lsi #define sd_tg_rdwr ssd_tg_rdwr #define sd_tg_getinfo ssd_tg_getinfo +#define sd_rmw_msg_print_handler ssd_rmw_msg_print_handler #endif /* #if (defined(__fibre)) */ @@ -1463,7 +1465,7 @@ static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag); static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap, int path_flag); static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, - uint32_t *lbap, int path_flag); + uint32_t *lbap, uint32_t *psp, int path_flag); static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int flag, int path_flag); static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, @@ -1510,6 +1512,7 @@ static void sd_panic_for_res_conflict(struct sd_lun *un); * Disk Ioctl Function Prototypes */ static int sd_get_media_info(dev_t dev, caddr_t arg, int flag); +static int sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag); static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag); static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag); @@ -1610,6 +1613,11 @@ static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr, static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie); /* + * For printing RMW warning message timely + */ +static void sd_rmw_msg_print_handler(void *arg); + +/* * Constants for failfast support: * * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO @@ -1781,13 +1789,19 @@ static sd_chain_t sd_iostart_chain[] = { sd_mapblockaddr_iostart, /* Index: 3 */ sd_core_iostart, /* Index: 4 */ - /* Chain for buf IO for removable-media targets (PM enabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets with RMW needed (PM enabled) + */ sd_mapblockaddr_iostart, /* Index: 5 */ sd_mapblocksize_iostart, /* Index: 6 */ sd_pm_iostart, /* Index: 7 */ sd_core_iostart, /* Index: 8 */ - /* Chain for buf IO for removable-media targets (PM disabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets with RMW needed (PM disabled) + */ sd_mapblockaddr_iostart, /* Index: 9 */ sd_mapblocksize_iostart, /* Index: 10 */ sd_core_iostart, /* Index: 11 */ @@ -1817,6 +1831,26 @@ static sd_chain_t sd_iostart_chain[] = { /* Chain for "direct priority" USCSI commands (all targets) */ sd_core_iostart, /* Index: 25 */ + + /* + * Chain for buf IO for large sector size disk drive targets + * with RMW needed with checksumming (PM enabled) + */ + sd_mapblockaddr_iostart, /* Index: 26 */ + sd_mapblocksize_iostart, /* Index: 27 */ + sd_checksum_iostart, /* Index: 28 */ + sd_pm_iostart, /* Index: 29 */ + sd_core_iostart, /* Index: 30 */ + + /* + * Chain for buf IO for large sector size disk drive targets + * with RMW needed with checksumming (PM disabled) + */ + sd_mapblockaddr_iostart, /* Index: 31 */ + sd_mapblocksize_iostart, /* Index: 32 */ + sd_checksum_iostart, /* Index: 33 */ + sd_core_iostart, /* Index: 34 */ + }; /* @@ -1825,7 +1859,9 @@ static sd_chain_t sd_iostart_chain[] = { */ #define SD_CHAIN_DISK_IOSTART 0 #define SD_CHAIN_DISK_IOSTART_NO_PM 3 +#define SD_CHAIN_MSS_DISK_IOSTART 5 #define SD_CHAIN_RMMEDIA_IOSTART 5 +#define SD_CHAIN_MSS_DISK_IOSTART_NO_PM 9 #define SD_CHAIN_RMMEDIA_IOSTART_NO_PM 9 #define SD_CHAIN_CHKSUM_IOSTART 12 #define SD_CHAIN_CHKSUM_IOSTART_NO_PM 16 @@ -1833,6 +1869,8 @@ static sd_chain_t sd_iostart_chain[] = { #define SD_CHAIN_USCSI_CHKSUM_IOSTART 21 #define SD_CHAIN_DIRECT_CMD_IOSTART 24 #define SD_CHAIN_PRIORITY_CMD_IOSTART 25 +#define SD_CHAIN_MSS_CHKSUM_IOSTART 26 +#define SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM 31 /* @@ -1859,13 +1897,19 @@ static sd_chain_t sd_iodone_chain[] = { sd_buf_iodone, /* Index: 3 */ sd_mapblockaddr_iodone, /* Index: 4 */ - /* Chain for buf IO for removable-media targets (PM enabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets with RMW needed (PM enabled) + */ sd_buf_iodone, /* Index: 5 */ sd_mapblockaddr_iodone, /* Index: 6 */ sd_mapblocksize_iodone, /* Index: 7 */ sd_pm_iodone, /* Index: 8 */ - /* Chain for buf IO for removable-media targets (PM disabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets with RMW needed (PM disabled) + */ sd_buf_iodone, /* Index: 9 */ sd_mapblockaddr_iodone, /* Index: 10 */ sd_mapblocksize_iodone, /* Index: 11 */ @@ -1895,6 +1939,25 @@ static sd_chain_t sd_iodone_chain[] = { /* Chain for "direct priority" USCSI commands (all targets) */ sd_uscsi_iodone, /* Index: 25 */ + + /* + * Chain for buf IO for large sector size disk drive targets + * with checksumming (PM enabled) + */ + sd_buf_iodone, /* Index: 26 */ + sd_mapblockaddr_iodone, /* Index: 27 */ + sd_mapblocksize_iodone, /* Index: 28 */ + sd_checksum_iodone, /* Index: 29 */ + sd_pm_iodone, /* Index: 30 */ + + /* + * Chain for buf IO for large sector size disk drive targets + * with checksumming (PM disabled) + */ + sd_buf_iodone, /* Index: 31 */ + sd_mapblockaddr_iodone, /* Index: 32 */ + sd_mapblocksize_iodone, /* Index: 33 */ + sd_checksum_iodone, /* Index: 34 */ }; @@ -1910,14 +1973,17 @@ static sd_chain_t sd_iodone_chain[] = { #define SD_CHAIN_DISK_IODONE 2 #define SD_CHAIN_DISK_IODONE_NO_PM 4 #define SD_CHAIN_RMMEDIA_IODONE 8 +#define SD_CHAIN_MSS_DISK_IODONE 8 #define SD_CHAIN_RMMEDIA_IODONE_NO_PM 11 +#define SD_CHAIN_MSS_DISK_IODONE_NO_PM 11 #define SD_CHAIN_CHKSUM_IODONE 15 #define SD_CHAIN_CHKSUM_IODONE_NO_PM 18 #define SD_CHAIN_USCSI_CMD_IODONE 20 #define SD_CHAIN_USCSI_CHKSUM_IODONE 22 #define SD_CHAIN_DIRECT_CMD_IODONE 24 #define SD_CHAIN_PRIORITY_CMD_IODONE 25 - +#define SD_CHAIN_MSS_CHKSUM_IODONE 30 +#define SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM 34 @@ -1940,13 +2006,19 @@ static sd_initpkt_t sd_initpkt_map[] = { sd_initpkt_for_buf, /* Index: 3 */ sd_initpkt_for_buf, /* Index: 4 */ - /* Chain for buf IO for removable-media targets (PM enabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets (PM enabled) + */ sd_initpkt_for_buf, /* Index: 5 */ sd_initpkt_for_buf, /* Index: 6 */ sd_initpkt_for_buf, /* Index: 7 */ sd_initpkt_for_buf, /* Index: 8 */ - /* Chain for buf IO for removable-media targets (PM disabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets (PM disabled) + */ sd_initpkt_for_buf, /* Index: 9 */ sd_initpkt_for_buf, /* Index: 10 */ sd_initpkt_for_buf, /* Index: 11 */ @@ -1977,6 +2049,24 @@ static sd_initpkt_t sd_initpkt_map[] = { /* Chain for "direct priority" USCSI commands (all targets) */ sd_initpkt_for_uscsi, /* Index: 25 */ + /* + * Chain for buf IO for large sector size disk drive targets + * with checksumming (PM enabled) + */ + sd_initpkt_for_buf, /* Index: 26 */ + sd_initpkt_for_buf, /* Index: 27 */ + sd_initpkt_for_buf, /* Index: 28 */ + sd_initpkt_for_buf, /* Index: 29 */ + sd_initpkt_for_buf, /* Index: 30 */ + + /* + * Chain for buf IO for large sector size disk drive targets + * with checksumming (PM disabled) + */ + sd_initpkt_for_buf, /* Index: 31 */ + sd_initpkt_for_buf, /* Index: 32 */ + sd_initpkt_for_buf, /* Index: 33 */ + sd_initpkt_for_buf, /* Index: 34 */ }; @@ -1999,13 +2089,19 @@ static sd_destroypkt_t sd_destroypkt_map[] = { sd_destroypkt_for_buf, /* Index: 3 */ sd_destroypkt_for_buf, /* Index: 4 */ - /* Chain for buf IO for removable-media targets (PM enabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets (PM enabled) + */ sd_destroypkt_for_buf, /* Index: 5 */ sd_destroypkt_for_buf, /* Index: 6 */ sd_destroypkt_for_buf, /* Index: 7 */ sd_destroypkt_for_buf, /* Index: 8 */ - /* Chain for buf IO for removable-media targets (PM disabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets (PM disabled) + */ sd_destroypkt_for_buf, /* Index: 9 */ sd_destroypkt_for_buf, /* Index: 10 */ sd_destroypkt_for_buf, /* Index: 11 */ @@ -2036,6 +2132,24 @@ static sd_destroypkt_t sd_destroypkt_map[] = { /* Chain for "direct priority" USCSI commands (all targets) */ sd_destroypkt_for_uscsi, /* Index: 25 */ + /* + * Chain for buf IO for large sector size disk drive targets + * with checksumming (PM disabled) + */ + sd_destroypkt_for_buf, /* Index: 26 */ + sd_destroypkt_for_buf, /* Index: 27 */ + sd_destroypkt_for_buf, /* Index: 28 */ + sd_destroypkt_for_buf, /* Index: 29 */ + sd_destroypkt_for_buf, /* Index: 30 */ + + /* + * Chain for buf IO for large sector size disk drive targets + * with checksumming (PM enabled) + */ + sd_destroypkt_for_buf, /* Index: 31 */ + sd_destroypkt_for_buf, /* Index: 32 */ + sd_destroypkt_for_buf, /* Index: 33 */ + sd_destroypkt_for_buf, /* Index: 34 */ }; @@ -2066,13 +2180,19 @@ static int sd_chain_type_map[] = { SD_CHAIN_BUFIO, /* Index: 3 */ SD_CHAIN_BUFIO, /* Index: 4 */ - /* Chain for buf IO for removable-media targets (PM enabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets (PM enabled) + */ SD_CHAIN_BUFIO, /* Index: 5 */ SD_CHAIN_BUFIO, /* Index: 6 */ SD_CHAIN_BUFIO, /* Index: 7 */ SD_CHAIN_BUFIO, /* Index: 8 */ - /* Chain for buf IO for removable-media targets (PM disabled) */ + /* + * Chain for buf IO for removable-media or large sector size + * disk drive targets (PM disabled) + */ SD_CHAIN_BUFIO, /* Index: 9 */ SD_CHAIN_BUFIO, /* Index: 10 */ SD_CHAIN_BUFIO, /* Index: 11 */ @@ -2095,13 +2215,32 @@ static int sd_chain_type_map[] = { /* Chain for USCSI commands (checksum targets) */ SD_CHAIN_USCSI, /* Index: 21 */ SD_CHAIN_USCSI, /* Index: 22 */ - SD_CHAIN_USCSI, /* Index: 22 */ + SD_CHAIN_USCSI, /* Index: 23 */ /* Chain for "direct" USCSI commands (all targets) */ SD_CHAIN_DIRECT, /* Index: 24 */ /* Chain for "direct priority" USCSI commands (all targets) */ SD_CHAIN_DIRECT_PRIORITY, /* Index: 25 */ + + /* + * Chain for buf IO for large sector size disk drive targets + * with checksumming (PM enabled) + */ + SD_CHAIN_BUFIO, /* Index: 26 */ + SD_CHAIN_BUFIO, /* Index: 27 */ + SD_CHAIN_BUFIO, /* Index: 28 */ + SD_CHAIN_BUFIO, /* Index: 29 */ + SD_CHAIN_BUFIO, /* Index: 30 */ + + /* + * Chain for buf IO for large sector size disk drive targets + * with checksumming (PM disabled) + */ + SD_CHAIN_BUFIO, /* Index: 31 */ + SD_CHAIN_BUFIO, /* Index: 32 */ + SD_CHAIN_BUFIO, /* Index: 33 */ + SD_CHAIN_BUFIO, /* Index: 34 */ }; @@ -2147,6 +2286,9 @@ static struct sd_chain_index sd_chain_index_map[] = { { SD_CHAIN_USCSI_CHKSUM_IOSTART, SD_CHAIN_USCSI_CHKSUM_IODONE }, { SD_CHAIN_DIRECT_CMD_IOSTART, SD_CHAIN_DIRECT_CMD_IODONE }, { SD_CHAIN_PRIORITY_CMD_IOSTART, SD_CHAIN_PRIORITY_CMD_IODONE }, + { SD_CHAIN_MSS_CHKSUM_IOSTART, SD_CHAIN_MSS_CHKSUM_IODONE }, + { SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM, SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM }, + }; @@ -2158,9 +2300,13 @@ static struct sd_chain_index sd_chain_index_map[] = { #define SD_CHAIN_INFO_DISK 0 #define SD_CHAIN_INFO_DISK_NO_PM 1 #define SD_CHAIN_INFO_RMMEDIA 2 +#define SD_CHAIN_INFO_MSS_DISK 2 #define SD_CHAIN_INFO_RMMEDIA_NO_PM 3 +#define SD_CHAIN_INFO_MSS_DSK_NO_PM 3 #define SD_CHAIN_INFO_CHKSUM 4 #define SD_CHAIN_INFO_CHKSUM_NO_PM 5 +#define SD_CHAIN_INFO_MSS_DISK_CHKSUM 10 +#define SD_CHAIN_INFO_MSS_DISK_CHKSUM_NO_PM 11 /* un->un_uscsi_chain_type must be set to one of these */ #define SD_CHAIN_INFO_USCSI_CMD 6 @@ -3967,6 +4113,16 @@ sd_set_properties(struct sd_lun *un, char *name, char *value) "min throttle set to %d\n", un->un_min_throttle); } + if (strcasecmp(name, "rmw-type") == 0) { + if (ddi_strtol(value, &endptr, 0, &val) == 0) { + un->un_f_rmw_type = val; + } else { + goto value_invalid; + } + SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: " + "RMW type set to %d\n", un->un_f_rmw_type); + } + /* * Validate the throttle values. * If any of the numbers are invalid, set everything to defaults. @@ -4996,7 +5152,10 @@ sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity) { if (lbasize != 0) { un->un_tgt_blocksize = lbasize; - un->un_f_tgt_blocksize_is_valid = TRUE; + un->un_f_tgt_blocksize_is_valid = TRUE; + if (!un->un_f_has_removable_media) { + un->un_sys_blocksize = lbasize; + } } if (capacity != 0) { @@ -5290,7 +5449,7 @@ sd_get_devid(sd_ssc_t *ssc) /* Calculate the checksum */ chksum = 0; ip = (uint_t *)dkdevid; - for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int)); + for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int)); i++) { chksum ^= ip[i]; } @@ -5386,6 +5545,7 @@ static int sd_write_deviceid(sd_ssc_t *ssc) { struct dk_devid *dkdevid; + uchar_t *buf; diskaddr_t blk; uint_t *ip, chksum; int status; @@ -5406,7 +5566,8 @@ sd_write_deviceid(sd_ssc_t *ssc) /* Allocate the buffer */ - dkdevid = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP); + buf = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP); + dkdevid = (struct dk_devid *)buf; /* Fill in the revision */ dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB; @@ -5421,7 +5582,7 @@ sd_write_deviceid(sd_ssc_t *ssc) /* Calculate the checksum */ chksum = 0; ip = (uint_t *)dkdevid; - for (i = 0; i < ((un->un_sys_blocksize - sizeof (int))/sizeof (int)); + for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int)); i++) { chksum ^= ip[i]; } @@ -5430,12 +5591,12 @@ sd_write_deviceid(sd_ssc_t *ssc) DKD_FORMCHKSUM(chksum, dkdevid); /* Write the reserved sector */ - status = sd_send_scsi_WRITE(ssc, dkdevid, un->un_sys_blocksize, blk, + status = sd_send_scsi_WRITE(ssc, buf, un->un_sys_blocksize, blk, SD_PATH_DIRECT); if (status != 0) sd_ssc_assessment(ssc, SD_FMT_IGNORE); - kmem_free(dkdevid, un->un_sys_blocksize); + kmem_free(buf, un->un_sys_blocksize); mutex_enter(SD_MUTEX(un)); return (status); @@ -5903,6 +6064,14 @@ sd_ddi_suspend(dev_info_t *devi) mutex_exit(&un->un_pm_mutex); } + if (un->un_rmw_msg_timeid != NULL) { + timeout_id_t temp_id = un->un_rmw_msg_timeid; + un->un_rmw_msg_timeid = NULL; + mutex_exit(SD_MUTEX(un)); + (void) untimeout(temp_id); + mutex_enter(SD_MUTEX(un)); + } + if (un->un_retry_timeid != NULL) { timeout_id_t temp_id = un->un_retry_timeid; un->un_retry_timeid = NULL; @@ -6217,7 +6386,7 @@ sd_pm_idletimeout_handler(void *arg) } else { un->un_buf_chain_type = SD_CHAIN_INFO_DISK; } - un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD; + un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD; SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_idletimeout_handler: idling device\n"); @@ -6839,6 +7008,7 @@ sd_unit_attach(dev_info_t *devi) struct scsi_device *devp; struct sd_lun *un; char *variantp; + char name_str[48]; int reservation_flag = SD_TARGET_IS_UNRESERVED; int instance; int rval; @@ -7267,6 +7437,7 @@ sd_unit_attach(dev_info_t *devi) * meaning a non-zero value must be entered to change the default. */ un->un_f_disksort_disabled = FALSE; + un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT; /* * Retrieve the properties from the static driver table or the driver @@ -7906,6 +8077,24 @@ sd_unit_attach(dev_info_t *devi) un->un_f_write_cache_enabled = (wc_enabled != 0); mutex_exit(SD_MUTEX(un)); + if (un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR && + un->un_tgt_blocksize != DEV_BSIZE) { + if (!(un->un_wm_cache)) { + (void) snprintf(name_str, sizeof (name_str), + "%s%d_cache", + ddi_driver_name(SD_DEVINFO(un)), + ddi_get_instance(SD_DEVINFO(un))); + un->un_wm_cache = kmem_cache_create( + name_str, sizeof (struct sd_w_map), + 8, sd_wm_cache_constructor, + sd_wm_cache_destructor, NULL, + (void *)un, NULL, 0); + if (!(un->un_wm_cache)) { + goto wm_cache_failed; + } + } + } + /* * Check the value of the NV_SUP bit and set * un_f_suppress_cache_flush accordingly. @@ -7994,7 +8183,7 @@ sd_unit_attach(dev_info_t *devi) /* * An error occurred during the attach; clean up & return failure. */ - +wm_cache_failed: devid_failed: setup_pm_failed: @@ -8057,6 +8246,15 @@ spinup_failed: mutex_enter(SD_MUTEX(un)); } + /* Cancel rmw warning message timeouts */ + if (un->un_rmw_msg_timeid != NULL) { + timeout_id_t temp_id = un->un_rmw_msg_timeid; + un->un_rmw_msg_timeid = NULL; + mutex_exit(SD_MUTEX(un)); + (void) untimeout(temp_id); + mutex_enter(SD_MUTEX(un)); + } + /* Cancel any pending retry timeouts */ if (un->un_retry_timeid != NULL) { timeout_id_t temp_id = un->un_retry_timeid; @@ -8270,6 +8468,14 @@ sd_unit_detach(dev_info_t *devi) mutex_enter(SD_MUTEX(un)); } + if (un->un_rmw_msg_timeid != NULL) { + timeout_id_t temp_id = un->un_rmw_msg_timeid; + un->un_rmw_msg_timeid = NULL; + mutex_exit(SD_MUTEX(un)); + (void) untimeout(temp_id); + mutex_enter(SD_MUTEX(un)); + } + if (un->un_dcvb_timeid != NULL) { timeout_id_t temp_id = un->un_dcvb_timeid; un->un_dcvb_timeid = NULL; @@ -10288,7 +10494,9 @@ sd_ready_and_valid(sd_ssc_t *ssc, int part) * a media is changed this routine will be called and the * block size is a function of media rather than device. */ - if (un->un_f_non_devbsize_supported && NOT_DEVBSIZE(un)) { + if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR || + un->un_f_non_devbsize_supported) && + un->un_tgt_blocksize != DEV_BSIZE) { if (!(un->un_wm_cache)) { (void) snprintf(name_str, sizeof (name_str), "%s%d_cache", @@ -10518,17 +10726,20 @@ sdread(dev_t dev, struct uio *uio, cred_t *cred_p) /* * Read requests are restricted to multiples of the system block size. */ - secmask = un->un_sys_blocksize - 1; + if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR) + secmask = un->un_tgt_blocksize - 1; + else + secmask = DEV_BSIZE - 1; if (uio->uio_loffset & ((offset_t)(secmask))) { SD_ERROR(SD_LOG_READ_WRITE, un, "sdread: file offset not modulo %d\n", - un->un_sys_blocksize); + secmask + 1); err = EINVAL; } else if (uio->uio_iov->iov_len & (secmask)) { SD_ERROR(SD_LOG_READ_WRITE, un, "sdread: transfer length not modulo %d\n", - un->un_sys_blocksize); + secmask + 1); err = EINVAL; } else { err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio); @@ -10604,17 +10815,20 @@ sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p) /* * Write requests are restricted to multiples of the system block size. */ - secmask = un->un_sys_blocksize - 1; + if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR) + secmask = un->un_tgt_blocksize - 1; + else + secmask = DEV_BSIZE - 1; if (uio->uio_loffset & ((offset_t)(secmask))) { SD_ERROR(SD_LOG_READ_WRITE, un, "sdwrite: file offset not modulo %d\n", - un->un_sys_blocksize); + secmask + 1); err = EINVAL; } else if (uio->uio_iov->iov_len & (secmask)) { SD_ERROR(SD_LOG_READ_WRITE, un, "sdwrite: transfer length not modulo %d\n", - un->un_sys_blocksize); + secmask + 1); err = EINVAL; } else { err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio); @@ -10690,17 +10904,20 @@ sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p) /* * Read requests are restricted to multiples of the system block size. */ - secmask = un->un_sys_blocksize - 1; + if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR) + secmask = un->un_tgt_blocksize - 1; + else + secmask = DEV_BSIZE - 1; if (uio->uio_loffset & ((offset_t)(secmask))) { SD_ERROR(SD_LOG_READ_WRITE, un, "sdaread: file offset not modulo %d\n", - un->un_sys_blocksize); + secmask + 1); err = EINVAL; } else if (uio->uio_iov->iov_len & (secmask)) { SD_ERROR(SD_LOG_READ_WRITE, un, "sdaread: transfer length not modulo %d\n", - un->un_sys_blocksize); + secmask + 1); err = EINVAL; } else { err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio); @@ -10776,17 +10993,20 @@ sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p) /* * Write requests are restricted to multiples of the system block size. */ - secmask = un->un_sys_blocksize - 1; + if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR) + secmask = un->un_tgt_blocksize - 1; + else + secmask = DEV_BSIZE - 1; if (uio->uio_loffset & ((offset_t)(secmask))) { SD_ERROR(SD_LOG_READ_WRITE, un, "sdawrite: file offset not modulo %d\n", - un->un_sys_blocksize); + secmask + 1); err = EINVAL; } else if (uio->uio_iov->iov_len & (secmask)) { SD_ERROR(SD_LOG_READ_WRITE, un, "sdawrite: transfer length not modulo %d\n", - un->un_sys_blocksize); + secmask + 1); err = EINVAL; } else { err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio); @@ -11012,6 +11232,7 @@ sdstrategy(struct buf *bp) biodone(bp); return (0); } + /* As was done in the past, fail new cmds. if state is dumping. */ if (un->un_state == SD_STATE_DUMPING) { bioerror(bp, ENXIO); @@ -11150,6 +11371,27 @@ sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp, /* FALLTHRU */ case SD_CHAIN_BUFIO: index = un->un_buf_chain_type; + if ((!un->un_f_has_removable_media) && + (un->un_tgt_blocksize != 0) && + (un->un_tgt_blocksize != DEV_BSIZE)) { + int secmask = 0, blknomask = 0; + blknomask = + (un->un_tgt_blocksize / DEV_BSIZE) - 1; + secmask = un->un_tgt_blocksize - 1; + + if ((bp->b_lblkno & (blknomask)) || + (bp->b_bcount & (secmask))) { + if (un->un_f_rmw_type != + SD_RMW_TYPE_RETURN_ERROR) { + if (un->un_f_pm_is_enabled == FALSE) + index = + SD_CHAIN_INFO_MSS_DSK_NO_PM; + else + index = + SD_CHAIN_INFO_MSS_DISK; + } + } + } break; case SD_CHAIN_USCSI: index = un->un_uscsi_chain_type; @@ -12039,6 +12281,20 @@ sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp) * request would exceed partition range. Converts * partition-relative block address to absolute. * + * Upon exit of this function: + * 1.I/O is aligned + * xp->xb_blkno represents the absolute sector address + * 2.I/O is misaligned + * xp->xb_blkno represents the absolute logical block address + * based on DEV_BSIZE. The logical block address will be + * converted to physical sector address in sd_mapblocksize_\ + * iostart. + * 3.I/O is misaligned but is aligned in "overrun" buf + * xp->xb_blkno represents the absolute logical block address + * based on DEV_BSIZE. The logical block address will be + * converted to physical sector address in sd_mapblocksize_\ + * iostart. But no RMW will be issued in this case. + * * Context: Can sleep * * Issues: This follows what the old code did, in terms of accessing @@ -12060,6 +12316,8 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp) int partition; diskaddr_t partition_offset; struct sd_xbuf *xp; + int secmask = 0, blknomask = 0; + ushort_t is_aligned = TRUE; ASSERT(un != NULL); ASSERT(bp != NULL); @@ -12116,6 +12374,57 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp) (void) cmlb_partinfo(un->un_cmlbhandle, partition, &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT); + blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1; + secmask = un->un_tgt_blocksize - 1; + + if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) { + is_aligned = FALSE; + } + + if (!(NOT_DEVBSIZE(un))) { + /* + * If I/O is aligned, no need to involve RMW(Read Modify Write) + * Convert the logical block number to target's physical sector + * number. + */ + if (is_aligned) { + xp->xb_blkno = SD_SYS2TGTBLOCK(un, xp->xb_blkno); + } else { + switch (un->un_f_rmw_type) { + case SD_RMW_TYPE_RETURN_ERROR: + bp->b_flags |= B_ERROR; + goto error_exit; + + case SD_RMW_TYPE_DEFAULT: + mutex_enter(SD_MUTEX(un)); + if (un->un_rmw_msg_timeid == NULL) { + scsi_log(SD_DEVINFO(un), sd_label, + CE_WARN, "I/O request is not " + "aligned with %d disk sector size. " + "It is handled through Read Modify " + "Write but the performance is " + "very low.\n", + un->un_tgt_blocksize); + un->un_rmw_msg_timeid = + timeout(sd_rmw_msg_print_handler, + un, SD_RMW_MSG_PRINT_TIMEOUT); + } else { + un->un_rmw_incre_count ++; + } + mutex_exit(SD_MUTEX(un)); + break; + + case SD_RMW_TYPE_NO_WARNING: + default: + break; + } + + nblocks = SD_TGT2SYSBLOCK(un, nblocks); + partition_offset = SD_TGT2SYSBLOCK(un, + partition_offset); + } + } + /* * blocknum is the starting block number of the request. At this * point it is still relative to the start of the minor device. @@ -12136,7 +12445,7 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp) * a multiple of the system block size. */ if ((blocknum < 0) || (blocknum >= nblocks) || - ((bp->b_bcount & (un->un_sys_blocksize - 1)) != 0)) { + ((bp->b_bcount & (DEV_BSIZE - 1)) != 0)) { bp->b_flags |= B_ERROR; goto error_exit; } @@ -12145,11 +12454,18 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp) * If the requsted # blocks exceeds the available # blocks, that * is an overrun of the partition. */ - requested_nblocks = SD_BYTES2SYSBLOCKS(un, bp->b_bcount); + if ((!NOT_DEVBSIZE(un)) && is_aligned) { + requested_nblocks = SD_BYTES2TGTBLOCKS(un, bp->b_bcount); + } else { + requested_nblocks = SD_BYTES2SYSBLOCKS(bp->b_bcount); + } + available_nblocks = (size_t)(nblocks - blocknum); ASSERT(nblocks >= blocknum); if (requested_nblocks > available_nblocks) { + size_t resid; + /* * Allocate an "overrun" buf to allow the request to proceed * for the amount of space available in the partition. The @@ -12158,8 +12474,14 @@ sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp) * replaces the original buf here, and the original buf * is saved inside the overrun buf, for later use. */ - size_t resid = SD_SYSBLOCKS2BYTES(un, - (offset_t)(requested_nblocks - available_nblocks)); + if ((!NOT_DEVBSIZE(un)) && is_aligned) { + resid = SD_TGTBLOCKS2BYTES(un, + (offset_t)(requested_nblocks - available_nblocks)); + } else { + resid = SD_SYSBLOCKS2BYTES( + (offset_t)(requested_nblocks - available_nblocks)); + } + size_t count = bp->b_bcount - resid; /* * Note: count is an unsigned entity thus it'll NEVER @@ -12318,7 +12640,7 @@ sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp) * un->un_sys_blocksize as its block size or if bcount == 0. * In this case there is no layer-private data block allocated. */ - if ((un->un_tgt_blocksize == un->un_sys_blocksize) || + if ((un->un_tgt_blocksize == DEV_BSIZE) || (bp->b_bcount == 0)) { goto done; } @@ -12333,7 +12655,7 @@ sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp) SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: " "tgt_blocksize:0x%x sys_blocksize: 0x%x\n", - un->un_tgt_blocksize, un->un_sys_blocksize); + un->un_tgt_blocksize, DEV_BSIZE); SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: " "request start block:0x%x\n", xp->xb_blkno); SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: " @@ -12376,7 +12698,7 @@ sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp) * Note that end_block is actually the block that follows the last * block of the request, but that's what is needed for the computation. */ - first_byte = SD_SYSBLOCKS2BYTES(un, (offset_t)xp->xb_blkno); + first_byte = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno); start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize; end_block = (first_byte + bp->b_bcount + un->un_tgt_blocksize - 1) / un->un_tgt_blocksize; @@ -12519,7 +12841,7 @@ sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp) * There is no shadow buf or layer-private data if the target is * using un->un_sys_blocksize as its block size or if bcount == 0. */ - if ((un->un_tgt_blocksize == un->un_sys_blocksize) || + if ((un->un_tgt_blocksize == DEV_BSIZE) || (bp->b_bcount == 0)) { goto exit; } @@ -15550,6 +15872,48 @@ sd_start_retry_command(void *arg) "sd_start_retry_command: exit\n"); } +/* + * Function: sd_rmw_msg_print_handler + * + * Description: If RMW mode is enabled and warning message is triggered + * print I/O count during a fixed interval. + * + * Arguments: arg - pointer to associated softstate for the device. + * + * Context: timeout(9F) thread context. May not sleep. + */ +static void +sd_rmw_msg_print_handler(void *arg) +{ + struct sd_lun *un = arg; + + ASSERT(un != NULL); + ASSERT(!mutex_owned(SD_MUTEX(un))); + + SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, + "sd_rmw_msg_print_handler: entry\n"); + + mutex_enter(SD_MUTEX(un)); + + if (un->un_rmw_incre_count > 0) { + scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, + "%"PRIu64" I/O requests are not aligned with %d disk " + "sector size in %ld seconds. They are handled through " + "Read Modify Write but the performance is very low!\n", + un->un_rmw_incre_count, un->un_tgt_blocksize, + drv_hztousec(SD_RMW_MSG_PRINT_TIMEOUT) / 1000000); + un->un_rmw_incre_count = 0; + un->un_rmw_msg_timeid = timeout(sd_rmw_msg_print_handler, + un, SD_RMW_MSG_PRINT_TIMEOUT); + } else { + un->un_rmw_msg_timeid = NULL; + } + + mutex_exit(SD_MUTEX(un)); + + SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, + "sd_rmw_msg_print_handler: exit\n"); +} /* * Function: sd_start_direct_priority_command @@ -19336,6 +19700,7 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap, uint32_t *capacity_buf; uint64_t capacity; uint32_t lbasize; + uint32_t pbsize; int status; struct sd_lun *un; @@ -19418,7 +19783,7 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap, if (capacity == 0xffffffff) { sd_ssc_assessment(ssc, SD_FMT_IGNORE); status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, - &lbasize, path_flag); + &lbasize, &pbsize, path_flag); if (status != 0) { return (status); } @@ -19467,10 +19832,11 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap, * on the logical unit. The actual logical block count will be * this value plus one. * - * Currently the capacity is saved in terms of un->un_sys_blocksize, - * so scale the capacity value to reflect this. + * Currently, for removable media, the capacity is saved in terms + * of un->un_sys_blocksize, so scale the capacity value to reflect this. */ - capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize); + if (un->un_f_has_removable_media) + capacity = (capacity + 1) * (lbasize / un->un_sys_blocksize); /* * Copy the values from the READ CAPACITY command into the space @@ -19504,15 +19870,19 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap, * determine the device capacity in number of blocks and the * device native block size. If this function returns a failure, * then the values in *capp and *lbap are undefined. - * This routine should always be called by - * sd_send_scsi_READ_CAPACITY which will appy any device - * specific adjustments to capacity and lbasize. + * This routine should be called by sd_send_scsi_READ_CAPACITY + * which will apply any device specific adjustments to capacity + * and lbasize. One exception is it is also called by + * sd_get_media_info_ext. In that function, there is no need to + * adjust the capacity and lbasize. * * Arguments: ssc - ssc contains ptr to soft state struct for the target * capp - ptr to unsigned 64-bit variable to receive the * capacity value from the command. * lbap - ptr to unsigned 32-bit varaible to receive the * block size value from the command + * psp - ptr to unsigned 32-bit variable to receive the + * physical block size value from the command * path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and * the normal command waitq, or SD_PATH_DIRECT_PRIORITY * to use the USCSI "direct" chain and bypass the normal @@ -19533,7 +19903,7 @@ sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap, static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, - uint32_t *lbap, int path_flag) + uint32_t *lbap, uint32_t *psp, int path_flag) { struct scsi_extended_sense sense_buf; struct uscsi_cmd ucmd_buf; @@ -19541,6 +19911,8 @@ sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, uint64_t *capacity16_buf; uint64_t capacity; uint32_t lbasize; + uint32_t pbsize; + uint32_t lbpb_exp; int status; struct sd_lun *un; @@ -19617,9 +19989,13 @@ sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, * bytes 8-11: Block length in bytes * (MSB in byte:8 & LSB in byte:11) * + * byte 13: LOGICAL BLOCKS PER PHYSICAL BLOCK EXPONENT */ capacity = BE_64(capacity16_buf[0]); lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]); + lbpb_exp = (BE_64(capacity16_buf[1]) >> 40) & 0x0f; + + pbsize = lbasize << lbpb_exp; /* * Done with capacity16_buf @@ -19666,9 +20042,11 @@ sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, *capp = capacity; *lbap = lbasize; + *psp = pbsize; SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: " - "capacity:0x%llx lbasize:0x%x\n", capacity, lbasize); + "capacity:0x%llx lbasize:0x%x, pbsize: 0x%x\n", + capacity, lbasize, pbsize); return (0); } @@ -21443,6 +21821,7 @@ sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p) case DKIOCHOTPLUGGABLE: case DKIOCINFO: case DKIOCGMEDIAINFO: + case DKIOCGMEDIAINFOEXT: case MHIOCENFAILFAST: case MHIOCSTATUS: case MHIOCTKOWN: @@ -21509,6 +21888,11 @@ skip_ready_valid: err = sd_get_media_info(dev, (caddr_t)arg, flag); break; + case DKIOCGMEDIAINFOEXT: + SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFOEXT\n"); + err = sd_get_media_info_ext(dev, (caddr_t)arg, flag); + break; + case DKIOCGGEOM: case DKIOCGVTOC: case DKIOCGEXTVTOC: @@ -22609,6 +22993,205 @@ no_assessment: return (rval); } +/* + * Function: sd_get_media_info_ext + * + * Description: This routine is the driver entry point for handling ioctl + * requests for the media type or command set profile used by the + * drive to operate on the media (DKIOCGMEDIAINFOEXT). The + * difference this ioctl and DKIOCGMEDIAINFO is the return value + * of this ioctl contains both logical block size and physical + * block size. + * + * + * Arguments: dev - the device number + * arg - pointer to user provided dk_minfo_ext structure + * specifying the media type, logical block size, + * physical block size and disk capacity. + * flag - this argument is a pass through to ddi_copyxxx() + * directly from the mode argument of ioctl(). + * + * Return Code: 0 + * EACCESS + * EFAULT + * ENXIO + * EIO + */ + +static int +sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag) +{ + struct sd_lun *un = NULL; + struct uscsi_cmd com; + struct scsi_inquiry *sinq; + struct dk_minfo_ext media_info_ext; + u_longlong_t media_capacity; + uint64_t capacity; + uint_t lbasize; + uint_t pbsize; + uchar_t *out_data; + uchar_t *rqbuf; + int rval = 0; + int rtn; + sd_ssc_t *ssc; + + if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL || + (un->un_state == SD_STATE_OFFLINE)) { + return (ENXIO); + } + + SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info_ext: entry\n"); + + out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP); + rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP); + ssc = sd_ssc_init(un); + + /* Issue a TUR to determine if the drive is ready with media present */ + rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA); + if (rval == ENXIO) { + goto done; + } else if (rval != 0) { + sd_ssc_assessment(ssc, SD_FMT_IGNORE); + } + + /* Now get configuration data */ + if (ISCD(un)) { + media_info_ext.dki_media_type = DK_CDROM; + + /* Allow SCMD_GET_CONFIGURATION to MMC devices only */ + if (un->un_f_mmc_cap == TRUE) { + rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, + SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN, + SD_PATH_STANDARD); + + if (rtn) { + /* + * We ignore all failures for CD and need to + * put the assessment before processing code + * to avoid missing assessment for FMA. + */ + sd_ssc_assessment(ssc, SD_FMT_IGNORE); + /* + * Failed for other than an illegal request + * or command not supported + */ + if ((com.uscsi_status == STATUS_CHECK) && + (com.uscsi_rqstatus == STATUS_GOOD)) { + if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) || + (rqbuf[12] != 0x20)) { + rval = EIO; + goto no_assessment; + } + } + } else { + /* + * The GET CONFIGURATION command succeeded + * so set the media type according to the + * returned data + */ + media_info_ext.dki_media_type = out_data[6]; + media_info_ext.dki_media_type <<= 8; + media_info_ext.dki_media_type |= out_data[7]; + } + } + } else { + /* + * The profile list is not available, so we attempt to identify + * the media type based on the inquiry data + */ + sinq = un->un_sd->sd_inq; + if ((sinq->inq_dtype == DTYPE_DIRECT) || + (sinq->inq_dtype == DTYPE_OPTICAL)) { + /* This is a direct access device or optical disk */ + media_info_ext.dki_media_type = DK_FIXED_DISK; + + if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) || + (bcmp(sinq->inq_vid, "iomega", 6) == 0)) { + if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) { + media_info_ext.dki_media_type = DK_ZIP; + } else if ( + (bcmp(sinq->inq_pid, "jaz", 3) == 0)) { + media_info_ext.dki_media_type = DK_JAZ; + } + } + } else { + /* + * Not a CD, direct access or optical disk so return + * unknown media + */ + media_info_ext.dki_media_type = DK_UNKNOWN; + } + } + + /* + * Now read the capacity so we can provide the lbasize, + * pbsize and capacity. + */ + rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize, &pbsize, + SD_PATH_DIRECT); + + if (rval != 0) { + rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize, + SD_PATH_DIRECT); + + switch (rval) { + case 0: + pbsize = lbasize; + media_capacity = capacity; + /* + * sd_send_scsi_READ_CAPACITY() reports capacity in + * un->un_sys_blocksize chunks. So we need to convert + * it into cap.lbsize chunks. + */ + if (un->un_f_has_removable_media) { + media_capacity *= un->un_sys_blocksize; + media_capacity /= lbasize; + } + break; + case EACCES: + rval = EACCES; + goto done; + default: + rval = EIO; + goto done; + } + } else { + media_capacity = capacity; + } + + /* + * If lun is expanded dynamically, update the un structure. + */ + mutex_enter(SD_MUTEX(un)); + if ((un->un_f_blockcount_is_valid == TRUE) && + (un->un_f_tgt_blocksize_is_valid == TRUE) && + (capacity > un->un_blockcount)) { + sd_update_block_info(un, lbasize, capacity); + } + mutex_exit(SD_MUTEX(un)); + + media_info_ext.dki_lbsize = lbasize; + media_info_ext.dki_capacity = media_capacity; + media_info_ext.dki_pbsize = pbsize; + + if (ddi_copyout(&media_info_ext, arg, sizeof (struct dk_minfo_ext), + flag)) { + rval = EFAULT; + goto no_assessment; + } +done: + if (rval != 0) { + if (rval == EIO) + sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK); + else + sd_ssc_assessment(ssc, SD_FMT_IGNORE); + } +no_assessment: + sd_ssc_fini(ssc); + kmem_free(out_data, SD_PROFILE_HEADER_LEN); + kmem_free(rqbuf, SENSE_LENGTH); + return (rval); +} /* * Function: sd_check_media @@ -24700,17 +25283,51 @@ sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) partition = SDPART(dev); SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition); + if (!(NOT_DEVBSIZE(un))) { + int secmask = 0; + int blknomask = 0; + + blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1; + secmask = un->un_tgt_blocksize - 1; + + if (blkno & blknomask) { + SD_TRACE(SD_LOG_DUMP, un, + "sddump: dump start block not modulo %d\n", + un->un_tgt_blocksize); + return (EINVAL); + } + + if ((nblk * DEV_BSIZE) & secmask) { + SD_TRACE(SD_LOG_DUMP, un, + "sddump: dump length not modulo %d\n", + un->un_tgt_blocksize); + return (EINVAL); + } + + } + /* Validate blocks to dump at against partition size. */ (void) cmlb_partinfo(un->un_cmlbhandle, partition, &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT); - if ((blkno + nblk) > nblks) { - SD_TRACE(SD_LOG_DUMP, un, - "sddump: dump range larger than partition: " - "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n", - blkno, nblk, nblks); - return (EINVAL); + if (NOT_DEVBSIZE(un)) { + if ((blkno + nblk) > nblks) { + SD_TRACE(SD_LOG_DUMP, un, + "sddump: dump range larger than partition: " + "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n", + blkno, nblk, nblks); + return (EINVAL); + } + } else { + if (((blkno / (un->un_tgt_blocksize / DEV_BSIZE)) + + (nblk / (un->un_tgt_blocksize / DEV_BSIZE))) > nblks) { + SD_TRACE(SD_LOG_DUMP, un, + "sddump: dump range larger than partition: " + "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n", + blkno, nblk, nblks); + return (EINVAL); + } } mutex_enter(&un->un_pm_mutex); @@ -24813,7 +25430,12 @@ sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) * Convert the partition-relative block number to a * disk physical block number. */ - blkno += start_block; + if (NOT_DEVBSIZE(un)) { + blkno += start_block; + } else { + blkno = blkno / (un->un_tgt_blocksize / DEV_BSIZE); + blkno += start_block; + } SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno); @@ -24901,6 +25523,10 @@ sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) dma_resid = wr_bp->b_bcount; oblkno = blkno; + if (!(NOT_DEVBSIZE(un))) { + nblk = nblk / (un->un_tgt_blocksize / DEV_BSIZE); + } + while (dma_resid != 0) { for (i = 0; i < SD_NDUMP_RETRIES; i++) { @@ -29894,7 +30520,7 @@ sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr, * sys_blocksize != tgt_blocksize, need to re-adjust * blkno and save the index to beginning of dk_label */ - first_byte = SD_SYSBLOCKS2BYTES(un, start_block); + first_byte = SD_SYSBLOCKS2BYTES(start_block); real_addr = first_byte / un->un_tgt_blocksize; end_block = (first_byte + reqlength + diff --git a/usr/src/uts/common/os/dumpsubr.c b/usr/src/uts/common/os/dumpsubr.c index 201d6d1bfd..0753cc19da 100644 --- a/usr/src/uts/common/os/dumpsubr.c +++ b/usr/src/uts/common/os/dumpsubr.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -255,12 +255,12 @@ dumpinit(vnode_t *vp, char *name, int justchecking) if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) { size_t blk_size; struct dk_cinfo dki; - struct extvtoc vtoc; + struct dk_minfo minf; - if (VOP_IOCTL(cdev_vp, DKIOCGEXTVTOC, (intptr_t)&vtoc, - FKIOCTL, kcred, NULL, NULL) == 0 && - vtoc.v_sectorsz != 0) - blk_size = vtoc.v_sectorsz; + if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO, + (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL) + == 0 && minf.dki_lbsize != 0) + blk_size = minf.dki_lbsize; else blk_size = DEV_BSIZE; diff --git a/usr/src/uts/common/sys/dkio.h b/usr/src/uts/common/sys/dkio.h index 18f49e513a..caf7d7976d 100644 --- a/usr/src/uts/common/sys/dkio.h +++ b/usr/src/uts/common/sys/dkio.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -301,6 +301,11 @@ enum dkio_state { DKIO_NONE, DKIO_EJECTED, DKIO_INSERTED, DKIO_DEV_GONE }; #define DKIOCGTEMPERATURE (DKIOC|45) /* get temperature */ /* + * ioctl to get the media info including physical block size + */ +#define DKIOCGMEDIAINFOEXT (DKIOC|48) + +/* * Used for providing the temperature. */ @@ -324,6 +329,17 @@ struct dk_minfo { }; /* + * Used for Media info or the current profile info + * including physical block size if supported. + */ +struct dk_minfo_ext { + uint_t dki_media_type; /* Media type or profile info */ + uint_t dki_lbsize; /* Logical blocksize of media */ + diskaddr_t dki_capacity; /* Capacity as # of dki_lbsize blks */ + uint_t dki_pbsize; /* Physical blocksize of media */ +}; + +/* * Media types or profiles known */ #define DK_UNKNOWN 0x00 /* Media inserted - type unknown */ diff --git a/usr/src/uts/common/sys/dklabel.h b/usr/src/uts/common/sys/dklabel.h index 01baa7157c..457c1ecadc 100644 --- a/usr/src/uts/common/sys/dklabel.h +++ b/usr/src/uts/common/sys/dklabel.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -207,7 +207,7 @@ struct dk_label { uint16_t dkl_ncyl; /* # of data cylinders */ uint16_t dkl_acyl; /* # of alternate cylinders */ uint16_t dkl_nhead; /* # of heads in this partition */ - uint16_t dkl_nsect; /* # of 512 byte sectors per track */ + uint16_t dkl_nsect; /* # of sectors per track */ uint16_t dkl_obs3; /* obsolete */ uint16_t dkl_obs4; /* obsolete */ struct dk_map32 dkl_map[NDKMAP]; /* logical partition headers */ diff --git a/usr/src/uts/common/sys/scsi/targets/sddef.h b/usr/src/uts/common/sys/scsi/targets/sddef.h index c5bbc59ef1..90129e40c3 100644 --- a/usr/src/uts/common/sys/scsi/targets/sddef.h +++ b/usr/src/uts/common/sys/scsi/targets/sddef.h @@ -438,7 +438,8 @@ struct sd_lun { /* SYNC CACHE needs to be */ /* sent in sdclose */ un_f_devid_transport_defined :1, /* devid defined by transport */ - un_f_reserved :12; + un_f_rmw_type :2, /* RMW type */ + un_f_reserved :10; /* Ptr to table of strings for ASC/ASCQ error message printing */ struct scsi_asq_key_strings *un_additional_codes; @@ -477,6 +478,8 @@ struct sd_lun { struct kmem_cache *un_wm_cache; /* fast alloc in non-512 write case */ uint_t un_rmw_count; /* count of read-modify-writes */ struct sd_w_map *un_wm; /* head of sd_w_map chain */ + uint64_t un_rmw_incre_count; /* count I/O */ + timeout_id_t un_rmw_msg_timeid; /* for RMW message control */ /* For timeout callback to issue a START STOP UNIT command */ timeout_id_t un_startstop_timeid; @@ -560,12 +563,12 @@ struct sd_lun { (blockcount * (un)->un_tgt_blocksize) /* Convert a byte count to a number of system blocks */ -#define SD_BYTES2SYSBLOCKS(un, bytecount) \ - ((bytecount + (un->un_sys_blocksize - 1))/un->un_sys_blocksize) +#define SD_BYTES2SYSBLOCKS(bytecount) \ + ((bytecount + (DEV_BSIZE - 1))/DEV_BSIZE) /* Convert a system block count to a number of bytes */ -#define SD_SYSBLOCKS2BYTES(un, blockcount) \ - (blockcount * (un)->un_sys_blocksize) +#define SD_SYSBLOCKS2BYTES(blockcount) \ + (blockcount * DEV_BSIZE) /* * Calculate the number of bytes needed to hold the requested number of bytes @@ -579,13 +582,19 @@ struct sd_lun { * to the system block location. */ #define SD_TGTBYTEOFFSET(un, sysblk, tgtblk) \ - (SD_SYSBLOCKS2BYTES(un, sysblk) - SD_TGTBLOCKS2BYTES(un, tgtblk)) + (SD_SYSBLOCKS2BYTES(sysblk) - SD_TGTBLOCKS2BYTES(un, tgtblk)) /* * Calculate the target block location from the system block location */ #define SD_SYS2TGTBLOCK(un, blockcnt) \ - ((blockcnt * un->un_sys_blocksize) / un->un_tgt_blocksize) + (blockcnt / ((un)->un_tgt_blocksize / DEV_BSIZE)) + +/* + * Calculate the target block location from the system block location + */ +#define SD_TGT2SYSBLOCK(un, blockcnt) \ + (blockcnt * ((un)->un_tgt_blocksize / DEV_BSIZE)) /* * SD_DEFAULT_MAX_XFER_SIZE is the default value to bound the max xfer @@ -768,6 +777,12 @@ _NOTE(MUTEX_PROTECTS_DATA(sd_lun::un_fi_mutex, #define SD_WTYPE_RMW 0x002 /* Write requires read-modify-write */ #define SD_WM_BUSY 0x100 /* write-map is busy */ +/* + * RMW type + */ +#define SD_RMW_TYPE_DEFAULT 0 /* do rmw with warning message */ +#define SD_RMW_TYPE_NO_WARNING 1 /* do rmw without warning message */ +#define SD_RMW_TYPE_RETURN_ERROR 2 /* rmw disabled */ /* Device error kstats */ struct sd_errstats { @@ -1678,6 +1693,11 @@ struct sd_fm_internal { #define SD_RESTART_TIMEOUT (drv_usectohz((clock_t)100000)) /* + * 10s misaligned I/O warning message interval + */ +#define SD_RMW_MSG_PRINT_TIMEOUT (drv_usectohz((clock_t)10000000)) + +/* * 100 msec. is what we'll wait for certain retries for fibre channel * targets, 0 msec for parallel SCSI. */ diff --git a/usr/src/uts/common/xen/io/xdb.c b/usr/src/uts/common/xen/io/xdb.c index 16fd5aff9d..06551ebe85 100644 --- a/usr/src/uts/common/xen/io/xdb.c +++ b/usr/src/uts/common/xen/io/xdb.c @@ -1202,6 +1202,7 @@ xdb_open_device(xdb_t *vdp) { dev_info_t *dip = vdp->xs_dip; uint64_t devsize; + int blksize; char *nodepath; ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); @@ -1252,7 +1253,17 @@ xdb_open_device(xdb_t *vdp) kmem_free(nodepath, MAXPATHLEN); return (DDI_FAILURE); } - vdp->xs_sectors = devsize / XB_BSIZE; + + blksize = ldi_prop_get_int64(vdp->xs_ldi_hdl, + DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + "blksize", DEV_BSIZE); + if (blksize == DEV_BSIZE) + blksize = ldi_prop_get_int(vdp->xs_ldi_hdl, + LDI_DEV_T_ANY | DDI_PROP_DONTPASS | + DDI_PROP_NOTPROM, "device-blksize", DEV_BSIZE); + + vdp->xs_sec_size = blksize; + vdp->xs_sectors = devsize / blksize; /* check if the underlying device is a CD/DVD disc */ if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS, @@ -1388,13 +1399,12 @@ trans_retry: /* If feature-barrier isn't present in xenstore, add it. */ fb_exists = xenbus_exists(xsname, XBP_FB); - /* hard-coded 512-byte sector size */ - ssize = DEV_BSIZE; + ssize = (vdp->xs_sec_size == 0) ? DEV_BSIZE : vdp->xs_sec_size; sectors = vdp->xs_sectors; if (((!fb_exists && (err = xenbus_printf(xbt, xsname, XBP_FB, "%d", 1)))) || (err = xenbus_printf(xbt, xsname, XBP_INFO, "%u", dinfo)) || - (err = xenbus_printf(xbt, xsname, "sector-size", "%u", ssize)) || + (err = xenbus_printf(xbt, xsname, XBP_SECTOR_SIZE, "%u", ssize)) || (err = xenbus_printf(xbt, xsname, XBP_SECTORS, "%"PRIu64, sectors)) || (err = xenbus_printf(xbt, xsname, "instance", "%d", instance)) || diff --git a/usr/src/uts/common/xen/io/xdb.h b/usr/src/uts/common/xen/io/xdb.h index f8046e8219..2173ca6ad9 100644 --- a/usr/src/uts/common/xen/io/xdb.h +++ b/usr/src/uts/common/xen/io/xdb.h @@ -113,6 +113,8 @@ struct xdb { uint32_t xs_type; /* # of total sectors */ uint64_t xs_sectors; + /* sector size if existed */ + uint_t xs_sec_size; /* blkif I/O request ring buffer */ xendev_ring_t *xs_ring; /* handle to access the ring buffer */ diff --git a/usr/src/uts/common/xen/io/xdf.c b/usr/src/uts/common/xen/io/xdf.c index 109421797d..ef50b2bec7 100644 --- a/usr/src/uts/common/xen/io/xdf.c +++ b/usr/src/uts/common/xen/io/xdf.c @@ -478,7 +478,6 @@ vreq_setup(xdf_t *vdp, v_req_t *vreq) if (!ALIGNED_XFER(bp)) { if (bp->b_flags & (B_PAGEIO | B_PHYS)) bp_mapin(bp); - rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, @@ -1638,11 +1637,13 @@ xdf_get_flush_block(xdf_t *vdp) /* * Get a DEV_BSIZE aligned bufer */ - vdp->xdf_flush_mem = kmem_alloc(DEV_BSIZE * 2, KM_SLEEP); + vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP); vdp->xdf_cache_flush_block = - (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), DEV_BSIZE); + (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem), + (int)vdp->xdf_xdev_secsize); + if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block, - xdf_flush_block, DEV_BSIZE, NULL) != 0) + xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0) return (DDI_FAILURE); return (DDI_SUCCESS); } @@ -1746,7 +1747,7 @@ xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) geomp->g_acyl = 0; geomp->g_nhead = XDF_NHEADS; geomp->g_nsect = XDF_NSECTS; - geomp->g_secsize = XB_BSIZE; + geomp->g_secsize = vdp->xdf_xdev_secsize; geomp->g_capacity = vdp->xdf_xdev_nblocks; geomp->g_intrlv = 0; geomp->g_rpm = 7200; @@ -1764,6 +1765,7 @@ xdf_setstate_connected(xdf_t *vdp) dev_info_t *dip = vdp->xdf_dip; cmlb_geom_t pgeom; diskaddr_t nblocks = 0; + uint_t secsize = 0; char *oename, *xsname, *str; uint_t dinfo; @@ -1793,6 +1795,7 @@ xdf_setstate_connected(xdf_t *vdp) */ if (xenbus_gather(XBT_NULL, oename, XBP_SECTORS, "%"SCNu64, &nblocks, + XBP_SECTOR_SIZE, "%u", &secsize, XBP_INFO, "%u", &dinfo, NULL) != 0) { cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " @@ -1808,7 +1811,10 @@ xdf_setstate_connected(xdf_t *vdp) dinfo |= VDISK_CDROM; strfree(str); + if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE))) + secsize = DEV_BSIZE; vdp->xdf_xdev_nblocks = nblocks; + vdp->xdf_xdev_secsize = secsize; #ifdef _ILP32 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " @@ -2373,6 +2379,14 @@ xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) int xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) { + int instance; + xdf_t *vdp; + + instance = ddi_get_instance(dip); + + if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) + return (ENXIO); + switch (cmd) { case TG_GETPHYGEOM: return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); @@ -2381,7 +2395,9 @@ xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) case TG_GETCAPACITY: return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); case TG_GETBLOCKSIZE: - *(uint32_t *)arg = XB_BSIZE; + mutex_enter(&vdp->xdf_cb_lk); + *(uint32_t *)arg = vdp->xdf_xdev_secsize; + mutex_exit(&vdp->xdf_cb_lk); return (0); case TG_GETATTR: return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); @@ -2404,7 +2420,8 @@ xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, /* We don't allow IO from the oe_change callback thread */ ASSERT(curthread != vdp->xdf_oe_change_thread); - if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) + if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE)) + >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) return (EINVAL); bp = getrbuf(KM_SLEEP); @@ -2412,9 +2429,10 @@ xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, bp->b_flags = B_BUSY | B_READ; else bp->b_flags = B_BUSY | B_WRITE; + bp->b_un.b_addr = bufp; bp->b_bcount = reqlen; - bp->b_blkno = start; + bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE); bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ mutex_enter(&vdp->xdf_dev_lk); @@ -2582,7 +2600,7 @@ xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, case DKIOCGMEDIAINFO: { struct dk_minfo media_info; - media_info.dki_lbsize = DEV_BSIZE; + media_info.dki_lbsize = vdp->xdf_xdev_secsize; media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; if (XD_IS_CD(vdp)) media_info.dki_media_type = DK_CDROM; @@ -2664,7 +2682,7 @@ xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, !xdf_barrier_flush_disable) { rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, vdp->xdf_cache_flush_block, xdf_flush_block, - DEV_BSIZE, (void *)dev); + vdp->xdf_xdev_secsize, (void *)dev); } else { return (ENOTTY); } @@ -2686,6 +2704,7 @@ xdf_strategy(struct buf *bp) xdf_t *vdp; minor_t minor; diskaddr_t p_blkct, p_blkst; + daddr_t blkno; ulong_t nblks; int part; @@ -2726,16 +2745,24 @@ xdf_strategy(struct buf *bp) mutex_enter(&vdp->xdf_dev_lk); } + /* + * Adjust the real blkno and bcount according to the underline + * physical sector size. + */ + blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE); + /* check for a starting block beyond the disk or partition limit */ - if (bp->b_blkno > p_blkct) { + if (blkno > p_blkct) { DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, - vdp->xdf_addr, (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); + vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct)); + mutex_exit(&vdp->xdf_dev_lk); xdf_io_err(bp, EINVAL, 0); return (0); } /* Legacy: don't set error flag at this case */ - if (bp->b_blkno == p_blkct) { + if (blkno == p_blkct) { + mutex_exit(&vdp->xdf_dev_lk); bp->b_resid = bp->b_bcount; biodone(bp); return (0); @@ -2747,14 +2774,29 @@ xdf_strategy(struct buf *bp) bp->av_back = bp->av_forw = NULL; /* Adjust for partial transfer, this will result in an error later */ - nblks = bp->b_bcount >> XB_BSHIFT; - if ((bp->b_blkno + nblks) > p_blkct) { - bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; + if (vdp->xdf_xdev_secsize != 0 && + vdp->xdf_xdev_secsize != XB_BSIZE) { + nblks = bp->b_bcount / vdp->xdf_xdev_secsize; + } else { + nblks = bp->b_bcount >> XB_BSHIFT; + } + + if ((blkno + nblks) > p_blkct) { + if (vdp->xdf_xdev_secsize != 0 && + vdp->xdf_xdev_secsize != XB_BSIZE) { + bp->b_resid = + ((blkno + nblks) - p_blkct) * + vdp->xdf_xdev_secsize; + } else { + bp->b_resid = + ((blkno + nblks) - p_blkct) << + XB_BSHIFT; + } bp->b_bcount -= bp->b_resid; } DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", - vdp->xdf_addr, (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); + vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount)); /* Fix up the buf struct */ bp->b_flags |= B_BUSY; @@ -2792,6 +2834,9 @@ xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) NULL, NULL, NULL, NULL)) return (ENXIO); + if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) + return (ENOSPC); + if (U_INVAL(uiop)) return (EINVAL); @@ -2822,7 +2867,7 @@ xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) NULL, NULL, NULL, NULL)) return (ENXIO); - if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) + if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) return (ENOSPC); if (U_INVAL(uiop)) @@ -2853,7 +2898,7 @@ xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) NULL, NULL, NULL, NULL)) return (ENXIO); - if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) + if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) return (ENOSPC); if (U_INVAL(uiop)) @@ -2884,7 +2929,7 @@ xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) NULL, NULL, NULL, NULL)) return (ENXIO); - if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) + if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp)) return (ENOSPC); if (U_INVAL(uiop)) @@ -2921,9 +2966,11 @@ xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) NULL, NULL, NULL)) return (ENXIO); - if ((blkno + nblk) > p_blkcnt) { + if ((blkno + nblk) > + (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) { cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, - vdp->xdf_addr, blkno + nblk, (uint64_t)p_blkcnt); + vdp->xdf_addr, (daddr_t)((blkno + nblk) / + (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt); return (EINVAL); } @@ -3451,7 +3498,7 @@ xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) mutex_destroy(&vdp->xdf_cb_lk); mutex_destroy(&vdp->xdf_dev_lk); if (vdp->xdf_cache_flush_block != NULL) - kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); + kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize); ddi_soft_state_free(xdf_ssp, instance); return (DDI_SUCCESS); } diff --git a/usr/src/uts/common/xen/io/xdf.h b/usr/src/uts/common/xen/io/xdf.h index a3319f70a3..f2a2a82dd5 100644 --- a/usr/src/uts/common/xen/io/xdf.h +++ b/usr/src/uts/common/xen/io/xdf.h @@ -48,7 +48,7 @@ extern "C" { #define XB_BSIZE DEV_BSIZE #define XB_BMASK (XB_BSIZE - 1) #define XB_BSHIFT 9 -#define XB_DTOB(bn) ((bn) << XB_BSHIFT) +#define XB_DTOB(bn, vdp) ((bn) * (vdp)->xdf_xdev_secsize) #define XB_MAX_SEGLEN (8 * XB_BSIZE) #define XB_SEGOFFSET (XB_MAX_SEGLEN - 1) @@ -222,6 +222,7 @@ typedef struct xdf { kcondvar_t xdf_dev_cv; /* cv used in I/O path */ uint_t xdf_dinfo; /* disk info from backend xenstore */ diskaddr_t xdf_xdev_nblocks; /* total size in block */ + uint_t xdf_xdev_secsize; /* disk blksize from backend */ cmlb_geom_t xdf_pgeom; boolean_t xdf_pgeom_set; boolean_t xdf_pgeom_fixed; diff --git a/usr/src/uts/common/xen/sys/xendev.h b/usr/src/uts/common/xen/sys/xendev.h index 8e5921dc3f..dad4ad222f 100644 --- a/usr/src/uts/common/xen/sys/xendev.h +++ b/usr/src/uts/common/xen/sys/xendev.h @@ -52,6 +52,7 @@ extern "C" { /* * Xenbus property interfaces, initialized by backend disk driver */ +#define XBP_SECTOR_SIZE "sector-size" /* backend prop: uint */ #define XBP_SECTORS "sectors" /* backend prop: uint64 */ #define XBP_INFO "info" /* backend prop: uint */ #define XBP_FB "feature-barrier" /* backend prop: boolean int */ diff --git a/usr/src/uts/sun4v/io/vdc.c b/usr/src/uts/sun4v/io/vdc.c index 6c5d37b940..b7729adeed 100644 --- a/usr/src/uts/sun4v/io/vdc.c +++ b/usr/src/uts/sun4v/io/vdc.c @@ -150,6 +150,7 @@ static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, static void vdc_store_label_unk(vdc_t *vdc); static boolean_t vdc_is_opened(vdc_t *vdc); static void vdc_update_size(vdc_t *vdc, size_t, size_t, size_t); +static int vdc_update_vio_bsize(vdc_t *vdc, uint32_t); /* handshake with vds */ static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); @@ -621,8 +622,10 @@ vdc_do_attach(dev_info_t *dip) vdc->state = VDC_STATE_INIT; vdc->lifecycle = VDC_LC_ATTACHING; vdc->session_id = 0; - vdc->block_size = DEV_BSIZE; - vdc->max_xfer_sz = maxphys / DEV_BSIZE; + vdc->vdisk_bsize = DEV_BSIZE; + vdc->vio_bmask = 0; + vdc->vio_bshift = 0; + vdc->max_xfer_sz = maxphys / vdc->vdisk_bsize; /* * We assume, for now, that the vDisk server will export 'read' @@ -943,7 +946,7 @@ vdc_set_err_kstats(vdc_t *vdc) stp = (vd_err_stats_t *)vdc->err_stats->ks_data; ASSERT(stp != NULL); - stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->block_size; + stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->vdisk_bsize; (void) strcpy(stp->vd_vid.value.c, "SUN"); (void) strcpy(stp->vd_pid.value.c, "VDSK"); @@ -1124,7 +1127,7 @@ vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, name, valuep, lengthp)); } nblocks = vdc->slice[VDCPART(dev)].nblocks; - blksize = vdc->block_size; + blksize = vdc->vdisk_bsize; mutex_exit(&vdc->lock); return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags, @@ -1382,6 +1385,7 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) size_t nbytes = nblk * DEV_BSIZE; int instance = VDCUNIT(dev); vdc_t *vdc = NULL; + diskaddr_t vio_blkno; if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); @@ -1390,8 +1394,16 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", instance, nbytes, blkno, (void *)addr); + + /* convert logical block to vio block */ + if ((blkno & vdc->vio_bmask) != 0) { + DMSG(vdc, 0, "Misaligned block number (%lu)\n", blkno); + return (EINVAL); + } + vio_blkno = blkno >> vdc->vio_bshift; + rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, - VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); + VDCPART(dev), vio_blkno, CB_STRATEGY, 0, VIO_write_dir); if (rv) { DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); return (rv); @@ -1422,6 +1434,7 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) static int vdc_strategy(struct buf *buf) { + diskaddr_t vio_blkno; int rv = -1; vdc_t *vdc = NULL; int instance = VDCUNIT(buf->b_edev); @@ -1448,8 +1461,21 @@ vdc_strategy(struct buf *buf) slice = VDCPART(buf->b_edev); } + /* + * In the buf structure, b_lblkno represents a logical block number + * using a block size of 512 bytes. For the VIO request, this block + * number has to be converted to be represented with the block size + * used by the VIO protocol. + */ + if ((buf->b_lblkno & vdc->vio_bmask) != 0) { + bioerror(buf, EINVAL); + biodone(buf); + return (0); + } + vio_blkno = buf->b_lblkno >> vdc->vio_bshift; + rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, - buf->b_bcount, slice, buf->b_lblkno, + buf->b_bcount, slice, vio_blkno, CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : VIO_write_dir); @@ -1494,8 +1520,8 @@ vdc_min(struct buf *bufp) vdc = ddi_get_soft_state(vdc_state, instance); VERIFY(vdc != NULL); - if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { - bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; + if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->vdisk_bsize)) { + bufp->b_bcount = vdc->max_xfer_sz * vdc->vdisk_bsize; } } @@ -1670,7 +1696,7 @@ vdc_init_attr_negotiation(vdc_t *vdc) pkt.tag.vio_sid = vdc->session_id; /* fill in payload */ pkt.max_xfer_sz = vdc->max_xfer_sz; - pkt.vdisk_block_size = vdc->block_size; + pkt.vdisk_block_size = vdc->vdisk_bsize; pkt.xfer_mode = VIO_DRING_MODE_V1_0; pkt.operations = 0; /* server will set bits of valid operations */ pkt.vdisk_type = 0; /* server will set to valid device type */ @@ -2605,13 +2631,13 @@ vdc_init_descriptor_ring(vdc_t *vdc) * as we do not have the capability to split requests over * multiple DRing entries. */ - if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { + if ((vdc->max_xfer_sz * vdc->vdisk_bsize) < maxphys) { DMSG(vdc, 0, "[%d] using minimum DRing size\n", vdc->instance); vdc->dring_max_cookies = maxphys / PAGESIZE; } else { vdc->dring_max_cookies = - (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; + (vdc->max_xfer_sz * vdc->vdisk_bsize) / PAGESIZE; } vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + (sizeof (ldc_mem_cookie_t) * @@ -4864,6 +4890,17 @@ vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) vdc->instance); attr_msg->vdisk_size = 0; } + + /* update the VIO block size */ + if (attr_msg->vdisk_block_size > 0 && + vdc_update_vio_bsize(vdc, + attr_msg->vdisk_block_size) != 0) { + DMSG(vdc, 0, "[%d] Invalid block size (%u) from vds", + vdc->instance, attr_msg->vdisk_block_size); + status = EINVAL; + break; + } + /* update disk, block and transfer sizes */ vdc_update_size(vdc, attr_msg->vdisk_size, attr_msg->vdisk_block_size, attr_msg->max_xfer_sz); @@ -4877,7 +4914,7 @@ vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", - vdc->instance, vdc->block_size, + vdc->instance, vdc->vdisk_bsize, attr_msg->vdisk_block_size); if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) || @@ -5266,7 +5303,7 @@ vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag) return (EFAULT); } - VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); + VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) { return (rv); @@ -5307,7 +5344,7 @@ vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag) * flag - ioctl flags */ static int -vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) +vdc_dioctl_rwcmd(vdc_t *vdc, caddr_t arg, int flag) { struct dadkio_rwcmd32 rwcmd32; struct dadkio_rwcmd rwcmd; @@ -5351,7 +5388,7 @@ vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) bzero((caddr_t)&auio, sizeof (struct uio)); auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; + auio.uio_loffset = rwcmd.blkaddr * vdc->vdisk_bsize; auio.uio_resid = rwcmd.buflen; auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; @@ -5363,7 +5400,8 @@ vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) */ buf->b_private = (void *)VD_SLICE_NONE; - status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); + status = physio(vdc_strategy, buf, VD_MAKE_DEV(vdc->instance, 0), + rw, vdc_min, &auio); biofini(buf); kmem_free(buf, sizeof (buf_t)); @@ -6639,14 +6677,23 @@ vdc_check_capacity(vdc_t *vdc) if ((rv = vdc_get_capacity(vdc, &dsk_size, &blk_size)) != 0) return (rv); - if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0) + if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || blk_size == 0) return (EINVAL); mutex_enter(&vdc->lock); - vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz); + /* + * First try to update the VIO block size (which is the same as the + * vdisk block size). If this returns an error then that means that + * we can not use that block size so basically the vdisk is unusable + * and we return an error. + */ + rv = vdc_update_vio_bsize(vdc, blk_size); + if (rv == 0) + vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz); + mutex_exit(&vdc->lock); - return (0); + return (rv); } /* @@ -6969,7 +7016,7 @@ vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp) case DIOCTL_RWCMD: { - return (vdc_dioctl_rwcmd(dev, arg, mode)); + return (vdc_dioctl_rwcmd(vdc, arg, mode)); } case DKIOCGAPART: @@ -7604,7 +7651,7 @@ vdc_create_fake_geometry(vdc_t *vdc) (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); - /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ + /* max_xfer_sz is #blocks so we don't need to divide by vdisk_bsize */ vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; /* @@ -7660,7 +7707,7 @@ vdc_create_fake_geometry(vdc_t *vdc) } vdc->minfo->dki_capacity = vdc->vdisk_size; - vdc->minfo->dki_lbsize = vdc->block_size; + vdc->minfo->dki_lbsize = vdc->vdisk_bsize; } static ushort_t @@ -7692,7 +7739,7 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size) * update anything. */ if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || - (blk_size == vdc->block_size && dsk_size == vdc->vdisk_size && + (blk_size == vdc->vdisk_bsize && dsk_size == vdc->vdisk_size && xfr_size == vdc->max_xfer_sz)) return; @@ -7706,13 +7753,11 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size) if ((xfr_size * blk_size) > (PAGESIZE * DEV_BSIZE)) { DMSG(vdc, 0, "[%d] vds block transfer size too big;" " using max supported by vdc", vdc->instance); - xfr_size = maxphys / DEV_BSIZE; - dsk_size = (dsk_size * blk_size) / DEV_BSIZE; - blk_size = DEV_BSIZE; + xfr_size = maxphys / blk_size; } vdc->max_xfer_sz = xfr_size; - vdc->block_size = blk_size; + vdc->vdisk_bsize = blk_size; vdc->vdisk_size = dsk_size; stp = (vd_err_stats_t *)vdc->err_stats->ks_data; @@ -7723,6 +7768,50 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size) } /* + * Update information about the VIO block size. The VIO block size is the + * same as the vdisk block size which is stored in vdc->vdisk_bsize so we + * do not store that information again. + * + * However, buf structures will always use a logical block size of 512 bytes + * (DEV_BSIZE) and we will need to convert logical block numbers to VIO block + * numbers for each read or write operation using vdc_strategy(). To speed up + * this conversion, we expect the VIO block size to be a power of 2 and a + * multiple 512 bytes (DEV_BSIZE), and we cache some useful information. + * + * The function return EINVAL if the new VIO block size (blk_size) is not a + * power of 2 or not a multiple of 512 bytes, otherwise it returns 0. + */ +static int +vdc_update_vio_bsize(vdc_t *vdc, uint32_t blk_size) +{ + uint32_t ratio, n; + int nshift = 0; + + vdc->vio_bmask = 0; + vdc->vio_bshift = 0; + + ASSERT(blk_size > 0); + + if ((blk_size % DEV_BSIZE) != 0) + return (EINVAL); + + ratio = blk_size / DEV_BSIZE; + + for (n = ratio; n > 1; n >>= 1) { + if ((n & 0x1) != 0) { + /* blk_size is not a power of 2 */ + return (EINVAL); + } + nshift++; + } + + vdc->vio_bshift = nshift; + vdc->vio_bmask = ratio - 1; + + return (0); +} + +/* * Function: * vdc_validate_geometry * @@ -7747,7 +7836,7 @@ vdc_validate_geometry(vdc_t *vdc) buf_t *buf; /* BREAD requests need to be in a buf_t structure */ dev_t dev; int rv, rval; - struct dk_label label; + struct dk_label *label; struct dk_geom geom; struct extvtoc vtoc; efi_gpt_t *gpt; @@ -7786,7 +7875,7 @@ vdc_validate_geometry(vdc_t *vdc) return (EIO); } - VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); + VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe); @@ -7870,14 +7959,15 @@ vdc_validate_geometry(vdc_t *vdc) /* * Read disk label from start of disk */ + label = kmem_alloc(vdc->vdisk_bsize, KM_SLEEP); buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); bioinit(buf); - buf->b_un.b_addr = (caddr_t)&label; - buf->b_bcount = DK_LABEL_SIZE; + buf->b_un.b_addr = (caddr_t)label; + buf->b_bcount = vdc->vdisk_bsize; buf->b_flags = B_BUSY | B_READ; buf->b_dev = cmpdev(dev); - rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label, - DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); + rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)label, + vdc->vdisk_bsize, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); if (rv) { DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", vdc->instance); @@ -7892,15 +7982,17 @@ vdc_validate_geometry(vdc_t *vdc) biofini(buf); kmem_free(buf, sizeof (buf_t)); - if (rv != 0 || label.dkl_magic != DKL_MAGIC || - label.dkl_cksum != vdc_lbl2cksum(&label)) { + if (rv != 0 || label->dkl_magic != DKL_MAGIC || + label->dkl_cksum != vdc_lbl2cksum(label)) { DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", vdc->instance); + kmem_free(label, vdc->vdisk_bsize); mutex_enter(&vdc->lock); vdc_store_label_unk(vdc); return (EINVAL); } + kmem_free(label, vdc->vdisk_bsize); mutex_enter(&vdc->lock); vdc_store_label_vtoc(vdc, &geom, &vtoc); return (0); @@ -8108,7 +8200,7 @@ vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct extvtoc *vtoc) int i; ASSERT(MUTEX_HELD(&vdc->lock)); - ASSERT(vdc->block_size == vtoc->v_sectorsz); + ASSERT(vdc->vdisk_bsize == vtoc->v_sectorsz); vdc->vdisk_label = VD_DISK_LABEL_VTOC; bcopy(vtoc, vdc->vtoc, sizeof (struct extvtoc)); diff --git a/usr/src/uts/sun4v/io/vds.c b/usr/src/uts/sun4v/io/vds.c index 548fc0f048..45f4122465 100644 --- a/usr/src/uts/sun4v/io/vds.c +++ b/usr/src/uts/sun4v/io/vds.c @@ -119,6 +119,10 @@ #define VD_EFI_LBA_GPT 1 /* LBA of the GPT */ #define VD_EFI_LBA_GPE 2 /* LBA of the GPE */ +#define VD_EFI_DEV_SET(dev, vdsk, ioctl) \ + VDSK_EFI_DEV_SET(dev, vdsk, ioctl, \ + (vdsk)->vdisk_bsize, (vdsk)->vdisk_size) + /* * Flags defining the behavior for flushing asynchronous writes used to * performed some write I/O requests. @@ -451,13 +455,14 @@ typedef struct vd { int open_flags; /* open flags */ uint_t nslices; /* number of slices we export */ size_t vdisk_size; /* number of blocks in vdisk */ - size_t vdisk_block_size; /* size of each vdisk block */ + size_t vdisk_bsize; /* blk size of the vdisk */ vd_disk_type_t vdisk_type; /* slice or entire disk */ vd_disk_label_t vdisk_label; /* EFI or VTOC label */ vd_media_t vdisk_media; /* media type of backing dev. */ boolean_t is_atapi_dev; /* Is this an IDE CD-ROM dev? */ ushort_t max_xfer_sz; /* max xfer size in DEV_BSIZE */ - size_t block_size; /* blk size of actual device */ + size_t backend_bsize; /* blk size of backend device */ + int vio_bshift; /* shift for blk convertion */ boolean_t volume; /* is vDisk backed by volume */ boolean_t zvol; /* is vDisk backed by a zvol */ boolean_t file; /* is vDisk backed by a file? */ @@ -506,21 +511,20 @@ typedef struct vd { * followed by a GPT (efi_gpt_t) and a GPE (efi_gpe_t). * */ -#define VD_LABEL_VTOC_SIZE \ - P2ROUNDUP(sizeof (struct dk_label), DEV_BSIZE) +#define VD_LABEL_VTOC_SIZE(lba) \ + P2ROUNDUP(sizeof (struct dk_label), (lba)) -#define VD_LABEL_EFI_SIZE \ - P2ROUNDUP(DEV_BSIZE + sizeof (efi_gpt_t) + \ - sizeof (efi_gpe_t) * VD_MAXPART, DEV_BSIZE) +#define VD_LABEL_EFI_SIZE(lba) \ + P2ROUNDUP(2 * (lba) + sizeof (efi_gpe_t) * VD_MAXPART, \ + (lba)) #define VD_LABEL_VTOC(vd) \ ((struct dk_label *)(void *)((vd)->flabel)) -#define VD_LABEL_EFI_GPT(vd) \ - ((efi_gpt_t *)(void *)((vd)->flabel + DEV_BSIZE)) -#define VD_LABEL_EFI_GPE(vd) \ - ((efi_gpe_t *)(void *)((vd)->flabel + DEV_BSIZE + \ - sizeof (efi_gpt_t))) +#define VD_LABEL_EFI_GPT(vd, lba) \ + ((efi_gpt_t *)(void *)((vd)->flabel + (lba))) +#define VD_LABEL_EFI_GPE(vd, lba) \ + ((efi_gpe_t *)(void *)((vd)->flabel + 2 * (lba))) typedef struct vds_operation { @@ -757,6 +761,7 @@ vd_dskimg_io_params(vd_t *vd, int slice, size_t *blkp, size_t *lenp) ASSERT(vd->file || VD_DSKIMG(vd)); ASSERT(len > 0); + ASSERT(vd->vdisk_bsize == DEV_BSIZE); /* * If a file is exported as a slice then we don't care about the vtoc. @@ -797,7 +802,6 @@ vd_dskimg_io_params(vd_t *vd, int slice, size_t *blkp, size_t *lenp) ASSERT(vd->vtoc.v_sectorsz == DEV_BSIZE); } else { ASSERT(vd->vdisk_label == VD_DISK_LABEL_EFI); - ASSERT(vd->vdisk_block_size == DEV_BSIZE); } if (blk >= vd->slices[slice].nblocks) { @@ -875,6 +879,7 @@ vd_dskimg_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t offset, ASSERT(vd->file || VD_DSKIMG(vd)); ASSERT(len > 0); + ASSERT(vd->vdisk_bsize == DEV_BSIZE); if ((status = vd_dskimg_io_params(vd, slice, &offset, &len)) != 0) return ((status == ENODATA)? 0: -1); @@ -941,13 +946,14 @@ vd_dskimg_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t offset, * * Parameters: * disk_size - the disk size in bytes + * bsize - the disk block size in bytes * label - the returned default label. * * Return Code: * none. */ static void -vd_build_default_label(size_t disk_size, struct dk_label *label) +vd_build_default_label(size_t disk_size, size_t bsize, struct dk_label *label) { size_t size; char unit; @@ -1005,7 +1011,7 @@ vd_build_default_label(size_t disk_size, struct dk_label *label) } label->dkl_pcyl = disk_size / - (label->dkl_nsect * label->dkl_nhead * DEV_BSIZE); + (label->dkl_nsect * label->dkl_nhead * bsize); if (label->dkl_pcyl == 0) label->dkl_pcyl = 1; @@ -1027,7 +1033,7 @@ vd_build_default_label(size_t disk_size, struct dk_label *label) label->dkl_nhead, label->dkl_nsect); PR0("provided disk size: %ld bytes\n", (uint64_t) (label->dkl_pcyl * label->dkl_nhead * - label->dkl_nsect * DEV_BSIZE)); + label->dkl_nsect * bsize)); vd_get_readable_size(disk_size, &size, &unit); @@ -1230,6 +1236,8 @@ vd_dskimg_read_devid(vd_t *vd, ddi_devid_t *devid) uint_t chksum; int status, sz; + ASSERT(vd->vdisk_bsize == DEV_BSIZE); + if ((status = vd_dskimg_get_devid_block(vd, &blk)) != 0) return (status); @@ -1304,6 +1312,8 @@ vd_dskimg_write_devid(vd_t *vd, ddi_devid_t devid) size_t blk; int status; + ASSERT(vd->vdisk_bsize == DEV_BSIZE); + if (devid == NULL) { /* nothing to write */ return (0); @@ -1371,12 +1381,12 @@ vd_do_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len) ASSERT(!vd->file); ASSERT(!vd->volume); - ASSERT(vd->vdisk_block_size > 0); + ASSERT(vd->vdisk_bsize > 0); max_sectors = vd->max_xfer_sz; - nblk = (len / vd->vdisk_block_size); + nblk = (len / vd->vdisk_bsize); - if (len % vd->vdisk_block_size != 0) + if (len % vd->vdisk_bsize != 0) return (EINVAL); /* @@ -1414,7 +1424,7 @@ vd_do_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len) } ucmd.uscsi_cdb = (caddr_t)&cdb; ucmd.uscsi_bufaddr = data; - ucmd.uscsi_buflen = nsectors * vd->block_size; + ucmd.uscsi_buflen = nsectors * vd->backend_bsize; ucmd.uscsi_timeout = vd_scsi_rdwr_timeout; /* * Set flags so that the command is isolated from normal @@ -1459,7 +1469,7 @@ vd_do_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len) blk += nsectors; nblk -= nsectors; - data += nsectors * vd->vdisk_block_size; /* SECSIZE */ + data += nsectors * vd->vdisk_bsize; } return (status); @@ -1498,7 +1508,7 @@ vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen) size_t plen; /* length of data to be read from physical device */ char *buf; /* buffer area to fit physical device's block size */ - if (vd->block_size == 0) { + if (vd->backend_bsize == 0) { /* * The block size was not available during the attach, * try to update it now. @@ -1514,10 +1524,10 @@ vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen) * and adjust the block to be read from and the amount of data to * read to correspond with the device's block size. */ - if (vd->vdisk_block_size == vd->block_size) + if (vd->vdisk_bsize == vd->backend_bsize) return (vd_do_scsi_rdwr(vd, operation, data, vblk, vlen)); - if (vd->vdisk_block_size > vd->block_size) + if (vd->vdisk_bsize > vd->backend_bsize) return (EINVAL); /* @@ -1540,23 +1550,23 @@ vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen) * v v * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- virtual disk: * | | | |XX|XX|XX|XX|XX|XX| | | | | | } block size is - * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- vd->vdisk_block_size + * --+--+--+--+--+--+--+--+--+--+--+--+--+--+--+- vd->vdisk_bsize * : : : : * >:==:< delta : : * : : : : * --+-----+-----+-----+-----+-----+-----+-----+-- physical disk: * | |YY:YY|YYYYY|YYYYY|YY:YY| | | } block size is - * --+-----+-----+-----+-----+-----+-----+-----+-- vd->block_size + * --+-----+-----+-----+-----+-----+-----+-----+-- vd->backend_bsize * ^ ^ * |<--------------------->| * | plen * pblk */ /* END CSTYLED */ - pblk = (vblk * vd->vdisk_block_size) / vd->block_size; - delta = (vblk * vd->vdisk_block_size) - (pblk * vd->block_size); - pnblk = ((delta + vlen - 1) / vd->block_size) + 1; - plen = pnblk * vd->block_size; + pblk = (vblk * vd->vdisk_bsize) / vd->backend_bsize; + delta = (vblk * vd->vdisk_bsize) - (pblk * vd->backend_bsize); + pnblk = ((delta + vlen - 1) / vd->backend_bsize) + 1; + plen = pnblk * vd->backend_bsize; PR2("vblk %lx:pblk %lx: vlen %ld:plen %ld", vblk, pblk, vlen, plen); @@ -1591,7 +1601,7 @@ static ssize_t vd_slice_flabel_read(vd_t *vd, caddr_t data, size_t offset, size_t length) { size_t n = 0; - uint_t limit = vd->flabel_limit * DEV_BSIZE; + uint_t limit = vd->flabel_limit * vd->vdisk_bsize; ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); ASSERT(vd->flabel != NULL); @@ -1646,7 +1656,7 @@ vd_slice_flabel_read(vd_t *vd, caddr_t data, size_t offset, size_t length) static ssize_t vd_slice_flabel_write(vd_t *vd, caddr_t data, size_t offset, size_t length) { - uint_t limit = vd->flabel_limit * DEV_BSIZE; + uint_t limit = vd->flabel_limit * vd->vdisk_bsize; struct dk_label *label; struct dk_geom geom; struct extvtoc vtoc; @@ -1663,7 +1673,7 @@ vd_slice_flabel_write(vd_t *vd, caddr_t data, size_t offset, size_t length) * write was successful, but note that nothing is actually overwritten. */ if (vd->vdisk_label == VD_DISK_LABEL_VTOC && - offset == 0 && length == DEV_BSIZE) { + offset == 0 && length == vd->vdisk_bsize) { label = (void *)data; /* check that this is a valid label */ @@ -1721,7 +1731,7 @@ vd_slice_flabel_write(vd_t *vd, caddr_t data, size_t offset, size_t length) * Return the starting block relative to the vdisk * backend for the remaining operation. * lengthp - pointer to the number of bytes to read or write. - * This should be a multiple of DEV_BSIZE. Return the + * This should be a multiple of vdisk_bsize. Return the * remaining number of bytes to read or write. * * Return Code: @@ -1739,6 +1749,7 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap, size_t ablk, asize, aoff, alen; ssize_t n; int sec, status; + size_t bsize = vd->vdisk_bsize; ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE); ASSERT(slice != 0); @@ -1759,23 +1770,23 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap, return (EIO); } - if (length % DEV_BSIZE != 0) + if (length % bsize != 0) return (EINVAL); /* handle any I/O with the fake label */ if (operation == VD_OP_BWRITE) - n = vd_slice_flabel_write(vd, data, blk * DEV_BSIZE, length); + n = vd_slice_flabel_write(vd, data, blk * bsize, length); else - n = vd_slice_flabel_read(vd, data, blk * DEV_BSIZE, length); + n = vd_slice_flabel_read(vd, data, blk * bsize, length); if (n == -1) return (EINVAL); - ASSERT(n % DEV_BSIZE == 0); + ASSERT(n % bsize == 0); /* adjust I/O arguments */ data += n; - blk += n / DEV_BSIZE; + blk += n / bsize; length -= n; /* check if there's something else to process */ @@ -1791,7 +1802,7 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap, } if (vd->vdisk_label == VD_DISK_LABEL_EFI) { - asize = EFI_MIN_RESV_SIZE + 33; + asize = EFI_MIN_RESV_SIZE + (EFI_MIN_ARRAY_SIZE / bsize) + 1; ablk = vd->vdisk_size - asize; } else { ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC); @@ -1802,7 +1813,7 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap, asize = vd->dk_geom.dkg_acyl * csize; } - alen = length / DEV_BSIZE; + alen = length / bsize; aoff = blk; /* if we have reached the last block then the I/O is completed */ @@ -1834,10 +1845,10 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap, alen = ablk + asize - aoff; } - alen *= DEV_BSIZE; + alen *= bsize; if (operation == VD_OP_BREAD) { - bzero(data + (aoff - blk) * DEV_BSIZE, alen); + bzero(data + (aoff - blk) * bsize, alen); if (vd->vdisk_label == VD_DISK_LABEL_VTOC) { /* check if we read backup labels */ @@ -1848,9 +1859,9 @@ vd_slice_fake_rdwr(vd_t *vd, int slice, int operation, caddr_t *datap, for (sec = 1; (sec < 5 * 2 + 1); sec += 2) { if (ablk + sec >= blk && - ablk + sec < blk + (length / DEV_BSIZE)) { + ablk + sec < blk + (length / bsize)) { bcopy(label, data + - (ablk + sec - blk) * DEV_BSIZE, + (ablk + sec - blk) * bsize, sizeof (struct dk_label)); } } @@ -1899,6 +1910,8 @@ vd_bio_task(void *arg) ssize_t resid; int status; + ASSERT(vd->vdisk_bsize == DEV_BSIZE); + if (vd->zvol) { status = ldi_strategy(vd->ldi_handle[0], buf); @@ -2162,6 +2175,9 @@ vd_start_bio(vd_task_t *task) buf->b_flags |= B_WRITE; } + /* convert VIO block number to buf block number */ + buf->b_lblkno = offset << vd->vio_bshift; + request->status = ldi_strategy(vd->ldi_handle[slice], buf); } @@ -3101,7 +3117,8 @@ vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg) switch (cmd) { case DKIOCGETEFI: len = vd_slice_flabel_read(vd, - (caddr_t)dk_ioc->dki_data, lba * DEV_BSIZE, len); + (caddr_t)dk_ioc->dki_data, + lba * vd->vdisk_bsize, len); ASSERT(len > 0); @@ -3237,7 +3254,8 @@ vd_dskimg_validate_geometry(vd_t *vd) } vd->vdisk_label = VD_DISK_LABEL_UNK; - vd_build_default_label(vd->dskimg_size, &label); + vd_build_default_label(vd->dskimg_size, vd->vdisk_bsize, + &label); status = EINVAL; } else { vd->vdisk_label = VD_DISK_LABEL_VTOC; @@ -3835,7 +3853,7 @@ vd_get_capacity(vd_task_t *task) request->status = 0; - vd_cap.vdisk_block_size = vd->vdisk_block_size; + vd_cap.vdisk_block_size = vd->vdisk_bsize; vd_cap.vdisk_size = vd->vdisk_size; if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&vd_cap, 0, &nbytes, @@ -4480,7 +4498,7 @@ vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) * Must first get the maximum transfer size in bytes. */ size_t max_xfer_bytes = attr_msg->vdisk_block_size ? - attr_msg->vdisk_block_size*attr_msg->max_xfer_sz : + attr_msg->vdisk_block_size * attr_msg->max_xfer_sz : attr_msg->max_xfer_sz; size_t max_inband_msglen = sizeof (vd_dring_inband_msg_t) + @@ -4506,7 +4524,7 @@ vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) } /* Return the device's block size and max transfer size to the client */ - attr_msg->vdisk_block_size = vd->vdisk_block_size; + attr_msg->vdisk_block_size = vd->vdisk_bsize; attr_msg->max_xfer_sz = vd->max_xfer_sz; attr_msg->vdisk_size = vd->vdisk_size; @@ -5442,7 +5460,7 @@ vd_dskimg_is_iso_image(vd_t *vd) * Standard Identifier and is set to CD001 for a CD-ROM compliant * to the ISO 9660 standard. */ - sec = (ISO_VOLDESC_SEC * ISO_SECTOR_SIZE) / vd->vdisk_block_size; + sec = (ISO_VOLDESC_SEC * ISO_SECTOR_SIZE) / vd->vdisk_bsize; rv = vd_dskimg_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)iso_buf, sec, ISO_SECTOR_SIZE); @@ -5507,16 +5525,13 @@ vd_setup_full_disk(vd_t *vd) ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); - vd->vdisk_block_size = DEV_BSIZE; - /* set the disk size, block size and the media type of the disk */ status = vd_backend_check_size(vd); if (status != 0) { if (!vd->scsi) { /* unexpected failure */ - PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", - status); + PRN("Failed to check backend size (errno %d)", status); return (status); } @@ -5526,7 +5541,8 @@ vd_setup_full_disk(vd_t *vd) * size of the disk and the block size. */ vd->vdisk_size = VD_SIZE_UNKNOWN; - vd->block_size = 0; + vd->vdisk_bsize = 0; + vd->backend_bsize = 0; vd->vdisk_media = VD_MEDIA_FIXED; } @@ -5697,7 +5713,7 @@ vd_setup_partition_vtoc(vd_t *vd) vd->vtoc.v_part[VD_ENTIRE_DISK_SLICE].p_size = vd->dk_geom.dkg_ncyl * csize; - vd_get_readable_size(vd->vdisk_size * vd->vdisk_block_size, + vd_get_readable_size(vd->vdisk_size * vd->vdisk_bsize, &size, &unit); /* @@ -5723,7 +5739,7 @@ vd_setup_partition_vtoc(vd_t *vd) /* create a fake label from the vtoc and geometry */ vd->flabel_limit = (uint_t)csize; - vd->flabel_size = VD_LABEL_VTOC_SIZE; + vd->flabel_size = VD_LABEL_VTOC_SIZE(vd->vdisk_bsize); vd->flabel = kmem_zalloc(vd->flabel_size, KM_SLEEP); vd_vtocgeom_to_label(&vd->vtoc, &vd->dk_geom, VD_LABEL_VTOC(vd)); @@ -5741,7 +5757,7 @@ vd_setup_partition_vtoc(vd_t *vd) * as a slice without the addition of any metadata. * * So when exporting the disk as an EFI disk, we fake a disk with the following - * layout: + * layout: (assuming the block size is 512 bytes) * * flabel +--- flabel_limit * <------> v @@ -5776,9 +5792,8 @@ vd_setup_partition_vtoc(vd_t *vd) * - blocks 34+N+1 to P define a fake reserved partition and backup label, it * returns 0 * - * Note: if the backend size is not a multiple of the vdisk block size - * (DEV_BSIZE = 512 byte) then the very end of the backend will not map to - * any block of the virtual disk. + * Note: if the backend size is not a multiple of the vdisk block size then + * the very end of the backend will not map to any block of the virtual disk. */ static int vd_setup_partition_efi(vd_t *vd) @@ -5788,23 +5803,35 @@ vd_setup_partition_efi(vd_t *vd) struct uuid uuid = EFI_USR; struct uuid efi_reserved = EFI_RESERVED; uint32_t crc; - uint64_t s0_start, s0_end; + uint64_t s0_start, s0_end, first_u_lba; + size_t bsize; - vd->flabel_limit = 34; - vd->flabel_size = VD_LABEL_EFI_SIZE; + ASSERT(vd->vdisk_bsize > 0); + + bsize = vd->vdisk_bsize; + /* + * The minimum size for the label is 16K (EFI_MIN_ARRAY_SIZE) + * for GPEs plus one block for the GPT and one for PMBR. + */ + first_u_lba = (EFI_MIN_ARRAY_SIZE / bsize) + 2; + vd->flabel_limit = (uint_t)first_u_lba; + vd->flabel_size = VD_LABEL_EFI_SIZE(bsize); vd->flabel = kmem_zalloc(vd->flabel_size, KM_SLEEP); - gpt = VD_LABEL_EFI_GPT(vd); - gpe = VD_LABEL_EFI_GPE(vd); + gpt = VD_LABEL_EFI_GPT(vd, bsize); + gpe = VD_LABEL_EFI_GPE(vd, bsize); - /* adjust the vdisk_size, we emulate the first 34 blocks */ - vd->vdisk_size += 34; - s0_start = 34; + /* + * Adjust the vdisk_size, we emulate the first few blocks + * for the disk label. + */ + vd->vdisk_size += first_u_lba; + s0_start = first_u_lba; s0_end = vd->vdisk_size - 1; gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE); gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT); gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t)); - gpt->efi_gpt_FirstUsableLBA = LE_64(34ULL); + gpt->efi_gpt_FirstUsableLBA = LE_64(first_u_lba); gpt->efi_gpt_PartitionEntryLBA = LE_64(2ULL); gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t)); @@ -5834,7 +5861,8 @@ vd_setup_partition_efi(vd_t *vd) gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1); /* adjust the vdisk size for the backup GPT and GPE */ - vd->vdisk_size += 33; + vd->vdisk_size += (EFI_MIN_ARRAY_SIZE / bsize) + 1; + gpt->efi_gpt_AlternateLBA = LE_64(vd->vdisk_size - 1); CRC32(crc, gpe, sizeof (efi_gpe_t) * VD_MAXPART, -1U, crc32_table); gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc); @@ -5854,7 +5882,6 @@ static int vd_setup_backend_vnode(vd_t *vd) { int rval, status; - vattr_t vattr; dev_t dev; char *file_path = vd->device_path; ldi_handle_t lhandle; @@ -5874,20 +5901,6 @@ vd_setup_backend_vnode(vd_t *vd) */ vd->file = B_TRUE; - vattr.va_mask = AT_SIZE; - if ((status = VOP_GETATTR(vd->file_vnode, &vattr, 0, kcred, NULL)) - != 0) { - PRN("VOP_GETATTR(%s) = errno %d", file_path, status); - return (EIO); - } - - vd->dskimg_size = vattr.va_size; - - if (vd->file_vnode->v_flag & VNOMAP) { - PRN("File %s cannot be mapped", file_path); - return (EIO); - } - vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */ /* @@ -5938,10 +5951,6 @@ vd_setup_slice_image(vd_t *vd) struct dk_label label; int status; - /* sector size = block size = DEV_BSIZE */ - vd->block_size = DEV_BSIZE; - vd->vdisk_block_size = DEV_BSIZE; - vd->vdisk_size = vd->dskimg_size / DEV_BSIZE; vd->vdisk_media = VD_MEDIA_FIXED; vd->vdisk_label = (vd_slice_label == VD_DISK_LABEL_UNK)? vd_file_slice_label : vd_slice_label; @@ -5956,7 +5965,8 @@ vd_setup_slice_image(vd_t *vd) * adjust the vtoc so that it defines a single-slice * disk. */ - vd_build_default_label(vd->dskimg_size, &label); + vd_build_default_label(vd->dskimg_size, vd->vdisk_bsize, + &label); vd_label_to_vtocgeom(&label, &vd->vtoc, &vd->dk_geom); status = vd_setup_partition_vtoc(vd); } @@ -5970,6 +5980,12 @@ vd_setup_disk_image(vd_t *vd) int status; char *backend_path = vd->device_path; + if ((status = vd_backend_check_size(vd)) != 0) { + PRN("Fail to check size of %s (errno %d)", + backend_path, status); + return (EIO); + } + /* size should be at least sizeof(dk_label) */ if (vd->dskimg_size < sizeof (struct dk_label)) { PRN("Size of file has to be at least %ld bytes", @@ -5977,11 +5993,6 @@ vd_setup_disk_image(vd_t *vd) return (EIO); } - /* sector size = block size = DEV_BSIZE */ - vd->block_size = DEV_BSIZE; - vd->vdisk_block_size = DEV_BSIZE; - vd->vdisk_size = vd->dskimg_size / DEV_BSIZE; - /* * Find and validate the geometry of a disk image. */ @@ -5997,7 +6008,7 @@ vd_setup_disk_image(vd_t *vd) * of the ISO image (images for both drive types are stored * in the ISO-9600 format). CDs can store up to just under 1Gb */ - if ((vd->vdisk_size * vd->vdisk_block_size) > ONE_GIGABYTE) + if ((vd->vdisk_size * vd->vdisk_bsize) > ONE_GIGABYTE) vd->vdisk_media = VD_MEDIA_DVD; else vd->vdisk_media = VD_MEDIA_CD; @@ -6179,14 +6190,6 @@ vd_setup_backend_ldi(vd_t *vd) if (vd->vdisk_type == VD_DISK_TYPE_DISK) { if (vd->volume) { - /* get size of backing device */ - if (ldi_get_size(vd->ldi_handle[0], &vd->dskimg_size) != - DDI_SUCCESS) { - PRN("ldi_get_size() failed for %s", - device_path); - return (EIO); - } - /* setup disk image */ return (vd_setup_disk_image(vd)); } @@ -6220,14 +6223,6 @@ vd_setup_single_slice_disk(vd_t *vd) char *device_path = vd->device_path; struct vtoc vtoc; - /* Get size of backing device */ - if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) { - PRN("ldi_get_size() failed for %s", device_path); - return (EIO); - } - vd->vdisk_size = lbtodb(vd->vdisk_size); /* convert to blocks */ - vd->block_size = DEV_BSIZE; - vd->vdisk_block_size = DEV_BSIZE; vd->vdisk_media = VD_MEDIA_FIXED; if (vd->volume) { @@ -6241,6 +6236,12 @@ vd_setup_single_slice_disk(vd_t *vd) vd->vdisk_type = VD_DISK_TYPE_SLICE; vd->nslices = 1; + /* Get size of backing device */ + if ((status = vd_backend_check_size(vd)) != 0) { + PRN("Fail to check size of %s (errno %d)", device_path, status); + return (EIO); + } + /* * When exporting a slice or a device as a single slice disk, we don't * care about any partitioning exposed by the backend. The goal is just @@ -6251,7 +6252,7 @@ vd_setup_single_slice_disk(vd_t *vd) * variable. */ if (vd_slice_label == VD_DISK_LABEL_EFI || - vd->vdisk_size >= ONE_TERABYTE / DEV_BSIZE) { + vd->vdisk_size >= ONE_TERABYTE / vd->vdisk_bsize) { vd->vdisk_label = VD_DISK_LABEL_EFI; } else { status = ldi_ioctl(vd->ldi_handle[0], DKIOCGEXTVTOC, @@ -6281,8 +6282,8 @@ vd_setup_single_slice_disk(vd_t *vd) } else if (vd_slice_label == VD_DISK_LABEL_VTOC) { vd->vdisk_label = VD_DISK_LABEL_VTOC; - vd_build_default_label(vd->vdisk_size * DEV_BSIZE, - &label); + vd_build_default_label(vd->vdisk_size * vd->vdisk_bsize, + vd->vdisk_bsize, &label); vd_label_to_vtocgeom(&label, &vd->vtoc, &vd->dk_geom); } else { @@ -6302,13 +6303,50 @@ vd_setup_single_slice_disk(vd_t *vd) return (status); } +/* + * This function is invoked when setting up the vdisk backend and to process + * the VD_OP_GET_CAPACITY operation. It checks the backend size and set the + * following attributes of the vd structure: + * + * - vdisk_bsize: block size for the virtual disk used by the VIO protocol. Its + * value is 512 bytes (DEV_BSIZE) when the backend is a file, a volume or a + * CD/DVD. When the backend is a disk or a disk slice then it has the value + * of the logical block size of that disk (as returned by the DKIOCGMEDIAINFO + * ioctl). This block size is expected to be a power of 2 and a multiple of + * 512. + * + * - vdisk_size: size of the virtual disk expressed as a number of vdisk_bsize + * blocks. + * + * vdisk_size and vdisk_bsize are sent to the vdisk client during the connection + * handshake and in the result of a VD_OP_GET_CAPACITY operation. + * + * - backend_bsize: block size of the backend device. backend_bsize has the same + * value as vdisk_bsize except when the backend is a CD/DVD. In that case, + * vdisk_bsize is set to 512 (DEV_BSIZE) while backend_bsize is set to the + * effective logical block size of the CD/DVD (usually 2048). + * + * - dskimg_size: size of the backend when the backend is a disk image. This + * attribute is set only when the backend is a file or a volume, otherwise it + * is unused. + * + * - vio_bshift: number of bit to shift to convert a VIO block number (which + * uses a block size of vdisk_bsize) to a buf(9s) block number (which uses a + * block size of 512 bytes) i.e. we have vdisk_bsize = 512 x 2 ^ vio_bshift + * + * - vdisk_media: media of the virtual disk. This function only sets this + * attribute for physical disk and CD/DVD. For other backend types, this + * attribute is set in the setup function of the backend. + */ static int vd_backend_check_size(vd_t *vd) { - size_t backend_size, old_size, new_size; + size_t backend_size, backend_bsize, vdisk_bsize; + size_t old_size, new_size; struct dk_minfo minfo; vattr_t vattr; - int rval, rv; + int rval, rv, media, nshift = 0; + uint32_t n; if (vd->file) { @@ -6320,20 +6358,23 @@ vd_backend_check_size(vd_t *vd) return (rv); } backend_size = vattr.va_size; + backend_bsize = DEV_BSIZE; + vdisk_bsize = DEV_BSIZE; - } else if (vd->volume || vd->vdisk_type == VD_DISK_TYPE_SLICE) { + } else if (vd->volume) { - /* physical slice or volume (slice or full disk) */ + /* volume (slice or full disk) */ rv = ldi_get_size(vd->ldi_handle[0], &backend_size); if (rv != DDI_SUCCESS) { PR0("ldi_get_size() failed for %s", vd->device_path); return (EIO); } + backend_bsize = DEV_BSIZE; + vdisk_bsize = DEV_BSIZE; } else { - /* physical disk */ - ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); + /* physical disk or slice */ rv = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, (intptr_t)&minfo, (vd->open_flags | FKIOCTL), kcred, &rval); @@ -6342,17 +6383,58 @@ vd_backend_check_size(vd_t *vd) vd->device_path, rv); return (rv); } - backend_size = minfo.dki_capacity * minfo.dki_lbsize; + + if (vd->vdisk_type == VD_DISK_TYPE_SLICE) { + rv = ldi_get_size(vd->ldi_handle[0], &backend_size); + if (rv != DDI_SUCCESS) { + PR0("ldi_get_size() failed for %s", + vd->device_path); + return (EIO); + } + } else { + ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); + backend_size = minfo.dki_capacity * minfo.dki_lbsize; + } + + backend_bsize = minfo.dki_lbsize; + media = DK_MEDIATYPE2VD_MEDIATYPE(minfo.dki_media_type); + + /* + * If the device is a CD or a DVD then we force the vdisk block + * size to 512 bytes (DEV_BSIZE). In that case, vdisk_bsize can + * be different from backend_size. + */ + if (media == VD_MEDIA_CD || media == VD_MEDIA_DVD) + vdisk_bsize = DEV_BSIZE; + else + vdisk_bsize = backend_bsize; } + /* check vdisk block size */ + if (vdisk_bsize == 0 || vdisk_bsize % DEV_BSIZE != 0) + return (EINVAL); + old_size = vd->vdisk_size; - new_size = backend_size / DEV_BSIZE; + new_size = backend_size / vdisk_bsize; /* check if size has changed */ - if (old_size != VD_SIZE_UNKNOWN && old_size == new_size) + if (old_size != VD_SIZE_UNKNOWN && old_size == new_size && + vd->vdisk_bsize == vdisk_bsize) return (0); + /* cache info for blk conversion */ + for (n = vdisk_bsize / DEV_BSIZE; n > 1; n >>= 1) { + if ((n & 0x1) != 0) { + /* blk_size is not a power of 2 */ + return (EINVAL); + } + nshift++; + } + + vd->vio_bshift = nshift; vd->vdisk_size = new_size; + vd->vdisk_bsize = vdisk_bsize; + vd->backend_bsize = backend_bsize; if (vd->file || vd->volume) vd->dskimg_size = backend_size; @@ -6384,9 +6466,7 @@ vd_backend_check_size(vd_t *vd) } else if (!vd->file && !vd->volume) { /* physical disk */ ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); - vd->block_size = minfo.dki_lbsize; - vd->vdisk_media = - DK_MEDIATYPE2VD_MEDIATYPE(minfo.dki_media_type); + vd->vdisk_media = media; } return (0); diff --git a/usr/src/uts/sun4v/sys/vdc.h b/usr/src/uts/sun4v/sys/vdc.h index 63b76b9d27..eecaf9a30b 100644 --- a/usr/src/uts/sun4v/sys/vdc.h +++ b/usr/src/uts/sun4v/sys/vdc.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -98,6 +98,10 @@ extern "C" { */ #define VD_MAKE_DEV(instance, minor) ((instance << VDCUNIT_SHIFT) | minor) +#define VDC_EFI_DEV_SET(dev, vdsk, ioctl) \ + VDSK_EFI_DEV_SET(dev, vdsk, ioctl, \ + (vdsk)->vdisk_bsize, (vdsk)->vdisk_size) + /* * variables controlling how long to wait before timing out and how many * retries to attempt before giving up when communicating with vds. @@ -302,7 +306,9 @@ typedef struct vdc { uint32_t vdisk_media; /* physical media type of vDisk */ uint64_t vdisk_size; /* device size in blocks */ uint64_t max_xfer_sz; /* maximum block size of a descriptor */ - uint64_t block_size; /* device block size used */ + uint64_t vdisk_bsize; /* blk size for the virtual disk */ + uint32_t vio_bmask; /* mask to check vio blk alignment */ + int vio_bshift; /* shift for vio blk conversion */ uint64_t operations; /* bitmask of ops. server supports */ struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */ struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */ diff --git a/usr/src/uts/sun4v/sys/vdsk_common.h b/usr/src/uts/sun4v/sys/vdsk_common.h index 62b45c2df4..0464964847 100644 --- a/usr/src/uts/sun4v/sys/vdsk_common.h +++ b/usr/src/uts/sun4v/sys/vdsk_common.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -521,11 +521,11 @@ typedef struct vd_efi_dev { vd_efi_ioctl_func vdisk_ioctl; /* vdisk ioctl function */ } vd_efi_dev_t; -#define VD_EFI_DEV_SET(efi_dev, vdsk, ioctl) \ - (efi_dev).vdisk = vdsk; \ - (efi_dev).vdisk_ioctl = ioctl; \ - (efi_dev).block_size = (vdsk)->block_size; \ - (efi_dev).disk_size = (vdsk)->vdisk_size; +#define VDSK_EFI_DEV_SET(efi_dev, vdsk, ioctl, bsize, dsize) \ + (efi_dev).vdisk = vdsk; \ + (efi_dev).vdisk_ioctl = ioctl; \ + (efi_dev).block_size = bsize; \ + (efi_dev).disk_size = dsize; int vd_efi_alloc_and_read(vd_efi_dev_t *dev, efi_gpt_t **gpt, efi_gpe_t **gpe); |