summaryrefslogtreecommitdiff
path: root/usr/src/uts/sun4v/io/vdc.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/sun4v/io/vdc.c')
-rw-r--r--usr/src/uts/sun4v/io/vdc.c164
1 files changed, 128 insertions, 36 deletions
diff --git a/usr/src/uts/sun4v/io/vdc.c b/usr/src/uts/sun4v/io/vdc.c
index 6c5d37b940..b7729adeed 100644
--- a/usr/src/uts/sun4v/io/vdc.c
+++ b/usr/src/uts/sun4v/io/vdc.c
@@ -150,6 +150,7 @@ static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *,
static void vdc_store_label_unk(vdc_t *vdc);
static boolean_t vdc_is_opened(vdc_t *vdc);
static void vdc_update_size(vdc_t *vdc, size_t, size_t, size_t);
+static int vdc_update_vio_bsize(vdc_t *vdc, uint32_t);
/* handshake with vds */
static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver);
@@ -621,8 +622,10 @@ vdc_do_attach(dev_info_t *dip)
vdc->state = VDC_STATE_INIT;
vdc->lifecycle = VDC_LC_ATTACHING;
vdc->session_id = 0;
- vdc->block_size = DEV_BSIZE;
- vdc->max_xfer_sz = maxphys / DEV_BSIZE;
+ vdc->vdisk_bsize = DEV_BSIZE;
+ vdc->vio_bmask = 0;
+ vdc->vio_bshift = 0;
+ vdc->max_xfer_sz = maxphys / vdc->vdisk_bsize;
/*
* We assume, for now, that the vDisk server will export 'read'
@@ -943,7 +946,7 @@ vdc_set_err_kstats(vdc_t *vdc)
stp = (vd_err_stats_t *)vdc->err_stats->ks_data;
ASSERT(stp != NULL);
- stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->block_size;
+ stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->vdisk_bsize;
(void) strcpy(stp->vd_vid.value.c, "SUN");
(void) strcpy(stp->vd_pid.value.c, "VDSK");
@@ -1124,7 +1127,7 @@ vdc_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
name, valuep, lengthp));
}
nblocks = vdc->slice[VDCPART(dev)].nblocks;
- blksize = vdc->block_size;
+ blksize = vdc->vdisk_bsize;
mutex_exit(&vdc->lock);
return (ddi_prop_op_nblocks_blksize(dev, dip, prop_op, mod_flags,
@@ -1382,6 +1385,7 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
size_t nbytes = nblk * DEV_BSIZE;
int instance = VDCUNIT(dev);
vdc_t *vdc = NULL;
+ diskaddr_t vio_blkno;
if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance);
@@ -1390,8 +1394,16 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n",
instance, nbytes, blkno, (void *)addr);
+
+ /* convert logical block to vio block */
+ if ((blkno & vdc->vio_bmask) != 0) {
+ DMSG(vdc, 0, "Misaligned block number (%lu)\n", blkno);
+ return (EINVAL);
+ }
+ vio_blkno = blkno >> vdc->vio_bshift;
+
rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes,
- VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir);
+ VDCPART(dev), vio_blkno, CB_STRATEGY, 0, VIO_write_dir);
if (rv) {
DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv);
return (rv);
@@ -1422,6 +1434,7 @@ vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
static int
vdc_strategy(struct buf *buf)
{
+ diskaddr_t vio_blkno;
int rv = -1;
vdc_t *vdc = NULL;
int instance = VDCUNIT(buf->b_edev);
@@ -1448,8 +1461,21 @@ vdc_strategy(struct buf *buf)
slice = VDCPART(buf->b_edev);
}
+ /*
+ * In the buf structure, b_lblkno represents a logical block number
+ * using a block size of 512 bytes. For the VIO request, this block
+ * number has to be converted to be represented with the block size
+ * used by the VIO protocol.
+ */
+ if ((buf->b_lblkno & vdc->vio_bmask) != 0) {
+ bioerror(buf, EINVAL);
+ biodone(buf);
+ return (0);
+ }
+ vio_blkno = buf->b_lblkno >> vdc->vio_bshift;
+
rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr,
- buf->b_bcount, slice, buf->b_lblkno,
+ buf->b_bcount, slice, vio_blkno,
CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir :
VIO_write_dir);
@@ -1494,8 +1520,8 @@ vdc_min(struct buf *bufp)
vdc = ddi_get_soft_state(vdc_state, instance);
VERIFY(vdc != NULL);
- if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) {
- bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size;
+ if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->vdisk_bsize)) {
+ bufp->b_bcount = vdc->max_xfer_sz * vdc->vdisk_bsize;
}
}
@@ -1670,7 +1696,7 @@ vdc_init_attr_negotiation(vdc_t *vdc)
pkt.tag.vio_sid = vdc->session_id;
/* fill in payload */
pkt.max_xfer_sz = vdc->max_xfer_sz;
- pkt.vdisk_block_size = vdc->block_size;
+ pkt.vdisk_block_size = vdc->vdisk_bsize;
pkt.xfer_mode = VIO_DRING_MODE_V1_0;
pkt.operations = 0; /* server will set bits of valid operations */
pkt.vdisk_type = 0; /* server will set to valid device type */
@@ -2605,13 +2631,13 @@ vdc_init_descriptor_ring(vdc_t *vdc)
* as we do not have the capability to split requests over
* multiple DRing entries.
*/
- if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) {
+ if ((vdc->max_xfer_sz * vdc->vdisk_bsize) < maxphys) {
DMSG(vdc, 0, "[%d] using minimum DRing size\n",
vdc->instance);
vdc->dring_max_cookies = maxphys / PAGESIZE;
} else {
vdc->dring_max_cookies =
- (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE;
+ (vdc->max_xfer_sz * vdc->vdisk_bsize) / PAGESIZE;
}
vdc->dring_entry_size = (sizeof (vd_dring_entry_t) +
(sizeof (ldc_mem_cookie_t) *
@@ -4864,6 +4890,17 @@ vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg)
vdc->instance);
attr_msg->vdisk_size = 0;
}
+
+ /* update the VIO block size */
+ if (attr_msg->vdisk_block_size > 0 &&
+ vdc_update_vio_bsize(vdc,
+ attr_msg->vdisk_block_size) != 0) {
+ DMSG(vdc, 0, "[%d] Invalid block size (%u) from vds",
+ vdc->instance, attr_msg->vdisk_block_size);
+ status = EINVAL;
+ break;
+ }
+
/* update disk, block and transfer sizes */
vdc_update_size(vdc, attr_msg->vdisk_size,
attr_msg->vdisk_block_size, attr_msg->max_xfer_sz);
@@ -4877,7 +4914,7 @@ vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg)
DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n",
vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz);
DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n",
- vdc->instance, vdc->block_size,
+ vdc->instance, vdc->vdisk_bsize,
attr_msg->vdisk_block_size);
if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) ||
@@ -5266,7 +5303,7 @@ vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag)
return (EFAULT);
}
- VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl);
+ VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl);
if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) {
return (rv);
@@ -5307,7 +5344,7 @@ vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag)
* flag - ioctl flags
*/
static int
-vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag)
+vdc_dioctl_rwcmd(vdc_t *vdc, caddr_t arg, int flag)
{
struct dadkio_rwcmd32 rwcmd32;
struct dadkio_rwcmd rwcmd;
@@ -5351,7 +5388,7 @@ vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag)
bzero((caddr_t)&auio, sizeof (struct uio));
auio.uio_iov = &aiov;
auio.uio_iovcnt = 1;
- auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE;
+ auio.uio_loffset = rwcmd.blkaddr * vdc->vdisk_bsize;
auio.uio_resid = rwcmd.buflen;
auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE;
@@ -5363,7 +5400,8 @@ vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag)
*/
buf->b_private = (void *)VD_SLICE_NONE;
- status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio);
+ status = physio(vdc_strategy, buf, VD_MAKE_DEV(vdc->instance, 0),
+ rw, vdc_min, &auio);
biofini(buf);
kmem_free(buf, sizeof (buf_t));
@@ -6639,14 +6677,23 @@ vdc_check_capacity(vdc_t *vdc)
if ((rv = vdc_get_capacity(vdc, &dsk_size, &blk_size)) != 0)
return (rv);
- if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0)
+ if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 || blk_size == 0)
return (EINVAL);
mutex_enter(&vdc->lock);
- vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz);
+ /*
+ * First try to update the VIO block size (which is the same as the
+ * vdisk block size). If this returns an error then that means that
+ * we can not use that block size so basically the vdisk is unusable
+ * and we return an error.
+ */
+ rv = vdc_update_vio_bsize(vdc, blk_size);
+ if (rv == 0)
+ vdc_update_size(vdc, dsk_size, blk_size, vdc->max_xfer_sz);
+
mutex_exit(&vdc->lock);
- return (0);
+ return (rv);
}
/*
@@ -6969,7 +7016,7 @@ vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp)
case DIOCTL_RWCMD:
{
- return (vdc_dioctl_rwcmd(dev, arg, mode));
+ return (vdc_dioctl_rwcmd(vdc, arg, mode));
}
case DKIOCGAPART:
@@ -7604,7 +7651,7 @@ vdc_create_fake_geometry(vdc_t *vdc)
(void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME);
(void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME);
- /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */
+ /* max_xfer_sz is #blocks so we don't need to divide by vdisk_bsize */
vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz;
/*
@@ -7660,7 +7707,7 @@ vdc_create_fake_geometry(vdc_t *vdc)
}
vdc->minfo->dki_capacity = vdc->vdisk_size;
- vdc->minfo->dki_lbsize = vdc->block_size;
+ vdc->minfo->dki_lbsize = vdc->vdisk_bsize;
}
static ushort_t
@@ -7692,7 +7739,7 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size)
* update anything.
*/
if (dsk_size == VD_SIZE_UNKNOWN || dsk_size == 0 ||
- (blk_size == vdc->block_size && dsk_size == vdc->vdisk_size &&
+ (blk_size == vdc->vdisk_bsize && dsk_size == vdc->vdisk_size &&
xfr_size == vdc->max_xfer_sz))
return;
@@ -7706,13 +7753,11 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size)
if ((xfr_size * blk_size) > (PAGESIZE * DEV_BSIZE)) {
DMSG(vdc, 0, "[%d] vds block transfer size too big;"
" using max supported by vdc", vdc->instance);
- xfr_size = maxphys / DEV_BSIZE;
- dsk_size = (dsk_size * blk_size) / DEV_BSIZE;
- blk_size = DEV_BSIZE;
+ xfr_size = maxphys / blk_size;
}
vdc->max_xfer_sz = xfr_size;
- vdc->block_size = blk_size;
+ vdc->vdisk_bsize = blk_size;
vdc->vdisk_size = dsk_size;
stp = (vd_err_stats_t *)vdc->err_stats->ks_data;
@@ -7723,6 +7768,50 @@ vdc_update_size(vdc_t *vdc, size_t dsk_size, size_t blk_size, size_t xfr_size)
}
/*
+ * Update information about the VIO block size. The VIO block size is the
+ * same as the vdisk block size which is stored in vdc->vdisk_bsize so we
+ * do not store that information again.
+ *
+ * However, buf structures will always use a logical block size of 512 bytes
+ * (DEV_BSIZE) and we will need to convert logical block numbers to VIO block
+ * numbers for each read or write operation using vdc_strategy(). To speed up
+ * this conversion, we expect the VIO block size to be a power of 2 and a
+ * multiple 512 bytes (DEV_BSIZE), and we cache some useful information.
+ *
+ * The function return EINVAL if the new VIO block size (blk_size) is not a
+ * power of 2 or not a multiple of 512 bytes, otherwise it returns 0.
+ */
+static int
+vdc_update_vio_bsize(vdc_t *vdc, uint32_t blk_size)
+{
+ uint32_t ratio, n;
+ int nshift = 0;
+
+ vdc->vio_bmask = 0;
+ vdc->vio_bshift = 0;
+
+ ASSERT(blk_size > 0);
+
+ if ((blk_size % DEV_BSIZE) != 0)
+ return (EINVAL);
+
+ ratio = blk_size / DEV_BSIZE;
+
+ for (n = ratio; n > 1; n >>= 1) {
+ if ((n & 0x1) != 0) {
+ /* blk_size is not a power of 2 */
+ return (EINVAL);
+ }
+ nshift++;
+ }
+
+ vdc->vio_bshift = nshift;
+ vdc->vio_bmask = ratio - 1;
+
+ return (0);
+}
+
+/*
* Function:
* vdc_validate_geometry
*
@@ -7747,7 +7836,7 @@ vdc_validate_geometry(vdc_t *vdc)
buf_t *buf; /* BREAD requests need to be in a buf_t structure */
dev_t dev;
int rv, rval;
- struct dk_label label;
+ struct dk_label *label;
struct dk_geom geom;
struct extvtoc vtoc;
efi_gpt_t *gpt;
@@ -7786,7 +7875,7 @@ vdc_validate_geometry(vdc_t *vdc)
return (EIO);
}
- VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl);
+ VDC_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl);
rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe);
@@ -7870,14 +7959,15 @@ vdc_validate_geometry(vdc_t *vdc)
/*
* Read disk label from start of disk
*/
+ label = kmem_alloc(vdc->vdisk_bsize, KM_SLEEP);
buf = kmem_alloc(sizeof (buf_t), KM_SLEEP);
bioinit(buf);
- buf->b_un.b_addr = (caddr_t)&label;
- buf->b_bcount = DK_LABEL_SIZE;
+ buf->b_un.b_addr = (caddr_t)label;
+ buf->b_bcount = vdc->vdisk_bsize;
buf->b_flags = B_BUSY | B_READ;
buf->b_dev = cmpdev(dev);
- rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label,
- DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir);
+ rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)label,
+ vdc->vdisk_bsize, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir);
if (rv) {
DMSG(vdc, 1, "[%d] Failed to read disk block 0\n",
vdc->instance);
@@ -7892,15 +7982,17 @@ vdc_validate_geometry(vdc_t *vdc)
biofini(buf);
kmem_free(buf, sizeof (buf_t));
- if (rv != 0 || label.dkl_magic != DKL_MAGIC ||
- label.dkl_cksum != vdc_lbl2cksum(&label)) {
+ if (rv != 0 || label->dkl_magic != DKL_MAGIC ||
+ label->dkl_cksum != vdc_lbl2cksum(label)) {
DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n",
vdc->instance);
+ kmem_free(label, vdc->vdisk_bsize);
mutex_enter(&vdc->lock);
vdc_store_label_unk(vdc);
return (EINVAL);
}
+ kmem_free(label, vdc->vdisk_bsize);
mutex_enter(&vdc->lock);
vdc_store_label_vtoc(vdc, &geom, &vtoc);
return (0);
@@ -8108,7 +8200,7 @@ vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct extvtoc *vtoc)
int i;
ASSERT(MUTEX_HELD(&vdc->lock));
- ASSERT(vdc->block_size == vtoc->v_sectorsz);
+ ASSERT(vdc->vdisk_bsize == vtoc->v_sectorsz);
vdc->vdisk_label = VD_DISK_LABEL_VTOC;
bcopy(vtoc, vdc->vtoc, sizeof (struct extvtoc));