diff options
Diffstat (limited to 'usr/src/uts/sun4v/io/vds.c')
| -rw-r--r-- | usr/src/uts/sun4v/io/vds.c | 645 |
1 files changed, 609 insertions, 36 deletions
diff --git a/usr/src/uts/sun4v/io/vds.c b/usr/src/uts/sun4v/io/vds.c index 9d424703e8..b864adbc5e 100644 --- a/usr/src/uts/sun4v/io/vds.c +++ b/usr/src/uts/sun4v/io/vds.c @@ -39,6 +39,7 @@ #include <sys/file.h> #include <sys/fs/hsfs_isospec.h> #include <sys/mdeg.h> +#include <sys/mhd.h> #include <sys/modhash.h> #include <sys/note.h> #include <sys/pathname.h> @@ -137,6 +138,10 @@ vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)labelp, \ 0, sizeof (struct dk_label)) +/* Message for disk access rights reset failure */ +#define VD_RESET_ACCESS_FAILURE_MSG \ + "Fail to reset disk access rights for disk %s" + /* * Specification of an MD node passed to the MDEG to filter any * 'vport' nodes that do not belong to the specified node. This @@ -347,6 +352,7 @@ typedef struct vd { size_t block_size; /* blk size of actual device */ boolean_t pseudo; /* underlying pseudo dev */ boolean_t file; /* is vDisk backed by a file? */ + boolean_t scsi; /* is vDisk backed by scsi? */ vnode_t *file_vnode; /* file vnode */ size_t file_size; /* file size */ ddi_devid_t file_devid; /* devid for disk image */ @@ -354,6 +360,7 @@ typedef struct vd { struct dk_geom dk_geom; /* synthetic for slice type */ struct dk_minfo dk_minfo; /* synthetic for slice type */ struct vtoc vtoc; /* synthetic for slice type */ + boolean_t ownership; /* disk ownership status */ ldc_status_t ldc_state; /* LDC connection state */ ldc_handle_t ldc_handle; /* handle for LDC comm */ size_t max_msglen; /* largest LDC message len */ @@ -391,7 +398,7 @@ typedef struct vd_ioctl { const char *cmd_name; /* ioctl cmd name */ void *arg; /* ioctl cmd argument */ /* convert input vd_buf to output ioctl_arg */ - void (*copyin)(void *vd_buf, void *ioctl_arg); + int (*copyin)(void *vd_buf, size_t, void *ioctl_arg); /* convert input ioctl_arg to output vd_buf */ void (*copyout)(void *ioctl_arg, void *vd_buf); /* write is true if the operation writes any data to the backend */ @@ -399,7 +406,8 @@ typedef struct vd_ioctl { } vd_ioctl_t; /* Define trivial copyin/copyout conversion function flag */ -#define VD_IDENTITY ((void (*)(void *, void *))-1) +#define VD_IDENTITY_IN ((int (*)(void *, size_t, void *))-1) +#define VD_IDENTITY_OUT ((void (*)(void *, void *))-1) static int vds_ldc_retries = VDS_RETRIES; @@ -411,6 +419,31 @@ static void *vds_state; static uint_t vd_file_write_flags = VD_FILE_WRITE_FLAGS; static short vd_scsi_rdwr_timeout = VD_SCSI_RDWR_TIMEOUT; +static int vd_scsi_debug = USCSI_SILENT; + +/* + * Tunable to define the behavior of the service domain if the vdisk server + * fails to reset disk exclusive access when a LDC channel is reset. When a + * LDC channel is reset the vdisk server will try to reset disk exclusive + * access by releasing any SCSI-2 reservation or resetting the disk. If these + * actions fail then the default behavior (vd_reset_access_failure = 0) is to + * print a warning message. This default behavior can be changed by setting + * the vd_reset_access_failure variable to A_REBOOT (= 0x1) and that will + * cause the service domain to reboot, or A_DUMP (= 0x5) and that will cause + * the service domain to panic. In both cases, the reset of the service domain + * should trigger a reset SCSI buses and hopefully clear any SCSI-2 reservation. + */ +static int vd_reset_access_failure = 0; + +/* + * Tunable for backward compatibility. When this variable is set to B_TRUE, + * all disk volumes (ZFS, SVM, VxvM volumes) will be exported as single + * slice disks whether or not they have the "slice" option set. This is + * to provide a simple backward compatibility mechanism when upgrading + * the vds driver and using a domain configuration created before the + * "slice" option was available. + */ +static boolean_t vd_volume_force_slice = B_FALSE; /* * Supported protocol version pairs, from highest (newest) to lowest (oldest) @@ -426,11 +459,13 @@ static const size_t vds_num_versions = static void vd_free_dring_task(vd_t *vdp); static int vd_setup_vd(vd_t *vd); static int vd_setup_single_slice_disk(vd_t *vd); +static int vd_setup_mediainfo(vd_t *vd); static boolean_t vd_enabled(vd_t *vd); static ushort_t vd_lbl2cksum(struct dk_label *label); static int vd_file_validate_geometry(vd_t *vd); static boolean_t vd_file_is_iso_image(vd_t *vd); static void vd_set_exported_operations(vd_t *vd); +static void vd_reset_access(vd_t *vd); /* * Function: @@ -1109,6 +1144,15 @@ vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t vblk, size_t vlen) size_t plen; /* length of data to be read from physical device */ char *buf; /* buffer area to fit physical device's block size */ + if (vd->block_size == 0) { + /* + * The block size was not available during the attach, + * try to update it now. + */ + if (vd_setup_mediainfo(vd) != 0) + return (EIO); + } + /* * If the vdisk block size and the block size of the underlying device * match we can skip straight to vd_do_scsi_rdwr(), otherwise we need @@ -1419,6 +1463,9 @@ vd_reset_if_needed(vd_t *vd) if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0)) PR0("ldc_down() returned errno %d", status); + /* Reset exclusive access rights */ + vd_reset_access(vd); + vd->initialized &= ~(VD_SID | VD_SEQ_NUM | VD_DRING); vd->state = VD_STATE_INIT; vd->max_msglen = sizeof (vio_msg_t); /* baseline vio message size */ @@ -1675,16 +1722,20 @@ vd_serial_notify(void *arg) vd_notify(task); } -static void -vd_geom2dk_geom(void *vd_buf, void *ioctl_arg) +/* ARGSUSED */ +static int +vd_geom2dk_geom(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) { VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg); + return (0); } -static void -vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg) +/* ARGSUSED */ +static int +vd_vtoc2vtoc(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) { VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg); + return (0); } static void @@ -1699,15 +1750,21 @@ vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf) VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf); } -static void -vd_get_efi_in(void *vd_buf, void *ioctl_arg) +static int +vd_get_efi_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) { vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; + size_t data_len; + + data_len = vd_buf_len - (sizeof (vd_efi_t) - sizeof (uint64_t)); + if (vd_efi->length > data_len) + return (EINVAL); dk_efi->dki_lba = vd_efi->lba; dk_efi->dki_length = vd_efi->length; dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP); + return (0); } static void @@ -1722,14 +1779,20 @@ vd_get_efi_out(void *ioctl_arg, void *vd_buf) kmem_free(dk_efi->dki_data, len); } -static void -vd_set_efi_in(void *vd_buf, void *ioctl_arg) +static int +vd_set_efi_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) { vd_efi_t *vd_efi = (vd_efi_t *)vd_buf; dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg; + size_t data_len; + + data_len = vd_buf_len - (sizeof (vd_efi_t) - sizeof (uint64_t)); + if (vd_efi->length > data_len) + return (EINVAL); dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP); VD_EFI2DK_EFI(vd_efi, dk_efi); + return (0); } static void @@ -1741,6 +1804,123 @@ vd_set_efi_out(void *ioctl_arg, void *vd_buf) kmem_free(dk_efi->dki_data, vd_efi->length); } +static int +vd_scsicmd_in(void *vd_buf, size_t vd_buf_len, void *ioctl_arg) +{ + size_t vd_scsi_len; + vd_scsi_t *vd_scsi = (vd_scsi_t *)vd_buf; + struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl_arg; + + /* check buffer size */ + vd_scsi_len = VD_SCSI_SIZE; + vd_scsi_len += P2ROUNDUP(vd_scsi->cdb_len, sizeof (uint64_t)); + vd_scsi_len += P2ROUNDUP(vd_scsi->sense_len, sizeof (uint64_t)); + vd_scsi_len += P2ROUNDUP(vd_scsi->datain_len, sizeof (uint64_t)); + vd_scsi_len += P2ROUNDUP(vd_scsi->dataout_len, sizeof (uint64_t)); + + ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); + + if (vd_buf_len < vd_scsi_len) + return (EINVAL); + + /* set flags */ + uscsi->uscsi_flags = vd_scsi_debug; + + if (vd_scsi->options & VD_SCSI_OPT_NORETRY) { + uscsi->uscsi_flags |= USCSI_ISOLATE; + uscsi->uscsi_flags |= USCSI_DIAGNOSE; + } + + /* task attribute */ + switch (vd_scsi->task_attribute) { + case VD_SCSI_TASK_ACA: + uscsi->uscsi_flags |= USCSI_HEAD; + break; + case VD_SCSI_TASK_HQUEUE: + uscsi->uscsi_flags |= USCSI_HTAG; + break; + case VD_SCSI_TASK_ORDERED: + uscsi->uscsi_flags |= USCSI_OTAG; + break; + default: + uscsi->uscsi_flags |= USCSI_NOTAG; + break; + } + + /* timeout */ + uscsi->uscsi_timeout = vd_scsi->timeout; + + /* cdb data */ + uscsi->uscsi_cdb = (caddr_t)VD_SCSI_DATA_CDB(vd_scsi); + uscsi->uscsi_cdblen = vd_scsi->cdb_len; + + /* sense buffer */ + if (vd_scsi->sense_len != 0) { + uscsi->uscsi_flags |= USCSI_RQENABLE; + uscsi->uscsi_rqbuf = (caddr_t)VD_SCSI_DATA_SENSE(vd_scsi); + uscsi->uscsi_rqlen = vd_scsi->sense_len; + } + + if (vd_scsi->datain_len != 0 && vd_scsi->dataout_len != 0) { + /* uscsi does not support read/write request */ + return (EINVAL); + } + + /* request data-in */ + if (vd_scsi->datain_len != 0) { + uscsi->uscsi_flags |= USCSI_READ; + uscsi->uscsi_buflen = vd_scsi->datain_len; + uscsi->uscsi_bufaddr = (char *)VD_SCSI_DATA_IN(vd_scsi); + } + + /* request data-out */ + if (vd_scsi->dataout_len != 0) { + uscsi->uscsi_buflen = vd_scsi->dataout_len; + uscsi->uscsi_bufaddr = (char *)VD_SCSI_DATA_OUT(vd_scsi); + } + + return (0); +} + +static void +vd_scsicmd_out(void *ioctl_arg, void *vd_buf) +{ + vd_scsi_t *vd_scsi = (vd_scsi_t *)vd_buf; + struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl_arg; + + /* output fields */ + vd_scsi->cmd_status = uscsi->uscsi_status; + + /* sense data */ + if ((uscsi->uscsi_flags & USCSI_RQENABLE) && + (uscsi->uscsi_status == STATUS_CHECK || + uscsi->uscsi_status == STATUS_TERMINATED)) { + vd_scsi->sense_status = uscsi->uscsi_rqstatus; + if (uscsi->uscsi_rqstatus == STATUS_GOOD) + vd_scsi->sense_len -= uscsi->uscsi_resid; + else + vd_scsi->sense_len = 0; + } else { + vd_scsi->sense_len = 0; + } + + if (uscsi->uscsi_status != STATUS_GOOD) { + vd_scsi->dataout_len = 0; + vd_scsi->datain_len = 0; + return; + } + + if (uscsi->uscsi_flags & USCSI_READ) { + /* request data (read) */ + vd_scsi->datain_len -= uscsi->uscsi_resid; + vd_scsi->dataout_len = 0; + } else { + /* request data (write) */ + vd_scsi->datain_len = 0; + vd_scsi->dataout_len -= uscsi->uscsi_resid; + } +} + static vd_disk_label_t vd_read_vtoc(vd_t *vd, struct vtoc *vtoc) { @@ -2143,10 +2323,30 @@ vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) } /* Convert client's data, if necessary */ - if (ioctl->copyin == VD_IDENTITY) /* use client buffer */ + if (ioctl->copyin == VD_IDENTITY_IN) { + /* use client buffer */ ioctl->arg = buf; - else /* convert client vdisk operation data to ioctl data */ - (ioctl->copyin)(buf, (void *)ioctl->arg); + } else { + /* convert client vdisk operation data to ioctl data */ + status = (ioctl->copyin)(buf, nbytes, + (void *)ioctl->arg); + if (status != 0) { + request->status = status; + return (0); + } + } + } + + if (ioctl->operation == VD_OP_SCSICMD) { + struct uscsi_cmd *uscsi = (struct uscsi_cmd *)ioctl->arg; + + /* check write permission */ + if (!(vd->open_flags & FWRITE) && + !(uscsi->uscsi_flags & USCSI_READ)) { + PR0("uscsi fails because backend is opened read-only"); + request->status = EROFS; + return (0); + } } /* @@ -2176,7 +2376,19 @@ vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) if (request->status != 0) { PR0("ioctl(%s) = errno %d", ioctl->cmd_name, request->status); - return (0); + if (ioctl->operation == VD_OP_SCSICMD && + ((struct uscsi_cmd *)ioctl->arg)->uscsi_status != 0) + /* + * USCSICMD has reported an error and the uscsi_status + * field is not zero. This means that the SCSI command + * has completed but it has an error. So we should + * mark the VD operation has succesfully completed + * and clients can check the SCSI status field for + * SCSI errors. + */ + request->status = 0; + else + return (0); } /* Convert data and send to client, if necessary */ @@ -2185,7 +2397,7 @@ vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl) PR1("Sending \"arg\" data to client"); /* Convert ioctl data to vdisk operation data, if necessary */ - if (ioctl->copyout != VD_IDENTITY) + if (ioctl->copyout != VD_IDENTITY_OUT) (ioctl->copyout)((void *)ioctl->arg, buf); if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes, @@ -2239,6 +2451,7 @@ vd_ioctl(vd_task_t *task) struct dk_geom dk_geom = {0}; struct vtoc vtoc = {0}; struct dk_efi dk_efi = {0}; + struct uscsi_cmd uscsi = {0}; vd_t *vd = task->vd; vd_dring_payload_t *request = task->request; vd_ioctl_t ioctl[] = { @@ -2250,7 +2463,7 @@ vd_ioctl(vd_task_t *task) /* "Get" (copy-out) operations */ {VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int), DKIOCGETWCE, STRINGIZE(DKIOCGETWCE), - NULL, VD_IDENTITY, VD_IDENTITY, B_FALSE}, + NULL, VD_IDENTITY_IN, VD_IDENTITY_OUT, B_FALSE}, {VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM), RNDSIZE(vd_geom_t), DKIOCGGEOM, STRINGIZE(DKIOCGGEOM), @@ -2265,7 +2478,7 @@ vd_ioctl(vd_task_t *task) /* "Set" (copy-in) operations */ {VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int), DKIOCSETWCE, STRINGIZE(DKIOCSETWCE), - NULL, VD_IDENTITY, VD_IDENTITY, B_TRUE}, + NULL, VD_IDENTITY_IN, VD_IDENTITY_OUT, B_TRUE}, {VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM), RNDSIZE(vd_geom_t), DKIOCSGEOM, STRINGIZE(DKIOCSGEOM), @@ -2276,6 +2489,10 @@ vd_ioctl(vd_task_t *task) {VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t), DKIOCSETEFI, STRINGIZE(DKIOCSETEFI), &dk_efi, vd_set_efi_in, vd_set_efi_out, B_TRUE}, + + {VD_OP_SCSICMD, STRINGIZE(VD_OP_SCSICMD), RNDSIZE(vd_scsi_t), + USCSICMD, STRINGIZE(USCSICMD), + &uscsi, vd_scsicmd_in, vd_scsicmd_out, B_FALSE}, }; size_t nioctls = (sizeof (ioctl))/(sizeof (ioctl[0])); @@ -2294,7 +2511,8 @@ vd_ioctl(vd_task_t *task) ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0); if (request->operation == VD_OP_GET_EFI || - request->operation == VD_OP_SET_EFI) { + request->operation == VD_OP_SET_EFI || + request->operation == VD_OP_SCSICMD) { if (request->nbytes >= ioctl[i].nbytes) break; PR0("%s: Expected at least nbytes = %lu, " @@ -2399,6 +2617,308 @@ vd_get_devid(vd_task_t *task) return (status); } +static int +vd_scsi_reset(vd_t *vd) +{ + int rval, status; + struct uscsi_cmd uscsi = { 0 }; + + uscsi.uscsi_flags = vd_scsi_debug | USCSI_RESET; + uscsi.uscsi_timeout = vd_scsi_rdwr_timeout; + + status = ldi_ioctl(vd->ldi_handle[0], USCSICMD, (intptr_t)&uscsi, + (vd->open_flags | FKIOCTL), kcred, &rval); + + return (status); +} + +static int +vd_reset(vd_task_t *task) +{ + vd_t *vd = task->vd; + vd_dring_payload_t *request = task->request; + + ASSERT(request->operation == VD_OP_RESET); + ASSERT(vd->scsi); + + PR0("Performing VD_OP_RESET"); + + if (request->nbytes != 0) { + PR0("VD_OP_RESET: Expected nbytes = 0, got %lu", + request->nbytes); + return (EINVAL); + } + + request->status = vd_scsi_reset(vd); + + return (0); +} + +static int +vd_get_capacity(vd_task_t *task) +{ + int rv; + size_t nbytes; + vd_t *vd = task->vd; + vd_dring_payload_t *request = task->request; + vd_capacity_t vd_cap = { 0 }; + + ASSERT(request->operation == VD_OP_GET_CAPACITY); + ASSERT(vd->scsi); + + PR0("Performing VD_OP_GET_CAPACITY"); + + nbytes = request->nbytes; + + if (nbytes != RNDSIZE(vd_capacity_t)) { + PR0("VD_OP_GET_CAPACITY: Expected nbytes = %lu, got %lu", + RNDSIZE(vd_capacity_t), nbytes); + return (EINVAL); + } + + if (vd->vdisk_size == VD_SIZE_UNKNOWN) { + if (vd_setup_mediainfo(vd) != 0) + ASSERT(vd->vdisk_size == VD_SIZE_UNKNOWN); + } + + ASSERT(vd->vdisk_size != 0); + + request->status = 0; + + vd_cap.vdisk_block_size = vd->vdisk_block_size; + vd_cap.vdisk_size = vd->vdisk_size; + + if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&vd_cap, 0, &nbytes, + request->cookie, request->ncookies, LDC_COPY_OUT)) != 0) { + PR0("ldc_mem_copy() returned errno %d copying to client", rv); + return (rv); + } + + return (0); +} + +static int +vd_get_access(vd_task_t *task) +{ + uint64_t access; + int rv, rval = 0; + size_t nbytes; + vd_t *vd = task->vd; + vd_dring_payload_t *request = task->request; + + ASSERT(request->operation == VD_OP_GET_ACCESS); + ASSERT(vd->scsi); + + PR0("Performing VD_OP_GET_ACCESS"); + + nbytes = request->nbytes; + + if (nbytes != sizeof (uint64_t)) { + PR0("VD_OP_GET_ACCESS: Expected nbytes = %lu, got %lu", + sizeof (uint64_t), nbytes); + return (EINVAL); + } + + request->status = ldi_ioctl(vd->ldi_handle[request->slice], MHIOCSTATUS, + NULL, (vd->open_flags | FKIOCTL), kcred, &rval); + + if (request->status != 0) + return (0); + + access = (rval == 0)? VD_ACCESS_ALLOWED : VD_ACCESS_DENIED; + + if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&access, 0, &nbytes, + request->cookie, request->ncookies, LDC_COPY_OUT)) != 0) { + PR0("ldc_mem_copy() returned errno %d copying to client", rv); + return (rv); + } + + return (0); +} + +static int +vd_set_access(vd_task_t *task) +{ + uint64_t flags; + int rv, rval; + size_t nbytes; + vd_t *vd = task->vd; + vd_dring_payload_t *request = task->request; + + ASSERT(request->operation == VD_OP_SET_ACCESS); + ASSERT(vd->scsi); + + nbytes = request->nbytes; + + if (nbytes != sizeof (uint64_t)) { + PR0("VD_OP_SET_ACCESS: Expected nbytes = %lu, got %lu", + sizeof (uint64_t), nbytes); + return (EINVAL); + } + + if ((rv = ldc_mem_copy(vd->ldc_handle, (char *)&flags, 0, &nbytes, + request->cookie, request->ncookies, LDC_COPY_IN)) != 0) { + PR0("ldc_mem_copy() returned errno %d copying from client", rv); + return (rv); + } + + if (flags == VD_ACCESS_SET_CLEAR) { + PR0("Performing VD_OP_SET_ACCESS (CLEAR)"); + request->status = ldi_ioctl(vd->ldi_handle[request->slice], + MHIOCRELEASE, NULL, (vd->open_flags | FKIOCTL), kcred, + &rval); + if (request->status == 0) + vd->ownership = B_FALSE; + return (0); + } + + /* + * As per the VIO spec, the PREEMPT and PRESERVE flags are only valid + * when the EXCLUSIVE flag is set. + */ + if (!(flags & VD_ACCESS_SET_EXCLUSIVE)) { + PR0("Invalid VD_OP_SET_ACCESS flags: 0x%lx", flags); + request->status = EINVAL; + return (0); + } + + switch (flags & (VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE)) { + + case VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE: + /* + * Flags EXCLUSIVE and PREEMPT and PRESERVE. We have to + * acquire exclusive access rights, preserve them and we + * can use preemption. So we can use the MHIOCTKNOWN ioctl. + */ + PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PREEMPT|PRESERVE)"); + request->status = ldi_ioctl(vd->ldi_handle[request->slice], + MHIOCTKOWN, NULL, (vd->open_flags | FKIOCTL), kcred, &rval); + break; + + case VD_ACCESS_SET_PRESERVE: + /* + * Flags EXCLUSIVE and PRESERVE. We have to acquire exclusive + * access rights and preserve them, but not preempt any other + * host. So we need to use the MHIOCTKOWN ioctl to enable the + * "preserve" feature but we can not called it directly + * because it uses preemption. So before that, we use the + * MHIOCQRESERVE ioctl to ensure we can get exclusive rights + * without preempting anyone. + */ + PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PRESERVE)"); + request->status = ldi_ioctl(vd->ldi_handle[request->slice], + MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, + &rval); + if (request->status != 0) + break; + request->status = ldi_ioctl(vd->ldi_handle[request->slice], + MHIOCTKOWN, NULL, (vd->open_flags | FKIOCTL), kcred, &rval); + break; + + case VD_ACCESS_SET_PREEMPT: + /* + * Flags EXCLUSIVE and PREEMPT. We have to acquire exclusive + * access rights and we can use preemption. So we try to do + * a SCSI reservation, if it fails we reset the disk to clear + * any reservation and we try to reserve again. + */ + PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE|PREEMPT)"); + request->status = ldi_ioctl(vd->ldi_handle[request->slice], + MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, + &rval); + if (request->status == 0) + break; + + /* reset the disk */ + (void) vd_scsi_reset(vd); + + /* try again even if the reset has failed */ + request->status = ldi_ioctl(vd->ldi_handle[request->slice], + MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, + &rval); + break; + + case 0: + /* Flag EXCLUSIVE only. Just issue a SCSI reservation */ + PR0("Performing VD_OP_SET_ACCESS (EXCLUSIVE)"); + request->status = ldi_ioctl(vd->ldi_handle[request->slice], + MHIOCQRESERVE, NULL, (vd->open_flags | FKIOCTL), kcred, + &rval); + break; + } + + if (request->status == 0) + vd->ownership = B_TRUE; + else + PR0("VD_OP_SET_ACCESS: error %d", request->status); + + return (0); +} + +static void +vd_reset_access(vd_t *vd) +{ + int status, rval; + + if (vd->file || !vd->ownership) + return; + + PR0("Releasing disk ownership"); + status = ldi_ioctl(vd->ldi_handle[0], MHIOCRELEASE, NULL, + (vd->open_flags | FKIOCTL), kcred, &rval); + + /* + * An EACCES failure means that there is a reservation conflict, + * so we are not the owner of the disk anymore. + */ + if (status == 0 || status == EACCES) { + vd->ownership = B_FALSE; + return; + } + + PR0("Fail to release ownership, error %d", status); + + /* + * We have failed to release the ownership, try to reset the disk + * to release reservations. + */ + PR0("Resetting disk"); + status = vd_scsi_reset(vd); + + if (status != 0) + PR0("Fail to reset disk, error %d", status); + + /* whatever the result of the reset is, we try the release again */ + status = ldi_ioctl(vd->ldi_handle[0], MHIOCRELEASE, NULL, + (vd->open_flags | FKIOCTL), kcred, &rval); + + if (status == 0 || status == EACCES) { + vd->ownership = B_FALSE; + return; + } + + PR0("Fail to release ownership, error %d", status); + + /* + * At this point we have done our best to try to reset the + * access rights to the disk and we don't know if we still + * own a reservation and if any mechanism to preserve the + * ownership is still in place. The ultimate solution would + * be to reset the system but this is usually not what we + * want to happen. + */ + + if (vd_reset_access_failure == A_REBOOT) { + cmn_err(CE_WARN, VD_RESET_ACCESS_FAILURE_MSG + ", rebooting the system", vd->device_path); + (void) uadmin(A_SHUTDOWN, AD_BOOT, NULL); + } else if (vd_reset_access_failure == A_DUMP) { + panic(VD_RESET_ACCESS_FAILURE_MSG, vd->device_path); + } + + cmn_err(CE_WARN, VD_RESET_ACCESS_FAILURE_MSG, vd->device_path); +} + /* * Define the supported operations once the functions for performing them have * been defined @@ -2417,6 +2937,11 @@ static const vds_operation_t vds_operation[] = { {X(VD_OP_GET_EFI), vd_ioctl, NULL}, {X(VD_OP_SET_EFI), vd_ioctl, NULL}, {X(VD_OP_GET_DEVID), vd_get_devid, NULL}, + {X(VD_OP_SCSICMD), vd_ioctl, NULL}, + {X(VD_OP_RESET), vd_reset, NULL}, + {X(VD_OP_GET_CAPACITY), vd_get_capacity, NULL}, + {X(VD_OP_SET_ACCESS), vd_set_access, NULL}, + {X(VD_OP_GET_ACCESS), vd_get_access, NULL}, #undef X }; @@ -2702,6 +3227,9 @@ vd_set_exported_operations(vd_t *vd) if (vd->open_flags & FWRITE) vd->operations |= VD_OP_MASK_WRITE; + if (vd->scsi) + vd->operations |= VD_OP_MASK_SCSI; + if (vd->file && vd_file_is_iso_image(vd)) { /* * can't write to ISO images, make sure that write @@ -2823,8 +3351,7 @@ vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen) } /* Return the device's block size and max transfer size to the client */ - attr_msg->vdisk_block_size = DEV_BSIZE; - attr_msg->vdisk_block_size = vd->block_size; + attr_msg->vdisk_block_size = vd->vdisk_block_size; attr_msg->max_xfer_sz = vd->max_xfer_sz; attr_msg->vdisk_size = vd->vdisk_size; @@ -3806,32 +4333,65 @@ vd_is_atapi_device(vd_t *vd) } static int +vd_setup_mediainfo(vd_t *vd) +{ + int status, rval; + struct dk_minfo dk_minfo; + + ASSERT(vd->ldi_handle[0] != NULL); + ASSERT(vd->vdisk_block_size != 0); + + if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, + (intptr_t)&dk_minfo, (vd->open_flags | FKIOCTL), + kcred, &rval)) != 0) + return (status); + + ASSERT(dk_minfo.dki_lbsize % vd->vdisk_block_size == 0); + + vd->block_size = dk_minfo.dki_lbsize; + vd->vdisk_size = (dk_minfo.dki_capacity * dk_minfo.dki_lbsize) / + vd->vdisk_block_size; + vd->vdisk_media = DK_MEDIATYPE2VD_MEDIATYPE(dk_minfo.dki_media_type); + return (0); +} + +static int vd_setup_full_disk(vd_t *vd) { - int rval, status; + int status; major_t major = getmajor(vd->dev[0]); minor_t minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE; - struct dk_minfo dk_minfo; ASSERT(vd->vdisk_type == VD_DISK_TYPE_DISK); + vd->vdisk_block_size = DEV_BSIZE; + /* * At this point, vdisk_size is set to the size of partition 2 but * this does not represent the size of the disk because partition 2 * may not cover the entire disk and its size does not include reserved - * blocks. So we update vdisk_size to be the size of the entire disk. + * blocks. So we call vd_get_mediainfo to udpate this information and + * set the block size and the media type of the disk. */ - if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO, - (intptr_t)&dk_minfo, (vd->open_flags | FKIOCTL), - kcred, &rval)) != 0) { - PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", - status); - return (status); + status = vd_setup_mediainfo(vd); + + if (status != 0) { + if (!vd->scsi) { + /* unexpected failure */ + PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d", + status); + return (status); + } + + /* + * The function can fail for SCSI disks which are present but + * reserved by another system. In that case, we don't know the + * size of the disk and the block size. + */ + vd->vdisk_size = VD_SIZE_UNKNOWN; + vd->block_size = 0; + vd->vdisk_media = VD_MEDIA_FIXED; } - vd->vdisk_size = dk_minfo.dki_capacity; - vd->block_size = dk_minfo.dki_lbsize; - vd->vdisk_media = DK_MEDIATYPE2VD_MEDIATYPE(dk_minfo.dki_media_type); - vd->vdisk_block_size = DEV_BSIZE; /* Move dev number and LDI handle to entire-disk-slice array elements */ vd->dev[VD_ENTIRE_DISK_SLICE] = vd->dev[0]; @@ -4337,6 +4897,8 @@ vd_setup_backend_ldi(vd_t *vd) vd->vdisk_type == VD_DISK_TYPE_DISK) || dk_cinfo.dki_ctype == DKC_CDROM) { ASSERT(!vd->pseudo); + if (dk_cinfo.dki_ctype == DKC_SCSI_CCS) + vd->scsi = B_TRUE; return (vd_setup_full_disk(vd)); } @@ -4349,8 +4911,6 @@ vd_setup_backend_ldi(vd_t *vd) * If it is disk slice 2 or a pseudo device then it is exported as a * single slice disk only if the "slice" option is specified. */ - ASSERT(vd->vdisk_type == VD_DISK_TYPE_SLICE || - dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE); return (vd_setup_single_slice_disk(vd)); } @@ -4472,13 +5032,26 @@ vd_setup_vd(vd_t *vd) ddi_release_devi(dip); VN_RELE(vnp); + if (!vd->pseudo) { + status = vd_setup_backend_ldi(vd); + break; + } + /* * If this is a pseudo device then its usage depends if the * "slice" option is set or not. If the "slice" option is set * then the pseudo device will be exported as a single slice, * otherwise it will be exported as a full disk. + * + * For backward compatibility, if vd_volume_force_slice is set + * then we always export pseudo devices as slices. */ - if (vd->pseudo && vd->vdisk_type == VD_DISK_TYPE_DISK) + if (vd_volume_force_slice) { + vd->vdisk_type = VD_DISK_TYPE_SLICE; + vd->nslices = 1; + } + + if (vd->vdisk_type == VD_DISK_TYPE_DISK) status = vd_setup_backend_vnode(vd); else status = vd_setup_backend_ldi(vd); |
