diff options
author | Jerry Jelinek <jerry.jelinek@joyent.com> | 2016-03-08 12:57:08 +0000 |
---|---|---|
committer | Jerry Jelinek <jerry.jelinek@joyent.com> | 2016-03-08 12:57:08 +0000 |
commit | 48bc46418c502790ab12bdf93d47dda7da3c1684 (patch) | |
tree | 9e89239148d783e90be150f50e5e8bb4f289c91c | |
parent | 0929ae1b21b179f0286999f40ab364fd2a370164 (diff) | |
download | illumos-joyent-48bc46418c502790ab12bdf93d47dda7da3c1684.tar.gz |
OS-4612 Want zvol block devices in LX to handle some Linux disk ioctls
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
-rw-r--r-- | usr/src/uts/common/brand/lx/os/lx_brand.c | 73 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/sys/lx_brand.h | 2 | ||||
-rw-r--r-- | usr/src/uts/common/brand/lx/syscall/lx_ioctl.c | 248 |
3 files changed, 315 insertions, 8 deletions
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c index 11aeb34dee..a2e308cc67 100644 --- a/usr/src/uts/common/brand/lx/os/lx_brand.c +++ b/usr/src/uts/common/brand/lx/os/lx_brand.c @@ -798,17 +798,22 @@ static int lx_zfs_ioctl(ldi_handle_t lh, int cmd, zfs_cmd_t *zc, size_t *dst_alloc_size) { uint64_t cookie; - size_t dstsize = 8192; + size_t dstsize; int rc, unused; cookie = zc->zc_cookie; + dstsize = (dst_alloc_size == NULL ? 0 : 8192); + again: - zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(dstsize, KM_SLEEP); - zc->zc_nvlist_dst_size = dstsize; + if (dst_alloc_size != NULL) { + zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(dstsize, + KM_SLEEP); + zc->zc_nvlist_dst_size = dstsize; + } rc = ldi_ioctl(lh, cmd, (intptr_t)zc, FKIOCTL, kcred, &unused); - if (rc == ENOMEM) { + if (rc == ENOMEM && dst_alloc_size != NULL) { /* * Our nvlist_dst buffer was too small, retry with a bigger * buffer. ZFS will tell us the exact needed size. @@ -825,10 +830,6 @@ again: if (dst_alloc_size != NULL) { *dst_alloc_size = dstsize; - } else { - /* Caller didn't want the nvlist_dst anyway */ - kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, dstsize); - zc->zc_nvlist_dst = NULL; } return (rc); @@ -853,6 +854,58 @@ lx_zvol_minor(char *znm) } /* + * We only get the relevant properties for zvols. This is because we're + * essentially iterating all of the ZFS datasets/zvols on the entire system + * when we boot the zone and there is a significant performance penalty if we + * have to retrieve all of the properties for everything. Especially since we + * don't care about any of them except the zvols actually in our delegated + * datasets. + * + * Note that the two properties we care about, volsize & volblocksize, are + * mandatory for zvols and should always be present. Also, note that the + * blocksize property value cannot change after the zvol has been created. + */ +static void +lx_zvol_props(ldi_handle_t lh, zfs_cmd_t *zc, uint64_t *vsz, uint64_t *bsz) +{ + int rc; + size_t size; + nvlist_t *nv = NULL, *nv2; + + rc = lx_zfs_ioctl(lh, ZFS_IOC_OBJSET_STATS, zc, &size); + if (rc != 0) + return; + + rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst, + zc->zc_nvlist_dst_size, &nv, 0); + ASSERT(rc == 0); + + kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size); + zc->zc_nvlist_dst = NULL; + zc->zc_nvlist_dst_size = 0; + + if ((rc = nvlist_lookup_nvlist(nv, "volsize", &nv2)) == 0) { + uint64_t val; + + rc = nvlist_lookup_uint64(nv2, ZPROP_VALUE, &val); + if (rc == 0) { + *vsz = val; + } + } + + if ((rc = nvlist_lookup_nvlist(nv, "volblocksize", &nv2)) == 0) { + uint64_t val; + + rc = nvlist_lookup_uint64(nv2, ZPROP_VALUE, &val); + if (rc == 0) { + *bsz = val; + } + } + + nvlist_free(nv); +} + +/* * We treat the zpool as a virtual device and any zvols as actual devices. */ static void @@ -953,6 +1006,10 @@ lx_zfs_get_devs(zone_t *zone, list_t *zvol_lst) (void) strcpy(zv->lzd_name, zc->zc_name); zv->lzd_type = LXD_ZFS_DEV_ZVOL; zv->lzd_minor = lx_zvol_minor(zc->zc_name); + + lx_zvol_props(lh, zc, &zv->lzd_volsize, + &zv->lzd_blksize); + list_insert_tail(zvol_lst, zv); } else { lx_zfs_ds_t *nds; diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h index 747e8fa012..9690547e0c 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_brand.h +++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h @@ -574,6 +574,8 @@ typedef struct lxd_zfs_dev { char lzd_name[MAXPATHLEN]; lxd_zfs_dev_type_t lzd_type; minor_t lzd_minor; + uint64_t lzd_volsize; + uint64_t lzd_blksize; } lxd_zfs_dev_t; /* diff --git a/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c b/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c index 5fb58e26ea..6c0e9648d8 100644 --- a/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c +++ b/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c @@ -48,10 +48,14 @@ #include <sys/netstack.h> #include <inet/ip.h> #include <inet/ip_if.h> +#include <sys/dkio.h> +#include <sys/sdt.h> /* * Linux ioctl types */ +#define LX_IOC_TYPE_HD 0x03 +#define LX_IOC_TYPE_BLK 0x12 #define LX_IOC_TYPE_FD 0x54 #define LX_IOC_TYPE_DTRACE 0x68 #define LX_IOC_TYPE_SOCK 0x89 @@ -60,6 +64,10 @@ /* * Supported ioctls */ +#define LX_HDIO_GETGEO 0x0301 +#define LX_BLKGETSIZE 0x1260 +#define LX_BLKSSZGET 0x1268 +#define LX_BLKGETSIZE64 0x80081272 #define LX_TCGETS 0x5401 #define LX_TCSETS 0x5402 #define LX_TCSETSW 0x5403 @@ -500,6 +508,224 @@ ict_fionread(file_t *fp, int cmd, intptr_t arg, int lxcmd) return (0); } +/* + * hard disk-related translators + * + * Note that the normal disk ioctls only work for VCHR devices. See spec_ioctl + * which will return ENOTTY for a VBLK device. However, fdisk, etc. expect to + * work with block devices. + * + * We expect a zvol to be the primary block device we're interacting with and + * we use the zone's lxzd_vdisks list to handle zvols specifically. + */ + +typedef struct lx_hd_geom { + unsigned char heads; + unsigned char sectors; + unsigned short cylinders; + unsigned long start; +} lx_hd_geom_t; + +static lxd_zfs_dev_t * +lx_lookup_zvol(minor_t min) +{ + lx_zone_data_t *lxzdata; + lxd_zfs_dev_t *zv; + + lxzdata = ztolxzd(curproc->p_zone); + if (lxzdata == NULL) + return (NULL); + ASSERT(lxzdata->lxzd_vdisks != NULL); + + zv = list_head(lxzdata->lxzd_vdisks); + while (zv != NULL) { + if (zv->lzd_minor == min) + return (zv); + + zv = list_next(lxzdata->lxzd_vdisks, zv); + } + + return (NULL); +} + +/* + * See zvol_ioctl() which always fails for DKIOCGGEOM. The geometry for a + * zvol (or really any modern disk) is made up, so we do that here as well. + */ +static int +ict_hdgetgeo(file_t *fp, int cmd, intptr_t arg, int lxcmd) +{ + lx_hd_geom_t lx_geom; + + if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK) + return (set_errno(EINVAL)); + + if (getmajor(fp->f_vnode->v_rdev) == mod_name_to_major("zfs")) { + minor_t m; + lxd_zfs_dev_t *zv; + + m = getminor(fp->f_vnode->v_rdev); + if ((zv = lx_lookup_zvol(m)) == NULL) { + /* should only happen if new zvol */ + bzero(&lx_geom, sizeof (lx_geom)); + } else { + diskaddr_t tot; + + tot = zv->lzd_volsize / zv->lzd_blksize; + + /* + * Since the 'sectors' value is only one byte we make + * up heads/cylinder values to get things to fit. + * We roundup the number of heads to ensure we don't + * overflow the sectors due to truncation. + */ + lx_geom.heads = lx_geom.cylinders = (tot / 0xff) + 1; + lx_geom.sectors = tot / lx_geom.heads; + lx_geom.start = 0; + } + } else { + int res, rv; + struct dk_geom geom; + + res = VOP_IOCTL(fp->f_vnode, DKIOCGGEOM, (intptr_t)&geom, + fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL); + if (res > 0) + return (set_errno(res)); + + lx_geom.heads = geom.dkg_nhead; + lx_geom.sectors = geom.dkg_nsect; + lx_geom.cylinders = geom.dkg_ncyl; + lx_geom.start = 0; + } + + if (copyout(&lx_geom, (caddr_t)arg, sizeof (lx_geom))) + return (set_errno(EFAULT)); + return (0); +} + +/* + * Per the Linux sd(4) man page, get the number of sectors. The linux/fs.h + * header says its 512 byte blocks. + */ +static int +ict_blkgetsize(file_t *fp, int cmd, intptr_t arg, int lxcmd) +{ + diskaddr_t tot; + + if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK) + return (set_errno(EINVAL)); + + if (getmajor(fp->f_vnode->v_rdev) == mod_name_to_major("zfs")) { + minor_t m; + lxd_zfs_dev_t *zv; + + m = getminor(fp->f_vnode->v_rdev); + if ((zv = lx_lookup_zvol(m)) == NULL) { + /* should only happen if new zvol */ + tot = 0; + } else { + tot = zv->lzd_volsize / 512; + } + } else { + int res, rv; + struct dk_minfo minfo; + + res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo, + fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL); + if (res > 0) + return (set_errno(res)); + + tot = minfo.dki_capacity; + if (minfo.dki_lbsize > 512) { + uint_t bsize = minfo.dki_lbsize / 512; + + tot *= bsize; + } + } + + if (copyout(&tot, (caddr_t)arg, sizeof (long))) + return (set_errno(EFAULT)); + return (0); +} + +/* + * Get the sector size (i.e. the logical block size). + */ +static int +ict_blkgetssize(file_t *fp, int cmd, intptr_t arg, int lxcmd) +{ + uint_t bsize; + + if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK) + return (set_errno(EINVAL)); + + if (getmajor(fp->f_vnode->v_rdev) == mod_name_to_major("zfs")) { + minor_t m; + lxd_zfs_dev_t *zv; + + m = getminor(fp->f_vnode->v_rdev); + if ((zv = lx_lookup_zvol(m)) == NULL) { + /* should only happen if new zvol */ + bsize = 0; + } else { + bsize = (uint_t)zv->lzd_blksize; + } + } else { + int res, rv; + struct dk_minfo minfo; + + res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo, + fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL); + if (res > 0) + return (set_errno(res)); + + bsize = (uint_t)minfo.dki_lbsize; + } + + if (copyout(&bsize, (caddr_t)arg, sizeof (bsize))) + return (set_errno(EFAULT)); + return (0); +} + +/* + * Get the size. The linux/fs.h header says its in bytes. + */ +static int +ict_blkgetsize64(file_t *fp, int cmd, intptr_t arg, int lxcmd) +{ + uint64_t tot; + + if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK) + return (set_errno(EINVAL)); + + if (getmajor(fp->f_vnode->v_rdev) == mod_name_to_major("zfs")) { + minor_t m; + lxd_zfs_dev_t *zv; + + m = getminor(fp->f_vnode->v_rdev); + if ((zv = lx_lookup_zvol(m)) == NULL) { + /* should only happen if new zvol */ + tot = 0; + } else { + tot = zv->lzd_volsize; + } + } else { + int res, rv; + struct dk_minfo minfo; + + res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo, + fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL); + if (res > 0) + return (set_errno(res)); + + tot = minfo.dki_capacity * minfo.dki_lbsize; + } + + if (copyout(&tot, (caddr_t)arg, sizeof (uint64_t))) + return (set_errno(EFAULT)); + return (0); +} + /* Terminal-related translators */ static int @@ -1349,6 +1575,20 @@ static lx_ioc_cmd_translator_t lx_ioc_xlate_autofs[] = { LX_IOC_CMD_TRANSLATOR_END }; +static lx_ioc_cmd_translator_t lx_ioc_xlate_hd[] = { + LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_HDIO_GETGEO, ict_hdgetgeo) + + LX_IOC_CMD_TRANSLATOR_END +}; + +static lx_ioc_cmd_translator_t lx_ioc_xlate_blk[] = { + LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKGETSIZE, ict_blkgetsize) + LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKSSZGET, ict_blkgetssize) + LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKGETSIZE64, ict_blkgetsize64) + + LX_IOC_CMD_TRANSLATOR_END +}; + static void lx_ioctl_vsd_free(void *data) { @@ -1399,6 +1639,14 @@ lx_ioctl(int fdes, int cmd, intptr_t arg) ict = lx_ioc_xlate_autofs; break; + case LX_IOC_TYPE_BLK: + ict = lx_ioc_xlate_blk; + break; + + case LX_IOC_TYPE_HD: + ict = lx_ioc_xlate_hd; + break; + default: releasef(fdes); return (set_errno(ENOTTY)); |