summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJerry Jelinek <jerry.jelinek@joyent.com>2016-03-08 12:57:08 +0000
committerJerry Jelinek <jerry.jelinek@joyent.com>2016-03-08 12:57:08 +0000
commit48bc46418c502790ab12bdf93d47dda7da3c1684 (patch)
tree9e89239148d783e90be150f50e5e8bb4f289c91c
parent0929ae1b21b179f0286999f40ab364fd2a370164 (diff)
downloadillumos-joyent-48bc46418c502790ab12bdf93d47dda7da3c1684.tar.gz
OS-4612 Want zvol block devices in LX to handle some Linux disk ioctls
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
-rw-r--r--usr/src/uts/common/brand/lx/os/lx_brand.c73
-rw-r--r--usr/src/uts/common/brand/lx/sys/lx_brand.h2
-rw-r--r--usr/src/uts/common/brand/lx/syscall/lx_ioctl.c248
3 files changed, 315 insertions, 8 deletions
diff --git a/usr/src/uts/common/brand/lx/os/lx_brand.c b/usr/src/uts/common/brand/lx/os/lx_brand.c
index 11aeb34dee..a2e308cc67 100644
--- a/usr/src/uts/common/brand/lx/os/lx_brand.c
+++ b/usr/src/uts/common/brand/lx/os/lx_brand.c
@@ -798,17 +798,22 @@ static int
lx_zfs_ioctl(ldi_handle_t lh, int cmd, zfs_cmd_t *zc, size_t *dst_alloc_size)
{
uint64_t cookie;
- size_t dstsize = 8192;
+ size_t dstsize;
int rc, unused;
cookie = zc->zc_cookie;
+ dstsize = (dst_alloc_size == NULL ? 0 : 8192);
+
again:
- zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(dstsize, KM_SLEEP);
- zc->zc_nvlist_dst_size = dstsize;
+ if (dst_alloc_size != NULL) {
+ zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(dstsize,
+ KM_SLEEP);
+ zc->zc_nvlist_dst_size = dstsize;
+ }
rc = ldi_ioctl(lh, cmd, (intptr_t)zc, FKIOCTL, kcred, &unused);
- if (rc == ENOMEM) {
+ if (rc == ENOMEM && dst_alloc_size != NULL) {
/*
* Our nvlist_dst buffer was too small, retry with a bigger
* buffer. ZFS will tell us the exact needed size.
@@ -825,10 +830,6 @@ again:
if (dst_alloc_size != NULL) {
*dst_alloc_size = dstsize;
- } else {
- /* Caller didn't want the nvlist_dst anyway */
- kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, dstsize);
- zc->zc_nvlist_dst = NULL;
}
return (rc);
@@ -853,6 +854,58 @@ lx_zvol_minor(char *znm)
}
/*
+ * We only get the relevant properties for zvols. This is because we're
+ * essentially iterating all of the ZFS datasets/zvols on the entire system
+ * when we boot the zone and there is a significant performance penalty if we
+ * have to retrieve all of the properties for everything. Especially since we
+ * don't care about any of them except the zvols actually in our delegated
+ * datasets.
+ *
+ * Note that the two properties we care about, volsize & volblocksize, are
+ * mandatory for zvols and should always be present. Also, note that the
+ * blocksize property value cannot change after the zvol has been created.
+ */
+static void
+lx_zvol_props(ldi_handle_t lh, zfs_cmd_t *zc, uint64_t *vsz, uint64_t *bsz)
+{
+ int rc;
+ size_t size;
+ nvlist_t *nv = NULL, *nv2;
+
+ rc = lx_zfs_ioctl(lh, ZFS_IOC_OBJSET_STATS, zc, &size);
+ if (rc != 0)
+ return;
+
+ rc = nvlist_unpack((char *)(uintptr_t)zc->zc_nvlist_dst,
+ zc->zc_nvlist_dst_size, &nv, 0);
+ ASSERT(rc == 0);
+
+ kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
+ zc->zc_nvlist_dst = NULL;
+ zc->zc_nvlist_dst_size = 0;
+
+ if ((rc = nvlist_lookup_nvlist(nv, "volsize", &nv2)) == 0) {
+ uint64_t val;
+
+ rc = nvlist_lookup_uint64(nv2, ZPROP_VALUE, &val);
+ if (rc == 0) {
+ *vsz = val;
+ }
+ }
+
+ if ((rc = nvlist_lookup_nvlist(nv, "volblocksize", &nv2)) == 0) {
+ uint64_t val;
+
+ rc = nvlist_lookup_uint64(nv2, ZPROP_VALUE, &val);
+ if (rc == 0) {
+ *bsz = val;
+ }
+ }
+
+ nvlist_free(nv);
+}
+
+/*
* We treat the zpool as a virtual device and any zvols as actual devices.
*/
static void
@@ -953,6 +1006,10 @@ lx_zfs_get_devs(zone_t *zone, list_t *zvol_lst)
(void) strcpy(zv->lzd_name, zc->zc_name);
zv->lzd_type = LXD_ZFS_DEV_ZVOL;
zv->lzd_minor = lx_zvol_minor(zc->zc_name);
+
+ lx_zvol_props(lh, zc, &zv->lzd_volsize,
+ &zv->lzd_blksize);
+
list_insert_tail(zvol_lst, zv);
} else {
lx_zfs_ds_t *nds;
diff --git a/usr/src/uts/common/brand/lx/sys/lx_brand.h b/usr/src/uts/common/brand/lx/sys/lx_brand.h
index 747e8fa012..9690547e0c 100644
--- a/usr/src/uts/common/brand/lx/sys/lx_brand.h
+++ b/usr/src/uts/common/brand/lx/sys/lx_brand.h
@@ -574,6 +574,8 @@ typedef struct lxd_zfs_dev {
char lzd_name[MAXPATHLEN];
lxd_zfs_dev_type_t lzd_type;
minor_t lzd_minor;
+ uint64_t lzd_volsize;
+ uint64_t lzd_blksize;
} lxd_zfs_dev_t;
/*
diff --git a/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c b/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c
index 5fb58e26ea..6c0e9648d8 100644
--- a/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c
+++ b/usr/src/uts/common/brand/lx/syscall/lx_ioctl.c
@@ -48,10 +48,14 @@
#include <sys/netstack.h>
#include <inet/ip.h>
#include <inet/ip_if.h>
+#include <sys/dkio.h>
+#include <sys/sdt.h>
/*
* Linux ioctl types
*/
+#define LX_IOC_TYPE_HD 0x03
+#define LX_IOC_TYPE_BLK 0x12
#define LX_IOC_TYPE_FD 0x54
#define LX_IOC_TYPE_DTRACE 0x68
#define LX_IOC_TYPE_SOCK 0x89
@@ -60,6 +64,10 @@
/*
* Supported ioctls
*/
+#define LX_HDIO_GETGEO 0x0301
+#define LX_BLKGETSIZE 0x1260
+#define LX_BLKSSZGET 0x1268
+#define LX_BLKGETSIZE64 0x80081272
#define LX_TCGETS 0x5401
#define LX_TCSETS 0x5402
#define LX_TCSETSW 0x5403
@@ -500,6 +508,224 @@ ict_fionread(file_t *fp, int cmd, intptr_t arg, int lxcmd)
return (0);
}
+/*
+ * hard disk-related translators
+ *
+ * Note that the normal disk ioctls only work for VCHR devices. See spec_ioctl
+ * which will return ENOTTY for a VBLK device. However, fdisk, etc. expect to
+ * work with block devices.
+ *
+ * We expect a zvol to be the primary block device we're interacting with and
+ * we use the zone's lxzd_vdisks list to handle zvols specifically.
+ */
+
+typedef struct lx_hd_geom {
+ unsigned char heads;
+ unsigned char sectors;
+ unsigned short cylinders;
+ unsigned long start;
+} lx_hd_geom_t;
+
+static lxd_zfs_dev_t *
+lx_lookup_zvol(minor_t min)
+{
+ lx_zone_data_t *lxzdata;
+ lxd_zfs_dev_t *zv;
+
+ lxzdata = ztolxzd(curproc->p_zone);
+ if (lxzdata == NULL)
+ return (NULL);
+ ASSERT(lxzdata->lxzd_vdisks != NULL);
+
+ zv = list_head(lxzdata->lxzd_vdisks);
+ while (zv != NULL) {
+ if (zv->lzd_minor == min)
+ return (zv);
+
+ zv = list_next(lxzdata->lxzd_vdisks, zv);
+ }
+
+ return (NULL);
+}
+
+/*
+ * See zvol_ioctl() which always fails for DKIOCGGEOM. The geometry for a
+ * zvol (or really any modern disk) is made up, so we do that here as well.
+ */
+static int
+ict_hdgetgeo(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ lx_hd_geom_t lx_geom;
+
+ if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK)
+ return (set_errno(EINVAL));
+
+ if (getmajor(fp->f_vnode->v_rdev) == mod_name_to_major("zfs")) {
+ minor_t m;
+ lxd_zfs_dev_t *zv;
+
+ m = getminor(fp->f_vnode->v_rdev);
+ if ((zv = lx_lookup_zvol(m)) == NULL) {
+ /* should only happen if new zvol */
+ bzero(&lx_geom, sizeof (lx_geom));
+ } else {
+ diskaddr_t tot;
+
+ tot = zv->lzd_volsize / zv->lzd_blksize;
+
+ /*
+ * Since the 'sectors' value is only one byte we make
+ * up heads/cylinder values to get things to fit.
+ * We roundup the number of heads to ensure we don't
+ * overflow the sectors due to truncation.
+ */
+ lx_geom.heads = lx_geom.cylinders = (tot / 0xff) + 1;
+ lx_geom.sectors = tot / lx_geom.heads;
+ lx_geom.start = 0;
+ }
+ } else {
+ int res, rv;
+ struct dk_geom geom;
+
+ res = VOP_IOCTL(fp->f_vnode, DKIOCGGEOM, (intptr_t)&geom,
+ fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL);
+ if (res > 0)
+ return (set_errno(res));
+
+ lx_geom.heads = geom.dkg_nhead;
+ lx_geom.sectors = geom.dkg_nsect;
+ lx_geom.cylinders = geom.dkg_ncyl;
+ lx_geom.start = 0;
+ }
+
+ if (copyout(&lx_geom, (caddr_t)arg, sizeof (lx_geom)))
+ return (set_errno(EFAULT));
+ return (0);
+}
+
+/*
+ * Per the Linux sd(4) man page, get the number of sectors. The linux/fs.h
+ * header says its 512 byte blocks.
+ */
+static int
+ict_blkgetsize(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ diskaddr_t tot;
+
+ if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK)
+ return (set_errno(EINVAL));
+
+ if (getmajor(fp->f_vnode->v_rdev) == mod_name_to_major("zfs")) {
+ minor_t m;
+ lxd_zfs_dev_t *zv;
+
+ m = getminor(fp->f_vnode->v_rdev);
+ if ((zv = lx_lookup_zvol(m)) == NULL) {
+ /* should only happen if new zvol */
+ tot = 0;
+ } else {
+ tot = zv->lzd_volsize / 512;
+ }
+ } else {
+ int res, rv;
+ struct dk_minfo minfo;
+
+ res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo,
+ fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL);
+ if (res > 0)
+ return (set_errno(res));
+
+ tot = minfo.dki_capacity;
+ if (minfo.dki_lbsize > 512) {
+ uint_t bsize = minfo.dki_lbsize / 512;
+
+ tot *= bsize;
+ }
+ }
+
+ if (copyout(&tot, (caddr_t)arg, sizeof (long)))
+ return (set_errno(EFAULT));
+ return (0);
+}
+
+/*
+ * Get the sector size (i.e. the logical block size).
+ */
+static int
+ict_blkgetssize(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ uint_t bsize;
+
+ if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK)
+ return (set_errno(EINVAL));
+
+ if (getmajor(fp->f_vnode->v_rdev) == mod_name_to_major("zfs")) {
+ minor_t m;
+ lxd_zfs_dev_t *zv;
+
+ m = getminor(fp->f_vnode->v_rdev);
+ if ((zv = lx_lookup_zvol(m)) == NULL) {
+ /* should only happen if new zvol */
+ bsize = 0;
+ } else {
+ bsize = (uint_t)zv->lzd_blksize;
+ }
+ } else {
+ int res, rv;
+ struct dk_minfo minfo;
+
+ res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo,
+ fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL);
+ if (res > 0)
+ return (set_errno(res));
+
+ bsize = (uint_t)minfo.dki_lbsize;
+ }
+
+ if (copyout(&bsize, (caddr_t)arg, sizeof (bsize)))
+ return (set_errno(EFAULT));
+ return (0);
+}
+
+/*
+ * Get the size. The linux/fs.h header says its in bytes.
+ */
+static int
+ict_blkgetsize64(file_t *fp, int cmd, intptr_t arg, int lxcmd)
+{
+ uint64_t tot;
+
+ if (fp->f_vnode->v_type != VCHR && fp->f_vnode->v_type != VBLK)
+ return (set_errno(EINVAL));
+
+ if (getmajor(fp->f_vnode->v_rdev) == mod_name_to_major("zfs")) {
+ minor_t m;
+ lxd_zfs_dev_t *zv;
+
+ m = getminor(fp->f_vnode->v_rdev);
+ if ((zv = lx_lookup_zvol(m)) == NULL) {
+ /* should only happen if new zvol */
+ tot = 0;
+ } else {
+ tot = zv->lzd_volsize;
+ }
+ } else {
+ int res, rv;
+ struct dk_minfo minfo;
+
+ res = VOP_IOCTL(fp->f_vnode, DKIOCGMEDIAINFO, (intptr_t)&minfo,
+ fp->f_flag | FKIOCTL, fp->f_cred, &rv, NULL);
+ if (res > 0)
+ return (set_errno(res));
+
+ tot = minfo.dki_capacity * minfo.dki_lbsize;
+ }
+
+ if (copyout(&tot, (caddr_t)arg, sizeof (uint64_t)))
+ return (set_errno(EFAULT));
+ return (0);
+}
+
/* Terminal-related translators */
static int
@@ -1349,6 +1575,20 @@ static lx_ioc_cmd_translator_t lx_ioc_xlate_autofs[] = {
LX_IOC_CMD_TRANSLATOR_END
};
+static lx_ioc_cmd_translator_t lx_ioc_xlate_hd[] = {
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_HDIO_GETGEO, ict_hdgetgeo)
+
+ LX_IOC_CMD_TRANSLATOR_END
+};
+
+static lx_ioc_cmd_translator_t lx_ioc_xlate_blk[] = {
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKGETSIZE, ict_blkgetsize)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKSSZGET, ict_blkgetssize)
+ LX_IOC_CMD_TRANSLATOR_CUSTOM(LX_BLKGETSIZE64, ict_blkgetsize64)
+
+ LX_IOC_CMD_TRANSLATOR_END
+};
+
static void
lx_ioctl_vsd_free(void *data)
{
@@ -1399,6 +1639,14 @@ lx_ioctl(int fdes, int cmd, intptr_t arg)
ict = lx_ioc_xlate_autofs;
break;
+ case LX_IOC_TYPE_BLK:
+ ict = lx_ioc_xlate_blk;
+ break;
+
+ case LX_IOC_TYPE_HD:
+ ict = lx_ioc_xlate_hd;
+ break;
+
default:
releasef(fdes);
return (set_errno(ENOTTY));